EmptyLabelServiceOptimizer.java

package org.wikidata.query.rdf.blazegraph.label;

import static org.wikidata.query.rdf.blazegraph.label.LabelServiceUtils.getLabelServiceNodes;

import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.openrdf.model.URI;
import org.openrdf.model.impl.URIImpl;
import org.openrdf.model.vocabulary.RDFS;
import org.openrdf.model.vocabulary.SKOS;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.wikidata.query.rdf.common.uri.SchemaDotOrg;

import com.bigdata.bop.BOp;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstant;
import com.bigdata.bop.IVariable;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.sparql.ast.AssignmentNode;
import com.bigdata.rdf.sparql.ast.ConstantNode;
import com.bigdata.rdf.sparql.ast.IGroupMemberNode;
import com.bigdata.rdf.sparql.ast.IGroupNode;
import com.bigdata.rdf.sparql.ast.JoinGroupNode;
import com.bigdata.rdf.sparql.ast.ProjectionNode;
import com.bigdata.rdf.sparql.ast.QueryRoot;
import com.bigdata.rdf.sparql.ast.QueryType;
import com.bigdata.rdf.sparql.ast.StatementPatternNode;
import com.bigdata.rdf.sparql.ast.StaticAnalysis;
import com.bigdata.rdf.sparql.ast.SubqueryBase;
import com.bigdata.rdf.sparql.ast.VarNode;
import com.bigdata.rdf.sparql.ast.eval.AST2BOpContext;
import com.bigdata.rdf.sparql.ast.optimizers.AbstractJoinGroupOptimizer;
import com.bigdata.rdf.sparql.ast.service.ServiceNode;
import com.bigdata.rdf.store.BD;

import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;

/**
 * Rewrites empty calls to the label service to attempt to resolve labels based
 * on the query's projection.
 */
@SuppressWarnings("rawtypes")
public class EmptyLabelServiceOptimizer extends AbstractJoinGroupOptimizer {
    private static final Logger LOG = LoggerFactory.getLogger(EmptyLabelServiceOptimizer.class);

    /**
     * Schema.org's description property as a URI.
     */
    private static final URI DESCRIPTION = new URIImpl(SchemaDotOrg.DESCRIPTION);

    private static final String LABEL_SERVICE_PROJECTION = "LabelService.projection";

    @Override
    protected void optimizeJoinGroup(AST2BOpContext ctx, StaticAnalysis sa, IBindingSet[] bSets, JoinGroupNode op) {
        final QueryRoot root = sa.getQueryRoot();
        if (root.getQueryType() == QueryType.ASK) {
            return;
        }
        if (root.getWhereClause() == op) {
            op.setProperty(LABEL_SERVICE_PROJECTION, root.getProjection());
        }
        op.getChildren(SubqueryBase.class).forEach(node -> {
            if (node.getWhereClause() != null) {
                BOp whereClause = node.getWhereClause();
                whereClause.setProperty(LABEL_SERVICE_PROJECTION, node.getProjection());
            }
        });

        // Prepare a set of vars, which might be bound both outside of the service and by LabelService
        // Fix for the issue: https://phabricator.wikimedia.org/T159723
        // See also patch for the com.bigdata.rdf.sparql.ast.eval.AST2BOpUtility.addServiceCall()
        Set<IVariable<?>> uncertainVars = collectUncertainVars(sa, bSets, op);

        getLabelServiceNodes(op).forEach(service -> {
            service.setUncertainVars(uncertainVars);
            JoinGroupNode g = (JoinGroupNode) service.getGraphPattern();
            boolean foundArg = false;
            for (BOp st : g.args()) {
                StatementPatternNode sn = (StatementPatternNode) st;
                if (sn.s().isConstant() && BD.SERVICE_PARAM.equals(sn.s().getValue())) {
                    continue;
                }
                foundArg = true;
                break;
            }

            if (restoreExtracted(service)) {
                foundArg = true;
            }

            if (!foundArg) {
                addResolutions(ctx, g, getProjectionNode(service));
            }

        });
    }

    private Set<IVariable<?>> collectUncertainVars(StaticAnalysis sa, IBindingSet[] bSets, JoinGroupNode op) {
        Set<IVariable<?>> uncertainVars = new HashSet<>();
        sa.getMaybeProducedBindings(op, uncertainVars, /* recursive */ true);
        for (IBindingSet bSet: bSets) {
            bSet.vars().forEachRemaining(v -> uncertainVars.add(v));
        }
        return uncertainVars;
    }

    @SuppressFBWarnings(
            value = "OCP_OVERLY_CONCRETE_PARAMETER",
            justification = "We only process ServiceNode's so that's the appropriate type")
    private ProjectionNode getProjectionNode(ServiceNode service) {
        IGroupNode<IGroupMemberNode> parent = service.getParent();
        while (parent != null) {
            ProjectionNode projection = (ProjectionNode) parent.annotations().get(
                    LABEL_SERVICE_PROJECTION
            );
            if (projection != null) {
                return projection;
            }
            parent = parent.getParent();
        }
        return null;
    }

    /**
     * Restore extracted statement from label service node.
     */
    @SuppressWarnings("unchecked")
    private boolean restoreExtracted(ServiceNode service) {
        boolean found = false;
        JoinGroupNode g = (JoinGroupNode) service.getGraphPattern();

        final List<StatementPatternNode> extractedList = (List<StatementPatternNode>) service
                .annotations()
                .get(LabelServiceExtractOptimizer.EXTRACTOR_ANNOTATION);
        if (extractedList != null && !extractedList.isEmpty()) {
            for (StatementPatternNode st : extractedList) {
                g.addArg(st);
            }
            found = true;
        }
        service.annotations().remove(LabelServiceExtractOptimizer.EXTRACTOR_ANNOTATION);
        return found;
    }

    /**
     * Infer that the user wanted to resolve some variables using the label
     * service.
     */
    private void addResolutions(AST2BOpContext ctx, JoinGroupNode g, ProjectionNode p) {
        if (p == null) {
            return;
        }
        for (AssignmentNode a : p) {
            IVariable<IV> v = a.getVar();
            if (a.getValueExpression() != v) {
                continue;
            }
            /*
             * Try and match a variable name we can resolve via labels. Note
             * that we should match AltLabel before Label because Label is a
             * suffix of it....
             */
            boolean replaced = addResolutionIfSuffix(ctx, g, "AltLabel", SKOS.ALT_LABEL, v)
                    || addResolutionIfSuffix(ctx, g, "Label", RDFS.LABEL, v)
                    || addResolutionIfSuffix(ctx, g, "Description", DESCRIPTION, v);
            if (replaced && LOG.isDebugEnabled()) {
                LOG.debug("Resolving {} using a label lookup.", v);
            }
        }
    }

    /**
     * Add the join group to resolve a variable if it matches a suffix,
     * returning true if it matched, false otherwise.
     */
    @SuppressFBWarnings(
            value = "OCP_OVERLY_CONCRETE_PARAMETER",
            justification = "Using AST2BOpContext makes sense since it is the only type that will ever be passed")
    private boolean addResolutionIfSuffix(AST2BOpContext ctx, JoinGroupNode g, String suffix, URI labelType,
            IVariable<IV> iVar) {
        if (!iVar.getName().endsWith(suffix)) {
            return false;
        }
        String source = iVar.getName().substring(0, iVar.getName().length() - suffix.length());
        IConstant<IV> labelTypeAsConstant = ctx.getAbstractTripleStore().getVocabulary().getConstant(labelType);
        g.addArg(new StatementPatternNode(new VarNode(source), new ConstantNode(labelTypeAsConstant), new VarNode(iVar)));
        return true;
    }
}