QueryInfo.scala

package org.wikidata.query.rdf.spark.transform.queries.sparql

import scala.collection.mutable
import scala.util.Try

import org.apache.jena.query.QueryFactory
import org.apache.jena.sparql.algebra.Algebra
import org.apache.jena.sparql.algebra.walker.Walker
import org.wikidata.query.rdf.spark.transform.queries.sparql.visitors.{AnalyzeOpVisitor, TripleInfo}
import org.wikidata.query.rdf.spark.utils.PrefixDeclarations

case class QueryInfo(
                      queryReprinted: String,
                      opList: Option[mutable.Buffer[String]],
                      operators: Option[mutable.Map[String, Long]],
                      prefixes: Option[mutable.Map[String, Long]],
                      nodes: Option[mutable.Map[String, Long]],
                      services: Option[mutable.Map[String, Long]],
                      wikidataNames: Option[mutable.Map[String, Long]],
                      expressions: Option[mutable.Map[String, Long]],
                      paths: Option[mutable.Map[String, Long]],
                      triples: Option[mutable.Buffer[TripleInfo]]
                    )

object QueryInfo {

  val prefixes = PrefixDeclarations.getPrefixDeclarations

  def apply(queryString: String): Option[QueryInfo] = Try {
    val queryWithNamespaces = QueryFactory.create(prefixes + queryString)
    val prefixMapping = queryWithNamespaces.getPrologue.getPrefixMapping
    val ast = Algebra.compile(queryWithNamespaces)
    val opAnalyzer = new AnalyzeOpVisitor(prefixMapping)
    Walker.walk(ast, opAnalyzer)

    // Hive complains with empty arrays and maps,
    // so they are converted to 'null' using Options.
    QueryInfo(
      //queryId,
      //queryString,
      ast.toString,
      Option(opAnalyzer.opList).filterNot(_.isEmpty),
      Option(opAnalyzer.opCount).filterNot(_.isEmpty),
      Option(opAnalyzer.nodeVisitor.prefixesCount).filterNot(_.isEmpty),
      Option(opAnalyzer.nodeVisitor.nodeCount).filterNot(_.isEmpty),
      Option(opAnalyzer.serviceVisitor.nodeCount).filterNot(_.isEmpty),
      Option(opAnalyzer.nodeVisitor.wdNodeCount).filterNot(_.isEmpty),
      Option(opAnalyzer.exprVisitor.exprVisited).filterNot(_.isEmpty),
      Option(opAnalyzer.pathVisitor.pathVisited).filterNot(_.isEmpty),
      Option(opAnalyzer.triples).filterNot(_.isEmpty)
    )

  }.toOption
}