AnalyzeOpVisitor.scala
package org.wikidata.query.rdf.spark.transform.queries.sparql.visitors
import scala.collection.mutable
import scala.collection.JavaConverters._
import org.apache.jena.shared.PrefixMapping
import org.apache.jena.sparql.algebra.{Op, OpVisitor}
import org.apache.jena.sparql.algebra.op._
import org.apache.jena.sparql.core.Var
case class TripleInfo(subjectNode: NodeInfo, predicateNode: NodeInfo, objectNode: NodeInfo)
// Number of methods is defined by parent class - turning off scalastyle check
// scalastyle:off number.of.methods
class AnalyzeOpVisitor(
prefixMapping: PrefixMapping
) extends OpVisitor {
var tripleGlobalCount = 0L
var triplePathCount = 0L
val opCount: mutable.Map[String, Long] = new mutable.HashMap[String, Long]().withDefaultValue(0L)
val opList: mutable.Buffer[String] = new mutable.ArrayBuffer[String]()
val triples: mutable.Buffer[TripleInfo] = new mutable.ArrayBuffer[TripleInfo]()
val nodeVisitor = new AnalyzeNodeVisitor(prefixMapping)
val serviceVisitor = new AnalyzeNodeVisitor(prefixMapping)
val pathVisitor = new AnalyzePathVisitor(nodeVisitor)
val exprVisitor = new AnalyzeExprVisitor(nodeVisitor, /*pathVisitor, */this)
private def inc(s: String): Unit = {
opCount(s) = opCount(s) + 1L
opList += s
}
private def incTriple(s: TripleInfo): Unit = {
triples += s
}
private def notWorked(op: Op): Unit ={
val opName = op.getName
opCount(opName) = opCount(opName) + 1L
}
private def tbd(op: Op): Unit ={
inc(op.getName)
}
override def visit(opBGP: OpBGP): Unit = {
inc(opBGP.getName)
opBGP.getPattern.getList.asScala.foreach(t => {
tripleGlobalCount += 1
val subNode = t.getSubject.visitWith(nodeVisitor).asInstanceOf[NodeInfo]
val predNode = t.getPredicate.visitWith(nodeVisitor).asInstanceOf[NodeInfo]
val objNode = t.getObject.visitWith(nodeVisitor).asInstanceOf[NodeInfo]
incTriple(TripleInfo(subNode, predNode, objNode))
})
}
override def visit(opService: OpService): Unit = {
inc(opService.getName)
opService.getService.visitWith(serviceVisitor)
}
override def visit(opTable: OpTable): Unit = {
inc(opTable.getName)
opTable.getTable.rows().asScala.foldLeft(Set.empty[Var])((seen, b) => {
b.vars().asScala.foldLeft(seen)((seen2, v) => {
if (! seen2.contains(v)) {
v.visitWith(nodeVisitor)
}
b.get(v).visitWith(nodeVisitor)
seen2 + v
})
})
()
}
override def visit(opPath: OpPath): Unit = {
inc(opPath.getName)
val triplePath = opPath.getTriplePath
val subNode = triplePath.getSubject.visitWith(nodeVisitor).asInstanceOf[NodeInfo]
val objNode = triplePath.getObject.visitWith(nodeVisitor).asInstanceOf[NodeInfo]
if (triplePath.getPath != null) {
triplePathCount += 1
triplePath.getPath.visit(pathVisitor)
val predNode = NodeInfo("PATH", triplePath.getPath.toString())
incTriple(TripleInfo(subNode, predNode, objNode))
} else {
tripleGlobalCount += 1
val predNode = triplePath.getPredicate.visitWith(nodeVisitor).asInstanceOf[NodeInfo]
incTriple(TripleInfo(subNode, predNode, objNode))
}
}
override def visit(opFilter: OpFilter): Unit = {
inc(opFilter.getName)
opFilter.getExprs.getList.asScala.foreach(_.visit(exprVisitor))
}
override def visit(opNull: OpNull): Unit = tbd(opNull)
override def visit(opGraph: OpGraph): Unit = tbd(opGraph)
override def visit(opLabel: OpLabel): Unit = tbd(opLabel)
override def visit(opAssign: OpAssign): Unit = tbd(opAssign)
override def visit(opExtend: OpExtend): Unit = {
inc(opExtend.getName)
val varExprList = opExtend.getVarExprList
varExprList.getVars.asScala.foldLeft(Set.empty[Var])((seen, v) => {
if (! seen.contains(v)) {
v.visitWith(nodeVisitor)
}
varExprList.getExpr(v).visit(exprVisitor)
seen + v
})
()
}
override def visit(opJoin: OpJoin): Unit = tbd(opJoin)
override def visit(opLeftJoin: OpLeftJoin): Unit = tbd(opLeftJoin)
override def visit(opUnion: OpUnion): Unit = tbd(opUnion)
override def visit(opDiff: OpDiff): Unit = tbd(opDiff)
override def visit(opMinus: OpMinus): Unit = tbd(opMinus)
override def visit(opConditional: OpConditional): Unit = tbd(opConditional)
override def visit(opSequence: OpSequence): Unit = tbd(opSequence)
override def visit(opDisjunction: OpDisjunction): Unit = tbd(opDisjunction)
override def visit(opList: OpList): Unit = tbd(opList)
override def visit(opOrder: OpOrder): Unit = tbd(opOrder)
override def visit(opProject: OpProject): Unit = tbd(opProject)
override def visit(opReduced: OpReduced): Unit = tbd(opReduced)
override def visit(opDistinct: OpDistinct): Unit = tbd(opDistinct)
override def visit(opSlice: OpSlice): Unit = tbd(opSlice)
override def visit(opGroup: OpGroup): Unit = tbd(opGroup)
override def visit(opTopN: OpTopN): Unit = tbd(opTopN)
/*
NOT WORKED
*/
override def visit(opQuadPattern: OpQuadPattern): Unit = notWorked(opQuadPattern)
override def visit(opQuadBlock: OpQuadBlock): Unit = notWorked(opQuadBlock)
override def visit(opTriple: OpTriple): Unit = notWorked(opTriple)
override def visit(opQuad: OpQuad): Unit = notWorked(opQuad)
override def visit(opProcedure: OpProcedure): Unit = notWorked(opProcedure)
override def visit(opPropFunc: OpPropFunc): Unit = notWorked(opPropFunc)
override def visit(opDatasetNames: OpDatasetNames): Unit = notWorked(opDatasetNames)
}
// scalastyle:on number.of.methods