MetricProcessor.java
package org.wikidata.analyzer.Processor;
import com.google.common.collect.Iterators;
import org.json.simple.JSONObject;
import org.wikidata.analyzer.Fetcher.RefPropFetcher;
import org.wikidata.analyzer.Fetcher.WikimediasFetcher;
import org.wikidata.wdtk.datamodel.interfaces.*;
import java.io.*;
import java.util.*;
/**
* @author Addshore
*/
public class MetricProcessor extends WikidataAnalyzerProcessor {
private Map<String, Double> counters = new HashMap<>();
private Map<String, String> wikimedias = new HashMap<>();
private List<String> referenceProperties = new ArrayList<>();
public MetricProcessor() {
super();
this.populateWikimedias();
this.populateReferenceProperties();
}
public void overrideCounters(Map<String, Double> counters) {
this.counters = counters;
}
public void doPostProcessing() {
// Quickly work out the average statements per item & property
this.initiateCounterIfNotReady("item.statements.avg");
this.initiateCounterIfNotReady("property.statements.avg");
this.counters.put("item.statements.avg", this.counters.get("item.statements.total") / this.counters.get("item.count"));
this.counters.put("property.statements.avg", this.counters.get("property.statements.total") / this.counters.get("property.count"));
}
public boolean tearDown() {
// And then do the real tearDown
try {
File metricsJsonFile = new File(outputDir.getAbsolutePath() + File.separator + "metrics.json");
BufferedWriter metricsJsonWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(metricsJsonFile)));
new JSONObject(this.counters).writeJSONString(metricsJsonWriter);
metricsJsonWriter.close();
} catch (IOException e) {
e.printStackTrace();
return false;
}
return true;
}
private void populateWikimedias() {
WikimediasFetcher fetcher = new WikimediasFetcher(outputDir);
List<HashMap<String, String>> mediawikis = fetcher.getMediawikis();
for (HashMap<String, String> entry : mediawikis) {
this.wikimedias.put(entry.get(WikimediasFetcher.ENTITYID_KEY), entry.get(WikimediasFetcher.DBNAME_KEY));
}
}
private void populateReferenceProperties() {
RefPropFetcher fetcher = new RefPropFetcher(outputDir);
this.referenceProperties = fetcher.getReferenceProperties();
}
private void increment(String counter) {
this.increment(counter, 1);
}
private void increment(String counter, double quantity) {
this.initiateCounterIfNotReady(counter);
this.counters.put(counter, this.counters.get(counter) + quantity);
}
private void initiateCounterIfNotReady(String counter) {
if (!this.counters.containsKey(counter)) {
this.counters.put(counter, (double) 0);
}
}
public void processItemDocument(ItemDocument document) {
if (document != null) {
this.increment("item.count");
this.increment("item.statements.total", Iterators.size( document.getAllStatements() ));
this.processStatementDocument(document);
}
}
public void processPropertyDocument(PropertyDocument document) {
if (document != null) {
this.increment("property.count");
this.increment("property.statements.total", Iterators.size( document.getAllStatements() ));
}
}
private void processStatementDocument(StatementDocument document) {
for (Iterator<Statement> statementIterator = document.getAllStatements(); statementIterator.hasNext(); ) {
Statement statement = statementIterator.next();
processStatement(statement);
}
}
private void processStatement(Statement statement) {
this.increment("qualifiers", Iterators.size(statement.getClaim().getAllQualifiers()));
this.increment("references", statement.getReferences().size());
if( statement.getReferences().size() == 0 ) {
this.increment("statements.unreferenced");
} else {
this.increment("statements.referenced");
}
for (Reference reference : statement.getReferences()) {
processReference(reference);
}
}
private void processReference(Reference reference) {
this.increment("references.snaks", Iterators.size(reference.getAllSnaks()));
for( Iterator<Snak> snaks = reference.getAllSnaks(); snaks.hasNext(); ) {
Snak snak = snaks.next();
processReferenceSnak(snak);
}
}
private void processReferenceSnak(Snak snak) {
String propertyId = snak.getPropertyId().getId();
//Only count the counts of non-"external id" property snaks intended for references
if( this.referenceProperties.contains( propertyId ) ) {
this.increment("references.snaks.prop." + propertyId);
}
if (snak instanceof ValueSnak) {
this.increment("references.snaks.type.value");
this.processReferenceValueSnak((ValueSnak)snak);
} else if (snak instanceof SomeValueSnak) {
this.increment("references.snaks.type.somevalue");
} else if (snak instanceof NoValueSnak) {
this.increment("references.snaks.type.novalue");
}
}
private void processReferenceValueSnak(ValueSnak snak) {
String propertyId = snak.getPropertyId().getId();
//Look for snaks indicating a Wikimedia reference
//Note: P143 (imported from), P248 (stated in)
if( propertyId.equals( "P143" ) || propertyId.equals("P248") ) {
//Note: must always be an EntityIdValue for the properties above
EntityIdValue entityIdValue = (EntityIdValue) snak.getValue();
if( this.wikimedias.containsKey( entityIdValue.getId() ) ) {
this.increment("references.snaks.wm");
this.increment("references.snaks.wm." + this.wikimedias.get( entityIdValue.getId() ));
}
}
}
}