WikibaseDateExtension.java
package org.wikidata.query.rdf.blazegraph.inline.literal;
import java.util.Set;
import javax.xml.datatype.DatatypeConfigurationException;
import javax.xml.datatype.DatatypeFactory;
import javax.xml.datatype.Duration;
import org.openrdf.model.Literal;
import org.openrdf.model.URI;
import org.openrdf.model.vocabulary.XMLSchema;
import org.wikidata.query.rdf.common.WikibaseDate;
import org.wikidata.query.rdf.common.WikibaseDate.ToStringFormat;
import com.bigdata.rdf.error.SparqlTypeErrorException;
import com.bigdata.rdf.internal.IDatatypeURIResolver;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.constraints.IMathOpHandler;
import com.bigdata.rdf.internal.constraints.MathBOp.MathOp;
import com.bigdata.rdf.internal.impl.literal.AbstractLiteralIV;
import com.bigdata.rdf.internal.impl.literal.LiteralExtensionIV;
import com.bigdata.rdf.internal.impl.literal.XSDNumericIV;
import com.bigdata.rdf.model.BigdataLiteral;
import com.bigdata.rdf.model.BigdataURI;
import com.bigdata.rdf.model.BigdataValue;
import com.bigdata.rdf.model.BigdataValueFactory;
import com.google.common.collect.ImmutableSet;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
/**
* This implementation of {@link com.bigdata.rdf.internal.IExtension} implements
* inlining for literals that represent xsd:dateTime literals. Unlike
* {@link com.bigdata.rdf.internal.impl.extensions.DateTimeExtension} on which
* this is based, it stores the literals as time in <strong>seconds</strong>
* since the epoch. The seconds are encoded as an inline long. Also unlike
* DateTimeExtension it only supports UTC as the default time zone because UTC
* is king. This is needed because Wikidata contains dates that who's
* <strong>milliseconds</strong> since epoch don't fit into a long.
*
* @param <V> Blazegraph value to expand. These are usually treated a bit
* roughly by Blazegraph - lots of rawtypes
*/
public class WikibaseDateExtension<V extends BigdataValue> extends AbstractMultiTypeExtension<V>
implements IMathOpHandler {
/**
* List of data types this extension can inline.
*/
private static final Set<URI> SUPPORTED_DATA_TYPES = ImmutableSet.of(XMLSchema.DATETIME, XMLSchema.DATE);
/**
* Datatype factory cache.
*/
protected static final DatatypeFactory DATATYPE_FACTORY;
static {
try {
DATATYPE_FACTORY = DatatypeFactory.newInstance();
} catch (DatatypeConfigurationException e) {
throw new RuntimeException(e);
}
}
public WikibaseDateExtension(final IDatatypeURIResolver resolver) {
super(resolver, SUPPORTED_DATA_TYPES);
}
/**
* Attempts to convert the supplied value into an epoch representation and
* encodes the long in a delegate {@link XSDNumericIV}, and returns an
* {@link LiteralExtensionIV} to wrap the native type.
*/
@Override
@SuppressWarnings("rawtypes")
protected AbstractLiteralIV createDelegateIV(Literal literal, BigdataURI dt) {
WikibaseDate date = WikibaseDate.fromString(literal.stringValue()).cleanWeirdStuff();
return new XSDNumericIV(date.secondsSinceEpoch());
}
/**
* Use the long value of the {@link XSDNumericIV} delegate which represents
* seconds since the epoch to create a WikibaseDate and then represent that
* properly using xsd's string representations.
*/
@Override
@SuppressWarnings("rawtypes")
protected BigdataLiteral safeAsValue(LiteralExtensionIV iv, BigdataValueFactory vf, BigdataURI dt) {
WikibaseDate date = WikibaseDate.fromSecondsSinceEpoch(iv.getDelegate().longValue());
if (dt.equals(XMLSchema.DATE)) {
return vf.createLiteral(date.toString(ToStringFormat.DATE), dt);
}
return vf.createLiteral(date.toString(ToStringFormat.DATE_TIME), dt);
}
/**
* Check whether this URI is of the type we support.
* @param lit
* @return
*/
private boolean isWikibaseDateURI(URI lit) {
if (lit == null) return false;
return SUPPORTED_DATA_TYPES.contains(lit);
}
@SuppressWarnings("rawtypes")
@Override
public boolean canInvokeMathOp(final Literal... args) {
if (args.length != 2) {
// for now we handle only two args
return false;
}
URI dt1 = args[0].getDatatype();
URI dt2 = args[1].getDatatype();
if (dt1 == null || dt2 == null) {
// can not operate on non-data URIs
return false;
}
boolean d1 = isWikibaseDateURI(dt1);
boolean d2 = isWikibaseDateURI(dt2);
if (d1 && d2) {
// both dates, we can handle it
return true;
}
if (d1 && dt2.equals(XMLSchema.DURATION)) {
// date and duration, is OK
return true;
}
if (d2 && dt1.equals(XMLSchema.DURATION)) {
// duration and date, is OK
return true;
}
return false;
}
/**
* Normalize IV - convert to LiteralExtension.
* @param l Original literal, will be parsed if IV is not inlined.
* @param iv Original IV
* @return Normalized IV, parsed through Wikidata if needed
*/
@SuppressWarnings({"rawtypes", "checkstyle:cyclomaticcomplexity"})
private LiteralExtensionIV normalizeIV(Literal l, IV iv) {
if (iv instanceof LiteralExtensionIV) {
return (LiteralExtensionIV)iv;
} else {
return createIV(l);
}
}
@SuppressWarnings({"rawtypes", "checkstyle:cyclomaticcomplexity", "checkstyle:NPathComplexity"})
@SuppressFBWarnings(value = "LEST_LOST_EXCEPTION_STACK_TRACE", justification = "Cause is really not needed here.")
@Override
public IV doMathOp(
final Literal l1, final IV iv1,
final Literal l2, final IV iv2,
final MathOp op,
final BigdataValueFactory vf) {
URI dt1 = l1.getDatatype();
URI dt2 = l2.getDatatype();
boolean d1 = isWikibaseDateURI(dt1);
boolean d2 = isWikibaseDateURI(dt2);
if (!d1 && !d2) {
throw new SparqlTypeErrorException();
}
LiteralExtensionIV liv1 = d1 ? normalizeIV(l1, iv1) : null;
LiteralExtensionIV liv2 = d2 ? normalizeIV(l2, iv2) : null;
if (d1 && d2) {
return handleTwoDates(liv1, liv2, op);
}
try {
// Now we have one date and one duration
if (op == MathOp.PLUS) {
LiteralExtensionIV iv = d1 ? liv1 : liv2;
Literal lduration = d1 ? l2 : l1;
return datePlusDuration(iv, DATATYPE_FACTORY.newDuration(lduration.getLabel()), vf);
}
if (op == MathOp.MINUS) {
return datePlusDuration(liv1,
DATATYPE_FACTORY.newDuration(l2.getLabel()).negate(), vf);
}
} catch (IllegalArgumentException e) {
// If we had trouble converting any arguments, make it SPARQL error
throw new SparqlTypeErrorException();
}
throw new SparqlTypeErrorException();
}
/**
* Combine two dates.
* @param iv1
* @param iv2
* @param op
* @return
*/
@SuppressWarnings("rawtypes")
private IV handleTwoDates(
final LiteralExtensionIV iv1,
final LiteralExtensionIV iv2,
final MathOp op) {
long ts1 = iv1.getDelegate().longValue();
long ts2 = iv2.getDelegate().longValue();
switch (op) {
case MIN:
return ts1 < ts2 ? iv1 : iv2;
case MAX:
return ts1 > ts2 ? iv1 : iv2;
case MINUS:
double days = (double) (ts1 - ts2) / ((double) (60 * 60 * 24));
return new XSDNumericIV(days);
default:
throw new SparqlTypeErrorException();
}
}
/**
* Retrieve data type for the extension.
* @param iv
* @return
*/
@SuppressFBWarnings(value = "PRMC_POSSIBLY_REDUNDANT_METHOD_CALLS", justification = "more readable with 2 calls to getValue()")
private BigdataURI getDataType(LiteralExtensionIV iv) {
if (iv.hasValue() && iv.getValue() instanceof BigdataLiteral) {
// Use dataType of cached value
return ((BigdataLiteral)iv.getValue()).getDatatype();
} else if (iv.getExtensionIV().hasValue()) {
// Use dataType from extension IV
return (BigdataURI)iv.getExtensionIV().getValue();
} else {
// Could not identify datatype of the result
throw new SparqlTypeErrorException();
}
}
/**
* Add Duration to date.
* @param iv
* @param d
* @param vf
* @return
*/
@SuppressWarnings({"rawtypes", "unchecked"})
private IV datePlusDuration(LiteralExtensionIV iv, Duration d, BigdataValueFactory vf) {
long ts = iv.getDelegate().longValue();
WikibaseDate newdate = WikibaseDate.fromSecondsSinceEpoch(ts).addDuration(d);
LiteralExtensionIV result = new LiteralExtensionIV(new XSDNumericIV(newdate.secondsSinceEpoch()), iv.getExtensionIV());
result.setValue(safeAsValue(result, vf, getDataType(iv)));
return result;
}
}