EventLoggingSchemaLoader.java
package org.wikimedia.eventutilities.core.event;
import java.net.URI;
import java.net.URISyntaxException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.wikimedia.eventutilities.core.json.JsonSchemaLoader;
import org.wikimedia.eventutilities.core.json.JsonLoadingException;
import com.fasterxml.jackson.core.JsonPointer;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
import com.fasterxml.jackson.databind.node.ObjectNode;
/**
* Loads EventLogging schemas from schema names (and/or revisions)
* by extending EventSchemaLoader and adding Mediawiki extension EventLogging
* schema specific functionality. When looking up schemas for event instances,
* this will always use the latest schema.
*
* Usage:
* <pre>{@code
* EventLoggingSchemaLoader schemaLoader = new EventLoggingSchemaLoader();
*
* // Load Test schema revision 123
* schemaLoader.getEventSchema("Test", 123);
*
* // Load the schema for JsonNode or Json String event (schema name is at /schema in event).
* schemaLoader.getEventSchema(event);
* }</pre>
*/
public class EventLoggingSchemaLoader extends EventSchemaLoader {
/**
* EventLogging schema names are in an event's `schema` field.
*/
protected static final String EVENTLOGGING_SCHEMA_FIELD = "/schema";
/**
* EventCapsule schema.
*/
protected final JsonNode eventLoggingCapsuleSchema;
private static final Logger LOG = LoggerFactory.getLogger(EventLoggingSchemaLoader.class.getName());
/**
* Returns an EventLoggingSchemaLoader that uses {@link JsonSchemaLoader} to load JSONSchemas.
*
* @param schemaLoader
* must have an underlying ResourceLoader that knows how to load
* relative schema URIs, which in EventLogging's case are MediaWiki action API params.
*/
public EventLoggingSchemaLoader(JsonSchemaLoader schemaLoader) {
super(schemaLoader, JsonPointer.compile(EVENTLOGGING_SCHEMA_FIELD));
this.eventLoggingCapsuleSchema = buildEventLoggingCapsule();
}
/**
* Adapted from https://github.com/wikimedia/eventlogging/blob/master/eventlogging/capsule.py.
*/
protected static JsonNode buildEventLoggingCapsule() {
JsonNodeFactory jf = JsonNodeFactory.instance;
ObjectNode capsuleSchema = jf.objectNode();
ObjectNode userAgentSchema = jf.objectNode();
userAgentSchema.set("browser_family", jf.objectNode().put("type", "string"));
userAgentSchema.set("browser_major", jf.objectNode().put("type", "string"));
userAgentSchema.set("browser_minor", jf.objectNode().put("type", "string"));
userAgentSchema.set("device_family", jf.objectNode().put("type", "string"));
userAgentSchema.set("is_bot", jf.objectNode().put("type", "boolean"));
userAgentSchema.set("is_mediawiki", jf.objectNode().put("type", "boolean"));
userAgentSchema.set("os_family", jf.objectNode().put("type", "string"));
userAgentSchema.set("os_major", jf.objectNode().put("type", "string"));
userAgentSchema.set("os_minor", jf.objectNode().put("type", "string"));
userAgentSchema.set("wmf_app_version", jf.objectNode().put("type", "string"));
ObjectNode userAgentField = jf.objectNode();
userAgentField.put("type", "object");
userAgentField.set("properties", userAgentSchema);
ObjectNode capsuleSchemaProperties = jf.objectNode();
capsuleSchemaProperties.set("ip", jf.objectNode().put("type", "string"));
capsuleSchemaProperties.set("userAgent", userAgentField);
capsuleSchemaProperties.set("uuid", jf.objectNode().put("type", "string"));
capsuleSchemaProperties.set("seqId", jf.objectNode().put("type", "integer"));
capsuleSchemaProperties.set("dt", jf.objectNode().put("type", "string"));
capsuleSchemaProperties.set("wiki", jf.objectNode().put("type", "string"));
capsuleSchemaProperties.set("webHost", jf.objectNode().put("type", "string"));
capsuleSchemaProperties.set("schema", jf.objectNode().put("type", "string"));
capsuleSchemaProperties.set("revision", jf.objectNode().put("type", "integer"));
capsuleSchemaProperties.set("topic", jf.objectNode().put("type", "string"));
capsuleSchemaProperties.set("recvFrom", jf.objectNode().put("type", "string"));
capsuleSchema = jf.objectNode();
capsuleSchema.put("type", "object");
capsuleSchema.set("properties", capsuleSchemaProperties);
capsuleSchema.put("additionalProperties", false);
return capsuleSchema;
}
/**
* Given a URI to an EventLogging 'event' field (un-encapsulated) schema,
* this will get the 'event' field schema at that URI, and then encapsulate
* it. This will use the schemaLoader's schema cache to cache the encapsulated schema by an
* artificial encapsulated schema URI value.
*/
@Override
public JsonNode load(URI schemaUri) throws JsonLoadingException {
URI encapsulatedSchemaUriCacheKey;
try {
// Make make an artificial 'encapsulated' URI we can use as a cache key for the
// encapsulated schema.
encapsulatedSchemaUriCacheKey = new URI(schemaUri.toString() + "&encapsulated=true");
} catch (URISyntaxException e) {
throw new IllegalArgumentException("Could not create artificial encapsulatedSchemaUri from " + schemaUri, e);
}
if (this.schemaLoader.isCached(encapsulatedSchemaUriCacheKey)) {
return this.schemaLoader.cacheGet(encapsulatedSchemaUriCacheKey);
}
JsonNode eventFieldSchema = super.load(schemaUri);
JsonNode encapsulatedSchema = this.encapsulateEventLoggingSchema(eventFieldSchema);
this.schemaLoader.cachePut(encapsulatedSchemaUriCacheKey, encapsulatedSchema);
return encapsulatedSchema;
}
/**
* Returns the latest EventLogging schema URI for this event.
* @param event should have field at schemaFieldPointer pointing at its URI.
*/
public URI getEventSchemaUri(JsonNode event) {
String schemaName = event.at(this.schemaFieldPointer).textValue();
return this.eventLoggingSchemaUriFor(schemaName);
}
/**
* Given an EventLogging event object, this extracts its schema name at /schema
* and uses it to get the latest EventLogging schema.
*/
@Override
public JsonNode getEventSchema(JsonNode event) throws JsonLoadingException {
URI schemaUri = this.getEventSchemaUri(event);
return this.load(schemaUri);
}
/**
* Given an EventLogging json event string, this parses it to a JsonNode and then
* extracts its schema name at /schema and uses it to get the latest EventLogging schema.
*/
@Override
public JsonNode getEventSchema(String eventString) throws JsonLoadingException {
JsonNode event = this.schemaLoader.parse(eventString);
return this.getEventSchema(event);
}
// EventLoggingSchemaLoader always returns the latest schema.
@Override
public JsonNode getLatestEventSchema(JsonNode event) throws JsonLoadingException {
return getEventSchema(event);
}
@Override
public JsonNode getLatestEventSchema(String eventString) throws JsonLoadingException {
return getEventSchema(eventString);
}
/**
* Given an EventLogging schema name , this will get the
* latest schema revision from EVENTLOGGING_SCHEMA_BASE_URI and encapsulate it.
*/
public JsonNode getEventLoggingSchema(String schemaName) throws JsonLoadingException {
URI eventFieldSchemaUri = this.eventLoggingSchemaUriFor(schemaName);
return this.load(eventFieldSchemaUri);
}
/**
* Given an EventLogging event schema name and revision, this will get the
* schema from EVENTLOGGING_SCHEMA_BASE_URI and encapsulate it.
* @return event schema
*/
public JsonNode getEventLoggingSchema(String schemaName, Integer revision) throws JsonLoadingException {
URI eventFieldSchemaUri = this.eventLoggingSchemaUriFor(schemaName, revision);
return this.load(eventFieldSchemaUri);
}
/**
* Builds an EventLogging Mediawiki API schema URI for the latest revision.
* @param name schema name
* @return EventLogging schema URI
*/
protected URI eventLoggingSchemaUriFor(String name) {
try {
URI schemaUri = new URI(
"?action=jsonschema&formatversion=2&format=json" +
"&title=" + name
);
LOG.debug("Built EventLogging schema URI for '{}': {}", name, schemaUri);
return schemaUri;
} catch (URISyntaxException e) {
throw new IllegalArgumentException(
"Could not build EventLogging schema URI for " + name +
" latest revision.", e
);
}
}
/**
* Builds an EventLogging Mediawiki API schema URI for a specific schema revision.
* @param name schema name
* @param revision schema revision
* @return EventLogging schema URI
*/
protected URI eventLoggingSchemaUriFor(String name, Integer revision) {
try {
URI schemaUri = new URI(
"?action=jsonschema&formatversion=2&format=json" +
"&title=" + name +
"&revid=" + revision
);
LOG.debug("Built EventLogging schema URI for '{}': {}", name, schemaUri);
return schemaUri;
} catch (URISyntaxException e) {
throw new IllegalArgumentException(
"Could not build EventLogging schema URI for " + name +
" revision " + revision + ".", e
);
}
}
/**
* Given an EventLogging schema in an ObjectNode, 'encapsulate' it in the
* eventLoggingCapsuleSchema the same way that EventLogging python would.
* @param schema the event schema to be encapsulated. Its 'properties' will be set as 'event'.
* @return encapsulated EventLogging schema
*/
protected JsonNode encapsulateEventLoggingSchema(JsonNode schema) {
ObjectNode schemaObject = (ObjectNode)schema;
// EventLogging MW API doesn't return event schema with type
// if user doesn't enter it explicitly. This happens for most EL schemas.
if (!schemaObject.has("type")) {
LOG.trace("EventLogging event schema is missing type; setting type: object.");
schemaObject.put("type", "object");
}
ObjectNode capsule = this.eventLoggingCapsuleSchema.deepCopy();
((ObjectNode)capsule.get("properties")).set("event", schemaObject);
return capsule;
}
public String toString() {
return "EventLoggingSchemaLoader(" + getResourceLoader() + ")";
}
}