Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
83.82% |
57 / 68 |
|
58.33% |
7 / 12 |
CRAP | |
0.00% |
0 / 1 |
| EventLogging | |
85.07% |
57 / 67 |
|
58.33% |
7 / 12 |
23.61 | |
0.00% |
0 / 1 |
| getLogger | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getEventSubmitter | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getMetricsPlatformClient | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
| resetMetricsPlatformClient | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 | |||
| submit | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| sendBeacon | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
2 | |||
| logEvent | |
94.44% |
17 / 18 |
|
0.00% |
0 / 1 |
3.00 | |||
| serializeEvent | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
| schemaValidate | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
3 | |||
| sessionInSample | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| encapsulate | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
3 | |||
| getLegacyStreamName | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * PHP API for logging events. |
| 4 | * |
| 5 | * @file |
| 6 | * @ingroup Extensions |
| 7 | * @ingroup EventLogging |
| 8 | * |
| 9 | * @author Ori Livneh <ori@wikimedia.org> |
| 10 | */ |
| 11 | |
| 12 | namespace MediaWiki\Extension\EventLogging; |
| 13 | |
| 14 | use MediaWiki\Context\RequestContext; |
| 15 | use MediaWiki\Deferred\DeferredUpdates; |
| 16 | use MediaWiki\Extension\EventLogging\EventSubmitter\EventSubmitter; |
| 17 | use MediaWiki\Extension\EventLogging\Libs\JsonSchemaValidation\JsonSchemaException; |
| 18 | use MediaWiki\Extension\EventLogging\Libs\JsonSchemaValidation\JsonTreeRef; |
| 19 | use MediaWiki\Extension\EventLogging\MetricsPlatform\MetricsClientFactory; |
| 20 | use MediaWiki\Json\FormatJson; |
| 21 | use MediaWiki\Logger\LoggerFactory; |
| 22 | use MediaWiki\MediaWikiServices; |
| 23 | use Psr\Log\LoggerInterface; |
| 24 | use RuntimeException; |
| 25 | use Wikimedia\MetricsPlatform\MetricsClient; |
| 26 | |
| 27 | class EventLogging { |
| 28 | |
| 29 | /** |
| 30 | * @var MetricsClient|null |
| 31 | */ |
| 32 | private static $metricsPlatformClient; |
| 33 | |
| 34 | /** |
| 35 | * Default logger. |
| 36 | * |
| 37 | * @internal |
| 38 | */ |
| 39 | public static function getLogger(): LoggerInterface { |
| 40 | return LoggerFactory::getInstance( 'EventLogging' ); |
| 41 | } |
| 42 | |
| 43 | private static function getEventSubmitter(): EventSubmitter { |
| 44 | return MediaWikiServices::getInstance()->get( 'EventLogging.EventSubmitter' ); |
| 45 | } |
| 46 | |
| 47 | /** |
| 48 | * Gets the singleton instance of the Metrics Platform Client (MPC). |
| 49 | * |
| 50 | * @see https://wikitech.wikimedia.org/wiki/Metrics_Platform |
| 51 | */ |
| 52 | public static function getMetricsPlatformClient(): MetricsClient { |
| 53 | if ( !self::$metricsPlatformClient ) { |
| 54 | /** @var MetricsClientFactory $metricsClientFactory */ |
| 55 | $metricsClientFactory = |
| 56 | MediaWikiServices::getInstance()->getService( 'EventLogging.MetricsClientFactory' ); |
| 57 | |
| 58 | self::$metricsPlatformClient = $metricsClientFactory->newMetricsClient( RequestContext::getMain() ); |
| 59 | } |
| 60 | |
| 61 | return self::$metricsPlatformClient; |
| 62 | } |
| 63 | |
| 64 | /** |
| 65 | * Resets the Metrics Platform Client for testing purposes. See also the warning and note |
| 66 | * against {@link MediaWikiServices::resetServiceForTesting()}. |
| 67 | * |
| 68 | * @internal |
| 69 | * |
| 70 | * @throws RuntimeException If called outside a PHPUnit test |
| 71 | */ |
| 72 | public static function resetMetricsPlatformClient(): void { |
| 73 | if ( !defined( 'MW_PHPUNIT_TEST' ) ) { |
| 74 | throw new RuntimeException( __METHOD__ . ' may only be called during unit tests.' ); |
| 75 | } |
| 76 | |
| 77 | self::$metricsPlatformClient = null; |
| 78 | } |
| 79 | |
| 80 | /** |
| 81 | * Submit an event according to the given stream's configuration. |
| 82 | * |
| 83 | * @param string $streamName |
| 84 | * @param array $event |
| 85 | */ |
| 86 | public static function submit( |
| 87 | string $streamName, |
| 88 | array $event |
| 89 | ): void { |
| 90 | self::getEventSubmitter()->submit( $streamName, $event ); |
| 91 | } |
| 92 | |
| 93 | /** |
| 94 | * Transfer small data asynchronously using an HTTP POST. |
| 95 | * This is meant to match the Navigator.sendBeacon() API. |
| 96 | * |
| 97 | * @see https://w3c.github.io/beacon/#sec-sendBeacon-method |
| 98 | * @param string $url |
| 99 | * @param array $data |
| 100 | * @return bool |
| 101 | * @deprecated use submit with new Event Platform based schemas. |
| 102 | */ |
| 103 | public static function sendBeacon( $url, array $data = [] ) { |
| 104 | $fname = __METHOD__; |
| 105 | $urlUtils = MediaWikiServices::getInstance()->getUrlUtils(); |
| 106 | $url = $urlUtils->expand( $url, PROTO_INTERNAL ) ?? ''; |
| 107 | DeferredUpdates::addCallableUpdate( static function () use ( $url, $data, $fname ) { |
| 108 | $options = $data ? [ 'postData' => $data ] : []; |
| 109 | return MediaWikiServices::getInstance()->getHttpRequestFactory() |
| 110 | ->post( $url, $options, $fname ); |
| 111 | } ); |
| 112 | |
| 113 | return true; |
| 114 | } |
| 115 | |
| 116 | /** |
| 117 | * Legacy EventLogging entrypoint. |
| 118 | * |
| 119 | * NOTE: For forwards compatibility with Event Platform schemas, |
| 120 | * we hijack the wgEventLoggingSchemas revision to encode the |
| 121 | * $schema URI. If the value for a schema defined in |
| 122 | * EventLoggingSchemas is a string, it is assumed |
| 123 | * to be an Event Platform $schema URI, not a MW revision id. |
| 124 | * In this case, the event will be POSTed to EventGate. |
| 125 | * |
| 126 | * @param string $schemaName Schema name. |
| 127 | * @param int $revId |
| 128 | * revision ID of schema. $schemasInfo[$schemaName] will override this. |
| 129 | * @param array $eventData |
| 130 | * Map of event keys/vals. |
| 131 | * This is the 'event' field, as provided by the caller, |
| 132 | * not an encapsulated real event. |
| 133 | * @param int $options This parameter is deprecated and no longer used. |
| 134 | * @return bool Whether the event was logged. |
| 135 | * @deprecated use EventLogging::submit() with new Event Platform based schemas. |
| 136 | * @see https://wikitech.wikimedia.org/wiki/Event_Platform/Instrumentation_How_To#In_PHP |
| 137 | */ |
| 138 | public static function logEvent( $schemaName, $revId, $eventData, $options = 0 ) { |
| 139 | $config = MediaWikiServices::getInstance()->getMainConfig(); |
| 140 | |
| 141 | $eventLoggingConfig = Hooks::getEventLoggingConfig( $config ); |
| 142 | $schemasInfo = $eventLoggingConfig['schemasInfo']; |
| 143 | $eventLoggingBaseUri = $eventLoggingConfig['baseUrl']; |
| 144 | |
| 145 | // Get the configured revision id or $schema URI |
| 146 | // to use with events of a particular (legacy metawiki) EventLogging schema. |
| 147 | // $schemasInfo[$schemaName] overrides passed in $revId. |
| 148 | $revisionOrSchemaUri = $schemasInfo[$schemaName] ?? $revId ?? -1; |
| 149 | |
| 150 | // Encapsulate and other event meta data to eventData. |
| 151 | $event = self::encapsulate( |
| 152 | $schemaName, |
| 153 | $revisionOrSchemaUri, |
| 154 | $eventData |
| 155 | ); |
| 156 | |
| 157 | if ( isset( $event['$schema'] ) ) { |
| 158 | // Assume that if $schema was set by self::encapsulate(), this |
| 159 | // event should be POSTed to EventGate via EventServiceClient submit() |
| 160 | self::submit( self::getLegacyStreamName( $schemaName ), $event ); |
| 161 | return true; |
| 162 | } else { |
| 163 | // Else this will be sent to the legacy eventlogging backend |
| 164 | // via 'sendBeacon' by url encoding the json data into a query parameter. |
| 165 | if ( !$eventLoggingBaseUri ) { |
| 166 | return false; |
| 167 | } |
| 168 | |
| 169 | $json = self::serializeEvent( $event ); |
| 170 | $url = $eventLoggingBaseUri . '?' . rawurlencode( $json ) . ';'; |
| 171 | |
| 172 | return self::sendBeacon( $url ); |
| 173 | } |
| 174 | } |
| 175 | |
| 176 | /** |
| 177 | * Converts the encapsulated event from an object to a string. |
| 178 | * |
| 179 | * @param array $event Encapsulated event |
| 180 | * @return string $json |
| 181 | */ |
| 182 | public static function serializeEvent( $event ) { |
| 183 | $eventData = $event['event']; |
| 184 | |
| 185 | if ( count( $eventData ) === 0 ) { |
| 186 | // Ensure empty events are serialized as '{}' and not '[]'. |
| 187 | $eventData = (object)$eventData; |
| 188 | } |
| 189 | $event['event'] = $eventData; |
| 190 | |
| 191 | // To make the resultant JSON easily extracted from a row of |
| 192 | // space-separated values, we replace literal spaces with unicode |
| 193 | // escapes. This is permitted by the JSON specs. |
| 194 | return str_replace( ' ', '\u0020', FormatJson::encode( $event ) ); |
| 195 | } |
| 196 | |
| 197 | /** |
| 198 | * Validates object against JSON Schema. |
| 199 | * |
| 200 | * @throws JsonSchemaException If the object fails to validate. |
| 201 | * @param array $object Object to be validated. |
| 202 | * @param array|null $schema Schema to validate against (default: JSON Schema). |
| 203 | * @return bool True. |
| 204 | */ |
| 205 | public static function schemaValidate( $object, $schema = null ) { |
| 206 | if ( $schema === null ) { |
| 207 | // Default to JSON Schema |
| 208 | $json = file_get_contents( dirname( __DIR__ ) . '/schemas/schemaschema.json' ); |
| 209 | $schema = FormatJson::decode( $json, true ); |
| 210 | } |
| 211 | |
| 212 | // We depart from the JSON Schema specification in disallowing by default |
| 213 | // additional event fields not mentioned in the schema. |
| 214 | // See <https://bugzilla.wikimedia.org/show_bug.cgi?id=44454> and |
| 215 | // <https://tools.ietf.org/html/draft-zyp-json-schema-03#section-5.4>. |
| 216 | if ( !array_key_exists( 'additionalProperties', $schema ) ) { |
| 217 | $schema[ 'additionalProperties' ] = false; |
| 218 | } |
| 219 | |
| 220 | $root = new JsonTreeRef( $object ); |
| 221 | $root->attachSchema( $schema ); |
| 222 | return $root->validate(); |
| 223 | } |
| 224 | |
| 225 | /** |
| 226 | * Randomise inclusion based on population size and a session ID. |
| 227 | * @param int $populationSize Return true one in this many times. This is 1/samplingRate. |
| 228 | * @param string $sessionId Hexadecimal value, only the first 8 characters are used |
| 229 | * @return bool True if the event should be included (sampled in), false if not (sampled out) |
| 230 | */ |
| 231 | public static function sessionInSample( $populationSize, $sessionId ) { |
| 232 | $decimal = (int)base_convert( substr( $sessionId, 0, 8 ), 16, 10 ); |
| 233 | return $decimal % $populationSize === 0; |
| 234 | } |
| 235 | |
| 236 | /** |
| 237 | * This encapsulates the event data in a wrapper object with |
| 238 | * the default metadata for the current request. |
| 239 | * |
| 240 | * NOTE: for forwards compatibility with Event Platform schemas, |
| 241 | * we hijack the wgEventLoggingSchemas revision to encode the |
| 242 | * $schema URI. If the value for a schema defined in |
| 243 | * EventLoggingSchemas is a string, it is assumed |
| 244 | * to be an Event Platform $schema URI, not a MW revision id. |
| 245 | * In this case, the event will be prepared to be POSTed to EventGate. |
| 246 | * |
| 247 | * @param string $schemaName |
| 248 | * @param int|string $revisionOrSchemaUri |
| 249 | * The revision id or a string $schema URI for use with Event Platform. |
| 250 | * @param array $eventData un-encapsulated event data |
| 251 | * @return array encapsulated event |
| 252 | */ |
| 253 | private static function encapsulate( $schemaName, $revisionOrSchemaUri, $eventData ) { |
| 254 | global $wgDBname; |
| 255 | |
| 256 | $event = [ |
| 257 | 'event' => $eventData, |
| 258 | 'schema' => $schemaName, |
| 259 | 'wiki' => $wgDBname, |
| 260 | ]; |
| 261 | |
| 262 | if ( isset( $_SERVER[ 'HTTP_HOST' ] ) ) { |
| 263 | $event['webHost'] = $_SERVER['HTTP_HOST']; |
| 264 | } |
| 265 | |
| 266 | if ( is_string( $revisionOrSchemaUri ) ) { |
| 267 | $event['$schema'] = $revisionOrSchemaUri; |
| 268 | // NOTE: `client_dt` is 'legacy' event time. `dt` is the preferred event time field |
| 269 | // and is set in EventServiceClient. |
| 270 | $event['client_dt'] = wfTimestamp( TS_ISO_8601 ); |
| 271 | |
| 272 | // Note: some fields will have defaults set by eventgate-wikimedia. |
| 273 | // See: |
| 274 | // - https://gerrit.wikimedia.org/r/plugins/gitiles/eventgate-wikimedia/+/refs/heads/master/eventgate-wikimedia.js#358 |
| 275 | // - https://wikitech.wikimedia.org/wiki/Event_Platform/Schemas/Guidelines#Automatically_populated_fields |
| 276 | } else { |
| 277 | $event['revision'] = $revisionOrSchemaUri; |
| 278 | $event['userAgent'] = $_SERVER[ 'HTTP_USER_AGENT' ] ?? ''; |
| 279 | } |
| 280 | |
| 281 | return $event; |
| 282 | } |
| 283 | |
| 284 | /** |
| 285 | * Prepend "eventlogging_" to the schema name to create a stream name for a migrated legacy |
| 286 | * schema. |
| 287 | * |
| 288 | * @param string $schemaName |
| 289 | * @return string |
| 290 | */ |
| 291 | private static function getLegacyStreamName( string $schemaName ): string { |
| 292 | return "eventlogging_$schemaName"; |
| 293 | } |
| 294 | |
| 295 | } |
| 296 | |
| 297 | class_alias( EventLogging::class, 'EventLogging' ); |