Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
80.92% |
140 / 173 |
|
23.08% |
3 / 13 |
CRAP | |
0.00% |
0 / 1 |
| WikimediaPageViewService | |
80.92% |
140 / 173 |
|
23.08% |
3 / 13 |
112.01 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
1 | |||
| setLogger | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| setOriginalRequest | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| supports | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
| getPageData | |
80.65% |
25 / 31 |
|
0.00% |
0 / 1 |
10.73 | |||
| getSiteData | |
75.00% |
15 / 20 |
|
0.00% |
0 / 1 |
12.89 | |||
| getTopPages | |
85.71% |
12 / 14 |
|
0.00% |
0 / 1 |
9.24 | |||
| getCacheExpiry | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| verifyApiOptions | |
70.00% |
7 / 10 |
|
0.00% |
0 / 1 |
5.68 | |||
| getRequestUrl | |
93.55% |
29 / 31 |
|
0.00% |
0 / 1 |
8.02 | |||
| makeRequest | |
83.33% |
30 / 36 |
|
0.00% |
0 / 1 |
20.67 | |||
| getEmptyDateRange | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 | |||
| getStartEnd | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace MediaWiki\Extension\PageViewInfo; |
| 4 | |
| 5 | use InvalidArgumentException; |
| 6 | use MediaWiki\Http\HttpRequestFactory; |
| 7 | use MediaWiki\Json\FormatJson; |
| 8 | use MediaWiki\Language\RawMessage; |
| 9 | use MediaWiki\Page\PageReference; |
| 10 | use MediaWiki\Request\WebRequest; |
| 11 | use MediaWiki\Status\Status; |
| 12 | use MediaWiki\Title\TitleFormatter; |
| 13 | use MediaWiki\Utils\MWTimestamp; |
| 14 | use MWHttpRequest; |
| 15 | use NullHttpRequestFactory; |
| 16 | use Psr\Log\LoggerAwareInterface; |
| 17 | use Psr\Log\LoggerInterface; |
| 18 | use Psr\Log\LogLevel; |
| 19 | use Psr\Log\NullLogger; |
| 20 | use StatusValue; |
| 21 | |
| 22 | /** |
| 23 | * PageViewService implementation for Wikimedia wikis, using the pageview API |
| 24 | * @see https://wikitech.wikimedia.org/wiki/Analytics/PageviewAPI |
| 25 | */ |
| 26 | class WikimediaPageViewService implements PageViewService, LoggerAwareInterface { |
| 27 | /** @var LoggerInterface */ |
| 28 | protected $logger; |
| 29 | |
| 30 | /** @var string */ |
| 31 | protected $endpoint; |
| 32 | /** @var int|false Max number of pages to look up (false for unlimited) */ |
| 33 | protected $lookupLimit; |
| 34 | |
| 35 | /** @var string */ |
| 36 | protected $project; |
| 37 | /** @var string 'all-access', 'desktop', 'mobile-app' or 'mobile-web' */ |
| 38 | protected $access; |
| 39 | /** @var string 'all-agents', 'user', 'spider' or 'bot' */ |
| 40 | protected $agent; |
| 41 | /** @var string 'hourly', 'daily' or 'monthly', allowing other options would make the interface too complex */ |
| 42 | protected $granularity = 'daily'; |
| 43 | /** @var int UNIX timestamp of 0:00 of the last day with complete data */ |
| 44 | protected $lastCompleteDay; |
| 45 | |
| 46 | /** @var array Cache for getEmptyDateRange() */ |
| 47 | protected $range; |
| 48 | |
| 49 | /** @var WebRequest|string[] The request that asked for this data; see the originalRequest |
| 50 | * parameter of MediaWiki\Http\HttpRequestFactory::request() |
| 51 | */ |
| 52 | protected $originalRequest; |
| 53 | |
| 54 | /** |
| 55 | * @param HttpRequestFactory $httpRequestFactory |
| 56 | * @param TitleFormatter $titleFormatter |
| 57 | * @param string $endpoint Wikimedia pageview API endpoint |
| 58 | * @param array $apiOptions Associative array of API URL parameters |
| 59 | * see https://wikimedia.org/api/rest_v1/#!/Pageviews_data |
| 60 | * project is the only required parameter. Granularity, start and end are not supported. |
| 61 | * @param int|false $lookupLimit Max number of pages to look up (false for unlimited). |
| 62 | * Data will be returned for no more than this many titles in a getPageData() call. |
| 63 | */ |
| 64 | public function __construct( |
| 65 | private readonly HttpRequestFactory $httpRequestFactory, |
| 66 | private readonly TitleFormatter $titleFormatter, |
| 67 | $endpoint, |
| 68 | array $apiOptions, |
| 69 | $lookupLimit |
| 70 | ) { |
| 71 | $this->endpoint = rtrim( $endpoint, '/' ); |
| 72 | $this->lookupLimit = $lookupLimit; |
| 73 | $apiOptions += [ |
| 74 | 'access' => 'all-access', |
| 75 | 'agent' => 'user', |
| 76 | ]; |
| 77 | $this->verifyApiOptions( $apiOptions ); |
| 78 | |
| 79 | $this->project = $apiOptions['project']; |
| 80 | $this->access = $apiOptions['access']; |
| 81 | $this->agent = $apiOptions['agent']; |
| 82 | |
| 83 | // Skip the current day for which only partial information is available |
| 84 | $this->lastCompleteDay = strtotime( '0:0 1 day ago', MWTimestamp::time() ); |
| 85 | |
| 86 | $this->logger = new NullLogger(); |
| 87 | } |
| 88 | |
| 89 | public function setLogger( LoggerInterface $logger ): void { |
| 90 | $this->logger = $logger; |
| 91 | } |
| 92 | |
| 93 | /** |
| 94 | * @param WebRequest|string[] $originalRequest See the 'originalRequest' parameter of |
| 95 | * MediaWiki\Http\HttpRequestFactory::request(). |
| 96 | */ |
| 97 | public function setOriginalRequest( $originalRequest ) { |
| 98 | $this->originalRequest = $originalRequest; |
| 99 | } |
| 100 | |
| 101 | /** @inheritDoc */ |
| 102 | public function supports( $metric, $scope ) { |
| 103 | if ( $metric === self::METRIC_VIEW ) { |
| 104 | return true; |
| 105 | } elseif ( $metric === self::METRIC_UNIQUE ) { |
| 106 | return $scope === self::SCOPE_SITE && $this->access !== 'mobile-app'; |
| 107 | } |
| 108 | return false; |
| 109 | } |
| 110 | |
| 111 | /** |
| 112 | * @inheritDoc |
| 113 | */ |
| 114 | public function getPageData( array $titles, $days, $metric = self::METRIC_VIEW ) { |
| 115 | if ( $metric !== self::METRIC_VIEW ) { |
| 116 | throw new InvalidArgumentException( 'Invalid metric: ' . $metric ); |
| 117 | } |
| 118 | if ( !$titles ) { |
| 119 | return StatusValue::newGood( [] ); |
| 120 | } elseif ( $this->lookupLimit !== false ) { |
| 121 | $titles = array_slice( $titles, 0, $this->lookupLimit ); |
| 122 | } |
| 123 | if ( $days <= 0 ) { |
| 124 | throw new InvalidArgumentException( 'Invalid days: ' . $days ); |
| 125 | } |
| 126 | |
| 127 | $status = StatusValue::newGood(); |
| 128 | $result = []; |
| 129 | foreach ( $titles as $title ) { |
| 130 | /** @var PageReference $title */ |
| 131 | $prefixedDBkey = $this->titleFormatter->getPrefixedDBkey( $title ); |
| 132 | $result[$prefixedDBkey] = $this->getEmptyDateRange( $days ); |
| 133 | $requestStatus = $this->makeRequest( |
| 134 | $this->getRequestUrl( self::SCOPE_ARTICLE, $prefixedDBkey, $days ) ); |
| 135 | if ( $requestStatus->isOK() ) { |
| 136 | $data = $requestStatus->getValue(); |
| 137 | if ( isset( $data['items'] ) && is_array( $data['items'] ) ) { |
| 138 | foreach ( $data['items'] as $item ) { |
| 139 | $ts = $item['timestamp']; |
| 140 | $day = substr( $ts, 0, 4 ) . '-' . substr( $ts, 4, 2 ) . '-' . substr( $ts, 6, 2 ); |
| 141 | $result[$prefixedDBkey][$day] = $item['views']; |
| 142 | } |
| 143 | $status->success[$prefixedDBkey] = true; |
| 144 | } else { |
| 145 | $status->error( 'pvi-invalidresponse' ); |
| 146 | $status->success[$prefixedDBkey] = false; |
| 147 | } |
| 148 | } else { |
| 149 | $status->success[$prefixedDBkey] = false; |
| 150 | } |
| 151 | $status->merge( $requestStatus ); |
| 152 | } |
| 153 | $status->successCount = count( array_filter( $status->success ) ); |
| 154 | $status->failCount = count( $status->success ) - $status->successCount; |
| 155 | $status->setResult( (bool)$status->successCount, $result ); |
| 156 | return $status; |
| 157 | } |
| 158 | |
| 159 | /** |
| 160 | * @inheritDoc |
| 161 | */ |
| 162 | public function getSiteData( $days, $metric = self::METRIC_VIEW ) { |
| 163 | if ( $metric !== self::METRIC_VIEW && $metric !== self::METRIC_UNIQUE ) { |
| 164 | throw new InvalidArgumentException( 'Invalid metric: ' . $metric ); |
| 165 | } elseif ( $metric === self::METRIC_UNIQUE && $this->access === 'mobile-app' ) { |
| 166 | throw new InvalidArgumentException( |
| 167 | 'Unique device counts for mobile apps are not supported' ); |
| 168 | } |
| 169 | if ( $days <= 0 ) { |
| 170 | throw new InvalidArgumentException( 'Invalid days: ' . $days ); |
| 171 | } |
| 172 | $result = $this->getEmptyDateRange( $days ); |
| 173 | $status = $this->makeRequest( $this->getRequestUrl( $metric, null, $days ) ); |
| 174 | if ( $status->isOK() ) { |
| 175 | $data = $status->getValue(); |
| 176 | if ( isset( $data['items'] ) && is_array( $data['items'] ) ) { |
| 177 | foreach ( $data['items'] as $item ) { |
| 178 | $ts = $item['timestamp']; |
| 179 | $day = substr( $ts, 0, 4 ) . '-' . substr( $ts, 4, 2 ) . '-' . substr( $ts, 6, 2 ); |
| 180 | $count = $metric === self::METRIC_VIEW ? $item['views'] : $item['devices']; |
| 181 | $result[$day] = $count; |
| 182 | } |
| 183 | } else { |
| 184 | $status->fatal( 'pvi-invalidresponse' ); |
| 185 | } |
| 186 | } |
| 187 | $status->setResult( $status->isOK(), $result ); |
| 188 | return $status; |
| 189 | } |
| 190 | |
| 191 | /** |
| 192 | * @inheritDoc |
| 193 | */ |
| 194 | public function getTopPages( $metric = self::METRIC_VIEW ) { |
| 195 | $result = []; |
| 196 | if ( $metric !== self::METRIC_VIEW ) { |
| 197 | throw new InvalidArgumentException( 'Invalid metric: ' . $metric ); |
| 198 | } |
| 199 | $status = $this->makeRequest( $this->getRequestUrl( self::SCOPE_TOP ) ); |
| 200 | if ( $status->isOK() ) { |
| 201 | $data = $status->getValue(); |
| 202 | if ( isset( $data['items'] ) && is_array( $data['items'] ) && !$data['items'] ) { |
| 203 | // empty result set, no error; makeRequest generates this on 404 |
| 204 | } elseif ( |
| 205 | isset( $data['items'][0]['articles'] ) && |
| 206 | is_array( $data['items'][0]['articles'] ) |
| 207 | ) { |
| 208 | foreach ( $data['items'][0]['articles'] as $item ) { |
| 209 | $result[$item['article']] = $item['views']; |
| 210 | } |
| 211 | } else { |
| 212 | $status->fatal( 'pvi-invalidresponse' ); |
| 213 | } |
| 214 | } |
| 215 | $status->setResult( $status->isOK(), $result ); |
| 216 | return $status; |
| 217 | } |
| 218 | |
| 219 | /** @inheritDoc */ |
| 220 | public function getCacheExpiry( $metric, $scope ) { |
| 221 | // data is valid until the end of the day |
| 222 | $endOfDay = strtotime( '0:0 next day', MWTimestamp::time() ); |
| 223 | return $endOfDay - time(); |
| 224 | } |
| 225 | |
| 226 | /** |
| 227 | * @param array $apiOptions |
| 228 | * @throws InvalidArgumentException |
| 229 | */ |
| 230 | protected function verifyApiOptions( array $apiOptions ) { |
| 231 | if ( !isset( $apiOptions['project'] ) ) { |
| 232 | throw new InvalidArgumentException( "'project' is required" ); |
| 233 | } elseif ( !in_array( $apiOptions['access'], |
| 234 | [ 'all-access', 'desktop', 'mobile-app', 'mobile-web' ], true ) ) { |
| 235 | throw new InvalidArgumentException( 'Invalid access: ' . $apiOptions['access'] ); |
| 236 | } elseif ( !in_array( $apiOptions['agent'], |
| 237 | [ 'all-agents', 'user', 'spider', 'bot' ], true ) ) { |
| 238 | throw new InvalidArgumentException( 'Invalid agent: ' . $apiOptions['agent'] ); |
| 239 | } elseif ( isset( $apiOptions['granularity'] ) ) { |
| 240 | throw new InvalidArgumentException( 'Changing granularity is not supported' ); |
| 241 | } |
| 242 | } |
| 243 | |
| 244 | /** |
| 245 | * @param string $scope SCOPE_* constant or METRIC_UNIQUE |
| 246 | * @param string|null $prefixedDBkey |
| 247 | * @param int|null $days |
| 248 | * @return string |
| 249 | */ |
| 250 | protected function getRequestUrl( $scope, ?string $prefixedDBkey = null, $days = null ) { |
| 251 | [ $start, $end ] = $this->getStartEnd( $days ); |
| 252 | switch ( $scope ) { |
| 253 | case self::SCOPE_ARTICLE: |
| 254 | if ( $prefixedDBkey === null ) { |
| 255 | throw new InvalidArgumentException( 'Title is required when using article scope' ); |
| 256 | } |
| 257 | // Use plain urlencode instead of wfUrlencode because we need |
| 258 | // "/" to be encoded, which wfUrlencode doesn't. |
| 259 | $encodedTitle = urlencode( $prefixedDBkey ); |
| 260 | // YYYYMMDD |
| 261 | $start = substr( $start, 0, 8 ); |
| 262 | $end = substr( $end, 0, 8 ); |
| 263 | return "$this->endpoint/metrics/pageviews/per-article/$this->project/$this->access/" |
| 264 | . "$this->agent/$encodedTitle/$this->granularity/$start/$end"; |
| 265 | case self::METRIC_VIEW: |
| 266 | case self::SCOPE_SITE: |
| 267 | // YYYYMMDDHH |
| 268 | $start = substr( $start, 0, 10 ); |
| 269 | $end = substr( $end, 0, 10 ); |
| 270 | return "$this->endpoint/metrics/pageviews/aggregate/$this->project/$this->access/$this->agent/" |
| 271 | . "$this->granularity/$start/$end"; |
| 272 | case self::SCOPE_TOP: |
| 273 | $year = substr( $end, 0, 4 ); |
| 274 | $month = substr( $end, 4, 2 ); |
| 275 | $day = substr( $end, 6, 2 ); |
| 276 | return "$this->endpoint/metrics/pageviews/top/$this->project/$this->access/$year/$month/$day"; |
| 277 | case self::METRIC_UNIQUE: |
| 278 | $access = match ( $this->access ) { |
| 279 | 'all-access' => 'all-sites', |
| 280 | 'desktop' => 'desktop-site', |
| 281 | 'mobile-web' => 'mobile-site', |
| 282 | }; |
| 283 | // YYYYMMDD |
| 284 | $start = substr( $start, 0, 8 ); |
| 285 | $end = substr( $end, 0, 8 ); |
| 286 | return "$this->endpoint/metrics/unique-devices/$this->project/$access/" |
| 287 | . "$this->granularity/$start/$end"; |
| 288 | default: |
| 289 | throw new InvalidArgumentException( 'Invalid scope: ' . $scope ); |
| 290 | } |
| 291 | } |
| 292 | |
| 293 | /** |
| 294 | * @param string $url |
| 295 | * @return StatusValue |
| 296 | */ |
| 297 | protected function makeRequest( $url ) { |
| 298 | if ( defined( 'MW_PHPUNIT_TEST' ) && |
| 299 | class_exists( NullHttpRequestFactory::class ) && |
| 300 | $this->httpRequestFactory instanceof NullHttpRequestFactory ) { |
| 301 | return StatusValue::newGood(); |
| 302 | } |
| 303 | /** @var MWHttpRequest $request */ |
| 304 | $request = $this->httpRequestFactory->create( $url, [ 'timeout' => 10 ], __METHOD__ ); |
| 305 | if ( $this->originalRequest ) { |
| 306 | $request->setOriginalRequest( $this->originalRequest ); |
| 307 | } |
| 308 | $status = $request->execute(); |
| 309 | $parseStatus = FormatJson::parse( $request->getContent() ?? '', FormatJson::FORCE_ASSOC ); |
| 310 | if ( $status->isOK() ) { |
| 311 | $status->merge( $parseStatus, true ); |
| 312 | } |
| 313 | |
| 314 | $apiErrorData = []; |
| 315 | if ( !$status->isOK() && $parseStatus->isOK() && is_array( $parseStatus->getValue() ) ) { |
| 316 | // hash of: type, title, method, uri, [detail] |
| 317 | $apiErrorData = $parseStatus->getValue(); |
| 318 | if ( isset( $apiErrorData['detail'] ) && is_array( $apiErrorData['detail'] ) ) { |
| 319 | $apiErrorData['detail'] = implode( ', ', $apiErrorData['detail'] ); |
| 320 | } |
| 321 | } |
| 322 | if ( |
| 323 | $request->getStatus() === 404 && |
| 324 | isset( $apiErrorData['type'] ) && |
| 325 | $apiErrorData['type'] === 'https://mediawiki.org/wiki/HyperSwitch/errors/not_found' |
| 326 | ) { |
| 327 | // the pageview API will return with a 404 when the page has 0 views :/ |
| 328 | $status = StatusValue::newGood( [ 'items' => [] ] ); |
| 329 | } |
| 330 | if ( !$status->isGood() ) { |
| 331 | $error = Status::wrap( $status )->getWikiText( false, false, 'en' ); |
| 332 | $severity = $status->isOK() ? LogLevel::INFO : LogLevel::ERROR; |
| 333 | $msg = $status->isOK() |
| 334 | ? 'Problems fetching {requesturl}: {error}' |
| 335 | : 'Failed fetching {requesturl}: {error}'; |
| 336 | $prefixedApiErrorData = array_combine( array_map( static function ( $k ) { |
| 337 | return 'apierror_' . $k; |
| 338 | }, array_keys( $apiErrorData ) ), $apiErrorData ); |
| 339 | $this->logger->log( $severity, $msg, [ |
| 340 | 'requesturl' => $url, |
| 341 | 'error' => $error, |
| 342 | ] + $prefixedApiErrorData ); |
| 343 | } |
| 344 | if ( !$status->isOK() && isset( $apiErrorData['detail'] ) ) { |
| 345 | $status->error( ( new RawMessage( '$1' ) )->params( $apiErrorData['detail'] ) ); |
| 346 | } |
| 347 | |
| 348 | return $status; |
| 349 | } |
| 350 | |
| 351 | /** |
| 352 | * The pageview API omits dates if there is no data. Fill it with nulls to make client-side |
| 353 | * processing easier. |
| 354 | * @param int $days |
| 355 | * @return array YYYY-MM-DD => null |
| 356 | */ |
| 357 | protected function getEmptyDateRange( $days ) { |
| 358 | if ( !$this->range ) { |
| 359 | $this->range = []; |
| 360 | // we only care about the date part, so add some hours to avoid errors when there is a |
| 361 | // leap second or some other weirdness |
| 362 | $end = $this->lastCompleteDay + 12 * 3600; |
| 363 | $start = $end - ( $days - 1 ) * 24 * 3600; |
| 364 | for ( $ts = $start; $ts <= $end; $ts += 24 * 3600 ) { |
| 365 | $this->range[gmdate( 'Y-m-d', $ts )] = null; |
| 366 | } |
| 367 | } |
| 368 | return $this->range; |
| 369 | } |
| 370 | |
| 371 | /** |
| 372 | * Get start and end timestamp in YYYYMMDDHH format |
| 373 | * @param int $days |
| 374 | * @return string[] |
| 375 | */ |
| 376 | protected function getStartEnd( $days ) { |
| 377 | $end = $this->lastCompleteDay + 12 * 3600; |
| 378 | $start = $end - ( $days - 1 ) * 24 * 3600; |
| 379 | return [ gmdate( 'Ymd', $start ) . '00', gmdate( 'Ymd', $end ) . '00' ]; |
| 380 | } |
| 381 | } |