Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
81.87% |
140 / 171 |
|
23.08% |
3 / 13 |
CRAP | |
0.00% |
0 / 1 |
WikimediaPageViewService | |
81.87% |
140 / 171 |
|
23.08% |
3 / 13 |
101.03 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
1 | |||
setLogger | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setOriginalRequest | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
supports | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
getPageData | |
83.87% |
26 / 31 |
|
0.00% |
0 / 1 |
10.42 | |||
getSiteData | |
75.00% |
15 / 20 |
|
0.00% |
0 / 1 |
12.89 | |||
getTopPages | |
85.71% |
12 / 14 |
|
0.00% |
0 / 1 |
9.24 | |||
getCacheExpiry | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
verifyApiOptions | |
70.00% |
7 / 10 |
|
0.00% |
0 / 1 |
5.68 | |||
getRequestUrl | |
93.55% |
29 / 31 |
|
0.00% |
0 / 1 |
8.02 | |||
makeRequest | |
84.38% |
27 / 32 |
|
0.00% |
0 / 1 |
16.98 | |||
getEmptyDateRange | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 | |||
getStartEnd | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\PageViewInfo; |
4 | |
5 | use InvalidArgumentException; |
6 | use MediaWiki\Http\HttpRequestFactory; |
7 | use MediaWiki\Json\FormatJson; |
8 | use MediaWiki\Language\RawMessage; |
9 | use MediaWiki\Page\PageReference; |
10 | use MediaWiki\Request\WebRequest; |
11 | use MediaWiki\Status\Status; |
12 | use MediaWiki\Title\TitleFormatter; |
13 | use MediaWiki\Utils\MWTimestamp; |
14 | use MWHttpRequest; |
15 | use Psr\Log\LoggerAwareInterface; |
16 | use Psr\Log\LoggerInterface; |
17 | use Psr\Log\LogLevel; |
18 | use Psr\Log\NullLogger; |
19 | use StatusValue; |
20 | |
21 | /** |
22 | * PageViewService implementation for Wikimedia wikis, using the pageview API |
23 | * @see https://wikitech.wikimedia.org/wiki/Analytics/PageviewAPI |
24 | */ |
25 | class WikimediaPageViewService implements PageViewService, LoggerAwareInterface { |
26 | /** @var HttpRequestFactory */ |
27 | protected $httpRequestFactory; |
28 | /** @var LoggerInterface */ |
29 | protected $logger; |
30 | |
31 | private TitleFormatter $titleFormatter; |
32 | |
33 | /** @var string */ |
34 | protected $endpoint; |
35 | /** @var int|false Max number of pages to look up (false for unlimited) */ |
36 | protected $lookupLimit; |
37 | |
38 | /** @var string */ |
39 | protected $project; |
40 | /** @var string 'all-access', 'desktop', 'mobile-app' or 'mobile-web' */ |
41 | protected $access; |
42 | /** @var string 'all-agents', 'user', 'spider' or 'bot' */ |
43 | protected $agent; |
44 | /** @var string 'hourly', 'daily' or 'monthly', allowing other options would make the interface too complex */ |
45 | protected $granularity = 'daily'; |
46 | /** @var int UNIX timestamp of 0:00 of the last day with complete data */ |
47 | protected $lastCompleteDay; |
48 | |
49 | /** @var array Cache for getEmptyDateRange() */ |
50 | protected $range; |
51 | |
52 | /** @var WebRequest|string[] The request that asked for this data; see the originalRequest |
53 | * parameter of MediaWiki\Http\HttpRequestFactory::request() |
54 | */ |
55 | protected $originalRequest; |
56 | |
57 | /** |
58 | * @param HttpRequestFactory $httpRequestFactory |
59 | * @param TitleFormatter $titleFormatter |
60 | * @param string $endpoint Wikimedia pageview API endpoint |
61 | * @param array $apiOptions Associative array of API URL parameters |
62 | * see https://wikimedia.org/api/rest_v1/#!/Pageviews_data |
63 | * project is the only required parameter. Granularity, start and end are not supported. |
64 | * @param int|false $lookupLimit Max number of pages to look up (false for unlimited). |
65 | * Data will be returned for no more than this many titles in a getPageData() call. |
66 | */ |
67 | public function __construct( |
68 | HttpRequestFactory $httpRequestFactory, |
69 | TitleFormatter $titleFormatter, |
70 | $endpoint, |
71 | array $apiOptions, |
72 | $lookupLimit |
73 | ) { |
74 | $this->endpoint = rtrim( $endpoint, '/' ); |
75 | $this->lookupLimit = $lookupLimit; |
76 | $apiOptions += [ |
77 | 'access' => 'all-access', |
78 | 'agent' => 'user', |
79 | ]; |
80 | $this->verifyApiOptions( $apiOptions ); |
81 | |
82 | $this->project = $apiOptions['project']; |
83 | $this->access = $apiOptions['access']; |
84 | $this->agent = $apiOptions['agent']; |
85 | |
86 | // Skip the current day for which only partial information is available |
87 | $this->lastCompleteDay = strtotime( '0:0 1 day ago', MWTimestamp::time() ); |
88 | |
89 | $this->httpRequestFactory = $httpRequestFactory; |
90 | $this->titleFormatter = $titleFormatter; |
91 | $this->logger = new NullLogger(); |
92 | } |
93 | |
94 | public function setLogger( LoggerInterface $logger ) { |
95 | $this->logger = $logger; |
96 | } |
97 | |
98 | /** |
99 | * @param WebRequest|string[] $originalRequest See the 'originalRequest' parameter of |
100 | * MediaWiki\Http\HttpRequestFactory::request(). |
101 | */ |
102 | public function setOriginalRequest( $originalRequest ) { |
103 | $this->originalRequest = $originalRequest; |
104 | } |
105 | |
106 | /** @inheritDoc */ |
107 | public function supports( $metric, $scope ) { |
108 | if ( $metric === self::METRIC_VIEW ) { |
109 | return true; |
110 | } elseif ( $metric === self::METRIC_UNIQUE ) { |
111 | return $scope === self::SCOPE_SITE && $this->access !== 'mobile-app'; |
112 | } |
113 | return false; |
114 | } |
115 | |
116 | /** |
117 | * @inheritDoc |
118 | */ |
119 | public function getPageData( array $titles, $days, $metric = self::METRIC_VIEW ) { |
120 | if ( $metric !== self::METRIC_VIEW ) { |
121 | throw new InvalidArgumentException( 'Invalid metric: ' . $metric ); |
122 | } |
123 | if ( !$titles ) { |
124 | return StatusValue::newGood( [] ); |
125 | } elseif ( $this->lookupLimit !== false ) { |
126 | $titles = array_slice( $titles, 0, $this->lookupLimit ); |
127 | } |
128 | if ( $days <= 0 ) { |
129 | throw new InvalidArgumentException( 'Invalid days: ' . $days ); |
130 | } |
131 | |
132 | $status = StatusValue::newGood(); |
133 | $result = []; |
134 | foreach ( $titles as $title ) { |
135 | /** @var PageReference $title */ |
136 | $prefixedDBkey = $this->titleFormatter->getPrefixedDBkey( $title ); |
137 | $result[$prefixedDBkey] = $this->getEmptyDateRange( $days ); |
138 | $requestStatus = $this->makeRequest( |
139 | $this->getRequestUrl( self::SCOPE_ARTICLE, $prefixedDBkey, $days ) ); |
140 | if ( $requestStatus->isOK() ) { |
141 | $data = $requestStatus->getValue(); |
142 | if ( isset( $data['items'] ) && is_array( $data['items'] ) ) { |
143 | foreach ( $data['items'] as $item ) { |
144 | $ts = $item['timestamp']; |
145 | $day = substr( $ts, 0, 4 ) . '-' . substr( $ts, 4, 2 ) . '-' . substr( $ts, 6, 2 ); |
146 | $result[$prefixedDBkey][$day] = $item['views']; |
147 | } |
148 | $status->success[$prefixedDBkey] = true; |
149 | } else { |
150 | $status->error( 'pvi-invalidresponse' ); |
151 | $status->success[$prefixedDBkey] = false; |
152 | } |
153 | } else { |
154 | $status->success[$prefixedDBkey] = false; |
155 | } |
156 | $status->merge( $requestStatus ); |
157 | } |
158 | $status->successCount = count( array_filter( $status->success ) ); |
159 | $status->failCount = count( $status->success ) - $status->successCount; |
160 | $status->setResult( (bool)$status->successCount, $result ); |
161 | return $status; |
162 | } |
163 | |
164 | /** |
165 | * @inheritDoc |
166 | */ |
167 | public function getSiteData( $days, $metric = self::METRIC_VIEW ) { |
168 | if ( $metric !== self::METRIC_VIEW && $metric !== self::METRIC_UNIQUE ) { |
169 | throw new InvalidArgumentException( 'Invalid metric: ' . $metric ); |
170 | } elseif ( $metric === self::METRIC_UNIQUE && $this->access === 'mobile-app' ) { |
171 | throw new InvalidArgumentException( |
172 | 'Unique device counts for mobile apps are not supported' ); |
173 | } |
174 | if ( $days <= 0 ) { |
175 | throw new InvalidArgumentException( 'Invalid days: ' . $days ); |
176 | } |
177 | $result = $this->getEmptyDateRange( $days ); |
178 | $status = $this->makeRequest( $this->getRequestUrl( $metric, null, $days ) ); |
179 | if ( $status->isOK() ) { |
180 | $data = $status->getValue(); |
181 | if ( isset( $data['items'] ) && is_array( $data['items'] ) ) { |
182 | foreach ( $data['items'] as $item ) { |
183 | $ts = $item['timestamp']; |
184 | $day = substr( $ts, 0, 4 ) . '-' . substr( $ts, 4, 2 ) . '-' . substr( $ts, 6, 2 ); |
185 | $count = $metric === self::METRIC_VIEW ? $item['views'] : $item['devices']; |
186 | $result[$day] = $count; |
187 | } |
188 | } else { |
189 | $status->fatal( 'pvi-invalidresponse' ); |
190 | } |
191 | } |
192 | $status->setResult( $status->isOK(), $result ); |
193 | return $status; |
194 | } |
195 | |
196 | /** |
197 | * @inheritDoc |
198 | */ |
199 | public function getTopPages( $metric = self::METRIC_VIEW ) { |
200 | $result = []; |
201 | if ( $metric !== self::METRIC_VIEW ) { |
202 | throw new InvalidArgumentException( 'Invalid metric: ' . $metric ); |
203 | } |
204 | $status = $this->makeRequest( $this->getRequestUrl( self::SCOPE_TOP ) ); |
205 | if ( $status->isOK() ) { |
206 | $data = $status->getValue(); |
207 | if ( isset( $data['items'] ) && is_array( $data['items'] ) && !$data['items'] ) { |
208 | // empty result set, no error; makeRequest generates this on 404 |
209 | } elseif ( |
210 | isset( $data['items'][0]['articles'] ) && |
211 | is_array( $data['items'][0]['articles'] ) |
212 | ) { |
213 | foreach ( $data['items'][0]['articles'] as $item ) { |
214 | $result[$item['article']] = $item['views']; |
215 | } |
216 | } else { |
217 | $status->fatal( 'pvi-invalidresponse' ); |
218 | } |
219 | } |
220 | $status->setResult( $status->isOK(), $result ); |
221 | return $status; |
222 | } |
223 | |
224 | /** @inheritDoc */ |
225 | public function getCacheExpiry( $metric, $scope ) { |
226 | // data is valid until the end of the day |
227 | $endOfDay = strtotime( '0:0 next day', MWTimestamp::time() ); |
228 | return $endOfDay - time(); |
229 | } |
230 | |
231 | /** |
232 | * @param array $apiOptions |
233 | * @throws InvalidArgumentException |
234 | */ |
235 | protected function verifyApiOptions( array $apiOptions ) { |
236 | if ( !isset( $apiOptions['project'] ) ) { |
237 | throw new InvalidArgumentException( "'project' is required" ); |
238 | } elseif ( !in_array( $apiOptions['access'], |
239 | [ 'all-access', 'desktop', 'mobile-app', 'mobile-web' ], true ) ) { |
240 | throw new InvalidArgumentException( 'Invalid access: ' . $apiOptions['access'] ); |
241 | } elseif ( !in_array( $apiOptions['agent'], |
242 | [ 'all-agents', 'user', 'spider', 'bot' ], true ) ) { |
243 | throw new InvalidArgumentException( 'Invalid agent: ' . $apiOptions['agent'] ); |
244 | } elseif ( isset( $apiOptions['granularity'] ) ) { |
245 | throw new InvalidArgumentException( 'Changing granularity is not supported' ); |
246 | } |
247 | } |
248 | |
249 | /** |
250 | * @param string $scope SCOPE_* constant or METRIC_UNIQUE |
251 | * @param string|null $prefixedDBkey |
252 | * @param int|null $days |
253 | * @return string |
254 | */ |
255 | protected function getRequestUrl( $scope, ?string $prefixedDBkey = null, $days = null ) { |
256 | [ $start, $end ] = $this->getStartEnd( $days ); |
257 | switch ( $scope ) { |
258 | case self::SCOPE_ARTICLE: |
259 | if ( $prefixedDBkey === null ) { |
260 | throw new InvalidArgumentException( 'Title is required when using article scope' ); |
261 | } |
262 | // Use plain urlencode instead of wfUrlencode because we need |
263 | // "/" to be encoded, which wfUrlencode doesn't. |
264 | $encodedTitle = urlencode( $prefixedDBkey ); |
265 | // YYYYMMDD |
266 | $start = substr( $start, 0, 8 ); |
267 | $end = substr( $end, 0, 8 ); |
268 | return "$this->endpoint/metrics/pageviews/per-article/$this->project/$this->access/" |
269 | . "$this->agent/$encodedTitle/$this->granularity/$start/$end"; |
270 | case self::METRIC_VIEW: |
271 | case self::SCOPE_SITE: |
272 | // YYYYMMDDHH |
273 | $start = substr( $start, 0, 10 ); |
274 | $end = substr( $end, 0, 10 ); |
275 | return "$this->endpoint/metrics/pageviews/aggregate/$this->project/$this->access/$this->agent/" |
276 | . "$this->granularity/$start/$end"; |
277 | case self::SCOPE_TOP: |
278 | $year = substr( $end, 0, 4 ); |
279 | $month = substr( $end, 4, 2 ); |
280 | $day = substr( $end, 6, 2 ); |
281 | return "$this->endpoint/metrics/pageviews/top/$this->project/$this->access/$year/$month/$day"; |
282 | case self::METRIC_UNIQUE: |
283 | $access = [ |
284 | 'all-access' => 'all-sites', |
285 | 'desktop' => 'desktop-site', |
286 | 'mobile-web' => 'mobile-site', |
287 | ][$this->access]; |
288 | // YYYYMMDD |
289 | $start = substr( $start, 0, 8 ); |
290 | $end = substr( $end, 0, 8 ); |
291 | return "$this->endpoint/metrics/unique-devices/$this->project/$access/" |
292 | . "$this->granularity/$start/$end"; |
293 | default: |
294 | throw new InvalidArgumentException( 'Invalid scope: ' . $scope ); |
295 | } |
296 | } |
297 | |
298 | /** |
299 | * @param string $url |
300 | * @return StatusValue |
301 | */ |
302 | protected function makeRequest( $url ) { |
303 | /** @var MWHttpRequest $request */ |
304 | $request = $this->httpRequestFactory->create( $url, [ 'timeout' => 10 ], __METHOD__ ); |
305 | if ( $this->originalRequest ) { |
306 | $request->setOriginalRequest( $this->originalRequest ); |
307 | } |
308 | $status = $request->execute(); |
309 | $parseStatus = FormatJson::parse( $request->getContent() ?? '', FormatJson::FORCE_ASSOC ); |
310 | if ( $status->isOK() ) { |
311 | $status->merge( $parseStatus, true ); |
312 | } |
313 | |
314 | $apiErrorData = []; |
315 | if ( !$status->isOK() && $parseStatus->isOK() && is_array( $parseStatus->getValue() ) ) { |
316 | // hash of: type, title, method, uri, [detail] |
317 | $apiErrorData = $parseStatus->getValue(); |
318 | if ( isset( $apiErrorData['detail'] ) && is_array( $apiErrorData['detail'] ) ) { |
319 | $apiErrorData['detail'] = implode( ', ', $apiErrorData['detail'] ); |
320 | } |
321 | } |
322 | if ( |
323 | $request->getStatus() === 404 && |
324 | isset( $apiErrorData['type'] ) && |
325 | $apiErrorData['type'] === 'https://mediawiki.org/wiki/HyperSwitch/errors/not_found' |
326 | ) { |
327 | // the pageview API will return with a 404 when the page has 0 views :/ |
328 | $status = StatusValue::newGood( [ 'items' => [] ] ); |
329 | } |
330 | if ( !$status->isGood() ) { |
331 | $error = Status::wrap( $status )->getWikiText( false, false, 'en' ); |
332 | $severity = $status->isOK() ? LogLevel::INFO : LogLevel::ERROR; |
333 | $msg = $status->isOK() |
334 | ? 'Problems fetching {requesturl}: {error}' |
335 | : 'Failed fetching {requesturl}: {error}'; |
336 | $prefixedApiErrorData = array_combine( array_map( static function ( $k ) { |
337 | return 'apierror_' . $k; |
338 | }, array_keys( $apiErrorData ) ), $apiErrorData ); |
339 | $this->logger->log( $severity, $msg, [ |
340 | 'requesturl' => $url, |
341 | 'error' => $error, |
342 | ] + $prefixedApiErrorData ); |
343 | } |
344 | if ( !$status->isOK() && isset( $apiErrorData['detail'] ) ) { |
345 | $status->error( ( new RawMessage( '$1' ) )->params( $apiErrorData['detail'] ) ); |
346 | } |
347 | |
348 | return $status; |
349 | } |
350 | |
351 | /** |
352 | * The pageview API omits dates if there is no data. Fill it with nulls to make client-side |
353 | * processing easier. |
354 | * @param int $days |
355 | * @return array YYYY-MM-DD => null |
356 | */ |
357 | protected function getEmptyDateRange( $days ) { |
358 | if ( !$this->range ) { |
359 | $this->range = []; |
360 | // we only care about the date part, so add some hours to avoid errors when there is a |
361 | // leap second or some other weirdness |
362 | $end = $this->lastCompleteDay + 12 * 3600; |
363 | $start = $end - ( $days - 1 ) * 24 * 3600; |
364 | for ( $ts = $start; $ts <= $end; $ts += 24 * 3600 ) { |
365 | $this->range[gmdate( 'Y-m-d', $ts )] = null; |
366 | } |
367 | } |
368 | return $this->range; |
369 | } |
370 | |
371 | /** |
372 | * Get start and end timestamp in YYYYMMDDHH format |
373 | * @param int $days |
374 | * @return string[] |
375 | */ |
376 | protected function getStartEnd( $days ) { |
377 | $end = $this->lastCompleteDay + 12 * 3600; |
378 | $start = $end - ( $days - 1 ) * 24 * 3600; |
379 | return [ gmdate( 'Ymd', $start ) . '00', gmdate( 'Ymd', $end ) . '00' ]; |
380 | } |
381 | } |