Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
81.66% |
138 / 169 |
|
23.08% |
3 / 13 |
CRAP | |
0.00% |
0 / 1 |
WikimediaPageViewService | |
81.66% |
138 / 169 |
|
23.08% |
3 / 13 |
102.11 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
1 | |||
setLogger | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setOriginalRequest | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
supports | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
getPageData | |
83.33% |
25 / 30 |
|
0.00% |
0 / 1 |
10.46 | |||
getSiteData | |
75.00% |
15 / 20 |
|
0.00% |
0 / 1 |
12.89 | |||
getTopPages | |
85.71% |
12 / 14 |
|
0.00% |
0 / 1 |
9.24 | |||
getCacheExpiry | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
verifyApiOptions | |
70.00% |
7 / 10 |
|
0.00% |
0 / 1 |
5.68 | |||
getRequestUrl | |
93.55% |
29 / 31 |
|
0.00% |
0 / 1 |
8.02 | |||
makeRequest | |
84.38% |
27 / 32 |
|
0.00% |
0 / 1 |
16.98 | |||
getEmptyDateRange | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 | |||
getStartEnd | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\PageViewInfo; |
4 | |
5 | use FormatJson; |
6 | use InvalidArgumentException; |
7 | use MediaWiki\Http\HttpRequestFactory; |
8 | use MWHttpRequest; |
9 | use MWTimestamp; |
10 | use Psr\Log\LoggerAwareInterface; |
11 | use Psr\Log\LoggerInterface; |
12 | use Psr\Log\LogLevel; |
13 | use Psr\Log\NullLogger; |
14 | use RawMessage; |
15 | use Status; |
16 | use StatusValue; |
17 | use Title; |
18 | use WebRequest; |
19 | |
20 | /** |
21 | * PageViewService implementation for Wikimedia wikis, using the pageview API |
22 | * @see https://wikitech.wikimedia.org/wiki/Analytics/PageviewAPI |
23 | */ |
24 | class WikimediaPageViewService implements PageViewService, LoggerAwareInterface { |
25 | /** @var HttpRequestFactory */ |
26 | protected $httpRequestFactory; |
27 | /** @var LoggerInterface */ |
28 | protected $logger; |
29 | |
30 | /** @var string */ |
31 | protected $endpoint; |
32 | /** @var int|false Max number of pages to look up (false for unlimited) */ |
33 | protected $lookupLimit; |
34 | |
35 | /** @var string */ |
36 | protected $project; |
37 | /** @var string 'all-access', 'desktop', 'mobile-app' or 'mobile-web' */ |
38 | protected $access; |
39 | /** @var string 'all-agents', 'user', 'spider' or 'bot' */ |
40 | protected $agent; |
41 | /** @var string 'hourly', 'daily' or 'monthly' */ |
42 | protected $granularity = 'daily'; // allowing other options would make the interface too complex |
43 | /** @var int UNIX timestamp of 0:00 of the last day with complete data */ |
44 | protected $lastCompleteDay; |
45 | |
46 | /** @var array Cache for getEmptyDateRange() */ |
47 | protected $range; |
48 | |
49 | /** @var WebRequest|string[] The request that asked for this data; see the originalRequest |
50 | * parameter of MediaWiki\Http\HttpRequestFactory::request() |
51 | */ |
52 | protected $originalRequest; |
53 | |
54 | /** |
55 | * @param HttpRequestFactory $httpRequestFactory |
56 | * @param string $endpoint Wikimedia pageview API endpoint |
57 | * @param array $apiOptions Associative array of API URL parameters |
58 | * see https://wikimedia.org/api/rest_v1/#!/Pageviews_data |
59 | * project is the only required parameter. Granularity, start and end are not supported. |
60 | * @param int|false $lookupLimit Max number of pages to look up (false for unlimited). |
61 | * Data will be returned for no more than this many titles in a getPageData() call. |
62 | */ |
63 | public function __construct( |
64 | HttpRequestFactory $httpRequestFactory, |
65 | $endpoint, |
66 | array $apiOptions, |
67 | $lookupLimit |
68 | ) { |
69 | $this->endpoint = rtrim( $endpoint, '/' ); |
70 | $this->lookupLimit = $lookupLimit; |
71 | $apiOptions += [ |
72 | 'access' => 'all-access', |
73 | 'agent' => 'user', |
74 | ]; |
75 | $this->verifyApiOptions( $apiOptions ); |
76 | |
77 | $this->project = $apiOptions['project']; |
78 | $this->access = $apiOptions['access']; |
79 | $this->agent = $apiOptions['agent']; |
80 | |
81 | // Skip the current day for which only partial information is available |
82 | $this->lastCompleteDay = strtotime( '0:0 1 day ago', MWTimestamp::time() ); |
83 | |
84 | $this->httpRequestFactory = $httpRequestFactory; |
85 | $this->logger = new NullLogger(); |
86 | } |
87 | |
88 | public function setLogger( LoggerInterface $logger ) { |
89 | $this->logger = $logger; |
90 | } |
91 | |
92 | /** |
93 | * @param WebRequest|string[] $originalRequest See the 'originalRequest' parameter of |
94 | * MediaWiki\Http\HttpRequestFactory::request(). |
95 | */ |
96 | public function setOriginalRequest( $originalRequest ) { |
97 | $this->originalRequest = $originalRequest; |
98 | } |
99 | |
100 | public function supports( $metric, $scope ) { |
101 | if ( $metric === self::METRIC_VIEW ) { |
102 | return true; |
103 | } elseif ( $metric === self::METRIC_UNIQUE ) { |
104 | return $scope === self::SCOPE_SITE && $this->access !== 'mobile-app'; |
105 | } |
106 | return false; |
107 | } |
108 | |
109 | /** |
110 | * @inheritDoc |
111 | */ |
112 | public function getPageData( array $titles, $days, $metric = self::METRIC_VIEW ) { |
113 | if ( $metric !== self::METRIC_VIEW ) { |
114 | throw new InvalidArgumentException( 'Invalid metric: ' . $metric ); |
115 | } |
116 | if ( !$titles ) { |
117 | return StatusValue::newGood( [] ); |
118 | } elseif ( $this->lookupLimit !== false ) { |
119 | $titles = array_slice( $titles, 0, $this->lookupLimit ); |
120 | } |
121 | if ( $days <= 0 ) { |
122 | throw new InvalidArgumentException( 'Invalid days: ' . $days ); |
123 | } |
124 | |
125 | $status = StatusValue::newGood(); |
126 | $result = []; |
127 | foreach ( $titles as $title ) { |
128 | /** @var Title $title */ |
129 | $result[$title->getPrefixedDBkey()] = $this->getEmptyDateRange( $days ); |
130 | $requestStatus = $this->makeRequest( |
131 | $this->getRequestUrl( self::SCOPE_ARTICLE, $title, $days ) ); |
132 | if ( $requestStatus->isOK() ) { |
133 | $data = $requestStatus->getValue(); |
134 | if ( isset( $data['items'] ) && is_array( $data['items'] ) ) { |
135 | foreach ( $data['items'] as $item ) { |
136 | $ts = $item['timestamp']; |
137 | $day = substr( $ts, 0, 4 ) . '-' . substr( $ts, 4, 2 ) . '-' . substr( $ts, 6, 2 ); |
138 | $result[$title->getPrefixedDBkey()][$day] = $item['views']; |
139 | } |
140 | $status->success[$title->getPrefixedDBkey()] = true; |
141 | } else { |
142 | $status->error( 'pvi-invalidresponse' ); |
143 | $status->success[$title->getPrefixedDBkey()] = false; |
144 | } |
145 | } else { |
146 | $status->success[$title->getPrefixedDBkey()] = false; |
147 | } |
148 | $status->merge( $requestStatus ); |
149 | } |
150 | $status->successCount = count( array_filter( $status->success ) ); |
151 | $status->failCount = count( $status->success ) - $status->successCount; |
152 | $status->setResult( (bool)$status->successCount, $result ); |
153 | return $status; |
154 | } |
155 | |
156 | /** |
157 | * @inheritDoc |
158 | */ |
159 | public function getSiteData( $days, $metric = self::METRIC_VIEW ) { |
160 | if ( $metric !== self::METRIC_VIEW && $metric !== self::METRIC_UNIQUE ) { |
161 | throw new InvalidArgumentException( 'Invalid metric: ' . $metric ); |
162 | } elseif ( $metric === self::METRIC_UNIQUE && $this->access === 'mobile-app' ) { |
163 | throw new InvalidArgumentException( |
164 | 'Unique device counts for mobile apps are not supported' ); |
165 | } |
166 | if ( $days <= 0 ) { |
167 | throw new InvalidArgumentException( 'Invalid days: ' . $days ); |
168 | } |
169 | $result = $this->getEmptyDateRange( $days ); |
170 | $status = $this->makeRequest( $this->getRequestUrl( $metric, null, $days ) ); |
171 | if ( $status->isOK() ) { |
172 | $data = $status->getValue(); |
173 | if ( isset( $data['items'] ) && is_array( $data['items'] ) ) { |
174 | foreach ( $data['items'] as $item ) { |
175 | $ts = $item['timestamp']; |
176 | $day = substr( $ts, 0, 4 ) . '-' . substr( $ts, 4, 2 ) . '-' . substr( $ts, 6, 2 ); |
177 | $count = $metric === self::METRIC_VIEW ? $item['views'] : $item['devices']; |
178 | $result[$day] = $count; |
179 | } |
180 | } else { |
181 | $status->fatal( 'pvi-invalidresponse' ); |
182 | } |
183 | } |
184 | $status->setResult( $status->isOK(), $result ); |
185 | return $status; |
186 | } |
187 | |
188 | /** |
189 | * @inheritDoc |
190 | */ |
191 | public function getTopPages( $metric = self::METRIC_VIEW ) { |
192 | $result = []; |
193 | if ( $metric !== self::METRIC_VIEW ) { |
194 | throw new InvalidArgumentException( 'Invalid metric: ' . $metric ); |
195 | } |
196 | $status = $this->makeRequest( $this->getRequestUrl( self::SCOPE_TOP ) ); |
197 | if ( $status->isOK() ) { |
198 | $data = $status->getValue(); |
199 | if ( isset( $data['items'] ) && is_array( $data['items'] ) && !$data['items'] ) { |
200 | // empty result set, no error; makeRequest generates this on 404 |
201 | } elseif ( |
202 | isset( $data['items'][0]['articles'] ) && |
203 | is_array( $data['items'][0]['articles'] ) |
204 | ) { |
205 | foreach ( $data['items'][0]['articles'] as $item ) { |
206 | $result[$item['article']] = $item['views']; |
207 | } |
208 | } else { |
209 | $status->fatal( 'pvi-invalidresponse' ); |
210 | } |
211 | } |
212 | $status->setResult( $status->isOK(), $result ); |
213 | return $status; |
214 | } |
215 | |
216 | public function getCacheExpiry( $metric, $scope ) { |
217 | // data is valid until the end of the day |
218 | $endOfDay = strtotime( '0:0 next day', MWTimestamp::time() ); |
219 | return $endOfDay - time(); |
220 | } |
221 | |
222 | /** |
223 | * @param array $apiOptions |
224 | * @throws InvalidArgumentException |
225 | */ |
226 | protected function verifyApiOptions( array $apiOptions ) { |
227 | if ( !isset( $apiOptions['project'] ) ) { |
228 | throw new InvalidArgumentException( "'project' is required" ); |
229 | } elseif ( !in_array( $apiOptions['access'], |
230 | [ 'all-access', 'desktop', 'mobile-app', 'mobile-web' ], true ) ) { |
231 | throw new InvalidArgumentException( 'Invalid access: ' . $apiOptions['access'] ); |
232 | } elseif ( !in_array( $apiOptions['agent'], |
233 | [ 'all-agents', 'user', 'spider', 'bot' ], true ) ) { |
234 | throw new InvalidArgumentException( 'Invalid agent: ' . $apiOptions['agent'] ); |
235 | } elseif ( isset( $apiOptions['granularity'] ) ) { |
236 | throw new InvalidArgumentException( 'Changing granularity is not supported' ); |
237 | } |
238 | } |
239 | |
240 | /** |
241 | * @param string $scope SCOPE_* constant or METRIC_UNIQUE |
242 | * @param Title|null $title |
243 | * @param int|null $days |
244 | * @return string |
245 | */ |
246 | protected function getRequestUrl( $scope, Title $title = null, $days = null ) { |
247 | list( $start, $end ) = $this->getStartEnd( $days ); |
248 | switch ( $scope ) { |
249 | case self::SCOPE_ARTICLE: |
250 | if ( !$title ) { |
251 | throw new InvalidArgumentException( 'Title is required when using article scope' ); |
252 | } |
253 | // Use plain urlencode instead of wfUrlencode because we need |
254 | // "/" to be encoded, which wfUrlencode doesn't. |
255 | $encodedTitle = urlencode( $title->getPrefixedDBkey() ); |
256 | // YYYYMMDD |
257 | $start = substr( $start, 0, 8 ); |
258 | $end = substr( $end, 0, 8 ); |
259 | return "$this->endpoint/metrics/pageviews/per-article/$this->project/$this->access/" |
260 | . "$this->agent/$encodedTitle/$this->granularity/$start/$end"; |
261 | case self::METRIC_VIEW: |
262 | case self::SCOPE_SITE: |
263 | // YYYYMMDDHH |
264 | $start = substr( $start, 0, 10 ); |
265 | $end = substr( $end, 0, 10 ); |
266 | return "$this->endpoint/metrics/pageviews/aggregate/$this->project/$this->access/$this->agent/" |
267 | . "$this->granularity/$start/$end"; |
268 | case self::SCOPE_TOP: |
269 | $year = substr( $end, 0, 4 ); |
270 | $month = substr( $end, 4, 2 ); |
271 | $day = substr( $end, 6, 2 ); |
272 | return "$this->endpoint/metrics/pageviews/top/$this->project/$this->access/$year/$month/$day"; |
273 | case self::METRIC_UNIQUE: |
274 | $access = [ |
275 | 'all-access' => 'all-sites', |
276 | 'desktop' => 'desktop-site', |
277 | 'mobile-web' => 'mobile-site', |
278 | ][$this->access]; |
279 | // YYYYMMDD |
280 | $start = substr( $start, 0, 8 ); |
281 | $end = substr( $end, 0, 8 ); |
282 | return "$this->endpoint/metrics/unique-devices/$this->project/$access/" |
283 | . "$this->granularity/$start/$end"; |
284 | default: |
285 | throw new InvalidArgumentException( 'Invalid scope: ' . $scope ); |
286 | } |
287 | } |
288 | |
289 | /** |
290 | * @param string $url |
291 | * @return StatusValue |
292 | */ |
293 | protected function makeRequest( $url ) { |
294 | /** @var MWHttpRequest $request */ |
295 | $request = $this->httpRequestFactory->create( $url, [ 'timeout' => 10 ], __METHOD__ ); |
296 | if ( $this->originalRequest ) { |
297 | $request->setOriginalRequest( $this->originalRequest ); |
298 | } |
299 | $status = $request->execute(); |
300 | $parseStatus = FormatJson::parse( $request->getContent() ?? '', FormatJson::FORCE_ASSOC ); |
301 | if ( $status->isOK() ) { |
302 | $status->merge( $parseStatus, true ); |
303 | } |
304 | |
305 | $apiErrorData = []; |
306 | if ( !$status->isOK() && $parseStatus->isOK() && is_array( $parseStatus->getValue() ) ) { |
307 | // hash of: type, title, method, uri, [detail] |
308 | $apiErrorData = $parseStatus->getValue(); |
309 | if ( isset( $apiErrorData['detail'] ) && is_array( $apiErrorData['detail'] ) ) { |
310 | $apiErrorData['detail'] = implode( ', ', $apiErrorData['detail'] ); |
311 | } |
312 | } |
313 | if ( |
314 | $request->getStatus() === 404 && |
315 | isset( $apiErrorData['type'] ) && |
316 | $apiErrorData['type'] === 'https://mediawiki.org/wiki/HyperSwitch/errors/not_found' |
317 | ) { |
318 | // the pageview API will return with a 404 when the page has 0 views :/ |
319 | $status = StatusValue::newGood( [ 'items' => [] ] ); |
320 | } |
321 | if ( !$status->isGood() ) { |
322 | $error = Status::wrap( $status )->getWikiText( false, false, 'en' ); |
323 | $severity = $status->isOK() ? LogLevel::INFO : LogLevel::ERROR; |
324 | $msg = $status->isOK() |
325 | ? 'Problems fetching {requesturl}: {error}' |
326 | : 'Failed fetching {requesturl}: {error}'; |
327 | $prefixedApiErrorData = array_combine( array_map( static function ( $k ) { |
328 | return 'apierror_' . $k; |
329 | }, array_keys( $apiErrorData ) ), $apiErrorData ); |
330 | $this->logger->log( $severity, $msg, [ |
331 | 'requesturl' => $url, |
332 | 'error' => $error, |
333 | ] + $prefixedApiErrorData ); |
334 | } |
335 | if ( !$status->isOK() && isset( $apiErrorData['detail'] ) ) { |
336 | $status->error( ( new RawMessage( '$1' ) )->params( $apiErrorData['detail'] ) ); |
337 | } |
338 | |
339 | return $status; |
340 | } |
341 | |
342 | /** |
343 | * The pageview API omits dates if there is no data. Fill it with nulls to make client-side |
344 | * processing easier. |
345 | * @param int $days |
346 | * @return array YYYY-MM-DD => null |
347 | */ |
348 | protected function getEmptyDateRange( $days ) { |
349 | if ( !$this->range ) { |
350 | $this->range = []; |
351 | // we only care about the date part, so add some hours to avoid errors when there is a |
352 | // leap second or some other weirdness |
353 | $end = $this->lastCompleteDay + 12 * 3600; |
354 | $start = $end - ( $days - 1 ) * 24 * 3600; |
355 | for ( $ts = $start; $ts <= $end; $ts += 24 * 3600 ) { |
356 | $this->range[gmdate( 'Y-m-d', $ts )] = null; |
357 | } |
358 | } |
359 | return $this->range; |
360 | } |
361 | |
362 | /** |
363 | * Get start and end timestamp in YYYYMMDDHH format |
364 | * @param int $days |
365 | * @return string[] |
366 | */ |
367 | protected function getStartEnd( $days ) { |
368 | $end = $this->lastCompleteDay + 12 * 3600; |
369 | $start = $end - ( $days - 1 ) * 24 * 3600; |
370 | return [ gmdate( 'Ymd', $start ) . '00', gmdate( 'Ymd', $end ) . '00' ]; |
371 | } |
372 | } |