Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
78.24% |
205 / 262 |
|
27.27% |
3 / 11 |
CRAP | |
0.00% |
0 / 1 |
ComputedUserImpactLookup | |
78.24% |
205 / 262 |
|
27.27% |
3 / 11 |
86.15 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
1 | |||
getUserImpact | |
94.12% |
16 / 17 |
|
0.00% |
0 / 1 |
3.00 | |||
getExpensiveUserImpact | |
89.19% |
33 / 37 |
|
0.00% |
0 / 1 |
6.05 | |||
getEditData | |
93.83% |
76 / 81 |
|
0.00% |
0 / 1 |
12.03 | |||
getThanksCount | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
getPageViewData | |
94.87% |
37 / 39 |
|
0.00% |
0 / 1 |
10.01 | |||
getPageViewDataInJobContext | |
58.62% |
17 / 29 |
|
0.00% |
0 / 1 |
10.47 | |||
getPageViewDataInWebRequestContext | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
20 | |||
logPageDataBadStatus | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
6 | |||
updateToIso8601DateKeys | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
2 | |||
getImage | |
28.57% |
4 / 14 |
|
0.00% |
0 / 1 |
19.12 |
1 | <?php |
2 | |
3 | namespace GrowthExperiments\UserImpact; |
4 | |
5 | use ChangeTags; |
6 | use DateTime; |
7 | use GrowthExperiments\NewcomerTasks\ConfigurationLoader\ConfigurationLoader; |
8 | use GrowthExperiments\NewcomerTasks\TaskType\TaskTypeHandlerRegistry; |
9 | use LogicException; |
10 | use MediaWiki\Config\ServiceOptions; |
11 | use MediaWiki\Extension\PageViewInfo\PageViewService; |
12 | use MediaWiki\Extension\Thanks\ThanksQueryHelper; |
13 | use MediaWiki\MainConfigNames; |
14 | use MediaWiki\Registration\ExtensionRegistry; |
15 | use MediaWiki\Revision\RevisionRecord; |
16 | use MediaWiki\Status\Status; |
17 | use MediaWiki\Storage\NameTableAccessException; |
18 | use MediaWiki\Storage\NameTableStore; |
19 | use MediaWiki\Title\MalformedTitleException; |
20 | use MediaWiki\Title\Title; |
21 | use MediaWiki\Title\TitleFactory; |
22 | use MediaWiki\Title\TitleFormatter; |
23 | use MediaWiki\Title\TitleValue; |
24 | use MediaWiki\User\User; |
25 | use MediaWiki\User\UserEditTracker; |
26 | use MediaWiki\User\UserFactory; |
27 | use MediaWiki\User\UserIdentity; |
28 | use MediaWiki\Utils\MWTimestamp; |
29 | use PageImages\PageImages; |
30 | use Psr\Log\LoggerInterface; |
31 | use Psr\Log\NullLogger; |
32 | use StatusValue; |
33 | use Wikimedia\Rdbms\DBAccessObjectUtils; |
34 | use Wikimedia\Rdbms\IConnectionProvider; |
35 | use Wikimedia\Rdbms\IDBAccessObject; |
36 | use Wikimedia\Stats\IBufferingStatsdDataFactory; |
37 | |
38 | class ComputedUserImpactLookup implements UserImpactLookup { |
39 | |
40 | public const CONSTRUCTOR_OPTIONS = [ |
41 | MainConfigNames::LocalTZoffset, |
42 | 'GEUserImpactMaxArticlesToProcessForPageviews', |
43 | 'GEUserImpactMaximumProcessTimeSeconds', |
44 | ]; |
45 | |
46 | /** |
47 | * Size in pixels of the thumb image to request to PageImages. Matches the Codex |
48 | * thumbnail component size it is rendered in. Used in the articles list (ArticlesList.vue) |
49 | * in the impact module. |
50 | */ |
51 | private const THUMBNAIL_SIZE = 40; |
52 | |
53 | /** Cutoff for edit statistics. See also DATA_ROWS_LIMIT in ScoreCards.vue. */ |
54 | private const MAX_EDITS = 1000; |
55 | |
56 | /** Cutoff for thanks count. See also DATA_ROWS_LIMIT in ScoreCards.vue. */ |
57 | private const MAX_THANKS = 1000; |
58 | |
59 | /** How many articles to use for $priorityTitles in getPageViewData(). */ |
60 | private const PRIORITY_ARTICLES_LIMIT = 5; |
61 | |
62 | /** How many days of pageview data to get. PageViewInfo supports up to 60. */ |
63 | public const PAGEVIEW_DAYS = 60; |
64 | |
65 | private ServiceOptions $config; |
66 | private IConnectionProvider $connectionProvider; |
67 | private NameTableStore $changeTagDefStore; |
68 | private UserFactory $userFactory; |
69 | private UserEditTracker $userEditTracker; |
70 | private TitleFormatter $titleFormatter; |
71 | private TitleFactory $titleFactory; |
72 | private IBufferingStatsdDataFactory $statsdDataFactory; |
73 | private ?LoggerInterface $logger; |
74 | private ?PageViewService $pageViewService; |
75 | private ?ThanksQueryHelper $thanksQueryHelper; |
76 | private TaskTypeHandlerRegistry $taskTypeHandlerRegistry; |
77 | private ConfigurationLoader $configurationLoader; |
78 | |
79 | /** |
80 | * @param ServiceOptions $config |
81 | * @param IConnectionProvider $connectionProvider |
82 | * @param NameTableStore $changeTagDefStore |
83 | * @param UserFactory $userFactory |
84 | * @param UserEditTracker $userEditTracker |
85 | * @param TitleFormatter $titleFormatter |
86 | * @param TitleFactory $titleFactory |
87 | * @param IBufferingStatsdDataFactory $statsdDataFactory |
88 | * @param TaskTypeHandlerRegistry $taskTypeHandlerRegistry |
89 | * @param ConfigurationLoader $configurationLoader |
90 | * @param LoggerInterface|null $loggerFactory |
91 | * @param PageViewService|null $pageViewService |
92 | * @param ThanksQueryHelper|null $thanksQueryHelper |
93 | */ |
94 | public function __construct( |
95 | ServiceOptions $config, |
96 | IConnectionProvider $connectionProvider, |
97 | NameTableStore $changeTagDefStore, |
98 | UserFactory $userFactory, |
99 | UserEditTracker $userEditTracker, |
100 | TitleFormatter $titleFormatter, |
101 | TitleFactory $titleFactory, |
102 | IBufferingStatsdDataFactory $statsdDataFactory, |
103 | TaskTypeHandlerRegistry $taskTypeHandlerRegistry, |
104 | ConfigurationLoader $configurationLoader, |
105 | ?LoggerInterface $loggerFactory, |
106 | ?PageViewService $pageViewService, |
107 | ?ThanksQueryHelper $thanksQueryHelper |
108 | ) { |
109 | $this->config = $config; |
110 | $this->connectionProvider = $connectionProvider; |
111 | $this->changeTagDefStore = $changeTagDefStore; |
112 | $this->userFactory = $userFactory; |
113 | $this->userEditTracker = $userEditTracker; |
114 | $this->titleFormatter = $titleFormatter; |
115 | $this->titleFactory = $titleFactory; |
116 | $this->statsdDataFactory = $statsdDataFactory; |
117 | $this->logger = $loggerFactory ?? new NullLogger(); |
118 | $this->pageViewService = $pageViewService; |
119 | $this->thanksQueryHelper = $thanksQueryHelper; |
120 | $this->taskTypeHandlerRegistry = $taskTypeHandlerRegistry; |
121 | $this->configurationLoader = $configurationLoader; |
122 | } |
123 | |
124 | /** @inheritDoc */ |
125 | public function getUserImpact( UserIdentity $user, int $flags = IDBAccessObject::READ_NORMAL ): ?UserImpact { |
126 | $user = $this->userFactory->newFromUserIdentity( $user ); |
127 | if ( !$user->isNamed() || $user->isHidden() ) { |
128 | return null; |
129 | } |
130 | |
131 | $editData = $this->getEditData( $user, $flags ); |
132 | $thanksCount = $this->getThanksCount( $user, $flags ); |
133 | |
134 | return new UserImpact( |
135 | $user, |
136 | $thanksCount, |
137 | $editData->getEditCountByNamespace(), |
138 | $editData->getEditCountByDay(), |
139 | $editData->getEditCountByTaskType(), |
140 | $editData->getRevertedEditCount(), |
141 | $editData->getNewcomerTaskEditCount(), |
142 | wfTimestampOrNull( TS_UNIX, $editData->getLastEditTimestamp() ), |
143 | ComputeEditingStreaks::getLongestEditingStreak( $editData->getEditCountByDay() ), |
144 | $this->userEditTracker->getUserEditCount( $user ) |
145 | ); |
146 | } |
147 | |
148 | /** @inheritDoc */ |
149 | public function getExpensiveUserImpact( |
150 | UserIdentity $user, |
151 | int $flags = IDBAccessObject::READ_NORMAL, |
152 | array $priorityArticles = [] |
153 | ): ?ExpensiveUserImpact { |
154 | $start = microtime( true ); |
155 | if ( !$this->pageViewService ) { |
156 | return null; |
157 | } |
158 | $user = $this->userFactory->newFromUserIdentity( $user ); |
159 | if ( !$user->isNamed() || $user->isHidden() ) { |
160 | return null; |
161 | } |
162 | |
163 | $editData = $this->getEditData( $user, $flags ); |
164 | $thanksCount = $this->getThanksCount( $user, $flags ); |
165 | // Use priority articles if known, otherwise make use of the last edited articles |
166 | // as "top articles" . |
167 | // This won't exclude retrieving data for other articles, but ensures that we fetch page |
168 | // view data for priority (as defined by the caller) articles first. |
169 | if ( $priorityArticles ) { |
170 | $priorityArticles = array_intersect_key( $editData->getEditedArticles(), $priorityArticles ); |
171 | } else { |
172 | $priorityArticles = $editData->getEditedArticles(); |
173 | } |
174 | $pageViewData = $this->getPageViewData( |
175 | $user, |
176 | $editData->getEditedArticles(), |
177 | array_slice( $priorityArticles, 0, self::PRIORITY_ARTICLES_LIMIT, true ), |
178 | self::PAGEVIEW_DAYS |
179 | ); |
180 | if ( $pageViewData === null ) { |
181 | return null; |
182 | } |
183 | |
184 | $expensiveUserImpact = new ExpensiveUserImpact( |
185 | $user, |
186 | $thanksCount, |
187 | $editData->getEditCountByNamespace(), |
188 | $editData->getEditCountByDay(), |
189 | $editData->getEditCountByTaskType(), |
190 | $editData->getRevertedEditCount(), |
191 | $editData->getNewcomerTaskEditCount(), |
192 | wfTimestampOrNull( TS_UNIX, $editData->getLastEditTimestamp() ), |
193 | $pageViewData['dailyTotalViews'], |
194 | $pageViewData['dailyArticleViews'], |
195 | ComputeEditingStreaks::getLongestEditingStreak( $editData->getEditCountByDay() ), |
196 | $this->userEditTracker->getUserEditCount( $user ) |
197 | ); |
198 | $this->statsdDataFactory->timing( |
199 | 'timing.growthExperiments.ComputedUserImpactLookup.getExpensiveUserImpact', microtime( true ) - $start |
200 | ); |
201 | return $expensiveUserImpact; |
202 | } |
203 | |
204 | /** |
205 | * Run a SQL query to fetch edit data for the user. |
206 | * |
207 | * @param User $user |
208 | * @param int $flags |
209 | * @return EditData |
210 | * @throws \Exception |
211 | */ |
212 | private function getEditData( User $user, int $flags ): EditData { |
213 | $db = DBAccessObjectUtils::getDBFromRecency( $this->connectionProvider, $flags ); |
214 | |
215 | $queryBuilder = $db->newSelectQueryBuilder() |
216 | ->table( 'revision' ) |
217 | ->join( 'page', null, 'rev_page = page_id' ); |
218 | |
219 | $taskChangeTagNames = $this->taskTypeHandlerRegistry->getUniqueChangeTags(); |
220 | $additionalChangeTagNames = [ |
221 | ChangeTags::TAG_REVERTED |
222 | ]; |
223 | |
224 | $changeTagNames = array_merge( $taskChangeTagNames, $additionalChangeTagNames ); |
225 | $changeTagIds = []; |
226 | $changeTagIdToName = []; |
227 | foreach ( $changeTagNames as $changeTagName ) { |
228 | try { |
229 | // Presume the tag is not related to a task; set $taskTypeId to the task type ID |
230 | // if it is. |
231 | $taskTypeId = null; |
232 | if ( in_array( $changeTagName, $taskChangeTagNames ) ) { |
233 | $taskTypeHandlerId = $this->taskTypeHandlerRegistry->getTaskTypeHandlerIdByChangeTagName( |
234 | $changeTagName |
235 | ); |
236 | if ( !$taskTypeHandlerId ) { |
237 | // In theory shouldn't be possible, given that the change tag names originate from the |
238 | // task type handler registry. Adding this to make phan happy. |
239 | throw new LogicException( |
240 | "Unable to find task type handler ID for change tag \"$changeTagName\"" |
241 | ); |
242 | } |
243 | $taskTypeHandler = $this->taskTypeHandlerRegistry->get( $taskTypeHandlerId ); |
244 | $taskTypeId = $taskTypeHandler->getTaskTypeIdByChangeTagName( $changeTagName ); |
245 | } |
246 | |
247 | $tagId = $this->changeTagDefStore->getId( $changeTagName ); |
248 | $changeTagIds[$tagId] = $taskTypeId; |
249 | $changeTagIdToName[$tagId] = $changeTagName; |
250 | } catch ( NameTableAccessException $nameTableAccessException ) { |
251 | // Some tags won't exist in test scenarios, and possibly in some small wikis where |
252 | // no suggested edits have been done yet. We can safely ignore the exception, |
253 | // it will mean that 'newcomerTaskEditCount' is 0 in the result. |
254 | } |
255 | } |
256 | |
257 | if ( $changeTagIds ) { |
258 | $queryBuilder->leftJoin( 'change_tag', null, [ |
259 | 'rev_id = ct_rev_id', |
260 | 'ct_tag_id' => array_keys( $changeTagIds ), |
261 | ] ); |
262 | $queryBuilder->field( 'ct_tag_id' ); |
263 | } |
264 | |
265 | $queryBuilder->fields( [ 'page_namespace', 'page_title', 'rev_timestamp' ] ); |
266 | $queryBuilder->where( [ 'rev_actor' => $user->getActorId() ] ); |
267 | $queryBuilder->where( $db->bitAnd( 'rev_deleted', RevisionRecord::DELETED_USER ) . ' = 0' ); |
268 | // hopefully able to use the rev_actor_timestamp index for an efficient query |
269 | $queryBuilder->orderBy( 'rev_timestamp', 'DESC' ); |
270 | $queryBuilder->limit( self::MAX_EDITS ); |
271 | $queryBuilder->recency( $flags ); |
272 | $queryBuilder->caller( __METHOD__ ); |
273 | // T331264 |
274 | $queryBuilder->straightJoinOption(); |
275 | |
276 | $editCountByNamespace = []; |
277 | $editCountByDay = []; |
278 | $revertedEditCount = 0; |
279 | $editCountByTaskType = array_fill_keys( array_keys( $this->configurationLoader->getTaskTypes() ), 0 ); |
280 | $newcomerTaskEditCount = 0; |
281 | $lastEditTimestamp = null; |
282 | $editedArticles = []; |
283 | |
284 | foreach ( $queryBuilder->fetchResultSet() as $row ) { |
285 | $linkTarget = new TitleValue( (int)$row->page_namespace, $row->page_title ); |
286 | $titleDbKey = $this->titleFormatter->getPrefixedDBkey( $linkTarget ); |
287 | $editTime = new MWTimestamp( $row->rev_timestamp ); |
288 | // Avoid using registered user timezone preference which can be used to de-anonymize users. |
289 | // Use anonymous UserIdentity instead which will fall back to use the wiki's default |
290 | // timezone and local tz offset. |
291 | $editTime->offsetForUser( $this->userFactory->newAnonymous() ); |
292 | $day = $editTime->format( 'Ymd' ); |
293 | |
294 | $editCountByNamespace[$row->page_namespace] |
295 | = ( $editCountByNamespace[$row->page_namespace] ?? 0 ) + 1; |
296 | $editCountByDay[$day] = ( $editCountByDay[$day] ?? 0 ) + 1; |
297 | if ( $row->ct_tag_id ?? null ) { |
298 | $taskTypeId = $changeTagIds[$row->ct_tag_id]; |
299 | if ( $taskTypeId ) { |
300 | $newcomerTaskEditCount++; |
301 | if ( !isset( $editCountByTaskType[$taskTypeId] ) ) { |
302 | $editCountByTaskType[$taskTypeId] = 0; |
303 | } |
304 | $editCountByTaskType[$taskTypeId]++; |
305 | } |
306 | |
307 | $changeTagName = $changeTagIdToName[$row->ct_tag_id]; |
308 | if ( $changeTagName === ChangeTags::TAG_REVERTED ) { |
309 | $revertedEditCount++; |
310 | } |
311 | } |
312 | $lastEditTimestamp ??= $row->rev_timestamp; |
313 | // Computed values $editCountByNamespace, $editCountByDay, $newcomerTaskEditCount and $lastEditTimestamp |
314 | // use data from all namespaces. Filter out non-article pages from the collection of returned articles |
315 | // ($editedArticles) since they are not relevant for the user article list of recent edits. |
316 | if ( (int)$row->page_namespace !== NS_MAIN ) { |
317 | continue; |
318 | } |
319 | // We're iterating over the result set, newest edits to oldest edits in descending order. The same |
320 | // article can have been edited multiple times. We'll stash the revision timestamp of the oldest |
321 | // edit made by the user to the article; we will use that later to calculate the "start date" |
322 | // for the impact of the user for a particular article, e.g. when making a pageviews tool URL |
323 | // or choosing the date range for page view data to display for an article. |
324 | $editedArticles[$titleDbKey]['oldestEdit'] = $row->rev_timestamp; |
325 | $editedArticles[$titleDbKey]['newestEdit'] ??= $row->rev_timestamp; |
326 | } |
327 | |
328 | return new EditData( |
329 | $editCountByNamespace, |
330 | array_reverse( $this->updateToIso8601DateKeys( $editCountByDay ) ), |
331 | $editCountByTaskType, |
332 | $revertedEditCount, |
333 | $newcomerTaskEditCount, |
334 | $lastEditTimestamp, |
335 | $editedArticles |
336 | ); |
337 | } |
338 | |
339 | /** |
340 | * @param User $user |
341 | * @param int $flags |
342 | * @return int Number of thanks received for the user ID |
343 | */ |
344 | private function getThanksCount( User $user, int $flags ): int { |
345 | return $this->thanksQueryHelper |
346 | ? $this->thanksQueryHelper->getThanksReceivedCount( $user, self::MAX_THANKS, $flags ) |
347 | : 0; |
348 | } |
349 | |
350 | /** |
351 | * Returns page views and other data, or null on error during data fetching. |
352 | * Must not be called when $this->pageViewService is null. |
353 | * @param User $user |
354 | * @param array[] $titles Data about edited articles. See {@see EditData::getEditedArticles()} |
355 | * for format. |
356 | * @param array[] $priorityTitles A subset of $titles that should get priority treatment |
357 | * (in case not all the pageview data can be retrieved due to resource limits). |
358 | * @param int $days How many days to query. No more than 60. |
359 | * @return array|null |
360 | * - dailyTotalViews: (array<string, int>) daily number of total views of articles in $titles, |
361 | * keyed by ISO 8601 date. |
362 | * - dailyArticleViews: (array[]) Daily article views and other data. Keyed by |
363 | * prefixed DBkey; values are arrays with the following fields: |
364 | * - views: (int[]) daily article views, keyed by ISO 8601 date. Might be 0 for the last day |
365 | * if it's still being processed. |
366 | * - firstEditDate: (string) ISO 8601 date of the user's first edit to the article. |
367 | * If the user made a very high number of total edits, it might just be some edit the |
368 | * user made to the article, not necessarily the first. |
369 | * - newestEdit: (string) MW_TS timestamp of the user's most recent edit. |
370 | * - imageUrl: (string|null) URL of a thumbnail of the article's main image. |
371 | * @phan-return array{dailyTotalViews:array<string,int>,dailyArticleViews:array<string,array{views:array<string,int>,firstEditDate:string,newestEdit:string,imageUrl:?string}>}|null |
372 | * @throws MalformedTitleException |
373 | */ |
374 | private function getPageViewData( User $user, array $titles, array $priorityTitles, int $days ): ?array { |
375 | // Short-circuit if the user has no edits. |
376 | if ( !$titles ) { |
377 | return [ |
378 | 'dailyTotalViews' => [], |
379 | 'dailyArticleViews' => [], |
380 | ]; |
381 | } |
382 | |
383 | // $priorityTitles is a subset of $titles but putting it to the front makes sure the data |
384 | // for those titles is fetched even if PageViewInfo cuts off the list of titles at some |
385 | // point, which it is allowed to do. |
386 | $allTitles = $priorityTitles + $titles; |
387 | $allTitleObjects = []; |
388 | |
389 | foreach ( $allTitles as $title => $data ) { |
390 | $allTitleObjects[$title] = [ |
391 | 'title' => $this->titleFactory->newFromTextThrow( $title ), |
392 | // rev_timestamp is in TS_MW format (e.g. 20210406200220), we only want |
393 | // the first 8 characters for comparison with Ymd format date strings. |
394 | 'rev_timestamp' => substr( $data['oldestEdit'], 0, 8 ), |
395 | 'newestEdit' => $data['newestEdit'], |
396 | 'oldestEdit' => $data['oldestEdit'] |
397 | ]; |
398 | } |
399 | if ( defined( 'MEDIAWIKI_JOB_RUNNER' ) || MW_ENTRY_POINT === 'cli' ) { |
400 | $pageViewData = $this->getPageViewDataInJobContext( $allTitleObjects, $user, $days ); |
401 | } else { |
402 | $pageViewData = $this->getPageViewDataInWebRequestContext( $allTitleObjects, $user, $days ); |
403 | } |
404 | |
405 | $dailyTotalViews = []; |
406 | $dailyArticleViews = []; |
407 | foreach ( $pageViewData as $title => $days ) { |
408 | // Normalize titles as PageViewInfo does not define which title format it uses :( |
409 | $title = str_replace( ' ', '_', $title ); |
410 | $mwTitle = $this->titleFactory->newFromTextThrow( $title ); |
411 | $imageUrl = $this->getImage( $mwTitle ); |
412 | if ( $imageUrl ) { |
413 | $dailyArticleViews[$title]['imageUrl'] = $imageUrl; |
414 | } |
415 | $firstEditDate = new DateTime( $allTitleObjects[$title]['rev_timestamp'] ); |
416 | $dailyArticleViews[$title]['firstEditDate'] = $firstEditDate->format( 'Y-m-d' ); |
417 | $dailyArticleViews[$title]['newestEdit'] = $allTitleObjects[$title]['newestEdit']; |
418 | |
419 | foreach ( $days as $day => $views ) { |
420 | // NOTE: Do not insert the data if it is a zero due to JSON blob size issues (T351898) |
421 | |
422 | $todayTotalViews = ( ( $dailyTotalViews[$day] ?? 0 ) + $views ); |
423 | if ( $todayTotalViews > 0 ) { |
424 | $dailyTotalViews[$day] = $todayTotalViews; |
425 | } |
426 | |
427 | $todayArticleViews = ( $views ?? 0 ); |
428 | if ( $todayArticleViews > 0 ) { |
429 | $dailyArticleViews[$title]['views'][$day] = $todayArticleViews; |
430 | } |
431 | } |
432 | } |
433 | |
434 | return [ |
435 | 'dailyTotalViews' => $dailyTotalViews, |
436 | 'dailyArticleViews' => $dailyArticleViews, |
437 | ]; |
438 | } |
439 | |
440 | private function getPageViewDataInJobContext( array $allTitleObjects, UserIdentity $user, int $days ): array { |
441 | $pageViewData = []; |
442 | $titleObjects = $allTitleObjects; |
443 | $loopStartTime = microtime( true ); |
444 | while ( count( $titleObjects ) ) { |
445 | $titleObjectsCount = count( $titleObjects ); |
446 | if ( count( $pageViewData ) > $this->config->get( 'GEUserImpactMaxArticlesToProcessForPageviews' ) ) { |
447 | $this->logger->info( |
448 | 'Reached article count limit while fetching page view data for {count} titles for user {user}.', |
449 | [ 'user' => $user->getName(), 'count' => count( $allTitleObjects ) ] |
450 | ); |
451 | break; |
452 | } |
453 | if ( microtime( true ) - $loopStartTime > $this->config->get( 'GEUserImpactMaximumProcessTimeSeconds' ) ) { |
454 | $this->logger->info( |
455 | "Reached maximum process time while fetching page view data for {count} titles for user {user}", |
456 | [ 'user' => $user->getName(), 'count' => count( $allTitleObjects ) ] |
457 | ); |
458 | break; |
459 | } |
460 | $pageDataStatus = $this->pageViewService->getPageData( |
461 | array_column( $titleObjects, 'title' ), $days |
462 | ); |
463 | if ( !$pageDataStatus->isGood() ) { |
464 | $this->logPageDataBadStatus( $pageDataStatus ); |
465 | } |
466 | if ( $pageDataStatus->isOK() ) { |
467 | $successful = array_filter( $pageDataStatus->success ); |
468 | $pageViewData += array_intersect_key( $pageDataStatus->getValue(), $successful ); |
469 | } |
470 | $titleObjects = array_diff_key( $titleObjects, $pageViewData ); |
471 | if ( count( $titleObjects ) === $titleObjectsCount ) { |
472 | // Received no new data. Abort to avoid a loop - errors are cached for a short time |
473 | // so re-requesting them wouldn't help. |
474 | return $pageViewData; |
475 | } |
476 | } |
477 | return $pageViewData; |
478 | } |
479 | |
480 | private function getPageViewDataInWebRequestContext( |
481 | array $allTitleObjects, UserIdentity $user, int $days |
482 | ): array { |
483 | $status = $this->pageViewService->getPageData( array_column( $allTitleObjects, 'title' ), $days ); |
484 | if ( !$status->isGood() ) { |
485 | $this->logPageDataBadStatus( $status ); |
486 | if ( !$status->isOK() ) { |
487 | return []; |
488 | } |
489 | } elseif ( $status->successCount < count( $allTitleObjects ) ) { |
490 | $failedTitles = array_keys( array_diff_key( $allTitleObjects, $status->success ) ); |
491 | $this->logger->info( "Failed to get page view data for {count} titles for user {user}", |
492 | [ |
493 | 'user' => $user->getName(), |
494 | 'count' => count( $failedTitles ), |
495 | 'failedTitles' => substr( implode( ',', $failedTitles ), 0, 250 ), |
496 | ] |
497 | ); |
498 | } |
499 | return $status->getValue(); |
500 | } |
501 | |
502 | /** |
503 | * Don't log pvi-cached-error-title messages (T328945) but track it in statsd, |
504 | * and log any other message that occurs. |
505 | * |
506 | * @param StatusValue $status |
507 | * @return void |
508 | */ |
509 | private function logPageDataBadStatus( StatusValue $status ) { |
510 | if ( $status->hasMessagesExcept( 'pvi-cached-error-title' ) ) { |
511 | $this->logger->error( |
512 | Status::wrap( $status )->getWikiText( false, false, 'en' ) |
513 | ); |
514 | } else { |
515 | $this->statsdDataFactory->updateCount( |
516 | 'GrowthExperiments.ComputedUserImpactLookup.PviCachedErrorTitle', |
517 | $status->failCount |
518 | ); |
519 | } |
520 | } |
521 | |
522 | /** |
523 | * Change array keys from MW_TS date prefixes to ISO 8601 dates. |
524 | * @param array $mwTsArray |
525 | * @return array |
526 | */ |
527 | private function updateToIso8601DateKeys( array $mwTsArray ): array { |
528 | $iso8601Array = []; |
529 | foreach ( $mwTsArray as $mwTsKey => $value ) { |
530 | $iso8601Key = substr( $mwTsKey, 0, 4 ) . '-' . substr( $mwTsKey, 4, 2 ) |
531 | . '-' . substr( $mwTsKey, 6, 2 ); |
532 | $iso8601Array[$iso8601Key] = $value; |
533 | } |
534 | return $iso8601Array; |
535 | } |
536 | |
537 | /** |
538 | * Get image URL for a page |
539 | * Depends on the PageImages extension. |
540 | * |
541 | * @param Title $title |
542 | * @return ?string |
543 | */ |
544 | private function getImage( Title $title ): ?string { |
545 | if ( !ExtensionRegistry::getInstance()->isLoaded( 'PageImages' ) ) { |
546 | return null; |
547 | } |
548 | |
549 | $imageFile = PageImages::getPageImage( $title ); |
550 | if ( $imageFile ) { |
551 | $ratio = $imageFile->getWidth() / $imageFile->getHeight(); |
552 | $options = [ |
553 | 'width' => $ratio > 1 ? |
554 | // Avoid decimals in the width because it makes the thumb url construction fail |
555 | floor( self::THUMBNAIL_SIZE / $imageFile->getHeight() * $imageFile->getWidth() ) : |
556 | self::THUMBNAIL_SIZE |
557 | ]; |
558 | |
559 | $thumb = $imageFile->transform( $options ); |
560 | if ( $thumb ) { |
561 | return $thumb->getUrl() ?: null; |
562 | } |
563 | } |
564 | |
565 | return null; |
566 | } |
567 | |
568 | } |