Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
78.24% covered (warning)
78.24%
205 / 262
27.27% covered (danger)
27.27%
3 / 11
CRAP
0.00% covered (danger)
0.00%
0 / 1
ComputedUserImpactLookup
78.24% covered (warning)
78.24%
205 / 262
27.27% covered (danger)
27.27%
3 / 11
86.15
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
13 / 13
100.00% covered (success)
100.00%
1 / 1
1
 getUserImpact
94.12% covered (success)
94.12%
16 / 17
0.00% covered (danger)
0.00%
0 / 1
3.00
 getExpensiveUserImpact
89.19% covered (warning)
89.19%
33 / 37
0.00% covered (danger)
0.00%
0 / 1
6.05
 getEditData
93.83% covered (success)
93.83%
76 / 81
0.00% covered (danger)
0.00%
0 / 1
12.03
 getThanksCount
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
 getPageViewData
94.87% covered (success)
94.87%
37 / 39
0.00% covered (danger)
0.00%
0 / 1
10.01
 getPageViewDataInJobContext
58.62% covered (warning)
58.62%
17 / 29
0.00% covered (danger)
0.00%
0 / 1
10.47
 getPageViewDataInWebRequestContext
0.00% covered (danger)
0.00%
0 / 15
0.00% covered (danger)
0.00%
0 / 1
20
 logPageDataBadStatus
0.00% covered (danger)
0.00%
0 / 8
0.00% covered (danger)
0.00%
0 / 1
6
 updateToIso8601DateKeys
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
2
 getImage
28.57% covered (danger)
28.57%
4 / 14
0.00% covered (danger)
0.00%
0 / 1
19.12
1<?php
2
3namespace GrowthExperiments\UserImpact;
4
5use ChangeTags;
6use DateTime;
7use DBAccessObjectUtils;
8use ExtensionRegistry;
9use GrowthExperiments\NewcomerTasks\ConfigurationLoader\ConfigurationLoader;
10use GrowthExperiments\NewcomerTasks\TaskType\TaskTypeHandlerRegistry;
11use IBufferingStatsdDataFactory;
12use IDBAccessObject;
13use LogicException;
14use MediaWiki\Config\ServiceOptions;
15use MediaWiki\Extension\PageViewInfo\PageViewService;
16use MediaWiki\Extension\Thanks\ThanksQueryHelper;
17use MediaWiki\MainConfigNames;
18use MediaWiki\Revision\RevisionRecord;
19use MediaWiki\Status\Status;
20use MediaWiki\Storage\NameTableAccessException;
21use MediaWiki\Storage\NameTableStore;
22use MediaWiki\Title\MalformedTitleException;
23use MediaWiki\Title\Title;
24use MediaWiki\Title\TitleFactory;
25use MediaWiki\Title\TitleFormatter;
26use MediaWiki\Title\TitleValue;
27use MediaWiki\User\User;
28use MediaWiki\User\UserEditTracker;
29use MediaWiki\User\UserFactory;
30use MediaWiki\User\UserIdentity;
31use MediaWiki\Utils\MWTimestamp;
32use PageImages\PageImages;
33use Psr\Log\LoggerInterface;
34use Psr\Log\NullLogger;
35use StatusValue;
36use Wikimedia\Rdbms\IConnectionProvider;
37
38class ComputedUserImpactLookup implements UserImpactLookup {
39
40    public const CONSTRUCTOR_OPTIONS = [
41        MainConfigNames::LocalTZoffset,
42        'GEUserImpactMaxArticlesToProcessForPageviews',
43        'GEUserImpactMaximumProcessTimeSeconds',
44    ];
45
46    /**
47     * Size in pixels of the thumb image to request to PageImages. Matches the Codex
48     * thumbnail component size it is rendered in. Used in the articles list (ArticlesList.vue)
49     * in the impact module.
50     */
51    private const THUMBNAIL_SIZE = 40;
52
53    /** Cutoff for edit statistics. See also DATA_ROWS_LIMIT in ScoreCards.vue. */
54    private const MAX_EDITS = 1000;
55
56    /** Cutoff for thanks count. See also DATA_ROWS_LIMIT in ScoreCards.vue. */
57    private const MAX_THANKS = 1000;
58
59    /** How many articles to use for $priorityTitles in getPageViewData(). */
60    private const PRIORITY_ARTICLES_LIMIT = 5;
61
62    /** How many days of pageview data to get. PageViewInfo supports up to 60. */
63    public const PAGEVIEW_DAYS = 60;
64
65    private ServiceOptions $config;
66    private IConnectionProvider $connectionProvider;
67    private NameTableStore $changeTagDefStore;
68    private UserFactory $userFactory;
69    private UserEditTracker $userEditTracker;
70    private TitleFormatter $titleFormatter;
71    private TitleFactory $titleFactory;
72    private IBufferingStatsdDataFactory $statsdDataFactory;
73    private ?LoggerInterface $logger;
74    private ?PageViewService $pageViewService;
75    private ?ThanksQueryHelper $thanksQueryHelper;
76    private TaskTypeHandlerRegistry $taskTypeHandlerRegistry;
77    private ConfigurationLoader $configurationLoader;
78
79    /**
80     * @param ServiceOptions $config
81     * @param IConnectionProvider $connectionProvider
82     * @param NameTableStore $changeTagDefStore
83     * @param UserFactory $userFactory
84     * @param UserEditTracker $userEditTracker
85     * @param TitleFormatter $titleFormatter
86     * @param TitleFactory $titleFactory
87     * @param IBufferingStatsdDataFactory $statsdDataFactory
88     * @param TaskTypeHandlerRegistry $taskTypeHandlerRegistry
89     * @param ConfigurationLoader $configurationLoader
90     * @param LoggerInterface|null $loggerFactory
91     * @param PageViewService|null $pageViewService
92     * @param ThanksQueryHelper|null $thanksQueryHelper
93     */
94    public function __construct(
95        ServiceOptions $config,
96        IConnectionProvider $connectionProvider,
97        NameTableStore $changeTagDefStore,
98        UserFactory $userFactory,
99        UserEditTracker $userEditTracker,
100        TitleFormatter $titleFormatter,
101        TitleFactory $titleFactory,
102        IBufferingStatsdDataFactory $statsdDataFactory,
103        TaskTypeHandlerRegistry $taskTypeHandlerRegistry,
104        ConfigurationLoader $configurationLoader,
105        ?LoggerInterface $loggerFactory,
106        ?PageViewService $pageViewService,
107        ?ThanksQueryHelper $thanksQueryHelper
108    ) {
109        $this->config = $config;
110        $this->connectionProvider = $connectionProvider;
111        $this->changeTagDefStore = $changeTagDefStore;
112        $this->userFactory = $userFactory;
113        $this->userEditTracker = $userEditTracker;
114        $this->titleFormatter = $titleFormatter;
115        $this->titleFactory = $titleFactory;
116        $this->statsdDataFactory = $statsdDataFactory;
117        $this->logger = $loggerFactory ?? new NullLogger();
118        $this->pageViewService = $pageViewService;
119        $this->thanksQueryHelper = $thanksQueryHelper;
120        $this->taskTypeHandlerRegistry = $taskTypeHandlerRegistry;
121        $this->configurationLoader = $configurationLoader;
122    }
123
124    /** @inheritDoc */
125    public function getUserImpact( UserIdentity $user, int $flags = IDBAccessObject::READ_NORMAL ): ?UserImpact {
126        $user = $this->userFactory->newFromUserIdentity( $user );
127        if ( !$user->isNamed() || $user->isHidden() ) {
128            return null;
129        }
130
131        $editData = $this->getEditData( $user, $flags );
132        $thanksCount = $this->getThanksCount( $user, $flags );
133
134        return new UserImpact(
135            $user,
136            $thanksCount,
137            $editData->getEditCountByNamespace(),
138            $editData->getEditCountByDay(),
139            $editData->getEditCountByTaskType(),
140            $editData->getRevertedEditCount(),
141            $editData->getNewcomerTaskEditCount(),
142            wfTimestampOrNull( TS_UNIX, $editData->getLastEditTimestamp() ),
143            ComputeEditingStreaks::getLongestEditingStreak( $editData->getEditCountByDay() ),
144            $this->userEditTracker->getUserEditCount( $user )
145        );
146    }
147
148    /** @inheritDoc */
149    public function getExpensiveUserImpact(
150        UserIdentity $user,
151        int $flags = IDBAccessObject::READ_NORMAL,
152        array $priorityArticles = []
153    ): ?ExpensiveUserImpact {
154        $start = microtime( true );
155        if ( !$this->pageViewService ) {
156            return null;
157        }
158        $user = $this->userFactory->newFromUserIdentity( $user );
159        if ( !$user->isNamed() || $user->isHidden() ) {
160            return null;
161        }
162
163        $editData = $this->getEditData( $user, $flags );
164        $thanksCount = $this->getThanksCount( $user, $flags );
165        // Use priority articles if known, otherwise make use of the last edited articles
166        // as "top articles" .
167        // This won't exclude retrieving data for other articles, but ensures that we fetch page
168        // view data for priority (as defined by the caller) articles first.
169        if ( $priorityArticles ) {
170            $priorityArticles = array_intersect_key( $editData->getEditedArticles(), $priorityArticles );
171        } else {
172            $priorityArticles = $editData->getEditedArticles();
173        }
174        $pageViewData = $this->getPageViewData(
175            $user,
176            $editData->getEditedArticles(),
177            array_slice( $priorityArticles, 0, self::PRIORITY_ARTICLES_LIMIT, true ),
178            self::PAGEVIEW_DAYS
179        );
180        if ( $pageViewData === null ) {
181            return null;
182        }
183
184        $expensiveUserImpact = new ExpensiveUserImpact(
185            $user,
186            $thanksCount,
187            $editData->getEditCountByNamespace(),
188            $editData->getEditCountByDay(),
189            $editData->getEditCountByTaskType(),
190            $editData->getRevertedEditCount(),
191            $editData->getNewcomerTaskEditCount(),
192            wfTimestampOrNull( TS_UNIX, $editData->getLastEditTimestamp() ),
193            $pageViewData['dailyTotalViews'],
194            $pageViewData['dailyArticleViews'],
195            ComputeEditingStreaks::getLongestEditingStreak( $editData->getEditCountByDay() ),
196            $this->userEditTracker->getUserEditCount( $user )
197        );
198        $this->statsdDataFactory->timing(
199            'timing.growthExperiments.ComputedUserImpactLookup.getExpensiveUserImpact', microtime( true ) - $start
200        );
201        return $expensiveUserImpact;
202    }
203
204    /**
205     * Run a SQL query to fetch edit data for the user.
206     *
207     * @param User $user
208     * @param int $flags
209     * @return EditData
210     * @throws \Exception
211     */
212    private function getEditData( User $user, int $flags ): EditData {
213        $db = DBAccessObjectUtils::getDBFromRecency( $this->connectionProvider, $flags );
214
215        $queryBuilder = $db->newSelectQueryBuilder()
216            ->table( 'revision' )
217            ->join( 'page', null, 'rev_page = page_id' );
218
219        $taskChangeTagNames = $this->taskTypeHandlerRegistry->getUniqueChangeTags();
220        $additionalChangeTagNames = [
221            ChangeTags::TAG_REVERTED
222        ];
223
224        $changeTagNames = array_merge( $taskChangeTagNames, $additionalChangeTagNames );
225        $changeTagIds = [];
226        $changeTagIdToName = [];
227        foreach ( $changeTagNames as $changeTagName ) {
228            try {
229                // Presume the tag is not related to a task; set $taskTypeId to the task type ID
230                // if it is.
231                $taskTypeId = null;
232                if ( in_array( $changeTagName, $taskChangeTagNames ) ) {
233                    $taskTypeHandlerId = $this->taskTypeHandlerRegistry->getTaskTypeHandlerIdByChangeTagName(
234                        $changeTagName
235                    );
236                    if ( !$taskTypeHandlerId ) {
237                        // In theory shouldn't be possible, given that the change tag names originate from the
238                        // task type handler registry. Adding this to make phan happy.
239                        throw new LogicException(
240                            "Unable to find task type handler ID for change tag \"$changeTagName\""
241                        );
242                    }
243                    $taskTypeHandler = $this->taskTypeHandlerRegistry->get( $taskTypeHandlerId );
244                    $taskTypeId = $taskTypeHandler->getTaskTypeIdByChangeTagName( $changeTagName );
245                }
246
247                $tagId = $this->changeTagDefStore->getId( $changeTagName );
248                $changeTagIds[$tagId] = $taskTypeId;
249                $changeTagIdToName[$tagId] = $changeTagName;
250            } catch ( NameTableAccessException $nameTableAccessException ) {
251                // Some tags won't exist in test scenarios, and possibly in some small wikis where
252                // no suggested edits have been done yet. We can safely ignore the exception,
253                // it will mean that 'newcomerTaskEditCount' is 0 in the result.
254            }
255        }
256
257        if ( $changeTagIds ) {
258            $queryBuilder->leftJoin( 'change_tag', null, [
259                'rev_id = ct_rev_id',
260                'ct_tag_id' => array_keys( $changeTagIds ),
261            ] );
262            $queryBuilder->field( 'ct_tag_id' );
263        }
264
265        $queryBuilder->fields( [ 'page_namespace', 'page_title', 'rev_timestamp' ] );
266        $queryBuilder->where( [ 'rev_actor' => $user->getActorId() ] );
267        $queryBuilder->where( $db->bitAnd( 'rev_deleted', RevisionRecord::DELETED_USER ) . ' = 0' );
268        // hopefully able to use the rev_actor_timestamp index for an efficient query
269        $queryBuilder->orderBy( 'rev_timestamp', 'DESC' );
270        $queryBuilder->limit( self::MAX_EDITS );
271        $queryBuilder->recency( $flags );
272        $queryBuilder->caller( __METHOD__ );
273        // T331264
274        $queryBuilder->straightJoinOption();
275
276        $editCountByNamespace = [];
277        $editCountByDay = [];
278        $revertedEditCount = 0;
279        $editCountByTaskType = array_fill_keys( array_keys( $this->configurationLoader->getTaskTypes() ), 0 );
280        $newcomerTaskEditCount = 0;
281        $lastEditTimestamp = null;
282        $editedArticles = [];
283
284        foreach ( $queryBuilder->fetchResultSet() as $row ) {
285            $linkTarget = new TitleValue( (int)$row->page_namespace, $row->page_title );
286            $titleDbKey = $this->titleFormatter->getPrefixedDBkey( $linkTarget );
287            $editTime = new MWTimestamp( $row->rev_timestamp );
288            // Avoid using registered user timezone preference which can be used to de-anonymize users.
289            // Use anonymous UserIdentity instead which will fall back to use the wiki's default
290            // timezone and local tz offset.
291            $editTime->offsetForUser( $this->userFactory->newAnonymous() );
292            $day = $editTime->format( 'Ymd' );
293
294            $editCountByNamespace[$row->page_namespace]
295                = ( $editCountByNamespace[$row->page_namespace] ?? 0 ) + 1;
296            $editCountByDay[$day] = ( $editCountByDay[$day] ?? 0 ) + 1;
297            if ( $row->ct_tag_id ?? null ) {
298                $taskTypeId = $changeTagIds[$row->ct_tag_id];
299                if ( $taskTypeId ) {
300                    $newcomerTaskEditCount++;
301                    if ( !isset( $editCountByTaskType[$taskTypeId] ) ) {
302                        $editCountByTaskType[$taskTypeId] = 0;
303                    }
304                    $editCountByTaskType[$taskTypeId]++;
305                }
306
307                $changeTagName = $changeTagIdToName[$row->ct_tag_id];
308                if ( $changeTagName === ChangeTags::TAG_REVERTED ) {
309                    $revertedEditCount++;
310                }
311            }
312            $lastEditTimestamp ??= $row->rev_timestamp;
313            // Computed values $editCountByNamespace, $editCountByDay, $newcomerTaskEditCount and $lastEditTimestamp
314            // use data from all namespaces. Filter out non-article pages from the collection of returned articles
315            // ($editedArticles) since they are not relevant for the user article list of recent edits.
316            if ( (int)$row->page_namespace !== NS_MAIN ) {
317                continue;
318            }
319            // We're iterating over the result set, newest edits to oldest edits in descending order. The same
320            // article can have been edited multiple times. We'll stash the revision timestamp of the oldest
321            // edit made by the user to the article; we will use that later to calculate the "start date"
322            // for the impact of the user for a particular article, e.g. when making a pageviews tool URL
323            // or choosing the date range for page view data to display for an article.
324            $editedArticles[$titleDbKey]['oldestEdit'] = $row->rev_timestamp;
325            $editedArticles[$titleDbKey]['newestEdit'] ??= $row->rev_timestamp;
326        }
327
328        return new EditData(
329            $editCountByNamespace,
330            array_reverse( $this->updateToIso8601DateKeys( $editCountByDay ) ),
331            $editCountByTaskType,
332            $revertedEditCount,
333            $newcomerTaskEditCount,
334            $lastEditTimestamp,
335            $editedArticles
336        );
337    }
338
339    /**
340     * @param User $user
341     * @param int $flags
342     * @return int Number of thanks received for the user ID
343     */
344    private function getThanksCount( User $user, int $flags ): int {
345        return $this->thanksQueryHelper
346            ? $this->thanksQueryHelper->getThanksReceivedCount( $user, self::MAX_THANKS, $flags )
347            : 0;
348    }
349
350    /**
351     * Returns page views and other data, or null on error during data fetching.
352     * Must not be called when $this->pageViewService is null.
353     * @param User $user
354     * @param array[] $titles Data about edited articles. See {@see EditData::getEditedArticles()}
355     *   for format.
356     * @param array[] $priorityTitles A subset of $titles that should get priority treatment
357     *   (in case not all the pageview data can be retrieved due to resource limits).
358     * @param int $days How many days to query. No more than 60.
359     * @return array|null
360     *   - dailyTotalViews: (array<string, int>) daily number of total views of articles in $titles,
361     *     keyed by ISO 8601 date.
362     *   - dailyArticleViews: (array[]) Daily article views and other data. Keyed by
363     *     prefixed DBkey; values are arrays with the following fields:
364     *     - views: (int[]) daily article views, keyed by ISO 8601 date. Might be 0 for the last day
365     *       if it's still being processed.
366     *     - firstEditDate: (string) ISO 8601 date of the user's first edit to the article.
367     *       If the user made a very high number of total edits, it might just be some edit the
368     *       user made to the article, not necessarily the first.
369     *     - newestEdit: (string) MW_TS timestamp of the user's most recent edit.
370     *     - imageUrl: (string|null) URL of a thumbnail of the article's main image.
371     * @phan-return array{dailyTotalViews:array<string,int>,dailyArticleViews:array<string,array{views:array<string,int>,firstEditDate:string,newestEdit:string,imageUrl:?string}>}|null
372     * @throws MalformedTitleException
373     */
374    private function getPageViewData( User $user, array $titles, array $priorityTitles, int $days ): ?array {
375        // Short-circuit if the user has no edits.
376        if ( !$titles ) {
377            return [
378                'dailyTotalViews' => [],
379                'dailyArticleViews' => [],
380            ];
381        }
382
383        // $priorityTitles is a subset of $titles but putting it to the front makes sure the data
384        // for those titles is fetched even if PageViewInfo cuts off the list of titles at some
385        // point, which it is allowed to do.
386        $allTitles = $priorityTitles + $titles;
387        $allTitleObjects = [];
388
389        foreach ( $allTitles as $title => $data ) {
390            $allTitleObjects[$title] = [
391                'title' => $this->titleFactory->newFromTextThrow( $title ),
392                // rev_timestamp is in TS_MW format (e.g. 20210406200220), we only want
393                // the first 8 characters for comparison with Ymd format date strings.
394                'rev_timestamp' => substr( $data['oldestEdit'], 0, 8 ),
395                'newestEdit' => $data['newestEdit'],
396                'oldestEdit' => $data['oldestEdit']
397            ];
398        }
399        if ( defined( 'MEDIAWIKI_JOB_RUNNER' ) || MW_ENTRY_POINT === 'cli' ) {
400            $pageViewData = $this->getPageViewDataInJobContext( $allTitleObjects, $user, $days );
401        } else {
402            $pageViewData = $this->getPageViewDataInWebRequestContext( $allTitleObjects, $user, $days );
403        }
404
405        $dailyTotalViews = [];
406        $dailyArticleViews = [];
407        foreach ( $pageViewData as $title => $days ) {
408            // Normalize titles as PageViewInfo does not define which title format it uses :(
409            $title = str_replace( ' ', '_', $title );
410            $mwTitle = $this->titleFactory->newFromTextThrow( $title );
411            $imageUrl = $this->getImage( $mwTitle );
412            if ( $imageUrl ) {
413                $dailyArticleViews[$title]['imageUrl'] = $imageUrl;
414            }
415            $firstEditDate = new DateTime( $allTitleObjects[$title]['rev_timestamp'] );
416            $dailyArticleViews[$title]['firstEditDate'] = $firstEditDate->format( 'Y-m-d' );
417            $dailyArticleViews[$title]['newestEdit'] = $allTitleObjects[$title]['newestEdit'];
418
419            foreach ( $days as $day => $views ) {
420                // NOTE: Do not insert the data if it is a zero due to JSON blob size issues (T351898)
421
422                $todayTotalViews = ( ( $dailyTotalViews[$day] ?? 0 ) + $views );
423                if ( $todayTotalViews > 0 ) {
424                    $dailyTotalViews[$day] = $todayTotalViews;
425                }
426
427                $todayArticleViews = ( $views ?? 0 );
428                if ( $todayArticleViews > 0 ) {
429                    $dailyArticleViews[$title]['views'][$day] = $todayArticleViews;
430                }
431            }
432        }
433
434        return [
435            'dailyTotalViews' => $dailyTotalViews,
436            'dailyArticleViews' => $dailyArticleViews,
437        ];
438    }
439
440    private function getPageViewDataInJobContext( array $allTitleObjects, UserIdentity $user, int $days ): array {
441        $pageViewData = [];
442        $titleObjects = $allTitleObjects;
443        $loopStartTime = microtime( true );
444        while ( count( $titleObjects ) ) {
445            $titleObjectsCount = count( $titleObjects );
446            if ( count( $pageViewData ) > $this->config->get( 'GEUserImpactMaxArticlesToProcessForPageviews' ) ) {
447                $this->logger->info(
448                    'Reached article count limit while fetching page view data for {count} titles for user {user}.',
449                    [ 'user' => $user->getName(), 'count' => count( $allTitleObjects ) ]
450                );
451                break;
452            }
453            if ( microtime( true ) - $loopStartTime > $this->config->get( 'GEUserImpactMaximumProcessTimeSeconds' ) ) {
454                $this->logger->info(
455                    "Reached maximum process time while fetching page view data for {count} titles for user {user}",
456                    [ 'user' => $user->getName(), 'count' => count( $allTitleObjects ) ]
457                );
458                break;
459            }
460            $pageDataStatus = $this->pageViewService->getPageData(
461                array_column( $titleObjects, 'title' ), $days
462            );
463            if ( !$pageDataStatus->isGood() ) {
464                $this->logPageDataBadStatus( $pageDataStatus );
465            }
466            if ( $pageDataStatus->isOK() ) {
467                $successful = array_filter( $pageDataStatus->success );
468                $pageViewData += array_intersect_key( $pageDataStatus->getValue(), $successful );
469            }
470            $titleObjects = array_diff_key( $titleObjects, $pageViewData );
471            if ( count( $titleObjects ) === $titleObjectsCount ) {
472                // Received no new data. Abort to avoid a loop - errors are cached for a short time
473                // so re-requesting them wouldn't help.
474                return $pageViewData;
475            }
476        }
477        return $pageViewData;
478    }
479
480    private function getPageViewDataInWebRequestContext(
481        array $allTitleObjects, UserIdentity $user, int $days
482    ): array {
483        $status = $this->pageViewService->getPageData( array_column( $allTitleObjects, 'title' ), $days );
484        if ( !$status->isGood() ) {
485            $this->logPageDataBadStatus( $status );
486            if ( !$status->isOK() ) {
487                return [];
488            }
489        } elseif ( $status->successCount < count( $allTitleObjects ) ) {
490            $failedTitles = array_keys( array_diff_key( $allTitleObjects, $status->success ) );
491            $this->logger->info( "Failed to get page view data for {count} titles for user {user}",
492                [
493                    'user' => $user->getName(),
494                    'count' => count( $failedTitles ),
495                    'failedTitles' => substr( implode( ',', $failedTitles ), 0, 250 ),
496                ]
497            );
498        }
499        return $status->getValue();
500    }
501
502    /**
503     * Don't log pvi-cached-error-title messages (T328945) but track it in statsd,
504     * and log any other message that occurs.
505     *
506     * @param StatusValue $status
507     * @return void
508     */
509    private function logPageDataBadStatus( StatusValue $status ) {
510        if ( $status->hasMessagesExcept( 'pvi-cached-error-title' ) ) {
511            $this->logger->error(
512                Status::wrap( $status )->getWikiText( false, false, 'en' )
513            );
514        } else {
515            $this->statsdDataFactory->updateCount(
516                'GrowthExperiments.ComputedUserImpactLookup.PviCachedErrorTitle',
517                $status->failCount
518            );
519        }
520    }
521
522    /**
523     * Change array keys from MW_TS date prefixes to ISO 8601 dates.
524     * @param array $mwTsArray
525     * @return array
526     */
527    private function updateToIso8601DateKeys( array $mwTsArray ): array {
528        $iso8601Array = [];
529        foreach ( $mwTsArray as $mwTsKey => $value ) {
530            $iso8601Key = substr( $mwTsKey, 0, 4 ) . '-' . substr( $mwTsKey, 4, 2 )
531                . '-' . substr( $mwTsKey, 6, 2 );
532            $iso8601Array[$iso8601Key] = $value;
533        }
534        return $iso8601Array;
535    }
536
537    /**
538     * Get image URL for a page
539     * Depends on the PageImages extension.
540     *
541     * @param Title $title
542     * @return ?string
543     */
544    private function getImage( Title $title ): ?string {
545        if ( !ExtensionRegistry::getInstance()->isLoaded( 'PageImages' ) ) {
546            return null;
547        }
548
549        $imageFile = PageImages::getPageImage( $title );
550        if ( $imageFile ) {
551            $ratio = $imageFile->getWidth() / $imageFile->getHeight();
552            $options = [
553                'width' => $ratio > 1 ?
554                    // Avoid decimals in the width because it makes the thumb url construction fail
555                    floor( self::THUMBNAIL_SIZE / $imageFile->getHeight() * $imageFile->getWidth() ) :
556                    self::THUMBNAIL_SIZE
557            ];
558
559            $thumb = $imageFile->transform( $options );
560            if ( $thumb ) {
561                return $thumb->getUrl() ?: null;
562            }
563        }
564
565        return null;
566    }
567
568}