Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
80.00% covered (warning)
80.00%
48 / 60
25.00% covered (danger)
25.00%
1 / 4
CRAP
0.00% covered (danger)
0.00%
0 / 1
RefreshUserImpactJob
80.00% covered (warning)
80.00%
48 / 60
25.00% covered (danger)
25.00%
1 / 4
21.89
0.00% covered (danger)
0.00%
0 / 1
 __construct
93.33% covered (success)
93.33%
14 / 15
0.00% covered (danger)
0.00%
0 / 1
2.00
 run
96.43% covered (success)
96.43%
27 / 28
0.00% covered (danger)
0.00%
0 / 1
13
 isFresh
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 computeUserImpact
37.50% covered (danger)
37.50%
6 / 16
0.00% covered (danger)
0.00%
0 / 1
5.20
1<?php
2
3namespace GrowthExperiments\UserImpact;
4
5use GenericParameterJob;
6use GrowthExperiments\GrowthExperimentsServices;
7use Job;
8use MediaWiki\Logger\LoggerFactory;
9use MediaWiki\MediaWikiServices;
10use MediaWiki\User\UserFactory;
11use MediaWiki\User\UserIdentityLookup;
12use MediaWiki\Utils\MWTimestamp;
13use Psr\Log\LoggerInterface;
14use Wikimedia\Assert\Assert;
15use Wikimedia\Assert\ParameterAssertionException;
16use Wikimedia\LightweightObjectStore\ExpirationAwareness;
17
18/**
19 * Job for computing and caching expensive user impact data. Can also be used to refresh the cache
20 * with an already computed value.
21 */
22class RefreshUserImpactJob extends Job implements GenericParameterJob {
23
24    private UserImpactStore $userImpactStore;
25    private UserImpactLookup $userImpactLookup;
26    private UserFactory $userFactory;
27    private UserImpactFormatter $userImpactFormatter;
28    private UserIdentityLookup $userIdentityLookup;
29    private LoggerInterface $logger;
30
31    /**
32     * Map of user ID => impact data as JSON string, or null to generate in the job
33     * @var (string|null)[]
34     */
35    private array $impactDataBatch;
36
37    /**
38     * Cached objects generated before this UNIX timestamp are considered stale and recomputed.
39     * Only used when no impact data is provided for the given user.
40     * @var int
41     */
42    private int $staleBefore;
43
44    /**
45     * @inheritDoc
46     * Parameters:
47     * - impactDataBatch: user impact data to write/compute, see self::$impactDataBatch
48     * - staleBefore: staleness limit, see self::$staleBefore; optional, defaults to 1 day ago
49     * - userId: user to refresh data for (deprecated, required if impactDataBatch not present)
50     * - impactData: impact data for userId (deprecated)
51     */
52    public function __construct( $params = null ) {
53        parent::__construct( 'refreshUserImpactJob', $params );
54
55        $services = MediaWikiServices::getInstance();
56        $growthServices = GrowthExperimentsServices::wrap( $services );
57        $this->userImpactStore = $growthServices->getUserImpactStore();
58        $this->userImpactLookup = $growthServices->getUserImpactLookup();
59        $this->userImpactFormatter = $growthServices->getUserImpactFormatter();
60        $this->userIdentityLookup = $services->getUserIdentityLookup();
61        $this->userFactory = $services->getUserFactory();
62        $this->logger = LoggerFactory::getInstance( 'GrowthExperiments' );
63
64        $this->impactDataBatch = $params['impactDataBatch']
65            // @phan-suppress-next-line PhanTypeArraySuspiciousNullable
66            ?? [ $params['userId'] => $params['impactData'] ?? null ];
67        $this->staleBefore = $params['staleBefore'] ?? MWTimestamp::time() - ExpirationAwareness::TTL_DAY;
68        // Prevent accidental use of TS_MW or some other non-TS_UNIX format but don't require int type
69        // as e.g. wfTimestamp( TS_UNIX ) returns a string.
70        Assert::parameter( is_numeric( $this->staleBefore ) && $this->staleBefore < 2147483647,
71            'staleBefore', 'must be a UNIX timestamp' );
72    }
73
74    /** @inheritDoc */
75    public function run() {
76        $preloadedUserImpacts = [];
77        if ( $this->userImpactStore instanceof DatabaseUserImpactStore ) {
78            $preloadedUserImpacts = $this->userImpactStore->batchGetUserImpact(
79                array_keys( $this->impactDataBatch )
80            );
81        }
82        foreach ( $this->impactDataBatch as $userId => $impactJson ) {
83            if ( $this->userFactory->newFromId( $userId )->isHidden() ) {
84                // do not update impact data for hidden users (T337845)
85                continue;
86            }
87
88            $userImpact = null;
89            /** @var UserImpact $preloadedUserImpact */
90            $preloadedUserImpact = $preloadedUserImpacts[$userId] ?? null;
91            if ( $impactJson ) {
92                try {
93                    $userImpact = UserImpact::newFromJsonArray( json_decode( $impactJson, true ) );
94                    // Do not update the cache if it is already more recent.
95                    if ( $preloadedUserImpact
96                        && $preloadedUserImpact->getGeneratedAt() > $userImpact->getGeneratedAt()
97                    ) {
98                        continue;
99                    }
100                } catch ( ParameterAssertionException $parameterAssertionException ) {
101                    // Invalid cache format used, recalculate from scratch.
102                }
103            } elseif ( $preloadedUserImpact && $this->isFresh( $preloadedUserImpact ) ) {
104                // We haven't been explicitly told to save new data, and the existing data
105                // is still usable, nothing to do.
106                continue;
107            }
108
109            if ( !$userImpact || !$this->isFresh( $userImpact ) ) {
110                $userImpact = $this->computeUserImpact( $userId );
111            }
112
113            if ( $userImpact ) {
114                // We don't want to cache all page view data captured by ::computeUserImpact; in a job queue
115                // context, this can contain up to 1000 articles of PageViewData (configured via
116                // GEUserImpactMaxArticlesToProcessForPageviews). Call
117                // the formatter to get just the data we need, and replace the dailyArticleViews with just the
118                // top entries.
119                $jsonData = $userImpact->jsonSerialize();
120                $sortedAndFiltered = $this->userImpactFormatter->sortAndFilter( $jsonData );
121                $jsonData['dailyArticleViews'] =
122                    // Make sure dailyArticleViews includes both the top viewed articles and recently edited
123                    // articles without page views. Those will both be used by UserImpactFormatter again when
124                    // fetching the data to display.
125                    $sortedAndFiltered['topViewedArticles'] + $sortedAndFiltered['recentEditsWithoutPageviews'];
126                $userImpact = UserImpact::newFromJsonArray( $jsonData );
127                $this->userImpactStore->setUserImpact( $userImpact );
128            }
129        }
130        return true;
131    }
132
133    /**
134     * @param UserImpact $impact
135     * @return bool
136     */
137    private function isFresh( UserImpact $impact ): bool {
138        return $impact->getGeneratedAt() >= $this->staleBefore;
139    }
140
141    /**
142     * @param int $userId
143     * @return ExpensiveUserImpact|null
144     */
145    private function computeUserImpact( int $userId ): ?ExpensiveUserImpact {
146        $loggerParams = [ 'userId' => $userId ];
147        $userIdentity = $this->userIdentityLookup->getUserIdentityByUserId( $userId );
148        if ( !$userIdentity ) {
149            $this->logger->error(
150                'Unable to get user identity in RefreshUserImpactJob.',
151                $loggerParams
152            );
153            return null;
154        }
155
156        $userImpact = $this->userImpactLookup->getExpensiveUserImpact( $userIdentity );
157        if ( !$userImpact ) {
158            $this->logger->error(
159                'Unable to generate user impact for user in RefreshUserImpactJob.',
160                $loggerParams
161            );
162            return null;
163        }
164
165        return $userImpact;
166    }
167
168}