Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
80.00% |
48 / 60 |
|
25.00% |
1 / 4 |
CRAP | |
0.00% |
0 / 1 |
RefreshUserImpactJob | |
80.00% |
48 / 60 |
|
25.00% |
1 / 4 |
21.89 | |
0.00% |
0 / 1 |
__construct | |
93.33% |
14 / 15 |
|
0.00% |
0 / 1 |
2.00 | |||
run | |
96.43% |
27 / 28 |
|
0.00% |
0 / 1 |
13 | |||
isFresh | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
computeUserImpact | |
37.50% |
6 / 16 |
|
0.00% |
0 / 1 |
5.20 |
1 | <?php |
2 | |
3 | namespace GrowthExperiments\UserImpact; |
4 | |
5 | use GenericParameterJob; |
6 | use GrowthExperiments\GrowthExperimentsServices; |
7 | use Job; |
8 | use MediaWiki\Logger\LoggerFactory; |
9 | use MediaWiki\MediaWikiServices; |
10 | use MediaWiki\User\UserFactory; |
11 | use MediaWiki\User\UserIdentityLookup; |
12 | use MediaWiki\Utils\MWTimestamp; |
13 | use Psr\Log\LoggerInterface; |
14 | use Wikimedia\Assert\Assert; |
15 | use Wikimedia\Assert\ParameterAssertionException; |
16 | use Wikimedia\LightweightObjectStore\ExpirationAwareness; |
17 | |
18 | /** |
19 | * Job for computing and caching expensive user impact data. Can also be used to refresh the cache |
20 | * with an already computed value. |
21 | */ |
22 | class RefreshUserImpactJob extends Job implements GenericParameterJob { |
23 | |
24 | private UserImpactStore $userImpactStore; |
25 | private UserImpactLookup $userImpactLookup; |
26 | private UserFactory $userFactory; |
27 | private UserImpactFormatter $userImpactFormatter; |
28 | private UserIdentityLookup $userIdentityLookup; |
29 | private LoggerInterface $logger; |
30 | |
31 | /** |
32 | * Map of user ID => impact data as JSON string, or null to generate in the job |
33 | * @var (string|null)[] |
34 | */ |
35 | private array $impactDataBatch; |
36 | |
37 | /** |
38 | * Cached objects generated before this UNIX timestamp are considered stale and recomputed. |
39 | * Only used when no impact data is provided for the given user. |
40 | * @var int |
41 | */ |
42 | private int $staleBefore; |
43 | |
44 | /** |
45 | * @inheritDoc |
46 | * Parameters: |
47 | * - impactDataBatch: user impact data to write/compute, see self::$impactDataBatch |
48 | * - staleBefore: staleness limit, see self::$staleBefore; optional, defaults to 1 day ago |
49 | * - userId: user to refresh data for (deprecated, required if impactDataBatch not present) |
50 | * - impactData: impact data for userId (deprecated) |
51 | */ |
52 | public function __construct( $params = null ) { |
53 | parent::__construct( 'refreshUserImpactJob', $params ); |
54 | |
55 | $services = MediaWikiServices::getInstance(); |
56 | $growthServices = GrowthExperimentsServices::wrap( $services ); |
57 | $this->userImpactStore = $growthServices->getUserImpactStore(); |
58 | $this->userImpactLookup = $growthServices->getUserImpactLookup(); |
59 | $this->userImpactFormatter = $growthServices->getUserImpactFormatter(); |
60 | $this->userIdentityLookup = $services->getUserIdentityLookup(); |
61 | $this->userFactory = $services->getUserFactory(); |
62 | $this->logger = LoggerFactory::getInstance( 'GrowthExperiments' ); |
63 | |
64 | $this->impactDataBatch = $params['impactDataBatch'] |
65 | // @phan-suppress-next-line PhanTypeArraySuspiciousNullable |
66 | ?? [ $params['userId'] => $params['impactData'] ?? null ]; |
67 | $this->staleBefore = $params['staleBefore'] ?? MWTimestamp::time() - ExpirationAwareness::TTL_DAY; |
68 | // Prevent accidental use of TS_MW or some other non-TS_UNIX format but don't require int type |
69 | // as e.g. wfTimestamp( TS_UNIX ) returns a string. |
70 | Assert::parameter( is_numeric( $this->staleBefore ) && $this->staleBefore < 2147483647, |
71 | 'staleBefore', 'must be a UNIX timestamp' ); |
72 | } |
73 | |
74 | /** @inheritDoc */ |
75 | public function run() { |
76 | $preloadedUserImpacts = []; |
77 | if ( $this->userImpactStore instanceof DatabaseUserImpactStore ) { |
78 | $preloadedUserImpacts = $this->userImpactStore->batchGetUserImpact( |
79 | array_keys( $this->impactDataBatch ) |
80 | ); |
81 | } |
82 | foreach ( $this->impactDataBatch as $userId => $impactJson ) { |
83 | if ( $this->userFactory->newFromId( $userId )->isHidden() ) { |
84 | // do not update impact data for hidden users (T337845) |
85 | continue; |
86 | } |
87 | |
88 | $userImpact = null; |
89 | /** @var UserImpact $preloadedUserImpact */ |
90 | $preloadedUserImpact = $preloadedUserImpacts[$userId] ?? null; |
91 | if ( $impactJson ) { |
92 | try { |
93 | $userImpact = UserImpact::newFromJsonArray( json_decode( $impactJson, true ) ); |
94 | // Do not update the cache if it is already more recent. |
95 | if ( $preloadedUserImpact |
96 | && $preloadedUserImpact->getGeneratedAt() > $userImpact->getGeneratedAt() |
97 | ) { |
98 | continue; |
99 | } |
100 | } catch ( ParameterAssertionException $parameterAssertionException ) { |
101 | // Invalid cache format used, recalculate from scratch. |
102 | } |
103 | } elseif ( $preloadedUserImpact && $this->isFresh( $preloadedUserImpact ) ) { |
104 | // We haven't been explicitly told to save new data, and the existing data |
105 | // is still usable, nothing to do. |
106 | continue; |
107 | } |
108 | |
109 | if ( !$userImpact || !$this->isFresh( $userImpact ) ) { |
110 | $userImpact = $this->computeUserImpact( $userId ); |
111 | } |
112 | |
113 | if ( $userImpact ) { |
114 | // We don't want to cache all page view data captured by ::computeUserImpact; in a job queue |
115 | // context, this can contain up to 1000 articles of PageViewData (configured via |
116 | // GEUserImpactMaxArticlesToProcessForPageviews). Call |
117 | // the formatter to get just the data we need, and replace the dailyArticleViews with just the |
118 | // top entries. |
119 | $jsonData = $userImpact->jsonSerialize(); |
120 | $sortedAndFiltered = $this->userImpactFormatter->sortAndFilter( $jsonData ); |
121 | $jsonData['dailyArticleViews'] = |
122 | // Make sure dailyArticleViews includes both the top viewed articles and recently edited |
123 | // articles without page views. Those will both be used by UserImpactFormatter again when |
124 | // fetching the data to display. |
125 | $sortedAndFiltered['topViewedArticles'] + $sortedAndFiltered['recentEditsWithoutPageviews']; |
126 | $userImpact = UserImpact::newFromJsonArray( $jsonData ); |
127 | $this->userImpactStore->setUserImpact( $userImpact ); |
128 | } |
129 | } |
130 | return true; |
131 | } |
132 | |
133 | /** |
134 | * @param UserImpact $impact |
135 | * @return bool |
136 | */ |
137 | private function isFresh( UserImpact $impact ): bool { |
138 | return $impact->getGeneratedAt() >= $this->staleBefore; |
139 | } |
140 | |
141 | /** |
142 | * @param int $userId |
143 | * @return ExpensiveUserImpact|null |
144 | */ |
145 | private function computeUserImpact( int $userId ): ?ExpensiveUserImpact { |
146 | $loggerParams = [ 'userId' => $userId ]; |
147 | $userIdentity = $this->userIdentityLookup->getUserIdentityByUserId( $userId ); |
148 | if ( !$userIdentity ) { |
149 | $this->logger->error( |
150 | 'Unable to get user identity in RefreshUserImpactJob.', |
151 | $loggerParams |
152 | ); |
153 | return null; |
154 | } |
155 | |
156 | $userImpact = $this->userImpactLookup->getExpensiveUserImpact( $userIdentity ); |
157 | if ( !$userImpact ) { |
158 | $this->logger->error( |
159 | 'Unable to generate user impact for user in RefreshUserImpactJob.', |
160 | $loggerParams |
161 | ); |
162 | return null; |
163 | } |
164 | |
165 | return $userImpact; |
166 | } |
167 | |
168 | } |