Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 137 |
|
0.00% |
0 / 7 |
CRAP | |
0.00% |
0 / 1 |
RefreshUserImpactData | |
0.00% |
0 / 131 |
|
0.00% |
0 / 7 |
1406 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 44 |
|
0.00% |
0 / 1 |
240 | |||
initOptions | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
initServices | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 | |||
getUsers | |
0.00% |
0 / 33 |
|
0.00% |
0 / 1 |
90 | |||
getQueryBuilder | |
0.00% |
0 / 21 |
|
0.00% |
0 / 1 |
30 | |||
getTimestampFromRelativeDate | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | |
3 | namespace GrowthExperiments\Maintenance; |
4 | |
5 | use DateTime; |
6 | use Exception; |
7 | use Generator; |
8 | use GrowthExperiments\GrowthExperimentsServices; |
9 | use GrowthExperiments\UserDatabaseHelper; |
10 | use GrowthExperiments\UserImpact\RefreshUserImpactJob; |
11 | use GrowthExperiments\UserImpact\UserImpactLookup; |
12 | use GrowthExperiments\UserImpact\UserImpactStore; |
13 | use MediaWiki\JobQueue\JobQueueGroupFactory; |
14 | use MediaWiki\Maintenance\Maintenance; |
15 | use MediaWiki\User\ActorStore; |
16 | use MediaWiki\User\UserFactory; |
17 | use MediaWiki\User\UserIdentity; |
18 | use MediaWiki\User\UserSelectQueryBuilder; |
19 | use Wikimedia\Rdbms\SelectQueryBuilder; |
20 | |
21 | $IP = getenv( 'MW_INSTALL_PATH' ); |
22 | if ( $IP === false ) { |
23 | $IP = __DIR__ . '/../../..'; |
24 | } |
25 | require_once "$IP/maintenance/Maintenance.php"; |
26 | |
27 | class RefreshUserImpactData extends Maintenance { |
28 | |
29 | private ActorStore $actorStore; |
30 | private UserFactory $userFactory; |
31 | private UserImpactLookup $userImpactLookup; |
32 | private UserImpactStore $userImpactStore; |
33 | private UserDatabaseHelper $userDatabaseHelper; |
34 | |
35 | private JobQueueGroupFactory $jobQueueGroupFactory; |
36 | |
37 | /** @var int|null Ignore a user if they have data generated after this Unix timestamp. */ |
38 | private ?int $ignoreAfter = null; |
39 | |
40 | private int $totalUsers = 0; |
41 | |
42 | public function __construct() { |
43 | parent::__construct(); |
44 | $this->requireExtension( 'GrowthExperiments' ); |
45 | $this->addDescription( 'Update data in the growthexperiments_user_impact table.' ); |
46 | $this->addOption( 'editedWithin', 'Apply to users who have edited within the given time.' |
47 | . ' Time is a relative timestring fragment passed to DateTime, such as "30days".', false, true ); |
48 | $this->addOption( 'registeredWithin', 'Apply to users who have registered within the given time.' |
49 | . ' Time is a relative timestring fragment passed to DateTime, such as "30days".', false, true ); |
50 | $this->addOption( 'hasEditsAtLeast', 'Apply to users who have at least this many edits.', false, true ); |
51 | $this->addOption( 'ignoreIfUpdatedWithin', 'Skip cache records which were stored within the given time.' |
52 | . ' Time is a relative timestring fragment passed to DateTime, such as "30days".', false, true ); |
53 | $this->addOption( 'fromUser', 'Continue from the given user ID (exclusive).', false, true ); |
54 | $this->addOption( 'use-job-queue', 'If job queue should be used to refresh user impact data.' ); |
55 | $this->addOption( 'force', 'Run even if GERefreshUserImpactDataMaintenanceScriptEnabled is false' ); |
56 | $this->addOption( 'dry-run', 'When used, the script will only count the number of users it would update.' ); |
57 | $this->addOption( 'verbose', 'Verbose mode' ); |
58 | $this->setBatchSize( 100 ); |
59 | } |
60 | |
61 | /** @inheritDoc */ |
62 | public function execute() { |
63 | if ( !$this->getConfig()->get( 'GERefreshUserImpactDataMaintenanceScriptEnabled' ) |
64 | && !$this->hasOption( 'force' ) |
65 | ) { |
66 | $this->output( |
67 | 'GERefreshUserImpactDataMaintenanceScriptEnabled is set to false on this wiki.' . |
68 | PHP_EOL |
69 | ); |
70 | return; |
71 | } |
72 | $this->initOptions(); |
73 | $this->initServices(); |
74 | |
75 | $users = []; |
76 | foreach ( $this->getUsers() as $user ) { |
77 | $realUser = $this->userFactory->newFromUserIdentity( $user ); |
78 | if ( $realUser->isHidden() ) { |
79 | // do not update impact data for hidden users (T337845) |
80 | $this->output( " ...skipping user {$user->getId()}, hidden.\n" ); |
81 | continue; |
82 | } |
83 | if ( $realUser->isBot() ) { |
84 | // do not update impact data for bots (T351898) |
85 | $this->output( " ...skipping user {$user->getId()}, bot.\n" ); |
86 | continue; |
87 | } |
88 | |
89 | if ( $this->hasOption( 'dry-run' ) ) { |
90 | if ( $this->hasOption( 'verbose' ) ) { |
91 | $this->output( " ...would refresh user impact for user {$user->getId()}\n" ); |
92 | } |
93 | continue; |
94 | } elseif ( $this->hasOption( 'use-job-queue' ) ) { |
95 | $users[$user->getId()] = null; |
96 | if ( count( $users ) >= $this->getBatchSize() ) { |
97 | if ( $this->hasOption( 'verbose' ) ) { |
98 | $usersText = implode( ', ', array_keys( $users ) ); |
99 | $this->output( " ... enqueueing refreshUserImpactJob for users $usersText\n" ); |
100 | } |
101 | $this->jobQueueGroupFactory->makeJobQueueGroup()->lazyPush( |
102 | new RefreshUserImpactJob( [ |
103 | 'impactDataBatch' => $users, |
104 | 'staleBefore' => $this->ignoreAfter, |
105 | ] ) |
106 | ); |
107 | $users = []; |
108 | } |
109 | } else { |
110 | $userImpact = $this->userImpactLookup->getExpensiveUserImpact( $user ); |
111 | if ( $userImpact ) { |
112 | if ( $this->hasOption( 'verbose' ) ) { |
113 | $this->output( " ...refreshing user impact for user {$user->getId()}\n" ); |
114 | } |
115 | $this->userImpactStore->setUserImpact( $userImpact ); |
116 | } elseif ( $this->hasOption( 'verbose' ) ) { |
117 | $this->output( " ...could not generate user impact for user {$user->getId()}\n" ); |
118 | } |
119 | } |
120 | } |
121 | |
122 | if ( $this->totalUsers ) { |
123 | $this->output( "Done. Processed $this->totalUsers users.\n" ); |
124 | } |
125 | } |
126 | |
127 | private function initOptions(): void { |
128 | if ( !$this->hasOption( 'editedWithin' ) && !$this->hasOption( 'registeredWithin' ) ) { |
129 | $this->fatalError( 'must use at least one of --editedWithin and --registeredWithin' ); |
130 | } |
131 | |
132 | $ignoreIfUpdatedWithin = $this->getOption( 'ignoreIfUpdatedWithin' ); |
133 | if ( $ignoreIfUpdatedWithin ) { |
134 | $this->ignoreAfter = $this->getTimestampFromRelativeDate( $ignoreIfUpdatedWithin ); |
135 | } |
136 | } |
137 | |
138 | private function initServices(): void { |
139 | $services = $this->getServiceContainer(); |
140 | $growthServices = GrowthExperimentsServices::wrap( $services ); |
141 | $this->actorStore = $services->getActorStore(); |
142 | $this->userFactory = $services->getUserFactory(); |
143 | $this->jobQueueGroupFactory = $services->getJobQueueGroupFactory(); |
144 | $this->userImpactLookup = $growthServices->getUncachedUserImpactLookup(); |
145 | $this->userImpactStore = $growthServices->getUserImpactStore(); |
146 | $this->userDatabaseHelper = $growthServices->getUserDatabaseHelper(); |
147 | } |
148 | |
149 | /** |
150 | * @return Generator<UserIdentity> |
151 | */ |
152 | private function getUsers(): Generator { |
153 | $queryBuilder = $this->getQueryBuilder(); |
154 | $queryBuilder->select( 'actor_user' ); |
155 | // do not update impact data for temporary users (T379672) |
156 | $queryBuilder->named(); |
157 | $queryBuilder->limit( $this->getBatchSize() ); |
158 | $queryBuilder->orderByUserId( SelectQueryBuilder::SORT_ASC ); |
159 | $queryBuilder->caller( __METHOD__ ); |
160 | $lastUserId = (int)$this->getOption( 'fromUser', 0 ); |
161 | $dbr = $this->getReplicaDB(); |
162 | do { |
163 | $this->output( "processing {$this->getBatchSize()} users starting with $lastUserId\n" ); |
164 | $batchQueryBuilder = clone $queryBuilder; |
165 | $batchQueryBuilder->where( $dbr->expr( 'actor_user', '>', $lastUserId ) ); |
166 | $userIds = $batchQueryBuilder->fetchFieldValues(); |
167 | if ( $userIds ) { |
168 | $users = $this->actorStore->newSelectQueryBuilder( $dbr ) |
169 | ->whereUserIds( $userIds ) |
170 | ->caller( __METHOD__ ) |
171 | ->fetchUserIdentities(); |
172 | } else { |
173 | $users = []; |
174 | } |
175 | foreach ( $users as $user ) { |
176 | $lastUserId = $user->getId(); |
177 | // Do staleness check, if we are not using the job queue. Jobs can run after |
178 | // significant delays and multiple updates for the same user might get queued, |
179 | // so we do the check when the job runs. |
180 | if ( $this->ignoreAfter && !$this->hasOption( 'use-job-queue' ) ) { |
181 | $cachedUserImpact = $this->userImpactStore->getExpensiveUserImpact( $user ); |
182 | if ( $cachedUserImpact && $cachedUserImpact->getGeneratedAt() >= $this->ignoreAfter ) { |
183 | if ( $this->hasOption( 'verbose' ) ) { |
184 | $this->output( " ...skipping user {$user->getId()}, has recent cached entry\n" ); |
185 | } |
186 | continue; |
187 | } |
188 | } |
189 | yield $user; |
190 | } |
191 | $this->waitForReplication(); |
192 | $usersProcessedInThisBatch = count( $userIds ); |
193 | $this->totalUsers += $usersProcessedInThisBatch; |
194 | if ( $usersProcessedInThisBatch > 0 ) { |
195 | $this->output( " processed $usersProcessedInThisBatch users\n" ); |
196 | } |
197 | } while ( $usersProcessedInThisBatch === $this->getBatchSize() ); |
198 | } |
199 | |
200 | private function getQueryBuilder(): UserSelectQueryBuilder { |
201 | $editedWithin = $this->getOption( 'editedWithin' ); |
202 | $registeredWithin = $this->getOption( 'registeredWithin' ); |
203 | $hasEditsAtLeast = $this->getOption( 'hasEditsAtLeast' ); |
204 | |
205 | $dbr = $this->getReplicaDB(); |
206 | $queryBuilder = $this->actorStore->newSelectQueryBuilder( $dbr ); |
207 | if ( $editedWithin ) { |
208 | $timestamp = $dbr->timestamp( $this->getTimestampFromRelativeDate( $editedWithin ) ); |
209 | $queryBuilder->join( 'revision', null, [ 'rev_actor = actor_id' ] ); |
210 | $queryBuilder->where( $dbr->expr( 'rev_timestamp', '>=', $timestamp ) ); |
211 | $queryBuilder->groupBy( [ 'actor_user' ] ); |
212 | } |
213 | if ( $registeredWithin ) { |
214 | $firstUserId = $this->userDatabaseHelper->findFirstUserIdForRegistrationTimestamp( |
215 | $this->getTimestampFromRelativeDate( $registeredWithin ) |
216 | ); |
217 | if ( $firstUserId ) { |
218 | $queryBuilder->where( $dbr->expr( 'actor_user', '>=', $firstUserId ) ); |
219 | } else { |
220 | $queryBuilder->where( '0 = 1' ); |
221 | } |
222 | } |
223 | if ( $hasEditsAtLeast ) { |
224 | $queryBuilder->join( 'user', null, [ 'user_id = actor_user' ] ); |
225 | $queryBuilder->where( $dbr->expr( 'user_editcount', '>=', (int)$hasEditsAtLeast ) ); |
226 | } |
227 | return $queryBuilder; |
228 | } |
229 | |
230 | /** |
231 | * @param string $relativeDate A relative date string fragment that will be prefixed with a |
232 | * minus sign and passed to the DateTime constructor. |
233 | * @return int |
234 | */ |
235 | private function getTimestampFromRelativeDate( string $relativeDate ): int { |
236 | try { |
237 | $dateTime = new DateTime( 'now - ' . $relativeDate ); |
238 | } catch ( Exception $e ) { |
239 | $this->fatalError( $e->getMessage() ); |
240 | } |
241 | return $dateTime->getTimestamp(); |
242 | } |
243 | |
244 | } |
245 | |
246 | $maintClass = RefreshUserImpactData::class; |
247 | require_once RUN_MAINTENANCE_IF_MAIN; |