Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 134 |
|
0.00% |
0 / 7 |
CRAP | |
0.00% |
0 / 1 |
RefreshUserImpactData | |
0.00% |
0 / 128 |
|
0.00% |
0 / 7 |
1406 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 44 |
|
0.00% |
0 / 1 |
240 | |||
initOptions | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
initServices | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 | |||
getUsers | |
0.00% |
0 / 30 |
|
0.00% |
0 / 1 |
90 | |||
getQueryBuilder | |
0.00% |
0 / 21 |
|
0.00% |
0 / 1 |
30 | |||
getTimestampFromRelativeDate | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | |
3 | namespace GrowthExperiments\Maintenance; |
4 | |
5 | use DateTime; |
6 | use Exception; |
7 | use Generator; |
8 | use GrowthExperiments\GrowthExperimentsServices; |
9 | use GrowthExperiments\UserDatabaseHelper; |
10 | use GrowthExperiments\UserImpact\RefreshUserImpactJob; |
11 | use GrowthExperiments\UserImpact\UserImpactLookup; |
12 | use GrowthExperiments\UserImpact\UserImpactStore; |
13 | use Maintenance; |
14 | use MediaWiki\JobQueue\JobQueueGroupFactory; |
15 | use MediaWiki\MediaWikiServices; |
16 | use MediaWiki\User\ActorStore; |
17 | use MediaWiki\User\UserFactory; |
18 | use MediaWiki\User\UserIdentity; |
19 | use MediaWiki\User\UserSelectQueryBuilder; |
20 | use Wikimedia\Rdbms\SelectQueryBuilder; |
21 | |
22 | $IP = getenv( 'MW_INSTALL_PATH' ); |
23 | if ( $IP === false ) { |
24 | $IP = __DIR__ . '/../../..'; |
25 | } |
26 | require_once "$IP/maintenance/Maintenance.php"; |
27 | |
28 | class RefreshUserImpactData extends Maintenance { |
29 | |
30 | private ActorStore $actorStore; |
31 | private UserFactory $userFactory; |
32 | private UserImpactLookup $userImpactLookup; |
33 | private UserImpactStore $userImpactStore; |
34 | private UserDatabaseHelper $userDatabaseHelper; |
35 | |
36 | private JobQueueGroupFactory $jobQueueGroupFactory; |
37 | |
38 | /** @var int|null Ignore a user if they have data generated after this Unix timestamp. */ |
39 | private ?int $ignoreAfter = null; |
40 | |
41 | private int $totalUsers = 0; |
42 | |
43 | public function __construct() { |
44 | parent::__construct(); |
45 | $this->requireExtension( 'GrowthExperiments' ); |
46 | $this->addDescription( 'Update data in the growthexperiments_user_impact table.' ); |
47 | $this->addOption( 'editedWithin', 'Apply to users who have edited within the given time.' |
48 | . ' Time is a relative timestring fragment passed to DateTime, such as "30days".', false, true ); |
49 | $this->addOption( 'registeredWithin', 'Apply to users who have registered within the given time.' |
50 | . ' Time is a relative timestring fragment passed to DateTime, such as "30days".', false, true ); |
51 | $this->addOption( 'hasEditsAtLeast', 'Apply to users who have at least this many edits.', false, true ); |
52 | $this->addOption( 'ignoreIfUpdatedWithin', 'Skip cache records which were stored within the given time.' |
53 | . ' Time is a relative timestring fragment passed to DateTime, such as "30days".', false, true ); |
54 | $this->addOption( 'fromUser', 'Continue from the given user ID (exclusive).', false, true ); |
55 | $this->addOption( 'use-job-queue', 'If job queue should be used to refresh user impact data.' ); |
56 | $this->addOption( 'force', 'Run even if GERefreshUserImpactDataMaintenanceScriptEnabled is false' ); |
57 | $this->addOption( 'dry-run', 'When used, the script will only count the number of users it would update.' ); |
58 | $this->addOption( 'verbose', 'Verbose mode' ); |
59 | $this->setBatchSize( 100 ); |
60 | } |
61 | |
62 | /** @inheritDoc */ |
63 | public function execute() { |
64 | if ( !$this->getConfig()->get( 'GERefreshUserImpactDataMaintenanceScriptEnabled' ) |
65 | && !$this->hasOption( 'force' ) |
66 | ) { |
67 | $this->output( |
68 | 'GERefreshUserImpactDataMaintenanceScriptEnabled is set to false on this wiki.' . |
69 | PHP_EOL |
70 | ); |
71 | return; |
72 | } |
73 | $this->initOptions(); |
74 | $this->initServices(); |
75 | |
76 | $users = []; |
77 | foreach ( $this->getUsers() as $user ) { |
78 | $realUser = $this->userFactory->newFromUserIdentity( $user ); |
79 | if ( $realUser->isHidden() ) { |
80 | // do not update impact data for hidden users (T337845) |
81 | $this->output( " ...skipping user {$user->getId()}, hidden.\n" ); |
82 | continue; |
83 | } |
84 | if ( $realUser->isBot() ) { |
85 | // do not update impact data for bots (T351898) |
86 | $this->output( " ...skipping user {$user->getId()}, bot.\n" ); |
87 | continue; |
88 | } |
89 | |
90 | if ( $this->hasOption( 'dry-run' ) ) { |
91 | if ( $this->hasOption( 'verbose' ) ) { |
92 | $this->output( " ...would refresh user impact for user {$user->getId()}\n" ); |
93 | } |
94 | continue; |
95 | } elseif ( $this->hasOption( 'use-job-queue' ) ) { |
96 | $users[$user->getId()] = null; |
97 | if ( count( $users ) >= $this->getBatchSize() ) { |
98 | if ( $this->hasOption( 'verbose' ) ) { |
99 | $usersText = implode( ', ', array_keys( $users ) ); |
100 | $this->output( " ... enqueueing refreshUserImpactJob for users $usersText\n" ); |
101 | } |
102 | $this->jobQueueGroupFactory->makeJobQueueGroup()->lazyPush( |
103 | new RefreshUserImpactJob( [ |
104 | 'impactDataBatch' => $users, |
105 | 'staleBefore' => $this->ignoreAfter, |
106 | ] ) |
107 | ); |
108 | $users = []; |
109 | } |
110 | } else { |
111 | $userImpact = $this->userImpactLookup->getExpensiveUserImpact( $user ); |
112 | if ( $userImpact ) { |
113 | if ( $this->hasOption( 'verbose' ) ) { |
114 | $this->output( " ...refreshing user impact for user {$user->getId()}\n" ); |
115 | } |
116 | $this->userImpactStore->setUserImpact( $userImpact ); |
117 | } elseif ( $this->hasOption( 'verbose' ) ) { |
118 | $this->output( " ...could not generate user impact for user {$user->getId()}\n" ); |
119 | } |
120 | } |
121 | } |
122 | |
123 | if ( $this->totalUsers ) { |
124 | $this->output( "Done. Processed $this->totalUsers users.\n" ); |
125 | } |
126 | } |
127 | |
128 | private function initOptions(): void { |
129 | if ( !$this->hasOption( 'editedWithin' ) && !$this->hasOption( 'registeredWithin' ) ) { |
130 | $this->fatalError( 'must use at least one of --editedWithin and --registeredWithin' ); |
131 | } |
132 | |
133 | $ignoreIfUpdatedWithin = $this->getOption( 'ignoreIfUpdatedWithin' ); |
134 | if ( $ignoreIfUpdatedWithin ) { |
135 | $this->ignoreAfter = $this->getTimestampFromRelativeDate( $ignoreIfUpdatedWithin ); |
136 | } |
137 | } |
138 | |
139 | private function initServices(): void { |
140 | $services = MediaWikiServices::getInstance(); |
141 | $growthServices = GrowthExperimentsServices::wrap( $services ); |
142 | $this->actorStore = $services->getActorStore(); |
143 | $this->userFactory = $services->getUserFactory(); |
144 | $this->jobQueueGroupFactory = $services->getJobQueueGroupFactory(); |
145 | $this->userImpactLookup = $growthServices->getUncachedUserImpactLookup(); |
146 | $this->userImpactStore = $growthServices->getUserImpactStore(); |
147 | $this->userDatabaseHelper = $growthServices->getUserDatabaseHelper(); |
148 | } |
149 | |
150 | /** |
151 | * @return Generator<UserIdentity> |
152 | */ |
153 | private function getUsers(): Generator { |
154 | $queryBuilder = $this->getQueryBuilder(); |
155 | $queryBuilder->select( 'actor_user' ); |
156 | $queryBuilder->limit( $this->getBatchSize() ); |
157 | $queryBuilder->orderByUserId( SelectQueryBuilder::SORT_ASC ); |
158 | $lastUserId = (int)$this->getOption( 'fromUser', 0 ); |
159 | $dbr = $this->getDB( DB_REPLICA ); |
160 | do { |
161 | $this->output( "processing {$this->getBatchSize()} users starting with $lastUserId\n" ); |
162 | $batchQueryBuilder = clone $queryBuilder; |
163 | $batchQueryBuilder->where( $dbr->expr( 'actor_user', '>', $lastUserId ) ); |
164 | $userIds = $batchQueryBuilder->fetchFieldValues(); |
165 | if ( $userIds ) { |
166 | $users = $this->actorStore->newSelectQueryBuilder( $dbr ) |
167 | ->whereUserIds( $userIds ) |
168 | ->fetchUserIdentities(); |
169 | } else { |
170 | $users = []; |
171 | } |
172 | foreach ( $users as $user ) { |
173 | $lastUserId = $user->getId(); |
174 | // Do staleness check, if we are not using the job queue. Jobs can run after |
175 | // significant delays and multiple updates for the same user might get queued, |
176 | // so we do the check when the job runs. |
177 | if ( $this->ignoreAfter && !$this->hasOption( 'use-job-queue' ) ) { |
178 | $cachedUserImpact = $this->userImpactStore->getExpensiveUserImpact( $user ); |
179 | if ( $cachedUserImpact && $cachedUserImpact->getGeneratedAt() >= $this->ignoreAfter ) { |
180 | if ( $this->hasOption( 'verbose' ) ) { |
181 | $this->output( " ...skipping user {$user->getId()}, has recent cached entry\n" ); |
182 | } |
183 | continue; |
184 | } |
185 | } |
186 | yield $user; |
187 | } |
188 | $this->waitForReplication(); |
189 | $usersProcessedInThisBatch = count( $userIds ); |
190 | $this->totalUsers += $usersProcessedInThisBatch; |
191 | if ( $usersProcessedInThisBatch > 0 ) { |
192 | $this->output( " processed $usersProcessedInThisBatch users\n" ); |
193 | } |
194 | } while ( $usersProcessedInThisBatch === $this->getBatchSize() ); |
195 | } |
196 | |
197 | private function getQueryBuilder(): UserSelectQueryBuilder { |
198 | $editedWithin = $this->getOption( 'editedWithin' ); |
199 | $registeredWithin = $this->getOption( 'registeredWithin' ); |
200 | $hasEditsAtLeast = $this->getOption( 'hasEditsAtLeast' ); |
201 | |
202 | $dbr = $this->getDB( DB_REPLICA ); |
203 | $queryBuilder = $this->actorStore->newSelectQueryBuilder( $dbr ); |
204 | if ( $editedWithin ) { |
205 | $timestamp = $dbr->timestamp( $this->getTimestampFromRelativeDate( $editedWithin ) ); |
206 | $queryBuilder->join( 'revision', null, [ 'rev_actor = actor_id' ] ); |
207 | $queryBuilder->where( $dbr->expr( 'rev_timestamp', '>=', $timestamp ) ); |
208 | $queryBuilder->groupBy( [ 'actor_user' ] ); |
209 | } |
210 | if ( $registeredWithin ) { |
211 | $firstUserId = $this->userDatabaseHelper->findFirstUserIdForRegistrationTimestamp( |
212 | $this->getTimestampFromRelativeDate( $registeredWithin ) |
213 | ); |
214 | if ( $firstUserId ) { |
215 | $queryBuilder->where( $dbr->expr( 'actor_user', '>=', $firstUserId ) ); |
216 | } else { |
217 | $queryBuilder->where( '0 = 1' ); |
218 | } |
219 | } |
220 | if ( $hasEditsAtLeast ) { |
221 | $queryBuilder->join( 'user', null, [ 'user_id = actor_user' ] ); |
222 | $queryBuilder->where( $dbr->expr( 'user_editcount', '>=', (int)$hasEditsAtLeast ) ); |
223 | } |
224 | return $queryBuilder; |
225 | } |
226 | |
227 | /** |
228 | * @param string $relativeDate A relative date string fragment that will be prefixed with a |
229 | * minus sign and passed to the DateTime constructor. |
230 | * @return int |
231 | */ |
232 | private function getTimestampFromRelativeDate( string $relativeDate ): int { |
233 | try { |
234 | $dateTime = new DateTime( 'now - ' . $relativeDate ); |
235 | } catch ( Exception $e ) { |
236 | $this->fatalError( $e->getMessage() ); |
237 | } |
238 | return $dateTime->getTimestamp(); |
239 | } |
240 | |
241 | } |
242 | |
243 | $maintClass = RefreshUserImpactData::class; |
244 | require_once RUN_MAINTENANCE_IF_MAIN; |