Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 137
0.00% covered (danger)
0.00%
0 / 7
CRAP
0.00% covered (danger)
0.00%
0 / 1
RefreshUserImpactData
0.00% covered (danger)
0.00%
0 / 131
0.00% covered (danger)
0.00%
0 / 7
1406
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 16
0.00% covered (danger)
0.00%
0 / 1
2
 execute
0.00% covered (danger)
0.00%
0 / 44
0.00% covered (danger)
0.00%
0 / 1
240
 initOptions
0.00% covered (danger)
0.00%
0 / 5
0.00% covered (danger)
0.00%
0 / 1
20
 initServices
0.00% covered (danger)
0.00%
0 / 8
0.00% covered (danger)
0.00%
0 / 1
2
 getUsers
0.00% covered (danger)
0.00%
0 / 33
0.00% covered (danger)
0.00%
0 / 1
90
 getQueryBuilder
0.00% covered (danger)
0.00%
0 / 21
0.00% covered (danger)
0.00%
0 / 1
30
 getTimestampFromRelativeDate
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
6
1<?php
2
3namespace GrowthExperiments\Maintenance;
4
5use DateTime;
6use Exception;
7use Generator;
8use GrowthExperiments\GrowthExperimentsServices;
9use GrowthExperiments\UserDatabaseHelper;
10use GrowthExperiments\UserImpact\RefreshUserImpactJob;
11use GrowthExperiments\UserImpact\UserImpactLookup;
12use GrowthExperiments\UserImpact\UserImpactStore;
13use MediaWiki\JobQueue\JobQueueGroupFactory;
14use MediaWiki\Maintenance\Maintenance;
15use MediaWiki\User\ActorStore;
16use MediaWiki\User\UserFactory;
17use MediaWiki\User\UserIdentity;
18use MediaWiki\User\UserSelectQueryBuilder;
19use Wikimedia\Rdbms\SelectQueryBuilder;
20
21$IP = getenv( 'MW_INSTALL_PATH' );
22if ( $IP === false ) {
23    $IP = __DIR__ . '/../../..';
24}
25require_once "$IP/maintenance/Maintenance.php";
26
27class RefreshUserImpactData extends Maintenance {
28
29    private ActorStore $actorStore;
30    private UserFactory $userFactory;
31    private UserImpactLookup $userImpactLookup;
32    private UserImpactStore $userImpactStore;
33    private UserDatabaseHelper $userDatabaseHelper;
34
35    private JobQueueGroupFactory $jobQueueGroupFactory;
36
37    /** @var int|null Ignore a user if they have data generated after this Unix timestamp. */
38    private ?int $ignoreAfter = null;
39
40    private int $totalUsers = 0;
41
42    public function __construct() {
43        parent::__construct();
44        $this->requireExtension( 'GrowthExperiments' );
45        $this->addDescription( 'Update data in the growthexperiments_user_impact table.' );
46        $this->addOption( 'editedWithin', 'Apply to users who have edited within the given time.'
47            . ' Time is a relative timestring fragment passed to DateTime, such as "30days".', false, true );
48        $this->addOption( 'registeredWithin', 'Apply to users who have registered within the given time.'
49            . ' Time is a relative timestring fragment passed to DateTime, such as "30days".', false, true );
50        $this->addOption( 'hasEditsAtLeast', 'Apply to users who have at least this many edits.', false, true );
51        $this->addOption( 'ignoreIfUpdatedWithin', 'Skip cache records which were stored within the given time.'
52            . ' Time is a relative timestring fragment passed to DateTime, such as "30days".', false, true );
53        $this->addOption( 'fromUser', 'Continue from the given user ID (exclusive).', false, true );
54        $this->addOption( 'use-job-queue', 'If job queue should be used to refresh user impact data.' );
55        $this->addOption( 'force', 'Run even if GERefreshUserImpactDataMaintenanceScriptEnabled is false' );
56        $this->addOption( 'dry-run', 'When used, the script will only count the number of users it would update.' );
57        $this->addOption( 'verbose', 'Verbose mode' );
58        $this->setBatchSize( 100 );
59    }
60
61    /** @inheritDoc */
62    public function execute() {
63        if ( !$this->getConfig()->get( 'GERefreshUserImpactDataMaintenanceScriptEnabled' )
64            && !$this->hasOption( 'force' )
65        ) {
66            $this->output(
67                'GERefreshUserImpactDataMaintenanceScriptEnabled is set to false on this wiki.' .
68                PHP_EOL
69            );
70            return;
71        }
72        $this->initOptions();
73        $this->initServices();
74
75        $users = [];
76        foreach ( $this->getUsers() as $user ) {
77            $realUser = $this->userFactory->newFromUserIdentity( $user );
78            if ( $realUser->isHidden() ) {
79                // do not update impact data for hidden users (T337845)
80                $this->output( " ...skipping user {$user->getId()}, hidden.\n" );
81                continue;
82            }
83            if ( $realUser->isBot() ) {
84                // do not update impact data for bots (T351898)
85                $this->output( " ...skipping user {$user->getId()}, bot.\n" );
86                continue;
87            }
88
89            if ( $this->hasOption( 'dry-run' ) ) {
90                if ( $this->hasOption( 'verbose' ) ) {
91                    $this->output( "  ...would refresh user impact for user {$user->getId()}\n" );
92                }
93                continue;
94            } elseif ( $this->hasOption( 'use-job-queue' ) ) {
95                $users[$user->getId()] = null;
96                if ( count( $users ) >= $this->getBatchSize() ) {
97                    if ( $this->hasOption( 'verbose' ) ) {
98                        $usersText = implode( ', ', array_keys( $users ) );
99                        $this->output( " ... enqueueing refreshUserImpactJob for users $usersText\n" );
100                    }
101                    $this->jobQueueGroupFactory->makeJobQueueGroup()->lazyPush(
102                        new RefreshUserImpactJob( [
103                            'impactDataBatch' => $users,
104                            'staleBefore' => $this->ignoreAfter,
105                        ] )
106                    );
107                    $users = [];
108                }
109            } else {
110                $userImpact = $this->userImpactLookup->getExpensiveUserImpact( $user );
111                if ( $userImpact ) {
112                    if ( $this->hasOption( 'verbose' ) ) {
113                        $this->output( "  ...refreshing user impact for user {$user->getId()}\n" );
114                    }
115                    $this->userImpactStore->setUserImpact( $userImpact );
116                } elseif ( $this->hasOption( 'verbose' ) ) {
117                    $this->output( "  ...could not generate user impact for user {$user->getId()}\n" );
118                }
119            }
120        }
121
122        if ( $this->totalUsers ) {
123            $this->output( "Done. Processed $this->totalUsers users.\n" );
124        }
125    }
126
127    private function initOptions(): void {
128        if ( !$this->hasOption( 'editedWithin' ) && !$this->hasOption( 'registeredWithin' ) ) {
129            $this->fatalError( 'must use at least one of --editedWithin and --registeredWithin' );
130        }
131
132        $ignoreIfUpdatedWithin = $this->getOption( 'ignoreIfUpdatedWithin' );
133        if ( $ignoreIfUpdatedWithin ) {
134            $this->ignoreAfter = $this->getTimestampFromRelativeDate( $ignoreIfUpdatedWithin );
135        }
136    }
137
138    private function initServices(): void {
139        $services = $this->getServiceContainer();
140        $growthServices = GrowthExperimentsServices::wrap( $services );
141        $this->actorStore = $services->getActorStore();
142        $this->userFactory = $services->getUserFactory();
143        $this->jobQueueGroupFactory = $services->getJobQueueGroupFactory();
144        $this->userImpactLookup = $growthServices->getUncachedUserImpactLookup();
145        $this->userImpactStore = $growthServices->getUserImpactStore();
146        $this->userDatabaseHelper = $growthServices->getUserDatabaseHelper();
147    }
148
149    /**
150     * @return Generator<UserIdentity>
151     */
152    private function getUsers(): Generator {
153        $queryBuilder = $this->getQueryBuilder();
154        $queryBuilder->select( 'actor_user' );
155        // do not update impact data for temporary users (T379672)
156        $queryBuilder->named();
157        $queryBuilder->limit( $this->getBatchSize() );
158        $queryBuilder->orderByUserId( SelectQueryBuilder::SORT_ASC );
159        $queryBuilder->caller( __METHOD__ );
160        $lastUserId = (int)$this->getOption( 'fromUser', 0 );
161        $dbr = $this->getReplicaDB();
162        do {
163            $this->output( "processing {$this->getBatchSize()} users starting with $lastUserId\n" );
164            $batchQueryBuilder = clone $queryBuilder;
165            $batchQueryBuilder->where( $dbr->expr( 'actor_user', '>', $lastUserId ) );
166            $userIds = $batchQueryBuilder->fetchFieldValues();
167            if ( $userIds ) {
168                $users = $this->actorStore->newSelectQueryBuilder( $dbr )
169                    ->whereUserIds( $userIds )
170                    ->caller( __METHOD__ )
171                    ->fetchUserIdentities();
172            } else {
173                $users = [];
174            }
175            foreach ( $users as $user ) {
176                $lastUserId = $user->getId();
177                // Do staleness check, if we are not using the job queue. Jobs can run after
178                // significant delays and multiple updates for the same user might get queued,
179                // so we do the check when the job runs.
180                if ( $this->ignoreAfter && !$this->hasOption( 'use-job-queue' ) ) {
181                    $cachedUserImpact = $this->userImpactStore->getExpensiveUserImpact( $user );
182                    if ( $cachedUserImpact && $cachedUserImpact->getGeneratedAt() >= $this->ignoreAfter ) {
183                        if ( $this->hasOption( 'verbose' ) ) {
184                            $this->output( "  ...skipping user {$user->getId()}, has recent cached entry\n" );
185                        }
186                        continue;
187                    }
188                }
189                yield $user;
190            }
191            $this->waitForReplication();
192            $usersProcessedInThisBatch = count( $userIds );
193            $this->totalUsers += $usersProcessedInThisBatch;
194            if ( $usersProcessedInThisBatch > 0 ) {
195                $this->output( "  processed $usersProcessedInThisBatch users\n" );
196            }
197        } while ( $usersProcessedInThisBatch === $this->getBatchSize() );
198    }
199
200    private function getQueryBuilder(): UserSelectQueryBuilder {
201        $editedWithin = $this->getOption( 'editedWithin' );
202        $registeredWithin = $this->getOption( 'registeredWithin' );
203        $hasEditsAtLeast = $this->getOption( 'hasEditsAtLeast' );
204
205        $dbr = $this->getReplicaDB();
206        $queryBuilder = $this->actorStore->newSelectQueryBuilder( $dbr );
207        if ( $editedWithin ) {
208            $timestamp = $dbr->timestamp( $this->getTimestampFromRelativeDate( $editedWithin ) );
209            $queryBuilder->join( 'revision', null, [ 'rev_actor = actor_id' ] );
210            $queryBuilder->where( $dbr->expr( 'rev_timestamp', '>=', $timestamp ) );
211            $queryBuilder->groupBy( [ 'actor_user' ] );
212        }
213        if ( $registeredWithin ) {
214            $firstUserId = $this->userDatabaseHelper->findFirstUserIdForRegistrationTimestamp(
215                $this->getTimestampFromRelativeDate( $registeredWithin )
216            );
217            if ( $firstUserId ) {
218                $queryBuilder->where( $dbr->expr( 'actor_user', '>=', $firstUserId ) );
219            } else {
220                $queryBuilder->where( '0 = 1' );
221            }
222        }
223        if ( $hasEditsAtLeast ) {
224            $queryBuilder->join( 'user', null, [ 'user_id = actor_user' ] );
225            $queryBuilder->where( $dbr->expr( 'user_editcount', '>=', (int)$hasEditsAtLeast ) );
226        }
227        return $queryBuilder;
228    }
229
230    /**
231     * @param string $relativeDate A relative date string fragment that will be prefixed with a
232     *   minus sign and passed to the DateTime constructor.
233     * @return int
234     */
235    private function getTimestampFromRelativeDate( string $relativeDate ): int {
236        try {
237            $dateTime = new DateTime( 'now - ' . $relativeDate );
238        } catch ( Exception $e ) {
239            $this->fatalError( $e->getMessage() );
240        }
241        return $dateTime->getTimestamp();
242    }
243
244}
245
246$maintClass = RefreshUserImpactData::class;
247require_once RUN_MAINTENANCE_IF_MAIN;