Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 134
0.00% covered (danger)
0.00%
0 / 7
CRAP
0.00% covered (danger)
0.00%
0 / 1
RefreshUserImpactData
0.00% covered (danger)
0.00%
0 / 128
0.00% covered (danger)
0.00%
0 / 7
1406
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 16
0.00% covered (danger)
0.00%
0 / 1
2
 execute
0.00% covered (danger)
0.00%
0 / 44
0.00% covered (danger)
0.00%
0 / 1
240
 initOptions
0.00% covered (danger)
0.00%
0 / 5
0.00% covered (danger)
0.00%
0 / 1
20
 initServices
0.00% covered (danger)
0.00%
0 / 8
0.00% covered (danger)
0.00%
0 / 1
2
 getUsers
0.00% covered (danger)
0.00%
0 / 30
0.00% covered (danger)
0.00%
0 / 1
90
 getQueryBuilder
0.00% covered (danger)
0.00%
0 / 21
0.00% covered (danger)
0.00%
0 / 1
30
 getTimestampFromRelativeDate
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
6
1<?php
2
3namespace GrowthExperiments\Maintenance;
4
5use DateTime;
6use Exception;
7use Generator;
8use GrowthExperiments\GrowthExperimentsServices;
9use GrowthExperiments\UserDatabaseHelper;
10use GrowthExperiments\UserImpact\RefreshUserImpactJob;
11use GrowthExperiments\UserImpact\UserImpactLookup;
12use GrowthExperiments\UserImpact\UserImpactStore;
13use Maintenance;
14use MediaWiki\JobQueue\JobQueueGroupFactory;
15use MediaWiki\MediaWikiServices;
16use MediaWiki\User\ActorStore;
17use MediaWiki\User\UserFactory;
18use MediaWiki\User\UserIdentity;
19use MediaWiki\User\UserSelectQueryBuilder;
20use Wikimedia\Rdbms\SelectQueryBuilder;
21
22$IP = getenv( 'MW_INSTALL_PATH' );
23if ( $IP === false ) {
24    $IP = __DIR__ . '/../../..';
25}
26require_once "$IP/maintenance/Maintenance.php";
27
28class RefreshUserImpactData extends Maintenance {
29
30    private ActorStore $actorStore;
31    private UserFactory $userFactory;
32    private UserImpactLookup $userImpactLookup;
33    private UserImpactStore $userImpactStore;
34    private UserDatabaseHelper $userDatabaseHelper;
35
36    private JobQueueGroupFactory $jobQueueGroupFactory;
37
38    /** @var int|null Ignore a user if they have data generated after this Unix timestamp. */
39    private ?int $ignoreAfter = null;
40
41    private int $totalUsers = 0;
42
43    public function __construct() {
44        parent::__construct();
45        $this->requireExtension( 'GrowthExperiments' );
46        $this->addDescription( 'Update data in the growthexperiments_user_impact table.' );
47        $this->addOption( 'editedWithin', 'Apply to users who have edited within the given time.'
48            . ' Time is a relative timestring fragment passed to DateTime, such as "30days".', false, true );
49        $this->addOption( 'registeredWithin', 'Apply to users who have registered within the given time.'
50            . ' Time is a relative timestring fragment passed to DateTime, such as "30days".', false, true );
51        $this->addOption( 'hasEditsAtLeast', 'Apply to users who have at least this many edits.', false, true );
52        $this->addOption( 'ignoreIfUpdatedWithin', 'Skip cache records which were stored within the given time.'
53            . ' Time is a relative timestring fragment passed to DateTime, such as "30days".', false, true );
54        $this->addOption( 'fromUser', 'Continue from the given user ID (exclusive).', false, true );
55        $this->addOption( 'use-job-queue', 'If job queue should be used to refresh user impact data.' );
56        $this->addOption( 'force', 'Run even if GERefreshUserImpactDataMaintenanceScriptEnabled is false' );
57        $this->addOption( 'dry-run', 'When used, the script will only count the number of users it would update.' );
58        $this->addOption( 'verbose', 'Verbose mode' );
59        $this->setBatchSize( 100 );
60    }
61
62    /** @inheritDoc */
63    public function execute() {
64        if ( !$this->getConfig()->get( 'GERefreshUserImpactDataMaintenanceScriptEnabled' )
65            && !$this->hasOption( 'force' )
66        ) {
67            $this->output(
68                'GERefreshUserImpactDataMaintenanceScriptEnabled is set to false on this wiki.' .
69                PHP_EOL
70            );
71            return;
72        }
73        $this->initOptions();
74        $this->initServices();
75
76        $users = [];
77        foreach ( $this->getUsers() as $user ) {
78            $realUser = $this->userFactory->newFromUserIdentity( $user );
79            if ( $realUser->isHidden() ) {
80                // do not update impact data for hidden users (T337845)
81                $this->output( " ...skipping user {$user->getId()}, hidden.\n" );
82                continue;
83            }
84            if ( $realUser->isBot() ) {
85                // do not update impact data for bots (T351898)
86                $this->output( " ...skipping user {$user->getId()}, bot.\n" );
87                continue;
88            }
89
90            if ( $this->hasOption( 'dry-run' ) ) {
91                if ( $this->hasOption( 'verbose' ) ) {
92                    $this->output( "  ...would refresh user impact for user {$user->getId()}\n" );
93                }
94                continue;
95            } elseif ( $this->hasOption( 'use-job-queue' ) ) {
96                $users[$user->getId()] = null;
97                if ( count( $users ) >= $this->getBatchSize() ) {
98                    if ( $this->hasOption( 'verbose' ) ) {
99                        $usersText = implode( ', ', array_keys( $users ) );
100                        $this->output( " ... enqueueing refreshUserImpactJob for users $usersText\n" );
101                    }
102                    $this->jobQueueGroupFactory->makeJobQueueGroup()->lazyPush(
103                        new RefreshUserImpactJob( [
104                            'impactDataBatch' => $users,
105                            'staleBefore' => $this->ignoreAfter,
106                        ] )
107                    );
108                    $users = [];
109                }
110            } else {
111                $userImpact = $this->userImpactLookup->getExpensiveUserImpact( $user );
112                if ( $userImpact ) {
113                    if ( $this->hasOption( 'verbose' ) ) {
114                        $this->output( "  ...refreshing user impact for user {$user->getId()}\n" );
115                    }
116                    $this->userImpactStore->setUserImpact( $userImpact );
117                } elseif ( $this->hasOption( 'verbose' ) ) {
118                    $this->output( "  ...could not generate user impact for user {$user->getId()}\n" );
119                }
120            }
121        }
122
123        if ( $this->totalUsers ) {
124            $this->output( "Done. Processed $this->totalUsers users.\n" );
125        }
126    }
127
128    private function initOptions(): void {
129        if ( !$this->hasOption( 'editedWithin' ) && !$this->hasOption( 'registeredWithin' ) ) {
130            $this->fatalError( 'must use at least one of --editedWithin and --registeredWithin' );
131        }
132
133        $ignoreIfUpdatedWithin = $this->getOption( 'ignoreIfUpdatedWithin' );
134        if ( $ignoreIfUpdatedWithin ) {
135            $this->ignoreAfter = $this->getTimestampFromRelativeDate( $ignoreIfUpdatedWithin );
136        }
137    }
138
139    private function initServices(): void {
140        $services = MediaWikiServices::getInstance();
141        $growthServices = GrowthExperimentsServices::wrap( $services );
142        $this->actorStore = $services->getActorStore();
143        $this->userFactory = $services->getUserFactory();
144        $this->jobQueueGroupFactory = $services->getJobQueueGroupFactory();
145        $this->userImpactLookup = $growthServices->getUncachedUserImpactLookup();
146        $this->userImpactStore = $growthServices->getUserImpactStore();
147        $this->userDatabaseHelper = $growthServices->getUserDatabaseHelper();
148    }
149
150    /**
151     * @return Generator<UserIdentity>
152     */
153    private function getUsers(): Generator {
154        $queryBuilder = $this->getQueryBuilder();
155        $queryBuilder->select( 'actor_user' );
156        $queryBuilder->limit( $this->getBatchSize() );
157        $queryBuilder->orderByUserId( SelectQueryBuilder::SORT_ASC );
158        $lastUserId = (int)$this->getOption( 'fromUser', 0 );
159        $dbr = $this->getDB( DB_REPLICA );
160        do {
161            $this->output( "processing {$this->getBatchSize()} users starting with $lastUserId\n" );
162            $batchQueryBuilder = clone $queryBuilder;
163            $batchQueryBuilder->where( $dbr->expr( 'actor_user', '>', $lastUserId ) );
164            $userIds = $batchQueryBuilder->fetchFieldValues();
165            if ( $userIds ) {
166                $users = $this->actorStore->newSelectQueryBuilder( $dbr )
167                    ->whereUserIds( $userIds )
168                    ->fetchUserIdentities();
169            } else {
170                $users = [];
171            }
172            foreach ( $users as $user ) {
173                $lastUserId = $user->getId();
174                // Do staleness check, if we are not using the job queue. Jobs can run after
175                // significant delays and multiple updates for the same user might get queued,
176                // so we do the check when the job runs.
177                if ( $this->ignoreAfter && !$this->hasOption( 'use-job-queue' ) ) {
178                    $cachedUserImpact = $this->userImpactStore->getExpensiveUserImpact( $user );
179                    if ( $cachedUserImpact && $cachedUserImpact->getGeneratedAt() >= $this->ignoreAfter ) {
180                        if ( $this->hasOption( 'verbose' ) ) {
181                            $this->output( "  ...skipping user {$user->getId()}, has recent cached entry\n" );
182                        }
183                        continue;
184                    }
185                }
186                yield $user;
187            }
188            $this->waitForReplication();
189            $usersProcessedInThisBatch = count( $userIds );
190            $this->totalUsers += $usersProcessedInThisBatch;
191            if ( $usersProcessedInThisBatch > 0 ) {
192                $this->output( "  processed $usersProcessedInThisBatch users\n" );
193            }
194        } while ( $usersProcessedInThisBatch === $this->getBatchSize() );
195    }
196
197    private function getQueryBuilder(): UserSelectQueryBuilder {
198        $editedWithin = $this->getOption( 'editedWithin' );
199        $registeredWithin = $this->getOption( 'registeredWithin' );
200        $hasEditsAtLeast = $this->getOption( 'hasEditsAtLeast' );
201
202        $dbr = $this->getDB( DB_REPLICA );
203        $queryBuilder = $this->actorStore->newSelectQueryBuilder( $dbr );
204        if ( $editedWithin ) {
205            $timestamp = $dbr->timestamp( $this->getTimestampFromRelativeDate( $editedWithin ) );
206            $queryBuilder->join( 'revision', null, [ 'rev_actor = actor_id' ] );
207            $queryBuilder->where( $dbr->expr( 'rev_timestamp', '>=', $timestamp ) );
208            $queryBuilder->groupBy( [ 'actor_user' ] );
209        }
210        if ( $registeredWithin ) {
211            $firstUserId = $this->userDatabaseHelper->findFirstUserIdForRegistrationTimestamp(
212                $this->getTimestampFromRelativeDate( $registeredWithin )
213            );
214            if ( $firstUserId ) {
215                $queryBuilder->where( $dbr->expr( 'actor_user', '>=', $firstUserId ) );
216            } else {
217                $queryBuilder->where( '0 = 1' );
218            }
219        }
220        if ( $hasEditsAtLeast ) {
221            $queryBuilder->join( 'user', null, [ 'user_id = actor_user' ] );
222            $queryBuilder->where( $dbr->expr( 'user_editcount', '>=', (int)$hasEditsAtLeast ) );
223        }
224        return $queryBuilder;
225    }
226
227    /**
228     * @param string $relativeDate A relative date string fragment that will be prefixed with a
229     *   minus sign and passed to the DateTime constructor.
230     * @return int
231     */
232    private function getTimestampFromRelativeDate( string $relativeDate ): int {
233        try {
234            $dateTime = new DateTime( 'now - ' . $relativeDate );
235        } catch ( Exception $e ) {
236            $this->fatalError( $e->getMessage() );
237        }
238        return $dateTime->getTimestamp();
239    }
240
241}
242
243$maintClass = RefreshUserImpactData::class;
244require_once RUN_MAINTENANCE_IF_MAIN;