MediaWiki 1.40.4
findMissingActors.php
Go to the documentation of this file.
1<?php
29
30require_once __DIR__ . '/Maintenance.php';
31
38
42 private $userFactory;
43
47 private $userNameUtils;
48
52 private $loadBalancer;
53
57 private $lbFactory;
58
62 private $actorNormalization;
63
65 private $tables;
66
67 public function __construct() {
68 parent::__construct();
69
70 $this->addDescription( 'Find and fix invalid actor IDs.' );
71 $this->addOption( 'field', 'The name of a database field to process',
72 true, true );
73 $this->addOption( 'type', 'Which type of invalid actors to find or fix, '
74 . 'missing or broken (with empty actor_name which can\'t be associated '
75 . 'with an existing user).',
76 false, true );
77 $this->addOption( 'skip', 'A comma-separated list of actor IDs to skip.',
78 false, true );
79 $this->addOption( 'overwrite-with', 'Replace invalid actors with this user. '
80 . 'Typically, this would be "Unknown user", but it could be any reserved '
81 . 'system user (per $wgReservedUsernames) or locally registered user. '
82 . 'If not given, invalid actors will only be listed, not fixed. '
83 . 'You will be prompted for confirmation before data is written. ',
84 false, true );
85
86 $this->setBatchSize( 1000 );
87 }
88
89 public function initializeServices(
90 ?UserFactory $userFactory = null,
91 ?UserNameUtils $userNameUtils = null,
92 ?LoadBalancer $loadBalancer = null,
93 ?LBFactory $lbFactory = null,
94 ?ActorNormalization $actorNormalization = null
95 ) {
96 $services = MediaWikiServices::getInstance();
97
98 $this->userFactory = $userFactory ?? $this->userFactory ?? $services->getUserFactory();
99 $this->userNameUtils = $userNameUtils ?? $this->userNameUtils ?? $services->getUserNameUtils();
100 $this->loadBalancer = $loadBalancer ?? $this->loadBalancer ?? $services->getDBLoadBalancer();
101 $this->lbFactory = $lbFactory ?? $this->lbFactory ?? $services->getDBLoadBalancerFactory();
102 $this->actorNormalization = $actorNormalization ?? $this->actorNormalization ??
103 $services->getActorNormalization();
104 }
105
109 private function getTables() {
110 if ( !$this->tables ) {
111 $tables = [
112 'ar_actor' => [ 'archive', 'ar_actor', 'ar_id' ],
113 'ipb_by_actor' => [ 'ipblocks', 'ipb_by_actor', 'ipb_id' ], // no index on ipb_by_actor!
114 'img_actor' => [ 'image', 'img_actor', 'img_name' ],
115 'oi_actor' => [ 'oldimage', 'oi_actor', 'oi_archive_name' ], // no index on oi_archive_name!
116 'fa_actor' => [ 'filearchive', 'fa_actor', 'fa_id' ],
117 'rc_actor' => [ 'recentchanges', 'rc_actor', 'rc_id' ],
118 'log_actor' => [ 'logging', 'log_actor', 'log_id' ],
119 'rev_actor' => [ 'revision', 'rev_actor', 'rev_id' ],
120 ];
121 $this->tables = $tables;
122 }
123 return $this->tables;
124 }
125
130 private function getTableInfo( $field ) {
131 $tables = $this->getTables();
132 return $tables[$field] ?? null;
133 }
134
144 private function getNewActorId() {
145 $name = $this->getOption( 'overwrite-with' );
146
147 if ( $name === null ) {
148 return null;
149 }
150
151 $user = $this->userFactory->newFromName( $name );
152
153 if ( !$user ) {
154 $this->fatalError( "Not a valid user name: '$name'" );
155 }
156
157 $name = $this->userNameUtils->getCanonical( $name, UserRigorOptions::RIGOR_NONE );
158
159 if ( $user->isRegistered() ) {
160 $this->output( "Using existing user: '$user'\n" );
161 } elseif ( !$this->userNameUtils->isValid( $name ) ) {
162 $this->fatalError( "Not a valid user name: '$name'" );
163 } elseif ( !$this->userNameUtils->isUsable( $name ) ) {
164 $this->output( "Using system user: '$name'\n" );
165 } else {
166 $this->fatalError( "Unknown user: '$name'" );
167 }
168
169 $dbw = $this->loadBalancer->getConnectionRef( DB_PRIMARY );
170 $actorId = $this->actorNormalization->acquireActorId( $user, $dbw );
171
172 if ( !$actorId ) {
173 $this->fatalError( "Failed to acquire an actor ID for user '$user'" );
174 }
175
176 $this->output( "Replacement actor ID is $actorId.\n" );
177 return $actorId;
178 }
179
180 public function execute() {
181 $this->initializeServices();
182
183 $field = $this->getOption( 'field' );
184 if ( !$this->getTableInfo( $field ) ) {
185 $this->fatalError( "Unknown field: $field.\n" );
186 }
187
188 $type = $this->getOption( 'type', 'missing' );
189 if ( $type !== 'missing' && $type !== 'broken' ) {
190 $this->fatalError( "Unknown type: $type.\n" );
191 }
192
193 $skip = $this->parseIntList( $this->getOption( 'skip', '' ) );
194 $overwrite = $this->getNewActorId();
195
196 $bad = $this->findBadActors( $field, $type, $skip );
197
198 if ( $bad && $overwrite ) {
199 $this->output( "\n" );
200 $this->output( "Do you want to OVERWRITE the listed actor IDs?\n" );
201 $this->output( "Information about the invalid IDs will be lost!\n" );
202 $this->output( "\n" );
203 $confirm = self::readconsole( 'Type "yes" to continue: ' );
204
205 if ( $confirm === 'yes' ) {
206 $this->overwriteActorIDs( $field, array_keys( $bad ), $overwrite );
207 } else {
208 $this->fatalError( 'Aborted.' );
209 }
210 }
211
212 $this->output( "Done.\n" );
213 }
214
224 private function findBadActors( $field, $type, $skip ) {
225 [ $table, $actorField, $idField ] = $this->getTableInfo( $field );
226 $this->output( "Finding invalid actor IDs in $table.$actorField...\n" );
227
228 $dbr = $this->loadBalancer->getConnectionRef( DB_REPLICA, 'vslow' );
229
230 /*
231 We are building an SQL query like this one here, performing a left join
232 to detect rows in $table that lack a matching row in the actor table.
233
234 In this example, $field is 'log_actor', so $table is 'logging',
235 $actorField is 'log_actor', and $idField is 'log_id'.
236 Further, $skip is [ 1, 2, 3, 4 ] and the batch size is 1000.
237
238 SELECT log_id
239 FROM logging
240 LEFT JOIN actor ON log_actor = actor_id
241 WHERE actor_id IS NULL
242 AND log_actor NOT IN (1, 2, 3, 4)
243 LIMIT 1000;
244 */
245
246 $conds = $type == 'missing'
247 ? [ 'actor_id' => null ]
248 : [ 'actor_name' => '' ];
249
250 if ( $skip ) {
251 $conds[] = $actorField . ' NOT IN ( ' . $dbr->makeList( $skip ) . ' ) ';
252 }
253
254 $queryBuilder = $dbr->newSelectQueryBuilder();
255 $queryBuilder->table( $table )
256 ->fields( [ $actorField, $idField ] )
257 ->conds( $conds )
258 ->leftJoin( 'actor', null, [ "$actorField = actor_id" ] )
259 ->limit( $this->getBatchSize() )
260 ->caller( __METHOD__ );
261
262 $res = $queryBuilder->fetchResultSet();
263 $count = $res->numRows();
264
265 $bad = [];
266
267 if ( $count ) {
268 $this->output( "\t\tID\tACTOR\n" );
269 }
270
271 foreach ( $res as $row ) {
272 $id = $row->$idField;
273 $actor = (int)( $row->$actorField );
274
275 $bad[$id] = $actor;
276 $this->output( "\t\t$id\t$actor\n" );
277 }
278
279 $this->output( "\tFound $count invalid actor IDs.\n" );
280
281 if ( $count >= $this->getBatchSize() ) {
282 $this->output( "\tBatch size reached, run again after fixing the current batch.\n" );
283 }
284
285 return $bad;
286 }
287
297 private function overwriteActorIDs( $field, array $ids, int $overwrite ) {
298 [ $table, $actorField, $idField ] = $this->getTableInfo( $field );
299
300 $count = count( $ids );
301 $this->output( "OVERWRITING $count actor IDs in $table.$actorField with $overwrite...\n" );
302
303 $dbw = $this->loadBalancer->getConnectionRef( DB_PRIMARY );
304
305 $dbw->update( $table, [ $actorField => $overwrite ], [ $idField => $ids ], __METHOD__ );
306
307 $count = $dbw->affectedRows();
308
309 $this->lbFactory->waitForReplication();
310 $this->output( "\tUpdated $count rows.\n" );
311
312 return $count;
313 }
314
315}
316
317$maintClass = FindMissingActors::class;
318require_once RUN_MAINTENANCE_IF_MAIN;
Maintenance script for finding and replacing invalid actor IDs, see T261325 and T307738.
execute()
Do the actual work.
initializeServices(?UserFactory $userFactory=null, ?UserNameUtils $userNameUtils=null, ?LoadBalancer $loadBalancer=null, ?LBFactory $lbFactory=null, ?ActorNormalization $actorNormalization=null)
__construct()
Default constructor.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
output( $out, $channel=null)
Throw some output to the user.
static readconsole( $prompt='> ')
Prompt the console for input.
getBatchSize()
Returns batch size.
parseIntList( $text)
Utility function to parse a string (perhaps from a command line option) into a list of integers (perh...
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
Service locator for MediaWiki core services.
Creates User objects.
UserNameUtils service.
Service for dealing with the actor table.
Shared interface for rigor levels when dealing with User methods.
const DB_REPLICA
Definition defines.php:26
const DB_PRIMARY
Definition defines.php:28