MediaWiki REL1_37
findMissingActors.php
Go to the documentation of this file.
1<?php
28
29require_once __DIR__ . '/Maintenance.php';
30
37
41 private $userFactory;
42
47
52
56 private $lbFactory;
57
62
64 private $tables;
65
66 public function __construct() {
67 parent::__construct();
68
69 $this->addDescription( 'Find and fix invalid actor IDs.' );
70 $this->addOption( 'field', 'The name of a database field to process',
71 true, true );
72 $this->addOption( 'skip', 'A comma-separated list of actor IDs to skip.',
73 false, true );
74 $this->addOption( 'overwrite-with', 'Replace missing actors with this user. '
75 . 'Typically, this would be "Unknown user", but it could be any reserved '
76 . 'system user (per $wgReservedUsernames) or locally registered user. '
77 . 'If not given, invalid actors will only be listed, not fixed. '
78 . 'You will be prompted for confirmation before data is written. ',
79 false, true );
80
81 $this->setBatchSize( 1000 );
82 }
83
84 public function initializeServices(
88 ?LBFactory $lbFactory = null,
90 ) {
91 $services = MediaWikiServices::getInstance();
92
93 $this->userFactory = $userFactory ?? $this->userFactory ?? $services->getUserFactory();
94 $this->userNameUtils = $userNameUtils ?? $this->userNameUtils ?? $services->getUserNameUtils();
95 $this->loadBalancer = $loadBalancer ?? $this->loadBalancer ?? $services->getDBLoadBalancer();
96 $this->lbFactory = $lbFactory ?? $this->lbFactory ?? $services->getDBLoadBalancerFactory();
97 $this->actorNormalization = $actorNormalization ?? $this->actorNormalization ??
98 $services->getActorNormalization();
99 }
100
104 private function getTables() {
106
107 if ( !$this->tables ) {
108 $tables = [
109 'ar_actor' => [ 'archive', 'ar_actor', 'ar_id' ],
110 'ipb_by_actor' => [ 'ipblocks', 'ipb_by_actor', 'ipb_id' ], // no index on ipb_by_actor!
111 'img_actor' => [ 'image', 'img_actor', 'img_name' ],
112 'oi_actor' => [ 'oldimage', 'oi_actor', 'oi_archive_name' ], // no index on oi_archive_name!
113 'fa_actor' => [ 'filearchive', 'fa_actor', 'fa_id' ],
114 'rc_actor' => [ 'recentchanges', 'rc_actor', 'rc_id' ],
115 'log_actor' => [ 'logging', 'log_actor', 'log_id' ],
116 ];
117
119 $tables['revactor_actor'] = [ 'revision_actor_temp', 'revactor_actor', 'revactor_rev' ];
120 }
122 $tables['rev_actor'] = [ 'revision', 'rev_actor', 'rev_id' ];
123 }
124 $this->tables = $tables;
125 }
126 return $this->tables;
127 }
128
133 private function getTableInfo( $field ) {
134 $tables = $this->getTables();
135 return $tables[$field] ?? null;
136 }
137
147 private function getNewActorId() {
148 $name = $this->getOption( 'overwrite-with' );
149
150 if ( $name === null ) {
151 return null;
152 }
153
154 $user = $this->userFactory->newFromName( $name );
155
156 if ( !$user ) {
157 $this->fatalError( "Not a valid user name: '$user'" );
158 }
159
160 $name = $this->userNameUtils->getCanonical( $name, UserNameUtils::RIGOR_NONE );
161
162 if ( $user->isRegistered() ) {
163 $this->output( "Using existing user: '$user'\n" );
164 } elseif ( !$this->userNameUtils->isValid( $name ) ) {
165 $this->fatalError( "Not a valid user name: '$name'" );
166 } elseif ( !$this->userNameUtils->isUsable( $name ) ) {
167 $this->output( "Using system user: '$name'\n" );
168 } else {
169 $this->fatalError( "Unknown user: '$name'" );
170 }
171
172 $dbw = $this->loadBalancer->getConnectionRef( DB_PRIMARY );
173 $actorId = $this->actorNormalization->acquireActorId( $user, $dbw );
174
175 if ( !$actorId ) {
176 $this->fatalError( "Failed to acquire an actor ID for user '$user'" );
177 }
178
179 $this->output( "Replacement actor ID is $actorId.\n" );
180 return $actorId;
181 }
182
183 public function execute() {
184 $this->initializeServices();
185
186 $field = $this->getOption( 'field' );
187 if ( !$this->getTableInfo( $field ) ) {
188 $this->fatalError( "Unknown field: $field.\n" );
189 }
190
191 $skip = $this->parseIntList( $this->getOption( 'skip', '' ) );
192 $overwrite = $this->getNewActorId();
193
194 $bad = $this->findBadActors( $field, $skip );
195
196 if ( $bad && $overwrite ) {
197 $this->output( "\n" );
198 $this->output( "Do you want to OVERWRITE the listed actor IDs?\n" );
199 $this->output( "Information about the invalid IDs will be lost!\n" );
200 $this->output( "\n" );
201 $confirm = $this->readconsole( 'Type "yes" to continue: ' );
202
203 if ( $confirm === 'yes' ) {
204 $this->overwriteActorIDs( $field, array_keys( $bad ), $overwrite );
205 } else {
206 $this->fatalError( 'Aborted.' );
207 }
208 }
209
210 $this->output( "Done.\n" );
211 }
212
221 private function findBadActors( $field, $skip ) {
222 [ $table, $actorField, $idField ] = $this->getTableInfo( $field );
223 $this->output( "Finding invalid actor IDs in $table.$actorField...\n" );
224
225 $dbr = $this->loadBalancer->getConnectionRef(
227 [ 'maintenance', 'vslow', 'slow' ]
228 );
229
230 /*
231 We are building an SQL query like this one here, performing a left join
232 to detect rows in $table that lack a matching row in the actor table.
233
234 In this example, $field is 'log_actor', so $table is 'logging',
235 $actorField is 'log_actor', and $idField is 'log_id'.
236 Further, $skip is [ 1, 2, 3, 4 ] and the batch size is 1000.
237
238 SELECT log_id
239 FROM logging
240 JOIN actor ON log_actor = actor_id
241 WHERE actor_id IS NULL
242 AND log_actor NOT IN (1, 2, 3, 4)
243 LIMIT 1000;
244 */
245
246 $conds = [ 'actor_id' => null ];
247
248 if ( $skip ) {
249 $conds[] = $actorField . ' NOT IN ( ' . $dbr->makeList( $skip ) . ' ) ';
250 }
251
252 $queryBuilder = $dbr->newSelectQueryBuilder();
253 $queryBuilder->table( $table )
254 ->fields( [ $actorField, $idField ] )
255 ->conds( $conds )
256 ->leftJoin( 'actor', null, [ "$actorField = actor_id" ] )
257 ->limit( $this->getBatchSize() )
258 ->caller( __METHOD__ );
259
260 $res = $queryBuilder->fetchResultSet();
261 $count = $res->numRows();
262
263 $bad = [];
264
265 if ( $count ) {
266 $this->output( "\t\tID\tACTOR\n" );
267 }
268
269 foreach ( $res as $row ) {
270 $id = $row->$idField;
271 $actor = (int)( $row->$actorField );
272
273 $bad[$id] = $actor;
274 $this->output( "\t\t$id\t$actor\n" );
275 }
276
277 $this->output( "\tFound $count invalid actor IDs.\n" );
278
279 if ( $count >= $this->getBatchSize() ) {
280 $this->output( "\tBatch size reached, run again after fixing the current batch.\n" );
281 }
282
283 return $bad;
284 }
285
295 private function overwriteActorIDs( $field, array $ids, int $overwrite ) {
296 [ $table, $actorField, $idField ] = $this->getTableInfo( $field );
297
298 $count = count( $ids );
299 $this->output( "OVERWRITING $count actor IDs in $table.$actorField with $overwrite...\n" );
300
301 $dbw = $this->loadBalancer->getConnectionRef( DB_PRIMARY );
302
303 $dbw->update( $table, [ $actorField => $overwrite ], [ $idField => $ids ], __METHOD__ );
304
305 $count = $dbw->affectedRows();
306
307 $this->lbFactory->waitForReplication();
308 $this->output( "\tUpdated $count rows.\n" );
309
310 return $count;
311 }
312
313}
314
315$maintClass = FindMissingActors::class;
316require_once RUN_MAINTENANCE_IF_MAIN;
int $wgActorTableSchemaMigrationStage
Actor table schema migration stage, for migration from the temporary table revision_actor_temp to the...
const SCHEMA_COMPAT_WRITE_TEMP
Definition Defines.php:264
const SCHEMA_COMPAT_WRITE_NEW
Definition Defines.php:266
Maintenance script for finding and replacing invalid actor IDs, see T261325.
findBadActors( $field, $skip)
Find rows that have bad actor IDs.
execute()
Do the actual work.
UserNameUtils null $userNameUtils
ActorNormalization $actorNormalization
getNewActorId()
Returns the actor ID of the user specified with the –overwrite-with option, or null if –overwrite-wit...
initializeServices(?UserFactory $userFactory=null, ?UserNameUtils $userNameUtils=null, ?LoadBalancer $loadBalancer=null, ?LBFactory $lbFactory=null, ?ActorNormalization $actorNormalization=null)
UserFactory null $userFactory
overwriteActorIDs( $field, array $ids, int $overwrite)
Overwrite the actor ID in a given set of rows.
__construct()
Default constructor.
LoadBalancer null $loadBalancer
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
output( $out, $channel=null)
Throw some output to the user.
static readconsole( $prompt='> ')
Prompt the console for input.
getBatchSize()
Returns batch size.
parseIntList( $text)
Utility function to parse a string (perhaps from a command line option) into a list of integers (perh...
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Creates User objects.
UserNameUtils service.
An interface for generating database load balancers.
Definition LBFactory.php:42
Database connection, tracking, load balancing, and transaction manager for a cluster.
Service for dealing with the actor table.
const DB_REPLICA
Definition defines.php:25
const DB_PRIMARY
Definition defines.php:27