MediaWiki REL1_39
findMissingActors.php
Go to the documentation of this file.
1<?php
29
30require_once __DIR__ . '/Maintenance.php';
31
38
42 private $userFactory;
43
47 private $userNameUtils;
48
52 private $loadBalancer;
53
57 private $lbFactory;
58
62 private $actorNormalization;
63
65 private $tables;
66
67 public function __construct() {
68 parent::__construct();
69
70 $this->addDescription( 'Find and fix invalid actor IDs.' );
71 $this->addOption( 'field', 'The name of a database field to process',
72 true, true );
73 $this->addOption( 'type', 'Which type of invalid actors to find or fix, '
74 . 'missing or broken (with empty actor_name which can\'t be associated '
75 . 'with an existing user).',
76 false, true );
77 $this->addOption( 'skip', 'A comma-separated list of actor IDs to skip.',
78 false, true );
79 $this->addOption( 'overwrite-with', 'Replace invalid actors with this user. '
80 . 'Typically, this would be "Unknown user", but it could be any reserved '
81 . 'system user (per $wgReservedUsernames) or locally registered user. '
82 . 'If not given, invalid actors will only be listed, not fixed. '
83 . 'You will be prompted for confirmation before data is written. ',
84 false, true );
85
86 $this->setBatchSize( 1000 );
87 }
88
89 public function initializeServices(
90 ?UserFactory $userFactory = null,
91 ?UserNameUtils $userNameUtils = null,
92 ?LoadBalancer $loadBalancer = null,
93 ?LBFactory $lbFactory = null,
94 ?ActorNormalization $actorNormalization = null
95 ) {
96 $services = MediaWikiServices::getInstance();
97
98 $this->userFactory = $userFactory ?? $this->userFactory ?? $services->getUserFactory();
99 $this->userNameUtils = $userNameUtils ?? $this->userNameUtils ?? $services->getUserNameUtils();
100 $this->loadBalancer = $loadBalancer ?? $this->loadBalancer ?? $services->getDBLoadBalancer();
101 $this->lbFactory = $lbFactory ?? $this->lbFactory ?? $services->getDBLoadBalancerFactory();
102 $this->actorNormalization = $actorNormalization ?? $this->actorNormalization ??
103 $services->getActorNormalization();
104 }
105
109 private function getTables() {
110 if ( !$this->tables ) {
111 $tables = [
112 'ar_actor' => [ 'archive', 'ar_actor', 'ar_id' ],
113 'ipb_by_actor' => [ 'ipblocks', 'ipb_by_actor', 'ipb_id' ], // no index on ipb_by_actor!
114 'img_actor' => [ 'image', 'img_actor', 'img_name' ],
115 'oi_actor' => [ 'oldimage', 'oi_actor', 'oi_archive_name' ], // no index on oi_archive_name!
116 'fa_actor' => [ 'filearchive', 'fa_actor', 'fa_id' ],
117 'rc_actor' => [ 'recentchanges', 'rc_actor', 'rc_id' ],
118 'log_actor' => [ 'logging', 'log_actor', 'log_id' ],
119 'rev_actor' => [ 'revision', 'rev_actor', 'rev_id' ],
120 ];
121 $this->tables = $tables;
122 }
123 return $this->tables;
124 }
125
130 private function getTableInfo( $field ) {
131 $tables = $this->getTables();
132 return $tables[$field] ?? null;
133 }
134
144 private function getNewActorId() {
145 $name = $this->getOption( 'overwrite-with' );
146
147 if ( $name === null ) {
148 return null;
149 }
150
151 $user = $this->userFactory->newFromName( $name );
152
153 if ( !$user ) {
154 $this->fatalError( "Not a valid user name: '$name'" );
155 }
156
157 $name = $this->userNameUtils->getCanonical( $name, UserRigorOptions::RIGOR_NONE );
158
159 if ( $user->isRegistered() ) {
160 $this->output( "Using existing user: '$user'\n" );
161 } elseif ( !$this->userNameUtils->isValid( $name ) ) {
162 $this->fatalError( "Not a valid user name: '$name'" );
163 } elseif ( !$this->userNameUtils->isUsable( $name ) ) {
164 $this->output( "Using system user: '$name'\n" );
165 } else {
166 $this->fatalError( "Unknown user: '$name'" );
167 }
168
169 $dbw = $this->loadBalancer->getConnectionRef( DB_PRIMARY );
170 $actorId = $this->actorNormalization->acquireActorId( $user, $dbw );
171
172 if ( !$actorId ) {
173 $this->fatalError( "Failed to acquire an actor ID for user '$user'" );
174 }
175
176 $this->output( "Replacement actor ID is $actorId.\n" );
177 return $actorId;
178 }
179
180 public function execute() {
181 $this->initializeServices();
182
183 $field = $this->getOption( 'field' );
184 if ( !$this->getTableInfo( $field ) ) {
185 $this->fatalError( "Unknown field: $field.\n" );
186 }
187
188 $type = $this->getOption( 'type', 'missing' );
189 if ( $type !== 'missing' && $type !== 'broken' ) {
190 $this->fatalError( "Unknown type: $type.\n" );
191 }
192
193 $skip = $this->parseIntList( $this->getOption( 'skip', '' ) );
194 $overwrite = $this->getNewActorId();
195
196 $bad = $this->findBadActors( $field, $type, $skip );
197
198 if ( $bad && $overwrite ) {
199 $this->output( "\n" );
200 $this->output( "Do you want to OVERWRITE the listed actor IDs?\n" );
201 $this->output( "Information about the invalid IDs will be lost!\n" );
202 $this->output( "\n" );
203 $confirm = self::readconsole( 'Type "yes" to continue: ' );
204
205 if ( $confirm === 'yes' ) {
206 $this->overwriteActorIDs( $field, array_keys( $bad ), $overwrite );
207 } else {
208 $this->fatalError( 'Aborted.' );
209 }
210 }
211
212 $this->output( "Done.\n" );
213 }
214
224 private function findBadActors( $field, $type, $skip ) {
225 [ $table, $actorField, $idField ] = $this->getTableInfo( $field );
226 $this->output( "Finding invalid actor IDs in $table.$actorField...\n" );
227
228 $dbr = $this->loadBalancer->getConnectionRef(
230 [ 'maintenance', 'vslow', 'slow' ]
231 );
232
233 /*
234 We are building an SQL query like this one here, performing a left join
235 to detect rows in $table that lack a matching row in the actor table.
236
237 In this example, $field is 'log_actor', so $table is 'logging',
238 $actorField is 'log_actor', and $idField is 'log_id'.
239 Further, $skip is [ 1, 2, 3, 4 ] and the batch size is 1000.
240
241 SELECT log_id
242 FROM logging
243 LEFT JOIN actor ON log_actor = actor_id
244 WHERE actor_id IS NULL
245 AND log_actor NOT IN (1, 2, 3, 4)
246 LIMIT 1000;
247 */
248
249 $conds = $type == 'missing'
250 ? [ 'actor_id' => null ]
251 : [ 'actor_name' => '' ];
252
253 if ( $skip ) {
254 $conds[] = $actorField . ' NOT IN ( ' . $dbr->makeList( $skip ) . ' ) ';
255 }
256
257 $queryBuilder = $dbr->newSelectQueryBuilder();
258 $queryBuilder->table( $table )
259 ->fields( [ $actorField, $idField ] )
260 ->conds( $conds )
261 ->leftJoin( 'actor', null, [ "$actorField = actor_id" ] )
262 ->limit( $this->getBatchSize() )
263 ->caller( __METHOD__ );
264
265 $res = $queryBuilder->fetchResultSet();
266 $count = $res->numRows();
267
268 $bad = [];
269
270 if ( $count ) {
271 $this->output( "\t\tID\tACTOR\n" );
272 }
273
274 foreach ( $res as $row ) {
275 $id = $row->$idField;
276 $actor = (int)( $row->$actorField );
277
278 $bad[$id] = $actor;
279 $this->output( "\t\t$id\t$actor\n" );
280 }
281
282 $this->output( "\tFound $count invalid actor IDs.\n" );
283
284 if ( $count >= $this->getBatchSize() ) {
285 $this->output( "\tBatch size reached, run again after fixing the current batch.\n" );
286 }
287
288 return $bad;
289 }
290
300 private function overwriteActorIDs( $field, array $ids, int $overwrite ) {
301 [ $table, $actorField, $idField ] = $this->getTableInfo( $field );
302
303 $count = count( $ids );
304 $this->output( "OVERWRITING $count actor IDs in $table.$actorField with $overwrite...\n" );
305
306 $dbw = $this->loadBalancer->getConnectionRef( DB_PRIMARY );
307
308 $dbw->update( $table, [ $actorField => $overwrite ], [ $idField => $ids ], __METHOD__ );
309
310 $count = $dbw->affectedRows();
311
312 $this->lbFactory->waitForReplication();
313 $this->output( "\tUpdated $count rows.\n" );
314
315 return $count;
316 }
317
318}
319
320$maintClass = FindMissingActors::class;
321require_once RUN_MAINTENANCE_IF_MAIN;
Maintenance script for finding and replacing invalid actor IDs, see T261325 and T307738.
execute()
Do the actual work.
initializeServices(?UserFactory $userFactory=null, ?UserNameUtils $userNameUtils=null, ?LoadBalancer $loadBalancer=null, ?LBFactory $lbFactory=null, ?ActorNormalization $actorNormalization=null)
__construct()
Default constructor.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
output( $out, $channel=null)
Throw some output to the user.
static readconsole( $prompt='> ')
Prompt the console for input.
getBatchSize()
Returns batch size.
parseIntList( $text)
Utility function to parse a string (perhaps from a command line option) into a list of integers (perh...
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
Service locator for MediaWiki core services.
Creates User objects.
UserNameUtils service.
Service for dealing with the actor table.
Shared interface for rigor levels when dealing with User methods.
const DB_REPLICA
Definition defines.php:26
const DB_PRIMARY
Definition defines.php:28