MediaWiki REL1_35
findMissingActors.php
Go to the documentation of this file.
1<?php
27
28require_once __DIR__ . '/Maintenance.php';
29
36
40 private $userFactory;
41
46
51
55 private $lbFactory;
56
57 private const TABLES = [
58 // 'rev_actor' => [ 'revision', 'rev_actor', 'rev_id' ], // not yet used in 1.35
59 'revactor_actor' => [ 'revision_actor_temp', 'revactor_actor', 'revactor_rev' ],
60 'ar_actor' => [ 'archive', 'ar_actor', 'ar_id' ],
61 'ipb_by_actor' => [ 'ipblocks', 'ipb_by_actor', 'ipb_id' ], // no index on ipb_by_actor!
62 'img_actor' => [ 'image', 'img_actor', 'img_name' ],
63 'oi_actor' => [ 'oldimage', 'oi_actor', 'oi_archive_name' ], // no index on oi_archive_name!
64 'fa_actor' => [ 'filearchive', 'fa_actor', 'fa_id' ],
65 'rc_actor' => [ 'recentchanges', 'rc_actor', 'rc_id' ],
66 'log_actor' => [ 'logging', 'log_actor', 'log_id' ],
67 ];
68
69 public function __construct() {
70 parent::__construct();
71
72 $this->addDescription( 'Find and fix invalid actor IDs.' );
73 $this->addOption( 'field', 'The name of a database field to process. '
74 . 'Possible values: ' . implode( ', ', array_keys( self::TABLES ) ),
75 true, true );
76 $this->addOption( 'skip', 'A comma-separated list of actor IDs to skip.',
77 false, true );
78 $this->addOption( 'overwrite-with', 'Replace missing actors with this user. '
79 . 'Typically, this would be "Unknown user", but it could be any reserved '
80 . 'system user (per $wgReservedUsernames) or locally registered user. '
81 . 'If not given, invalid actors will only be listed, not fixed. '
82 . 'You will be prompted for confirmation before data is written. ',
83 false, true );
84
85 $this->setBatchSize( 1000 );
86 }
87
88 public function initializeServices(
92 ?LBFactory $lbFactory = null
93 ) {
94 $services = MediaWikiServices::getInstance();
95
96 $this->userFactory = $userFactory ?? $this->userFactory ?? $services->getUserFactory();
97 $this->userNameUtils = $userNameUtils ?? $this->userNameUtils ?? $services->getUserNameUtils();
98 $this->loadBalancer = $loadBalancer ?? $this->loadBalancer ?? $services->getDBLoadBalancer();
99 $this->lbFactory = $lbFactory ?? $this->lbFactory ?? $services->getDBLoadBalancerFactory();
100 }
101
111 private function getNewActorId() {
112 $name = $this->getOption( 'overwrite-with' );
113
114 if ( $name === null ) {
115 return null;
116 }
117
118 $user = $this->userFactory->newFromName( $name );
119
120 if ( !$user ) {
121 $this->fatalError( "Not a valid user name: '$user'" );
122 }
123
124 $name = $this->userNameUtils->getCanonical( $name, UserNameUtils::RIGOR_NONE );
125
126 if ( $user->isRegistered() ) {
127 $this->output( "Using existing user: '$user'\n" );
128 } elseif ( !$this->userNameUtils->isValid( $name ) ) {
129 $this->fatalError( "Not a valid user name: '$name'" );
130 } elseif ( !$this->userNameUtils->isUsable( $name ) ) {
131 $this->output( "Using system user: '$name'\n" );
132 } else {
133 $this->fatalError( "Unknown user: '$name'" );
134 }
135
136 // Supply write connection to assign an actor ID if needed.
137 $dbw = $this->loadBalancer->getConnectionRef( DB_MASTER );
138 $actorId = $user->getActorId( $dbw );
139
140 if ( !$actorId ) {
141 $this->fatalError( "Failed to acquire an actor ID for user '$user'" );
142 }
143
144 $this->output( "Replacement actor ID is $actorId.\n" );
145 return $actorId;
146 }
147
151 public function execute() {
152 $this->initializeServices();
153
154 $field = $this->getOption( 'field' );
155 if ( !isset( self::TABLES[$field] ) ) {
156 $this->fatalError( "Unknown field: $field.\n" );
157 }
158
159 $skip = $this->parseIntList( $this->getOption( 'skip', '' ) );
160 $overwrite = $this->getNewActorId();
161
162 $bad = $this->findBadActors( $field, $skip );
163
164 if ( $bad && $overwrite ) {
165 $this->output( "\n" );
166 $this->output( "Do you want to OVERWRITE the listed actor IDs?\n" );
167 $this->output( "Information about the invalid IDs will be lost!\n" );
168 $this->output( "\n" );
169 $confirm = $this->readconsole( 'Type "yes" to continue: ' );
170
171 if ( $confirm === 'yes' ) {
172 $this->overwriteActorIDs( $field, array_keys( $bad ), $overwrite );
173 } else {
174 $this->fatalError( 'Aborted.' );
175 }
176 }
177
178 $this->output( "Done.\n" );
179 }
180
189 private function findBadActors( $field, $skip ) {
190 [ $table, $actorField, $idField ] = self::TABLES[$field];
191 $this->output( "Finding invalid actor IDs in $table.$actorField...\n" );
192
193 $dbr = $this->loadBalancer->getConnectionRef(
195 [ 'maintenance', 'vslow', 'slow' ]
196 );
197
198 /*
199 We are building an SQL query like this one here, performing a left join
200 to detect rows in $table that lack a matching row in the actor table.
201
202 In this example, $field is 'log_actor', so $table is 'logging',
203 $actorField is 'log_actor', and $idField is 'log_id'.
204 Further, $skip is [ 1, 2, 3, 4 ] and the batch size is 1000.
205
206 SELECT log_id
207 FROM logging
208 JOIN actor ON log_actor = actor_id
209 WHERE actor_id IS NULL
210 AND log_actor NOT IN (1, 2, 3, 4)
211 LIMIT 1000;
212 */
213
214 $conds = [ 'actor_id' => null ];
215
216 if ( $skip ) {
217 $conds[] = $actorField . ' NOT IN ( ' . $dbr->makeList( $skip ) . ' ) ';
218 }
219
220 $queryBuilder = $dbr->newSelectQueryBuilder();
221 $queryBuilder->table( $table )
222 ->fields( [ $actorField, $idField ] )
223 ->conds( $conds )
224 ->leftJoin( 'actor', null, [ "$actorField = actor_id" ] )
225 ->limit( $this->getBatchSize() )
226 ->caller( __METHOD__ );
227
228 $res = $queryBuilder->fetchResultSet();
229 $count = $res->numRows();
230
231 $bad = [];
232
233 if ( $count ) {
234 $this->output( "\t\tID\tACTOR\n" );
235 }
236
237 foreach ( $res as $row ) {
238 $id = $row->$idField;
239 $actor = (int)( $row->$actorField );
240
241 $bad[$id] = $actor;
242 $this->output( "\t\t$id\t$actor\n" );
243 }
244
245 $this->output( "\tFound $count invalid actor IDs.\n" );
246
247 if ( $count >= $this->getBatchSize() ) {
248 $this->output( "\tBatch size reached, run again after fixing the current batch.\n" );
249 }
250
251 return $bad;
252 }
253
263 private function overwriteActorIDs( $field, array $ids, int $overwrite ) {
264 [ $table, $actorField, $idField ] = self::TABLES[$field];
265
266 $count = count( $ids );
267 $this->output( "OVERWRITING $count actor IDs in $table.$actorField with $overwrite...\n" );
268
269 $dbw = $this->loadBalancer->getConnectionRef( DB_MASTER );
270
271 $dbw->update( $table, [ $actorField => $overwrite ], [ $idField => $ids ], __METHOD__ );
272
273 $count = $dbw->affectedRows();
274
275 $this->lbFactory->waitForReplication();
276 $this->output( "\tUpdated $count rows.\n" );
277
278 return $count;
279 }
280
281}
282
283$maintClass = FindMissingActors::class;
284require_once RUN_MAINTENANCE_IF_MAIN;
const RUN_MAINTENANCE_IF_MAIN
Maintenance script for finding and replacing invalid actor IDs, see T261325.
findBadActors( $field, $skip)
Find rows that have bad actor IDs.
execute()
Do the actual work.All child classes will need to implement thisbool|null|void True for success,...
UserNameUtils null $userNameUtils
getNewActorId()
Returns the actor ID of the user specified with the –overwrite-with option, or null if –overwrite-wit...
initializeServices(?UserFactory $userFactory=null, ?UserNameUtils $userNameUtils=null, ?LoadBalancer $loadBalancer=null, ?LBFactory $lbFactory=null)
UserFactory null $userFactory
overwriteActorIDs( $field, array $ids, int $overwrite)
Overwrite the actor ID in a given set of rows.
__construct()
Default constructor.
LoadBalancer null $loadBalancer
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
output( $out, $channel=null)
Throw some output to the user.
static readconsole( $prompt='> ')
Prompt the console for input.
getBatchSize()
Returns batch size.
parseIntList( $text)
Utility function to parse a string (perhaps from a command line option) into a list of integers (perh...
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
Set the batch size.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Creates User objects.
UserNameUtils service.
An interface for generating database load balancers.
Definition LBFactory.php:41
Database connection, tracking, load balancing, and transaction manager for a cluster.
const DB_REPLICA
Definition defines.php:25
const DB_MASTER
Definition defines.php:29