MediaWiki master
findMissingActors.php
Go to the documentation of this file.
1<?php
27
28require_once __DIR__ . '/Maintenance.php';
29
36
37 private UserFactory $userFactory;
38 private UserNameUtils $userNameUtils;
39 private ActorNormalization $actorNormalization;
40
41 private ?array $tables;
42
43 public function __construct() {
44 parent::__construct();
45
46 $this->addDescription( 'Find and fix invalid actor IDs.' );
47 $this->addOption( 'field', 'The name of a database field to process',
48 true, true );
49 $this->addOption( 'type', 'Which type of invalid actors to find or fix, '
50 . 'missing or broken (with empty actor_name which can\'t be associated '
51 . 'with an existing user).',
52 false, true );
53 $this->addOption( 'skip', 'A comma-separated list of actor IDs to skip.',
54 false, true );
55 $this->addOption( 'overwrite-with', 'Replace invalid actors with this user. '
56 . 'Typically, this would be "Unknown user", but it could be any reserved '
57 . 'system user (per $wgReservedUsernames) or locally registered user. '
58 . 'If not given, invalid actors will only be listed, not fixed. '
59 . 'You will be prompted for confirmation before data is written. ',
60 false, true );
61
62 $this->setBatchSize( 1000 );
63 }
64
68 private function getTables() {
69 if ( !$this->tables ) {
70 $tables = [
71 'ar_actor' => [ 'archive', 'ar_actor', 'ar_id' ],
72 'img_actor' => [ 'image', 'img_actor', 'img_name' ],
73 'oi_actor' => [ 'oldimage', 'oi_actor', 'oi_archive_name' ], // no index on oi_archive_name!
74 'fa_actor' => [ 'filearchive', 'fa_actor', 'fa_id' ],
75 'rc_actor' => [ 'recentchanges', 'rc_actor', 'rc_id' ],
76 'log_actor' => [ 'logging', 'log_actor', 'log_id' ],
77 'rev_actor' => [ 'revision', 'rev_actor', 'rev_id' ],
78 ];
79 $stage = $this->getServiceContainer()->getMainConfig()
80 ->get( MainConfigNames::BlockTargetMigrationStage );
81 if ( $stage & SCHEMA_COMPAT_WRITE_OLD ) {
82 $tables['ipb_by_actor'] = [ 'ipblocks', 'ipb_by_actor', 'ipb_id' ]; // no index on ipb_by_actor!
83 }
84 if ( $stage & SCHEMA_COMPAT_WRITE_NEW ) {
85 $tables['bl_by_actor'] = [ 'block', 'bl_by_actor', 'bl_id' ]; // no index on bl_by_actor!
86 }
87 $this->tables = $tables;
88 }
89 return $this->tables;
90 }
91
96 private function getTableInfo( $field ) {
97 $tables = $this->getTables();
98 return $tables[$field] ?? null;
99 }
100
110 private function getNewActorId() {
111 $name = $this->getOption( 'overwrite-with' );
112
113 if ( $name === null ) {
114 return null;
115 }
116
117 $user = $this->userFactory->newFromName( $name );
118
119 if ( !$user ) {
120 $this->fatalError( "Not a valid user name: '$name'" );
121 }
122
123 $name = $this->userNameUtils->getCanonical( $name, UserRigorOptions::RIGOR_NONE );
124
125 if ( $user->isRegistered() ) {
126 $this->output( "Using existing user: '$user'\n" );
127 } elseif ( !$this->userNameUtils->isValid( $name ) ) {
128 $this->fatalError( "Not a valid user name: '$name'" );
129 } elseif ( !$this->userNameUtils->isUsable( $name ) ) {
130 $this->output( "Using system user: '$name'\n" );
131 } else {
132 $this->fatalError( "Unknown user: '$name'" );
133 }
134
135 $dbw = $this->getPrimaryDB();
136 $actorId = $this->actorNormalization->acquireActorId( $user, $dbw );
137
138 if ( !$actorId ) {
139 $this->fatalError( "Failed to acquire an actor ID for user '$user'" );
140 }
141
142 $this->output( "Replacement actor ID is $actorId.\n" );
143 return $actorId;
144 }
145
146 public function execute() {
147 $services = $this->getServiceContainer();
148 $this->userFactory = $services->getUserFactory();
149 $this->userNameUtils = $services->getUserNameUtils();
150 $this->actorNormalization = $services->getActorNormalization();
151 $this->setDBProvider( $services->getConnectionProvider() );
152
153 $field = $this->getOption( 'field' );
154 if ( !$this->getTableInfo( $field ) ) {
155 $this->fatalError( "Unknown field: $field.\n" );
156 }
157
158 $type = $this->getOption( 'type', 'missing' );
159 if ( $type !== 'missing' && $type !== 'broken' ) {
160 $this->fatalError( "Unknown type: $type.\n" );
161 }
162
163 $skip = $this->parseIntList( $this->getOption( 'skip', '' ) );
164 $overwrite = $this->getNewActorId();
165
166 $bad = $this->findBadActors( $field, $type, $skip );
167
168 if ( $bad && $overwrite ) {
169 $this->output( "\n" );
170 $this->output( "Do you want to OVERWRITE the listed actor IDs?\n" );
171 $this->output( "Information about the invalid IDs will be lost!\n" );
172 $this->output( "\n" );
173 $confirm = self::readconsole( 'Type "yes" to continue: ' );
174
175 if ( $confirm === 'yes' ) {
176 $this->overwriteActorIDs( $field, array_keys( $bad ), $overwrite );
177 } else {
178 $this->fatalError( 'Aborted.' );
179 }
180 }
181
182 $this->output( "Done.\n" );
183 }
184
194 private function findBadActors( $field, $type, $skip ) {
195 [ $table, $actorField, $idField ] = $this->getTableInfo( $field );
196 $this->output( "Finding invalid actor IDs in $table.$actorField...\n" );
197
198 $dbr = $this->getServiceContainer()->getDBLoadBalancer()->getConnectionRef( DB_REPLICA, 'vslow' );
199
200 /*
201 We are building an SQL query like this one here, performing a left join
202 to detect rows in $table that lack a matching row in the actor table.
203
204 In this example, $field is 'log_actor', so $table is 'logging',
205 $actorField is 'log_actor', and $idField is 'log_id'.
206 Further, $skip is [ 1, 2, 3, 4 ] and the batch size is 1000.
207
208 SELECT log_id
209 FROM logging
210 LEFT JOIN actor ON log_actor = actor_id
211 WHERE actor_id IS NULL
212 AND log_actor NOT IN (1, 2, 3, 4)
213 LIMIT 1000;
214 */
215
216 $queryBuilder = $dbr->newSelectQueryBuilder()
217 ->select( [ $actorField, $idField ] )
218 ->from( $table )
219 ->leftJoin( 'actor', null, [ "$actorField = actor_id" ] )
220 ->where( $type == 'missing' ? [ 'actor_id' => null ] : [ 'actor_name' => '' ] )
221 ->limit( $this->getBatchSize() );
222
223 if ( $skip ) {
224 $queryBuilder->andWhere( $dbr->expr( $actorField, '!=', $skip ) );
225 }
226
227 $res = $queryBuilder->caller( __METHOD__ )->fetchResultSet();
228 $count = $res->numRows();
229
230 $bad = [];
231
232 if ( $count ) {
233 $this->output( "\t\tID\tACTOR\n" );
234 }
235
236 foreach ( $res as $row ) {
237 $id = $row->$idField;
238 $actor = (int)( $row->$actorField );
239
240 $bad[$id] = $actor;
241 $this->output( "\t\t$id\t$actor\n" );
242 }
243
244 $this->output( "\tFound $count invalid actor IDs.\n" );
245
246 if ( $count >= $this->getBatchSize() ) {
247 $this->output( "\tBatch size reached, run again after fixing the current batch.\n" );
248 }
249
250 return $bad;
251 }
252
262 private function overwriteActorIDs( $field, array $ids, int $overwrite ) {
263 [ $table, $actorField, $idField ] = $this->getTableInfo( $field );
264
265 $count = count( $ids );
266 $this->output( "OVERWRITING $count actor IDs in $table.$actorField with $overwrite...\n" );
267
268 $dbw = $this->getPrimaryDB();
269
270 $dbw->newUpdateQueryBuilder()
271 ->update( $table )
272 ->set( [ $actorField => $overwrite ] )
273 ->where( [ $idField => $ids ] )
274 ->caller( __METHOD__ )->execute();
275
276 $count = $dbw->affectedRows();
277
278 $this->waitForReplication();
279 $this->output( "\tUpdated $count rows.\n" );
280
281 return $count;
282 }
283
284}
285
286$maintClass = FindMissingActors::class;
287require_once RUN_MAINTENANCE_IF_MAIN;
const SCHEMA_COMPAT_WRITE_OLD
Definition Defines.php:274
const SCHEMA_COMPAT_WRITE_NEW
Definition Defines.php:278
Maintenance script for finding and replacing invalid actor IDs, see T261325 and T307738.
execute()
Do the actual work.
__construct()
Default constructor.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
output( $out, $channel=null)
Throw some output to the user.
waitForReplication()
Wait for replica DBs to catch up.
static readconsole( $prompt='> ')
Prompt the console for input.
getServiceContainer()
Returns the main service container.
getBatchSize()
Returns batch size.
parseIntList( $text)
Utility function to parse a string (perhaps from a command line option) into a list of integers (perh...
addDescription( $text)
Set the description text.
setDBProvider(IConnectionProvider $dbProvider)
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
A class containing constants representing the names of configuration variables.
Creates User objects.
UserNameUtils service.
Service for dealing with the actor table.
Shared interface for rigor levels when dealing with User methods.
const DB_REPLICA
Definition defines.php:26