MediaWiki REL1_35
MigrateActors.php
Go to the documentation of this file.
1<?php
26
27require_once __DIR__ . '/../Maintenance.php';
28
36
37 protected $tables = null;
38
39 public function __construct() {
40 parent::__construct();
41 $this->addDescription( 'Migrates actors from pre-1.31 columns to the \'actor\' table' );
42 $this->addOption( 'tables', 'List of tables to process, comma-separated', false, true );
43 $this->setBatchSize( 100 );
44 }
45
46 protected function getUpdateKey() {
47 return __CLASS__;
48 }
49
50 protected function doTable( $table ) {
51 return $this->tables === null || in_array( $table, $this->tables, true );
52 }
53
54 protected function doDBUpdates() {
55 $tables = $this->getOption( 'tables' );
56 if ( $tables !== null ) {
57 $this->tables = explode( ',', $tables );
58 }
59
60 if ( $this->doTable( 'user' ) ) {
61 $this->output( "Creating actor entries for all registered users\n" );
62 $end = 0;
63 $dbw = $this->getDB( DB_MASTER );
64 $max = $dbw->selectField( 'user', 'MAX(user_id)', '', __METHOD__ );
65 $count = 0;
66 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
67 while ( $end < $max ) {
68 $start = $end + 1;
69 $end = min( $start + $this->mBatchSize, $max );
70 $this->output( "... $start - $end\n" );
71 $dbw->insertSelect(
72 'actor',
73 'user',
74 [ 'actor_user' => 'user_id', 'actor_name' => 'user_name' ],
75 [ "user_id >= $start", "user_id <= $end" ],
76 __METHOD__,
77 [ 'IGNORE' ],
78 [ 'ORDER BY' => [ 'user_id' ] ]
79 );
80 $count += $dbw->affectedRows();
81 $lbFactory->waitForReplication();
82 }
83 $this->output( "Completed actor creation, added $count new actor(s)\n" );
84 } else {
85 $this->output( "Checking that actors exist for all registered users\n" );
86 $dbr = $this->getDB( DB_REPLICA, [ 'vslow' ] );
87 $anyMissing = $dbr->selectField(
88 [ 'user', 'actor' ],
89 '1',
90 [ 'actor_id' => null ],
91 __METHOD__,
92 [ 'LIMIT 1' ],
93 [ 'actor' => [ 'LEFT JOIN', 'actor_user = user_id' ] ]
94 );
95 if ( $anyMissing ) {
96 $this->error( 'Some users lack actors; run without --tables or include `user` in --tables.' );
97 return false;
98 }
99 $this->output( "Ok, continuing.\n" );
100 }
101
102 $errors = 0;
103 $errors += $this->migrateToTemp(
104 'revision', 'rev_id', [ 'revactor_timestamp' => 'rev_timestamp', 'revactor_page' => 'rev_page' ],
105 'rev_user', 'rev_user_text', 'revactor_rev', 'revactor_actor'
106 );
107 $errors += $this->migrate( 'archive', 'ar_id', 'ar_user', 'ar_user_text', 'ar_actor' );
108 $errors += $this->migrate( 'ipblocks', 'ipb_id', 'ipb_by', 'ipb_by_text', 'ipb_by_actor' );
109 $errors += $this->migrate( 'image', 'img_name', 'img_user', 'img_user_text', 'img_actor' );
110 $errors += $this->migrate(
111 'oldimage', [ 'oi_name', 'oi_timestamp' ], 'oi_user', 'oi_user_text', 'oi_actor'
112 );
113 $errors += $this->migrate( 'filearchive', 'fa_id', 'fa_user', 'fa_user_text', 'fa_actor' );
114 $errors += $this->migrate( 'recentchanges', 'rc_id', 'rc_user', 'rc_user_text', 'rc_actor' );
115 $errors += $this->migrate( 'logging', 'log_id', 'log_user', 'log_user_text', 'log_actor' );
116
117 $errors += $this->migrateLogSearch();
118
119 return $errors === 0;
120 }
121
129 private function makeNextCond( $dbw, $primaryKey, $row ) {
130 $next = '';
131 $display = [];
132 for ( $i = count( $primaryKey ) - 1; $i >= 0; $i-- ) {
133 $field = $primaryKey[$i];
134 $display[] = $field . '=' . $row->$field;
135 $value = $dbw->addQuotes( $row->$field );
136 if ( $next === '' ) {
137 $next = "$field > $value";
138 } else {
139 $next = "$field > $value OR $field = $value AND ($next)";
140 }
141 }
142 $display = implode( ' ', array_reverse( $display ) );
143 return [ $next, $display ];
144 }
145
153 private function makeActorIdSubquery( $dbw, $userField, $nameField ) {
154 $idSubquery = $dbw->buildSelectSubquery(
155 'actor',
156 'actor_id',
157 [ "$userField = actor_user" ],
158 __METHOD__
159 );
160 $nameSubquery = $dbw->buildSelectSubquery(
161 'actor',
162 'actor_id',
163 [ "$nameField = actor_name" ],
164 __METHOD__
165 );
166 return "CASE WHEN $userField = 0 OR $userField IS NULL THEN $nameSubquery ELSE $idSubquery END";
167 }
168
180 private function addActorsForRows(
181 IDatabase $dbw, $nameField, array &$rows, array &$complainedAboutUsers, &$countErrors
182 ) {
183 $needActors = [];
184 $countActors = 0;
185
186 $keep = [];
187 foreach ( $rows as $index => $row ) {
188 $keep[$index] = true;
189 if ( $row->actor_id === null ) {
190 // All registered users should have an actor_id already. So
191 // if we have a usable name here, it means they didn't run
192 // maintenance/cleanupUsersWithNoId.php
193 $name = $row->$nameField;
194 if ( User::isUsableName( $name ) ) {
195 if ( !isset( $complainedAboutUsers[$name] ) ) {
196 $complainedAboutUsers[$name] = true;
197 $this->error(
198 "User name \"$name\" is usable, cannot create an anonymous actor for it."
199 . " Run maintenance/cleanupUsersWithNoId.php to fix this situation.\n"
200 );
201 }
202 unset( $keep[$index] );
203 $countErrors++;
204 } else {
205 $needActors[$name] = 0;
206 }
207 }
208 }
209 $rows = array_intersect_key( $rows, $keep );
210
211 if ( $needActors ) {
212 $dbw->insert(
213 'actor',
214 array_map( function ( $v ) {
215 return [
216 'actor_name' => $v,
217 ];
218 }, array_keys( $needActors ) ),
219 __METHOD__,
220 [ 'IGNORE' ]
221 );
222 $countActors += $dbw->affectedRows();
223
224 $res = $dbw->select(
225 'actor',
226 [ 'actor_id', 'actor_name' ],
227 [ 'actor_name' => array_map( 'strval', array_keys( $needActors ) ) ],
228 __METHOD__
229 );
230 foreach ( $res as $row ) {
231 $needActors[$row->actor_name] = $row->actor_id;
232 }
233 foreach ( $rows as $row ) {
234 if ( $row->actor_id === null ) {
235 $row->actor_id = $needActors[$row->$nameField];
236 }
237 }
238 }
239
240 return $countActors;
241 }
242
256 protected function migrate( $table, $primaryKey, $userField, $nameField, $actorField ) {
257 if ( !$this->doTable( $table ) ) {
258 $this->output( "Skipping $table, not included in --tables\n" );
259 return 0;
260 }
261
262 $dbw = $this->getDB( DB_MASTER );
263 if ( !$dbw->fieldExists( $table, $userField, __METHOD__ ) ) {
264 $this->output( "No need to migrate $table.$userField, field does not exist\n" );
265 return 0;
266 }
267
268 $complainedAboutUsers = [];
269
270 $primaryKey = (array)$primaryKey;
271 $pkFilter = array_flip( $primaryKey );
272 $this->output(
273 "Beginning migration of $table.$userField and $table.$nameField to $table.$actorField\n"
274 );
275 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
276 $lbFactory->waitForReplication();
277
278 $actorIdSubquery = $this->makeActorIdSubquery( $dbw, $userField, $nameField );
279 $next = '1=1';
280 $countUpdated = 0;
281 $countActors = 0;
282 $countErrors = 0;
283 while ( true ) {
284 // Fetch the rows needing update
285 $res = $dbw->select(
286 $table,
287 array_merge( $primaryKey, [ $userField, $nameField, 'actor_id' => $actorIdSubquery ] ),
288 [
289 $actorField => 0,
290 $next,
291 ],
292 __METHOD__,
293 [
294 'ORDER BY' => $primaryKey,
295 'LIMIT' => $this->mBatchSize,
296 ]
297 );
298 if ( !$res->numRows() ) {
299 break;
300 }
301
302 // Insert new actors for rows that need one
303 $rows = iterator_to_array( $res );
304 $lastRow = end( $rows );
305 $countActors += $this->addActorsForRows(
306 $dbw, $nameField, $rows, $complainedAboutUsers, $countErrors
307 );
308
309 // Update the existing rows
310 foreach ( $rows as $row ) {
311 if ( !$row->actor_id ) {
312 list( , $display ) = $this->makeNextCond( $dbw, $primaryKey, $row );
313 $this->error(
314 "Could not make actor for row with $display "
315 . "$userField={$row->$userField} $nameField={$row->$nameField}\n"
316 );
317 $countErrors++;
318 continue;
319 }
320 $dbw->update(
321 $table,
322 [
323 $actorField => $row->actor_id,
324 ],
325 array_intersect_key( (array)$row, $pkFilter ) + [
326 $actorField => 0
327 ],
328 __METHOD__
329 );
330 $countUpdated += $dbw->affectedRows();
331 }
332
333 list( $next, $display ) = $this->makeNextCond( $dbw, $primaryKey, $lastRow );
334 $this->output( "... $display\n" );
335 $lbFactory->waitForReplication();
336 }
337
338 $this->output(
339 "Completed migration, updated $countUpdated row(s) with $countActors new actor(s), "
340 . "$countErrors error(s)\n"
341 );
342 return $countErrors;
343 }
344
362 protected function migrateToTemp(
363 $table, $primaryKey, $extra, $userField, $nameField, $newPrimaryKey, $actorField
364 ) {
365 if ( !$this->doTable( $table ) ) {
366 $this->output( "Skipping $table, not included in --tables\n" );
367 return 0;
368 }
369
370 $dbw = $this->getDB( DB_MASTER );
371 if ( !$dbw->fieldExists( $table, $userField, __METHOD__ ) ) {
372 $this->output( "No need to migrate $table.$userField, field does not exist\n" );
373 return 0;
374 }
375
376 $complainedAboutUsers = [];
377
378 $newTable = $table . '_actor_temp';
379 $this->output(
380 "Beginning migration of $table.$userField and $table.$nameField to $newTable.$actorField\n"
381 );
382 MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->waitForReplication();
383
384 $actorIdSubquery = $this->makeActorIdSubquery( $dbw, $userField, $nameField );
385 $next = [];
386 $countUpdated = 0;
387 $countActors = 0;
388 $countErrors = 0;
389 while ( true ) {
390 // Fetch the rows needing update
391 $res = $dbw->select(
392 [ $table, $newTable ],
393 [ $primaryKey, $userField, $nameField, 'actor_id' => $actorIdSubquery ] + $extra,
394 [ $newPrimaryKey => null ] + $next,
395 __METHOD__,
396 [
397 'ORDER BY' => $primaryKey,
398 'LIMIT' => $this->mBatchSize,
399 ],
400 [
401 $newTable => [ 'LEFT JOIN', "{$primaryKey}={$newPrimaryKey}" ],
402 ]
403 );
404 if ( !$res->numRows() ) {
405 break;
406 }
407
408 // Insert new actors for rows that need one
409 $rows = iterator_to_array( $res );
410 $lastRow = end( $rows );
411 $countActors += $this->addActorsForRows(
412 $dbw, $nameField, $rows, $complainedAboutUsers, $countErrors
413 );
414
415 // Update rows
416 if ( $rows ) {
417 $inserts = [];
418 foreach ( $rows as $row ) {
419 if ( !$row->actor_id ) {
420 list( , $display ) = $this->makeNextCond( $dbw, [ $primaryKey ], $row );
421 $this->error(
422 "Could not make actor for row with $display "
423 . "$userField={$row->$userField} $nameField={$row->$nameField}\n"
424 );
425 $countErrors++;
426 continue;
427 }
428 $ins = [
429 $newPrimaryKey => $row->$primaryKey,
430 $actorField => $row->actor_id,
431 ];
432 foreach ( $extra as $to => $from ) {
433 // It's aliased
434 $ins[$to] = $row->$to;
435 }
436 $inserts[] = $ins;
437 }
438 $this->beginTransaction( $dbw, __METHOD__ );
439 $dbw->insert( $newTable, $inserts, __METHOD__ );
440 $countUpdated += $dbw->affectedRows();
441 $this->commitTransaction( $dbw, __METHOD__ );
442 }
443
444 // Calculate the "next" condition
445 list( $n, $display ) = $this->makeNextCond( $dbw, [ $primaryKey ], $lastRow );
446 $next = [ $n ];
447 $this->output( "... $display\n" );
448 }
449
450 $this->output(
451 "Completed migration, updated $countUpdated row(s) with $countActors new actor(s), "
452 . "$countErrors error(s)\n"
453 );
454 return $countErrors;
455 }
456
461 protected function migrateLogSearch() {
462 if ( !$this->doTable( 'log_search' ) ) {
463 $this->output( "Skipping log_search, not included in --tables\n" );
464 return 0;
465 }
466
467 $complainedAboutUsers = [];
468
469 $primaryKey = [ 'ls_value', 'ls_log_id' ];
470 $this->output( "Beginning migration of log_search\n" );
471 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
472 $lbFactory->waitForReplication();
473
474 $dbw = $this->getDB( DB_MASTER );
475 $countInserted = 0;
476 $countActors = 0;
477 $countErrors = 0;
478
479 $anyBad = $dbw->selectField(
480 'log_search',
481 '1',
482 [ 'ls_field' => 'target_author_actor', 'ls_value' => '' ],
483 __METHOD__,
484 [ 'LIMIT' => 1 ]
485 );
486 if ( $anyBad ) {
487 $this->output( "... Deleting bogus rows due to T215525\n" );
488 $dbw->delete(
489 'log_search',
490 [ 'ls_field' => 'target_author_actor', 'ls_value' => '' ],
491 __METHOD__
492 );
493 $ct = $dbw->affectedRows();
494 $this->output( "... Deleted $ct bogus row(s) from T215525\n" );
495 $lbFactory->waitForReplication();
496 }
497
498 $next = '1=1';
499 while ( true ) {
500 // Fetch the rows needing update
501 $res = $dbw->select(
502 [ 'log_search', 'actor' ],
503 [ 'ls_value', 'ls_log_id', 'actor_id' ],
504 [
505 'ls_field' => 'target_author_id',
506 $next
507 ],
508 __METHOD__,
509 [
510 'ORDER BY' => $primaryKey,
511 'LIMIT' => $this->mBatchSize,
512 ],
513 [ 'actor' => [ 'LEFT JOIN', 'actor_user = ' . $dbw->buildIntegerCast( 'ls_value' ) ] ]
514 );
515 if ( !$res->numRows() ) {
516 break;
517 }
518
519 // Insert a 'target_author_actor' for each 'target_author_id'
520 $ins = [];
521 foreach ( $res as $row ) {
522 $lastRow = $row;
523 if ( !$row->actor_id ) {
524 list( , $display ) = $this->makeNextCond( $dbw, $primaryKey, $row );
525 $this->error( "No actor for target_author_id row with $display\n" );
526 $countErrors++;
527 continue;
528 }
529 $ins[] = [
530 'ls_field' => 'target_author_actor',
531 'ls_value' => $row->actor_id,
532 'ls_log_id' => $row->ls_log_id,
533 ];
534 }
535 $dbw->insert( 'log_search', $ins, __METHOD__, [ 'IGNORE' ] );
536 $countInserted += $dbw->affectedRows();
537
538 list( $next, $display ) = $this->makeNextCond( $dbw, $primaryKey, $lastRow );
539 $this->output( "... target_author_id, $display\n" );
540 $lbFactory->waitForReplication();
541 }
542
543 $next = '1=1';
544 while ( true ) {
545 // Fetch the rows needing update
546 $res = $dbw->select(
547 [ 'log_search', 'actor' ],
548 [ 'ls_value', 'ls_log_id', 'actor_id' ],
549 [
550 'ls_field' => 'target_author_ip',
551 $next
552 ],
553 __METHOD__,
554 [
555 'ORDER BY' => $primaryKey,
556 'LIMIT' => $this->mBatchSize,
557 ],
558 [ 'actor' => [ 'LEFT JOIN', 'ls_value = actor_name' ] ]
559 );
560 if ( !$res->numRows() ) {
561 break;
562 }
563
564 // Insert new actors for rows that need one
565 $rows = iterator_to_array( $res );
566 $lastRow = end( $rows );
567 $countActors += $this->addActorsForRows(
568 $dbw, 'ls_value', $rows, $complainedAboutUsers, $countErrors
569 );
570
571 // Insert a 'target_author_actor' for each 'target_author_ip'
572 $ins = [];
573 foreach ( $rows as $row ) {
574 if ( !$row->actor_id ) {
575 list( , $display ) = $this->makeNextCond( $dbw, $primaryKey, $row );
576 $this->error( "Could not make actor for target_author_ip row with $display\n" );
577 $countErrors++;
578 continue;
579 }
580 $ins[] = [
581 'ls_field' => 'target_author_actor',
582 'ls_value' => $row->actor_id,
583 'ls_log_id' => $row->ls_log_id,
584 ];
585 }
586 $dbw->insert( 'log_search', $ins, __METHOD__, [ 'IGNORE' ] );
587 $countInserted += $dbw->affectedRows();
588
589 list( $next, $display ) = $this->makeNextCond( $dbw, $primaryKey, $lastRow );
590 $this->output( "... target_author_ip, $display\n" );
591 $lbFactory->waitForReplication();
592 }
593
594 $this->output(
595 "Completed migration, inserted $countInserted row(s) with $countActors new actor(s), "
596 . "$countErrors error(s)\n"
597 );
598 return $countErrors;
599 }
600}
getDB()
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
error( $err, $die=0)
Throw an error to the user.
beginTransaction(IDatabase $dbw, $fname)
Begin a transcation on a DB.
commitTransaction(IDatabase $dbw, $fname)
Commit the transcation on a DB handle and wait for replica DBs to catch up.
output( $out, $channel=null)
Throw some output to the user.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
Set the batch size.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Maintenance script that migrates actors from pre-1.31 columns to the 'actor' table.
addActorsForRows(IDatabase $dbw, $nameField, array &$rows, array &$complainedAboutUsers, &$countErrors)
Add actors for anons in a set of rows.
doDBUpdates()
Do the actual work.
migrate( $table, $primaryKey, $userField, $nameField, $actorField)
Migrate actors in a table.
makeActorIdSubquery( $dbw, $userField, $nameField)
Make the subqueries for actor_id
migrateLogSearch()
Migrate actors in the log_search table.
__construct()
Default constructor.
makeNextCond( $dbw, $primaryKey, $row)
Calculate a "next" condition and a display string.
migrateToTemp( $table, $primaryKey, $extra, $userField, $nameField, $newPrimaryKey, $actorField)
Migrate actors in a table to a temporary table.
getUpdateKey()
Get the update key name to go in the update log table.
static isUsableName( $name)
Usernames which fail to pass this function will be blocked from user login and new account registrati...
Definition User.php:995
Basic database interface for live and lazy-loaded relation database handles.
Definition IDatabase.php:38
select( $table, $vars, $conds='', $fname=__METHOD__, $options=[], $join_conds=[])
Execute a SELECT query constructed using the various parameters provided.
affectedRows()
Get the number of rows affected by the last write query.
insert( $table, $rows, $fname=__METHOD__, $options=[])
Insert the given row(s) into a table.
const DB_REPLICA
Definition defines.php:25
const DB_MASTER
Definition defines.php:29