MediaWiki  master
MigrateActors.php
Go to the documentation of this file.
1 <?php
26 
27 require_once __DIR__ . '/../Maintenance.php';
28 
36 
37  protected $tables = null;
38 
39  public function __construct() {
40  parent::__construct();
41  $this->addDescription( 'Migrates actors from pre-1.31 columns to the \'actor\' table' );
42  $this->addOption( 'tables', 'List of tables to process, comma-separated', false, true );
43  $this->setBatchSize( 100 );
44  }
45 
46  protected function getUpdateKey() {
47  return __CLASS__;
48  }
49 
50  protected function doTable( $table ) {
51  return $this->tables === null || in_array( $table, $this->tables, true );
52  }
53 
54  protected function doDBUpdates() {
55  $tables = $this->getOption( 'tables' );
56  if ( $tables !== null ) {
57  $this->tables = explode( ',', $tables );
58  }
59 
60  if ( $this->doTable( 'user' ) ) {
61  $this->output( "Creating actor entries for all registered users\n" );
62  $end = 0;
63  $dbw = $this->getDB( DB_MASTER );
64  $max = $dbw->selectField( 'user', 'MAX(user_id)', '', __METHOD__ );
65  $count = 0;
66  $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
67  while ( $end < $max ) {
68  $start = $end + 1;
69  $end = min( $start + $this->mBatchSize, $max );
70  $this->output( "... $start - $end\n" );
71  $dbw->insertSelect(
72  'actor',
73  'user',
74  [ 'actor_user' => 'user_id', 'actor_name' => 'user_name' ],
75  [ "user_id >= $start", "user_id <= $end" ],
76  __METHOD__,
77  [ 'IGNORE' ],
78  [ 'ORDER BY' => [ 'user_id' ] ]
79  );
80  $count += $dbw->affectedRows();
81  $lbFactory->waitForReplication();
82  }
83  $this->output( "Completed actor creation, added $count new actor(s)\n" );
84  } else {
85  $this->output( "Checking that actors exist for all registered users\n" );
86  $dbr = $this->getDB( DB_REPLICA, [ 'vslow' ] );
87  $anyMissing = $dbr->selectField(
88  [ 'user', 'actor' ],
89  '1',
90  [ 'actor_id' => null ],
91  __METHOD__,
92  [ 'LIMIT 1' ],
93  [ 'actor' => [ 'LEFT JOIN', 'actor_user = user_id' ] ]
94  );
95  if ( $anyMissing ) {
96  $this->error( 'Some users lack actors; run without --tables or include `user` in --tables.' );
97  return false;
98  }
99  $this->output( "Ok, continuing.\n" );
100  }
101 
102  $errors = 0;
103  $errors += $this->migrateToTemp(
104  'revision', 'rev_id', [ 'revactor_timestamp' => 'rev_timestamp', 'revactor_page' => 'rev_page' ],
105  'rev_user', 'rev_user_text', 'revactor_rev', 'revactor_actor'
106  );
107  $errors += $this->migrate( 'archive', 'ar_id', 'ar_user', 'ar_user_text', 'ar_actor' );
108  $errors += $this->migrate( 'ipblocks', 'ipb_id', 'ipb_by', 'ipb_by_text', 'ipb_by_actor' );
109  $errors += $this->migrate( 'image', 'img_name', 'img_user', 'img_user_text', 'img_actor' );
110  $errors += $this->migrate(
111  'oldimage', [ 'oi_name', 'oi_timestamp' ], 'oi_user', 'oi_user_text', 'oi_actor'
112  );
113  $errors += $this->migrate( 'filearchive', 'fa_id', 'fa_user', 'fa_user_text', 'fa_actor' );
114  $errors += $this->migrate( 'recentchanges', 'rc_id', 'rc_user', 'rc_user_text', 'rc_actor' );
115  $errors += $this->migrate( 'logging', 'log_id', 'log_user', 'log_user_text', 'log_actor' );
116 
117  $errors += $this->migrateLogSearch();
118 
119  return $errors === 0;
120  }
121 
129  private function makeNextCond( $dbw, $primaryKey, $row ) {
130  $next = '';
131  $display = [];
132  for ( $i = count( $primaryKey ) - 1; $i >= 0; $i-- ) {
133  $field = $primaryKey[$i];
134  $display[] = $field . '=' . $row->$field;
135  $value = $dbw->addQuotes( $row->$field );
136  if ( $next === '' ) {
137  $next = "$field > $value";
138  } else {
139  $next = "$field > $value OR $field = $value AND ($next)";
140  }
141  }
142  $display = implode( ' ', array_reverse( $display ) );
143  return [ $next, $display ];
144  }
145 
153  private function makeActorIdSubquery( $dbw, $userField, $nameField ) {
154  $idSubquery = $dbw->buildSelectSubquery(
155  'actor',
156  'actor_id',
157  [ "$userField = actor_user" ],
158  __METHOD__
159  );
160  $nameSubquery = $dbw->buildSelectSubquery(
161  'actor',
162  'actor_id',
163  [ "$nameField = actor_name" ],
164  __METHOD__
165  );
166  return "CASE WHEN $userField = 0 OR $userField IS NULL THEN $nameSubquery ELSE $idSubquery END";
167  }
168 
180  private function addActorsForRows(
181  IDatabase $dbw, $nameField, array &$rows, array &$complainedAboutUsers, &$countErrors
182  ) {
183  $needActors = [];
184  $countActors = 0;
185 
186  $keep = [];
187  foreach ( $rows as $index => $row ) {
188  $keep[$index] = true;
189  if ( $row->actor_id === null ) {
190  // All registered users should have an actor_id already. So
191  // if we have a usable name here, it means they didn't run
192  // maintenance/cleanupUsersWithNoId.php
193  $name = $row->$nameField;
194  if ( User::isUsableName( $name ) ) {
195  if ( !isset( $complainedAboutUsers[$name] ) ) {
196  $complainedAboutUsers[$name] = true;
197  $this->error(
198  "User name \"$name\" is usable, cannot create an anonymous actor for it."
199  . " Run maintenance/cleanupUsersWithNoId.php to fix this situation.\n"
200  );
201  }
202  unset( $keep[$index] );
203  $countErrors++;
204  } else {
205  $needActors[$name] = 0;
206  }
207  }
208  }
209  $rows = array_intersect_key( $rows, $keep );
210 
211  if ( $needActors ) {
212  $dbw->insert(
213  'actor',
214  array_map( function ( $v ) {
215  return [
216  'actor_name' => $v,
217  ];
218  }, array_keys( $needActors ) ),
219  __METHOD__
220  );
221  $countActors += $dbw->affectedRows();
222 
223  $res = $dbw->select(
224  'actor',
225  [ 'actor_id', 'actor_name' ],
226  [ 'actor_name' => array_map( 'strval', array_keys( $needActors ) ) ],
227  __METHOD__
228  );
229  foreach ( $res as $row ) {
230  $needActors[$row->actor_name] = $row->actor_id;
231  }
232  foreach ( $rows as $row ) {
233  if ( $row->actor_id === null ) {
234  $row->actor_id = $needActors[$row->$nameField];
235  }
236  }
237  }
238 
239  return $countActors;
240  }
241 
255  protected function migrate( $table, $primaryKey, $userField, $nameField, $actorField ) {
256  if ( !$this->doTable( $table ) ) {
257  $this->output( "Skipping $table, not included in --tables\n" );
258  return 0;
259  }
260 
261  $dbw = $this->getDB( DB_MASTER );
262  if ( !$dbw->fieldExists( $table, $userField, __METHOD__ ) ) {
263  $this->output( "No need to migrate $table.$userField, field does not exist\n" );
264  return 0;
265  }
266 
267  $complainedAboutUsers = [];
268 
269  $primaryKey = (array)$primaryKey;
270  $pkFilter = array_flip( $primaryKey );
271  $this->output(
272  "Beginning migration of $table.$userField and $table.$nameField to $table.$actorField\n"
273  );
274  $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
275  $lbFactory->waitForReplication();
276 
277  $actorIdSubquery = $this->makeActorIdSubquery( $dbw, $userField, $nameField );
278  $next = '1=1';
279  $countUpdated = 0;
280  $countActors = 0;
281  $countErrors = 0;
282  while ( true ) {
283  // Fetch the rows needing update
284  $res = $dbw->select(
285  $table,
286  array_merge( $primaryKey, [ $userField, $nameField, 'actor_id' => $actorIdSubquery ] ),
287  [
288  $actorField => 0,
289  $next,
290  ],
291  __METHOD__,
292  [
293  'ORDER BY' => $primaryKey,
294  'LIMIT' => $this->mBatchSize,
295  ]
296  );
297  if ( !$res->numRows() ) {
298  break;
299  }
300 
301  // Insert new actors for rows that need one
302  $rows = iterator_to_array( $res );
303  $lastRow = end( $rows );
304  $countActors += $this->addActorsForRows(
305  $dbw, $nameField, $rows, $complainedAboutUsers, $countErrors
306  );
307 
308  // Update the existing rows
309  foreach ( $rows as $row ) {
310  if ( !$row->actor_id ) {
311  list( , $display ) = $this->makeNextCond( $dbw, $primaryKey, $row );
312  $this->error(
313  "Could not make actor for row with $display "
314  . "$userField={$row->$userField} $nameField={$row->$nameField}\n"
315  );
316  $countErrors++;
317  continue;
318  }
319  $dbw->update(
320  $table,
321  [
322  $actorField => $row->actor_id,
323  ],
324  array_intersect_key( (array)$row, $pkFilter ) + [
325  $actorField => 0
326  ],
327  __METHOD__
328  );
329  $countUpdated += $dbw->affectedRows();
330  }
331 
332  list( $next, $display ) = $this->makeNextCond( $dbw, $primaryKey, $lastRow );
333  $this->output( "... $display\n" );
334  $lbFactory->waitForReplication();
335  }
336 
337  $this->output(
338  "Completed migration, updated $countUpdated row(s) with $countActors new actor(s), "
339  . "$countErrors error(s)\n"
340  );
341  return $countErrors;
342  }
343 
361  protected function migrateToTemp(
362  $table, $primaryKey, $extra, $userField, $nameField, $newPrimaryKey, $actorField
363  ) {
364  if ( !$this->doTable( $table ) ) {
365  $this->output( "Skipping $table, not included in --tables\n" );
366  return 0;
367  }
368 
369  $dbw = $this->getDB( DB_MASTER );
370  if ( !$dbw->fieldExists( $table, $userField, __METHOD__ ) ) {
371  $this->output( "No need to migrate $table.$userField, field does not exist\n" );
372  return 0;
373  }
374 
375  $complainedAboutUsers = [];
376 
377  $newTable = $table . '_actor_temp';
378  $this->output(
379  "Beginning migration of $table.$userField and $table.$nameField to $newTable.$actorField\n"
380  );
381  MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->waitForReplication();
382 
383  $actorIdSubquery = $this->makeActorIdSubquery( $dbw, $userField, $nameField );
384  $next = [];
385  $countUpdated = 0;
386  $countActors = 0;
387  $countErrors = 0;
388  while ( true ) {
389  // Fetch the rows needing update
390  $res = $dbw->select(
391  [ $table, $newTable ],
392  [ $primaryKey, $userField, $nameField, 'actor_id' => $actorIdSubquery ] + $extra,
393  // @phan-suppress-next-line PhanSuspiciousBinaryAddLists
394  [ $newPrimaryKey => null ] + $next,
395  __METHOD__,
396  [
397  'ORDER BY' => $primaryKey,
398  'LIMIT' => $this->mBatchSize,
399  ],
400  [
401  $newTable => [ 'LEFT JOIN', "{$primaryKey}={$newPrimaryKey}" ],
402  ]
403  );
404  if ( !$res->numRows() ) {
405  break;
406  }
407 
408  // Insert new actors for rows that need one
409  $rows = iterator_to_array( $res );
410  $lastRow = end( $rows );
411  $countActors += $this->addActorsForRows(
412  $dbw, $nameField, $rows, $complainedAboutUsers, $countErrors
413  );
414 
415  // Update rows
416  if ( $rows ) {
417  $inserts = [];
418  foreach ( $rows as $row ) {
419  if ( !$row->actor_id ) {
420  list( , $display ) = $this->makeNextCond( $dbw, [ $primaryKey ], $row );
421  $this->error(
422  "Could not make actor for row with $display "
423  . "$userField={$row->$userField} $nameField={$row->$nameField}\n"
424  );
425  $countErrors++;
426  continue;
427  }
428  $ins = [
429  $newPrimaryKey => $row->$primaryKey,
430  $actorField => $row->actor_id,
431  ];
432  foreach ( $extra as $to => $from ) {
433  // It's aliased
434  $ins[$to] = $row->$to;
435  }
436  $inserts[] = $ins;
437  }
438  $this->beginTransaction( $dbw, __METHOD__ );
439  $dbw->insert( $newTable, $inserts, __METHOD__ );
440  $countUpdated += $dbw->affectedRows();
441  $this->commitTransaction( $dbw, __METHOD__ );
442  }
443 
444  // Calculate the "next" condition
445  list( $n, $display ) = $this->makeNextCond( $dbw, [ $primaryKey ], $lastRow );
446  $next = [ $n ];
447  $this->output( "... $display\n" );
448  }
449 
450  $this->output(
451  "Completed migration, updated $countUpdated row(s) with $countActors new actor(s), "
452  . "$countErrors error(s)\n"
453  );
454  return $countErrors;
455  }
456 
461  protected function migrateLogSearch() {
462  if ( !$this->doTable( 'log_search' ) ) {
463  $this->output( "Skipping log_search, not included in --tables\n" );
464  return 0;
465  }
466 
467  $complainedAboutUsers = [];
468 
469  $primaryKey = [ 'ls_value', 'ls_log_id' ];
470  $this->output( "Beginning migration of log_search\n" );
471  $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
472  $lbFactory->waitForReplication();
473 
474  $dbw = $this->getDB( DB_MASTER );
475  $countInserted = 0;
476  $countActors = 0;
477  $countErrors = 0;
478 
479  $anyBad = $dbw->selectField(
480  'log_search',
481  '1',
482  [ 'ls_field' => 'target_author_actor', 'ls_value' => '' ],
483  __METHOD__,
484  [ 'LIMIT' => 1 ]
485  );
486  if ( $anyBad ) {
487  $this->output( "... Deleting bogus rows due to T215525\n" );
488  $dbw->delete(
489  'log_search',
490  [ 'ls_field' => 'target_author_actor', 'ls_value' => '' ],
491  __METHOD__
492  );
493  $ct = $dbw->affectedRows();
494  $this->output( "... Deleted $ct bogus row(s) from T215525\n" );
495  $lbFactory->waitForReplication();
496  }
497 
498  $next = '1=1';
499  while ( true ) {
500  // Fetch the rows needing update
501  $res = $dbw->select(
502  [ 'log_search', 'actor' ],
503  [ 'ls_value', 'ls_log_id', 'actor_id' ],
504  [
505  'ls_field' => 'target_author_id',
506  $next
507  ],
508  __METHOD__,
509  [
510  'ORDER BY' => $primaryKey,
511  'LIMIT' => $this->mBatchSize,
512  ],
513  [ 'actor' => [ 'LEFT JOIN', 'actor_user = ' . $dbw->buildIntegerCast( 'ls_value' ) ] ]
514  );
515  if ( !$res->numRows() ) {
516  break;
517  }
518 
519  // Insert a 'target_author_actor' for each 'target_author_id'
520  $ins = [];
521  foreach ( $res as $row ) {
522  $lastRow = $row;
523  if ( !$row->actor_id ) {
524  list( , $display ) = $this->makeNextCond( $dbw, $primaryKey, $row );
525  $this->error( "No actor for target_author_id row with $display\n" );
526  $countErrors++;
527  continue;
528  }
529  $ins[] = [
530  'ls_field' => 'target_author_actor',
531  'ls_value' => $row->actor_id,
532  'ls_log_id' => $row->ls_log_id,
533  ];
534  }
535  $dbw->insert( 'log_search', $ins, __METHOD__, [ 'IGNORE' ] );
536  $countInserted += $dbw->affectedRows();
537 
538  list( $next, $display ) = $this->makeNextCond( $dbw, $primaryKey, $lastRow );
539  $this->output( "... target_author_id, $display\n" );
540  $lbFactory->waitForReplication();
541  }
542 
543  $next = '1=1';
544  while ( true ) {
545  // Fetch the rows needing update
546  $res = $dbw->select(
547  [ 'log_search', 'actor' ],
548  [ 'ls_value', 'ls_log_id', 'actor_id' ],
549  [
550  'ls_field' => 'target_author_ip',
551  $next
552  ],
553  __METHOD__,
554  [
555  'ORDER BY' => $primaryKey,
556  'LIMIT' => $this->mBatchSize,
557  ],
558  [ 'actor' => [ 'LEFT JOIN', 'ls_value = actor_name' ] ]
559  );
560  if ( !$res->numRows() ) {
561  break;
562  }
563 
564  // Insert new actors for rows that need one
565  $rows = iterator_to_array( $res );
566  $lastRow = end( $rows );
567  $countActors += $this->addActorsForRows(
568  $dbw, 'ls_value', $rows, $complainedAboutUsers, $countErrors
569  );
570 
571  // Insert a 'target_author_actor' for each 'target_author_ip'
572  $ins = [];
573  foreach ( $rows as $row ) {
574  if ( !$row->actor_id ) {
575  list( , $display ) = $this->makeNextCond( $dbw, $primaryKey, $row );
576  $this->error( "Could not make actor for target_author_ip row with $display\n" );
577  $countErrors++;
578  continue;
579  }
580  $ins[] = [
581  'ls_field' => 'target_author_actor',
582  'ls_value' => $row->actor_id,
583  'ls_log_id' => $row->ls_log_id,
584  ];
585  }
586  $dbw->insert( 'log_search', $ins, __METHOD__, [ 'IGNORE' ] );
587  $countInserted += $dbw->affectedRows();
588 
589  list( $next, $display ) = $this->makeNextCond( $dbw, $primaryKey, $lastRow );
590  $this->output( "... target_author_ip, $display\n" );
591  $lbFactory->waitForReplication();
592  }
593 
594  $this->output(
595  "Completed migration, inserted $countInserted row(s) with $countActors new actor(s), "
596  . "$countErrors error(s)\n"
597  );
598  return $countErrors;
599  }
600 }
Wikimedia\Rdbms\IDatabase\affectedRows
affectedRows()
Get the number of rows affected by the last write query.
MigrateActors\migrateLogSearch
migrateLogSearch()
Migrate actors in the log_search table.
Definition: MigrateActors.php:461
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:154
Maintenance\addDescription
addDescription( $text)
Set the description text.
Definition: Maintenance.php:327
$res
$res
Definition: testCompression.php:57
Wikimedia\Rdbms\IDatabase
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:38
$dbr
$dbr
Definition: testCompression.php:54
Maintenance\beginTransaction
beginTransaction(IDatabase $dbw, $fname)
Begin a transcation on a DB.
Definition: Maintenance.php:1397
MigrateActors\$tables
$tables
Definition: MigrateActors.php:37
LoggedUpdateMaintenance
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
Definition: LoggedUpdateMaintenance.php:26
Maintenance\addOption
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
Definition: Maintenance.php:245
MigrateActors\migrateToTemp
migrateToTemp( $table, $primaryKey, $extra, $userField, $nameField, $newPrimaryKey, $actorField)
Migrate actors in a table to a temporary table.
Definition: MigrateActors.php:361
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
DB_MASTER
const DB_MASTER
Definition: defines.php:26
MigrateActors\makeActorIdSubquery
makeActorIdSubquery( $dbw, $userField, $nameField)
Make the subqueries for actor_id
Definition: MigrateActors.php:153
MigrateActors\doDBUpdates
doDBUpdates()
Do the actual work.
Definition: MigrateActors.php:54
MigrateActors\getUpdateKey
getUpdateKey()
Get the update key name to go in the update log table.
Definition: MigrateActors.php:46
MigrateActors\migrate
migrate( $table, $primaryKey, $userField, $nameField, $actorField)
Migrate actors in a table.
Definition: MigrateActors.php:255
Maintenance\commitTransaction
commitTransaction(IDatabase $dbw, $fname)
Commit the transcation on a DB handle and wait for replica DBs to catch up.
Definition: Maintenance.php:1412
Maintenance\getDB
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
Definition: Maintenance.php:1366
Wikimedia\Rdbms\IDatabase\insert
insert( $table, $rows, $fname=__METHOD__, $options=[])
Insert the given row(s) into a table.
Maintenance\getOption
getOption( $name, $default=null)
Get an option, or return the default.
Definition: Maintenance.php:281
Wikimedia\Rdbms\IDatabase\select
select( $table, $vars, $conds='', $fname=__METHOD__, $options=[], $join_conds=[])
Execute a SELECT query constructed using the various parameters provided.
MigrateActors
Maintenance script that migrates actors from pre-1.31 columns to the 'actor' table.
Definition: MigrateActors.php:35
Maintenance\error
error( $err, $die=0)
Throw an error to the user.
Definition: Maintenance.php:463
Maintenance\output
output( $out, $channel=null)
Throw some output to the user.
Definition: Maintenance.php:434
MigrateActors\addActorsForRows
addActorsForRows(IDatabase $dbw, $nameField, array &$rows, array &$complainedAboutUsers, &$countErrors)
Add actors for anons in a set of rows.
Definition: MigrateActors.php:180
MigrateActors\makeNextCond
makeNextCond( $dbw, $primaryKey, $row)
Calculate a "next" condition and a display string.
Definition: MigrateActors.php:129
User\isUsableName
static isUsableName( $name)
Usernames which fail to pass this function will be blocked from user login and new account registrati...
Definition: User.php:992
MigrateActors\__construct
__construct()
Default constructor.
Definition: MigrateActors.php:39
MigrateActors\doTable
doTable( $table)
Definition: MigrateActors.php:50
Maintenance\setBatchSize
setBatchSize( $s=0)
Set the batch size.
Definition: Maintenance.php:374