MediaWiki  master
MigrateActors.php
Go to the documentation of this file.
1 <?php
26 
27 require_once __DIR__ . '/../Maintenance.php';
28 
36 
37  protected $tables = null;
38 
39  public function __construct() {
40  parent::__construct();
41  $this->addDescription( 'Migrates actors from pre-1.31 columns to the \'actor\' table' );
42  $this->addOption( 'tables', 'List of tables to process, comma-separated', false, true );
43  $this->setBatchSize( 100 );
44  }
45 
46  protected function getUpdateKey() {
47  return __CLASS__;
48  }
49 
50  protected function doTable( $table ) {
51  return $this->tables === null || in_array( $table, $this->tables, true );
52  }
53 
54  protected function doDBUpdates() {
55  $tables = $this->getOption( 'tables' );
56  if ( $tables !== null ) {
57  $this->tables = explode( ',', $tables );
58  }
59 
60  if ( $this->doTable( 'user' ) ) {
61  $this->output( "Creating actor entries for all registered users\n" );
62  $end = 0;
63  $dbw = $this->getDB( DB_PRIMARY );
64  $max = $dbw->selectField( 'user', 'MAX(user_id)', '', __METHOD__ );
65  $count = 0;
66  $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
67  while ( $end < $max ) {
68  $start = $end + 1;
69  $end = min( $start + $this->mBatchSize, $max );
70  $this->output( "... $start - $end\n" );
71  $dbw->insertSelect(
72  'actor',
73  'user',
74  [ 'actor_user' => 'user_id', 'actor_name' => 'user_name' ],
75  [ "user_id >= $start", "user_id <= $end" ],
76  __METHOD__,
77  [ 'IGNORE' ],
78  [ 'ORDER BY' => [ 'user_id' ] ]
79  );
80  $count += $dbw->affectedRows();
81  $lbFactory->waitForReplication();
82  }
83  $this->output( "Completed actor creation, added $count new actor(s)\n" );
84  } else {
85  $this->output( "Checking that actors exist for all registered users\n" );
86  $dbr = $this->getDB( DB_REPLICA, [ 'vslow' ] );
87  $anyMissing = (bool)$dbr->selectField(
88  [ 'user', 'actor' ],
89  '1',
90  [ 'actor_id' => null ],
91  __METHOD__,
92  [],
93  [ 'actor' => [ 'LEFT JOIN', 'actor_user = user_id' ] ]
94  );
95  if ( $anyMissing ) {
96  $this->error( 'Some users lack actors; run without --tables or include `user` in --tables.' );
97  return false;
98  }
99  $this->output( "Ok, continuing.\n" );
100  }
101 
102  $errors = 0;
103  $errors += $this->migrateToTemp(
104  'revision', 'rev_id', [ 'revactor_timestamp' => 'rev_timestamp', 'revactor_page' => 'rev_page' ],
105  'rev_user', 'rev_user_text', 'revactor_rev', 'revactor_actor'
106  );
107  $errors += $this->migrate( 'archive', 'ar_id', 'ar_user', 'ar_user_text', 'ar_actor' );
108  $errors += $this->migrate( 'ipblocks', 'ipb_id', 'ipb_by', 'ipb_by_text', 'ipb_by_actor' );
109  $errors += $this->migrate( 'image', 'img_name', 'img_user', 'img_user_text', 'img_actor' );
110  $errors += $this->migrate(
111  'oldimage', [ 'oi_name', 'oi_timestamp' ], 'oi_user', 'oi_user_text', 'oi_actor'
112  );
113  $errors += $this->migrate( 'filearchive', 'fa_id', 'fa_user', 'fa_user_text', 'fa_actor' );
114  $errors += $this->migrate( 'recentchanges', 'rc_id', 'rc_user', 'rc_user_text', 'rc_actor' );
115  $errors += $this->migrate( 'logging', 'log_id', 'log_user', 'log_user_text', 'log_actor' );
116 
117  $errors += $this->migrateLogSearch();
118 
119  return $errors === 0;
120  }
121 
129  private function makeNextCond( $dbw, $primaryKey, $row ) {
130  $next = '';
131  $display = [];
132  for ( $i = count( $primaryKey ) - 1; $i >= 0; $i-- ) {
133  $field = $primaryKey[$i];
134  $display[] = $field . '=' . $row->$field;
135  $value = $dbw->addQuotes( $row->$field );
136  if ( $next === '' ) {
137  $next = "$field > $value";
138  } else {
139  $next = "$field > $value OR $field = $value AND ($next)";
140  }
141  }
142  $display = implode( ' ', array_reverse( $display ) );
143  return [ $next, $display ];
144  }
145 
153  private function makeActorIdSubquery( $dbw, $userField, $nameField ) {
154  $idSubquery = $dbw->buildSelectSubquery(
155  'actor',
156  'actor_id',
157  [ "$userField = actor_user" ],
158  __METHOD__
159  );
160  $nameSubquery = $dbw->buildSelectSubquery(
161  'actor',
162  'actor_id',
163  [ "$nameField = actor_name" ],
164  __METHOD__
165  );
166  return "CASE WHEN $userField = 0 OR $userField IS NULL THEN $nameSubquery ELSE $idSubquery END";
167  }
168 
179  private function addActorsForRows(
180  IDatabase $dbw, $nameField, array &$rows, array &$complainedAboutUsers, &$countErrors
181  ) {
182  $needActors = [];
183  $countActors = 0;
184  $userNameUtils = MediaWikiServices::getInstance()->getUserNameUtils();
185 
186  $keep = [];
187  foreach ( $rows as $index => $row ) {
188  $keep[$index] = true;
189  if ( $row->actor_id === null ) {
190  // All registered users should have an actor_id already. So
191  // if we have a usable name here, it means they didn't run
192  // maintenance/cleanupUsersWithNoId.php
193  $name = $row->$nameField;
194  if ( $userNameUtils->isUsable( $name ) ) {
195  if ( !isset( $complainedAboutUsers[$name] ) ) {
196  $complainedAboutUsers[$name] = true;
197  $this->error(
198  "User name \"$name\" is usable, cannot create an anonymous actor for it."
199  . " Run maintenance/cleanupUsersWithNoId.php to fix this situation.\n"
200  );
201  }
202  unset( $keep[$index] );
203  $countErrors++;
204  } else {
205  $needActors[$name] = 0;
206  }
207  }
208  }
209  $rows = array_intersect_key( $rows, $keep );
210 
211  if ( $needActors ) {
212  $dbw->insert(
213  'actor',
214  array_map( static function ( $v ) {
215  return [
216  'actor_name' => $v,
217  ];
218  }, array_keys( $needActors ) ),
219  __METHOD__
220  );
221  $countActors += $dbw->affectedRows();
222 
223  $res = $dbw->select(
224  'actor',
225  [ 'actor_id', 'actor_name' ],
226  [ 'actor_name' => array_map( 'strval', array_keys( $needActors ) ) ],
227  __METHOD__
228  );
229  foreach ( $res as $row ) {
230  $needActors[$row->actor_name] = $row->actor_id;
231  }
232  foreach ( $rows as $row ) {
233  if ( $row->actor_id === null ) {
234  $row->actor_id = $needActors[$row->$nameField];
235  }
236  }
237  }
238 
239  return $countActors;
240  }
241 
255  protected function migrate( $table, $primaryKey, $userField, $nameField, $actorField ) {
256  if ( !$this->doTable( $table ) ) {
257  $this->output( "Skipping $table, not included in --tables\n" );
258  return 0;
259  }
260 
261  $dbw = $this->getDB( DB_PRIMARY );
262  if ( !$dbw->fieldExists( $table, $userField, __METHOD__ ) ) {
263  $this->output( "No need to migrate $table.$userField, field does not exist\n" );
264  return 0;
265  }
266 
267  $complainedAboutUsers = [];
268 
269  $primaryKey = (array)$primaryKey;
270  $pkFilter = array_fill_keys( $primaryKey, true );
271  $this->output(
272  "Beginning migration of $table.$userField and $table.$nameField to $table.$actorField\n"
273  );
274  $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
275  $lbFactory->waitForReplication();
276 
277  $actorIdSubquery = $this->makeActorIdSubquery( $dbw, $userField, $nameField );
278  $next = '1=1';
279  $countUpdated = 0;
280  $countActors = 0;
281  $countErrors = 0;
282  while ( true ) {
283  // Fetch the rows needing update
284  $res = $dbw->select(
285  $table,
286  array_merge( $primaryKey, [ $userField, $nameField, 'actor_id' => $actorIdSubquery ] ),
287  [
288  $actorField => 0,
289  $next,
290  ],
291  __METHOD__,
292  [
293  'ORDER BY' => $primaryKey,
294  'LIMIT' => $this->mBatchSize,
295  ]
296  );
297  if ( !$res->numRows() ) {
298  break;
299  }
300 
301  // Insert new actors for rows that need one
302  $rows = iterator_to_array( $res );
303  $lastRow = end( $rows );
304  $countActors += $this->addActorsForRows(
305  $dbw, $nameField, $rows, $complainedAboutUsers, $countErrors
306  );
307 
308  // Update the existing rows
309  foreach ( $rows as $row ) {
310  if ( !$row->actor_id ) {
311  list( , $display ) = $this->makeNextCond( $dbw, $primaryKey, $row );
312  $this->error(
313  "Could not make actor for row with $display "
314  . "$userField={$row->$userField} $nameField={$row->$nameField}\n"
315  );
316  $countErrors++;
317  continue;
318  }
319  $dbw->update(
320  $table,
321  [
322  $actorField => $row->actor_id,
323  ],
324  array_intersect_key( (array)$row, $pkFilter ) + [
325  $actorField => 0
326  ],
327  __METHOD__
328  );
329  $countUpdated += $dbw->affectedRows();
330  }
331 
332  list( $next, $display ) = $this->makeNextCond( $dbw, $primaryKey, $lastRow );
333  $this->output( "... $display\n" );
334  $lbFactory->waitForReplication();
335  }
336 
337  $this->output(
338  "Completed migration, updated $countUpdated row(s) with $countActors new actor(s), "
339  . "$countErrors error(s)\n"
340  );
341  return $countErrors;
342  }
343 
361  protected function migrateToTemp(
362  $table, $primaryKey, $extra, $userField, $nameField, $newPrimaryKey, $actorField
363  ) {
364  if ( !$this->doTable( $table ) ) {
365  $this->output( "Skipping $table, not included in --tables\n" );
366  return 0;
367  }
368 
369  $dbw = $this->getDB( DB_PRIMARY );
370  if ( !$dbw->fieldExists( $table, $userField, __METHOD__ ) ) {
371  $this->output( "No need to migrate $table.$userField, field does not exist\n" );
372  return 0;
373  }
374 
375  $complainedAboutUsers = [];
376 
377  $newTable = $table . '_actor_temp';
378  $this->output(
379  "Beginning migration of $table.$userField and $table.$nameField to $newTable.$actorField\n"
380  );
381  MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->waitForReplication();
382 
383  $actorIdSubquery = $this->makeActorIdSubquery( $dbw, $userField, $nameField );
384  $next = [];
385  $countUpdated = 0;
386  $countActors = 0;
387  $countErrors = 0;
388  while ( true ) {
389  // Fetch the rows needing update
390  $res = $dbw->select(
391  [ $table, $newTable ],
392  [ $primaryKey, $userField, $nameField, 'actor_id' => $actorIdSubquery ] + $extra,
393  [ $newPrimaryKey => null ] + $next,
394  __METHOD__,
395  [
396  'ORDER BY' => $primaryKey,
397  'LIMIT' => $this->mBatchSize,
398  ],
399  [
400  $newTable => [ 'LEFT JOIN', "{$primaryKey}={$newPrimaryKey}" ],
401  ]
402  );
403  if ( !$res->numRows() ) {
404  break;
405  }
406 
407  // Insert new actors for rows that need one
408  $rows = iterator_to_array( $res );
409  $lastRow = end( $rows );
410  $countActors += $this->addActorsForRows(
411  $dbw, $nameField, $rows, $complainedAboutUsers, $countErrors
412  );
413 
414  // Update rows
415  if ( $rows ) {
416  $inserts = [];
417  foreach ( $rows as $row ) {
418  if ( !$row->actor_id ) {
419  list( , $display ) = $this->makeNextCond( $dbw, [ $primaryKey ], $row );
420  $this->error(
421  "Could not make actor for row with $display "
422  . "$userField={$row->$userField} $nameField={$row->$nameField}\n"
423  );
424  $countErrors++;
425  continue;
426  }
427  $ins = [
428  $newPrimaryKey => $row->$primaryKey,
429  $actorField => $row->actor_id,
430  ];
431  foreach ( $extra as $to => $from ) {
432  // It's aliased
433  $ins[$to] = $row->$to;
434  }
435  $inserts[] = $ins;
436  }
437  $this->beginTransaction( $dbw, __METHOD__ );
438  $dbw->insert( $newTable, $inserts, __METHOD__ );
439  $countUpdated += $dbw->affectedRows();
440  $this->commitTransaction( $dbw, __METHOD__ );
441  }
442 
443  // Calculate the "next" condition
444  list( $n, $display ) = $this->makeNextCond( $dbw, [ $primaryKey ], $lastRow );
445  $next = [ $n ];
446  $this->output( "... $display\n" );
447  }
448 
449  $this->output(
450  "Completed migration, updated $countUpdated row(s) with $countActors new actor(s), "
451  . "$countErrors error(s)\n"
452  );
453  return $countErrors;
454  }
455 
460  protected function migrateLogSearch() {
461  if ( !$this->doTable( 'log_search' ) ) {
462  $this->output( "Skipping log_search, not included in --tables\n" );
463  return 0;
464  }
465 
466  $complainedAboutUsers = [];
467 
468  $primaryKey = [ 'ls_value', 'ls_log_id' ];
469  $this->output( "Beginning migration of log_search\n" );
470  $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
471  $lbFactory->waitForReplication();
472 
473  $dbw = $this->getDB( DB_PRIMARY );
474  $countInserted = 0;
475  $countActors = 0;
476  $countErrors = 0;
477 
478  $anyBad = (bool)$dbw->selectField( 'log_search', '1',
479  [ 'ls_field' => 'target_author_actor', 'ls_value' => '' ],
480  __METHOD__
481  );
482  if ( $anyBad ) {
483  $this->output( "... Deleting bogus rows due to T215525\n" );
484  $dbw->delete(
485  'log_search',
486  [ 'ls_field' => 'target_author_actor', 'ls_value' => '' ],
487  __METHOD__
488  );
489  $ct = $dbw->affectedRows();
490  $this->output( "... Deleted $ct bogus row(s) from T215525\n" );
491  $lbFactory->waitForReplication();
492  }
493 
494  $next = '1=1';
495  while ( true ) {
496  // Fetch the rows needing update
497  $res = $dbw->select(
498  [ 'log_search', 'actor' ],
499  [ 'ls_value', 'ls_log_id', 'actor_id' ],
500  [
501  'ls_field' => 'target_author_id',
502  $next
503  ],
504  __METHOD__,
505  [
506  'ORDER BY' => $primaryKey,
507  'LIMIT' => $this->mBatchSize,
508  ],
509  [ 'actor' => [ 'LEFT JOIN', 'actor_user = ' . $dbw->buildIntegerCast( 'ls_value' ) ] ]
510  );
511  if ( !$res->numRows() ) {
512  break;
513  }
514 
515  // Insert a 'target_author_actor' for each 'target_author_id'
516  $ins = [];
517  foreach ( $res as $row ) {
518  $lastRow = $row;
519  if ( !$row->actor_id ) {
520  list( , $display ) = $this->makeNextCond( $dbw, $primaryKey, $row );
521  $this->error( "No actor for target_author_id row with $display\n" );
522  $countErrors++;
523  continue;
524  }
525  $ins[] = [
526  'ls_field' => 'target_author_actor',
527  'ls_value' => $row->actor_id,
528  'ls_log_id' => $row->ls_log_id,
529  ];
530  }
531  $dbw->insert( 'log_search', $ins, __METHOD__, [ 'IGNORE' ] );
532  $countInserted += $dbw->affectedRows();
533 
534  list( $next, $display ) = $this->makeNextCond( $dbw, $primaryKey, $lastRow );
535  $this->output( "... target_author_id, $display\n" );
536  $lbFactory->waitForReplication();
537  }
538 
539  $next = '1=1';
540  while ( true ) {
541  // Fetch the rows needing update
542  $res = $dbw->select(
543  [ 'log_search', 'actor' ],
544  [ 'ls_value', 'ls_log_id', 'actor_id' ],
545  [
546  'ls_field' => 'target_author_ip',
547  $next
548  ],
549  __METHOD__,
550  [
551  'ORDER BY' => $primaryKey,
552  'LIMIT' => $this->mBatchSize,
553  ],
554  [ 'actor' => [ 'LEFT JOIN', 'ls_value = actor_name' ] ]
555  );
556  if ( !$res->numRows() ) {
557  break;
558  }
559 
560  // Insert new actors for rows that need one
561  $rows = iterator_to_array( $res );
562  $lastRow = end( $rows );
563  $countActors += $this->addActorsForRows(
564  $dbw, 'ls_value', $rows, $complainedAboutUsers, $countErrors
565  );
566 
567  // Insert a 'target_author_actor' for each 'target_author_ip'
568  $ins = [];
569  foreach ( $rows as $row ) {
570  if ( !$row->actor_id ) {
571  list( , $display ) = $this->makeNextCond( $dbw, $primaryKey, $row );
572  $this->error( "Could not make actor for target_author_ip row with $display\n" );
573  $countErrors++;
574  continue;
575  }
576  $ins[] = [
577  'ls_field' => 'target_author_actor',
578  'ls_value' => $row->actor_id,
579  'ls_log_id' => $row->ls_log_id,
580  ];
581  }
582  $dbw->insert( 'log_search', $ins, __METHOD__, [ 'IGNORE' ] );
583  $countInserted += $dbw->affectedRows();
584 
585  list( $next, $display ) = $this->makeNextCond( $dbw, $primaryKey, $lastRow );
586  $this->output( "... target_author_ip, $display\n" );
587  $lbFactory->waitForReplication();
588  }
589 
590  $this->output(
591  "Completed migration, inserted $countInserted row(s) with $countActors new actor(s), "
592  . "$countErrors error(s)\n"
593  );
594  return $countErrors;
595  }
596 }
Wikimedia\Rdbms\IDatabase\affectedRows
affectedRows()
Get the number of rows affected by the last write query.
MigrateActors\migrateLogSearch
migrateLogSearch()
Migrate actors in the log_search table.
Definition: MigrateActors.php:460
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:200
Maintenance\addDescription
addDescription( $text)
Set the description text.
Definition: Maintenance.php:329
$res
$res
Definition: testCompression.php:57
Wikimedia\Rdbms\IDatabase
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:38
$dbr
$dbr
Definition: testCompression.php:54
Maintenance\beginTransaction
beginTransaction(IDatabase $dbw, $fname)
Begin a transaction on a DB.
Definition: Maintenance.php:1406
MigrateActors\$tables
$tables
Definition: MigrateActors.php:37
LoggedUpdateMaintenance
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
Definition: LoggedUpdateMaintenance.php:26
Maintenance\addOption
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
Definition: Maintenance.php:249
MigrateActors\migrateToTemp
migrateToTemp( $table, $primaryKey, $extra, $userField, $nameField, $newPrimaryKey, $actorField)
Migrate actors in a table to a temporary table.
Definition: MigrateActors.php:361
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
MigrateActors\makeActorIdSubquery
makeActorIdSubquery( $dbw, $userField, $nameField)
Make the subqueries for actor_id
Definition: MigrateActors.php:153
MigrateActors\doDBUpdates
doDBUpdates()
Do the actual work.
Definition: MigrateActors.php:54
MigrateActors\getUpdateKey
getUpdateKey()
Get the update key name to go in the update log table.
Definition: MigrateActors.php:46
DB_PRIMARY
const DB_PRIMARY
Definition: defines.php:27
MigrateActors\migrate
migrate( $table, $primaryKey, $userField, $nameField, $actorField)
Migrate actors in a table.
Definition: MigrateActors.php:255
Maintenance\commitTransaction
commitTransaction(IDatabase $dbw, $fname)
Commit the transaction on a DB handle and wait for replica DBs to catch up.
Definition: Maintenance.php:1421
Maintenance\getDB
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
Definition: Maintenance.php:1375
Wikimedia\Rdbms\IDatabase\insert
insert( $table, $rows, $fname=__METHOD__, $options=[])
Insert the given row(s) into a table.
Maintenance\getOption
getOption( $name, $default=null)
Get an option, or return the default.
Definition: Maintenance.php:286
Wikimedia\Rdbms\IDatabase\select
select( $table, $vars, $conds='', $fname=__METHOD__, $options=[], $join_conds=[])
Execute a SELECT query constructed using the various parameters provided.
MigrateActors
Maintenance script that migrates actors from pre-1.31 columns to the 'actor' table.
Definition: MigrateActors.php:35
Maintenance\error
error( $err, $die=0)
Throw an error to the user.
Definition: Maintenance.php:464
Maintenance\output
output( $out, $channel=null)
Throw some output to the user.
Definition: Maintenance.php:435
MigrateActors\addActorsForRows
addActorsForRows(IDatabase $dbw, $nameField, array &$rows, array &$complainedAboutUsers, &$countErrors)
Add actors for anons in a set of rows.
Definition: MigrateActors.php:179
MigrateActors\makeNextCond
makeNextCond( $dbw, $primaryKey, $row)
Calculate a "next" condition and a display string.
Definition: MigrateActors.php:129
MigrateActors\__construct
__construct()
Default constructor.
Definition: MigrateActors.php:39
MigrateActors\doTable
doTable( $table)
Definition: MigrateActors.php:50
Maintenance\setBatchSize
setBatchSize( $s=0)
Definition: Maintenance.php:375