MediaWiki REL1_39
MigrateActors.php
Go to the documentation of this file.
1<?php
26
27require_once __DIR__ . '/../Maintenance.php';
28
36
37 protected $tables = null;
38
39 public function __construct() {
40 parent::__construct();
41 $this->addDescription( 'Migrates actors from pre-1.31 columns to the \'actor\' table' );
42 $this->addOption( 'tables', 'List of tables to process, comma-separated', false, true );
43 $this->setBatchSize( 100 );
44 }
45
46 protected function getUpdateKey() {
47 return __CLASS__;
48 }
49
50 protected function doTable( $table ) {
51 return $this->tables === null || in_array( $table, $this->tables, true );
52 }
53
54 protected function doDBUpdates() {
55 $tables = $this->getOption( 'tables' );
56 if ( $tables !== null ) {
57 $this->tables = explode( ',', $tables );
58 }
59
60 if ( $this->doTable( 'user' ) ) {
61 $this->output( "Creating actor entries for all registered users\n" );
62 $end = 0;
63 $dbw = $this->getDB( DB_PRIMARY );
64 $max = $dbw->selectField( 'user', 'MAX(user_id)', '', __METHOD__ );
65 $count = 0;
66 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
67 while ( $end < $max ) {
68 $start = $end + 1;
69 $end = min( $start + $this->mBatchSize, $max );
70 $this->output( "... $start - $end\n" );
71 $dbw->insertSelect(
72 'actor',
73 'user',
74 [ 'actor_user' => 'user_id', 'actor_name' => 'user_name' ],
75 [ "user_id >= $start", "user_id <= $end" ],
76 __METHOD__,
77 [ 'IGNORE' ],
78 [ 'ORDER BY' => [ 'user_id' ] ]
79 );
80 $count += $dbw->affectedRows();
81 $lbFactory->waitForReplication();
82 }
83 $this->output( "Completed actor creation, added $count new actor(s)\n" );
84 } else {
85 $this->output( "Checking that actors exist for all registered users\n" );
86 $dbr = $this->getDB( DB_REPLICA, [ 'vslow' ] );
87 $anyMissing = (bool)$dbr->selectField(
88 [ 'user', 'actor' ],
89 '1',
90 [ 'actor_id' => null ],
91 __METHOD__,
92 [],
93 [ 'actor' => [ 'LEFT JOIN', 'actor_user = user_id' ] ]
94 );
95 if ( $anyMissing ) {
96 $this->error( 'Some users lack actors; run without --tables or include `user` in --tables.' );
97 return false;
98 }
99 $this->output( "Ok, continuing.\n" );
100 }
101
102 $errors = 0;
103 $errors += $this->migrate( 'revision', 'rev_id', 'rev_user', 'rev_user_text', 'rev_actor' );
104 $errors += $this->migrate( 'archive', 'ar_id', 'ar_user', 'ar_user_text', 'ar_actor' );
105 $errors += $this->migrate( 'ipblocks', 'ipb_id', 'ipb_by', 'ipb_by_text', 'ipb_by_actor' );
106 $errors += $this->migrate( 'image', 'img_name', 'img_user', 'img_user_text', 'img_actor' );
107 $errors += $this->migrate(
108 'oldimage', [ 'oi_name', 'oi_timestamp' ], 'oi_user', 'oi_user_text', 'oi_actor'
109 );
110 $errors += $this->migrate( 'filearchive', 'fa_id', 'fa_user', 'fa_user_text', 'fa_actor' );
111 $errors += $this->migrate( 'recentchanges', 'rc_id', 'rc_user', 'rc_user_text', 'rc_actor' );
112 $errors += $this->migrate( 'logging', 'log_id', 'log_user', 'log_user_text', 'log_actor' );
113
114 $errors += $this->migrateLogSearch();
115
116 return $errors === 0;
117 }
118
126 private function makeNextCond( $dbw, $primaryKey, $row ) {
127 $next = '';
128 $display = [];
129 for ( $i = count( $primaryKey ) - 1; $i >= 0; $i-- ) {
130 $field = $primaryKey[$i];
131 $display[] = $field . '=' . $row->$field;
132 $value = $dbw->addQuotes( $row->$field );
133 if ( $next === '' ) {
134 $next = "$field > $value";
135 } else {
136 $next = "$field > $value OR $field = $value AND ($next)";
137 }
138 }
139 $display = implode( ' ', array_reverse( $display ) );
140 return [ $next, $display ];
141 }
142
150 private function makeActorIdSubquery( $dbw, $userField, $nameField ) {
151 $idSubquery = $dbw->buildSelectSubquery(
152 'actor',
153 'actor_id',
154 [ "$userField = actor_user" ],
155 __METHOD__
156 );
157 $nameSubquery = $dbw->buildSelectSubquery(
158 'actor',
159 'actor_id',
160 [ "$nameField = actor_name" ],
161 __METHOD__
162 );
163 return "CASE WHEN $userField = 0 OR $userField IS NULL THEN $nameSubquery ELSE $idSubquery END";
164 }
165
176 private function addActorsForRows(
177 IDatabase $dbw, $nameField, array &$rows, array &$complainedAboutUsers, &$countErrors
178 ) {
179 $needActors = [];
180 $countActors = 0;
181 $userNameUtils = MediaWikiServices::getInstance()->getUserNameUtils();
182
183 $keep = [];
184 foreach ( $rows as $index => $row ) {
185 $keep[$index] = true;
186 if ( $row->actor_id === null ) {
187 // All registered users should have an actor_id already. So
188 // if we have a usable name here, it means they didn't run
189 // maintenance/cleanupUsersWithNoId.php
190 $name = $row->$nameField;
191 if ( $userNameUtils->isUsable( $name ) ) {
192 if ( !isset( $complainedAboutUsers[$name] ) ) {
193 $complainedAboutUsers[$name] = true;
194 $this->error(
195 "User name \"$name\" is usable, cannot create an anonymous actor for it."
196 . " Run maintenance/cleanupUsersWithNoId.php to fix this situation.\n"
197 );
198 }
199 unset( $keep[$index] );
200 $countErrors++;
201 } else {
202 $needActors[$name] = 0;
203 }
204 }
205 }
206 $rows = array_intersect_key( $rows, $keep );
207
208 if ( $needActors ) {
209 $dbw->insert(
210 'actor',
211 array_map( static function ( $v ) {
212 return [
213 'actor_name' => $v,
214 ];
215 }, array_keys( $needActors ) ),
216 __METHOD__,
217 [ 'IGNORE' ]
218 );
219 $countActors += $dbw->affectedRows();
220
221 $res = $dbw->select(
222 'actor',
223 [ 'actor_id', 'actor_name' ],
224 [ 'actor_name' => array_map( 'strval', array_keys( $needActors ) ) ],
225 __METHOD__
226 );
227 foreach ( $res as $row ) {
228 $needActors[$row->actor_name] = $row->actor_id;
229 }
230 foreach ( $rows as $row ) {
231 if ( $row->actor_id === null ) {
232 $row->actor_id = $needActors[$row->$nameField];
233 }
234 }
235 }
236
237 return $countActors;
238 }
239
253 protected function migrate( $table, $primaryKey, $userField, $nameField, $actorField ) {
254 if ( !$this->doTable( $table ) ) {
255 $this->output( "Skipping $table, not included in --tables\n" );
256 return 0;
257 }
258
259 $dbw = $this->getDB( DB_PRIMARY );
260 if ( !$dbw->fieldExists( $table, $userField, __METHOD__ ) ) {
261 $this->output( "No need to migrate $table.$userField, field does not exist\n" );
262 return 0;
263 }
264
265 $complainedAboutUsers = [];
266
267 $primaryKey = (array)$primaryKey;
268 $pkFilter = array_fill_keys( $primaryKey, true );
269 $this->output(
270 "Beginning migration of $table.$userField and $table.$nameField to $table.$actorField\n"
271 );
272 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
273 $lbFactory->waitForReplication();
274
275 $actorIdSubquery = $this->makeActorIdSubquery( $dbw, $userField, $nameField );
276 $next = '1=1';
277 $countUpdated = 0;
278 $countActors = 0;
279 $countErrors = 0;
280 while ( true ) {
281 // Fetch the rows needing update
282 $res = $dbw->select(
283 $table,
284 array_merge( $primaryKey, [ $userField, $nameField, 'actor_id' => $actorIdSubquery ] ),
285 [
286 $actorField => 0,
287 $next,
288 ],
289 __METHOD__,
290 [
291 'ORDER BY' => $primaryKey,
292 'LIMIT' => $this->mBatchSize,
293 ]
294 );
295 if ( !$res->numRows() ) {
296 break;
297 }
298
299 // Insert new actors for rows that need one
300 $rows = iterator_to_array( $res );
301 $lastRow = end( $rows );
302 $countActors += $this->addActorsForRows(
303 $dbw, $nameField, $rows, $complainedAboutUsers, $countErrors
304 );
305
306 // Update the existing rows
307 foreach ( $rows as $row ) {
308 if ( !$row->actor_id ) {
309 list( , $display ) = $this->makeNextCond( $dbw, $primaryKey, $row );
310 $this->error(
311 "Could not make actor for row with $display "
312 . "$userField={$row->$userField} $nameField={$row->$nameField}\n"
313 );
314 $countErrors++;
315 continue;
316 }
317 $dbw->update(
318 $table,
319 [
320 $actorField => $row->actor_id,
321 ],
322 array_intersect_key( (array)$row, $pkFilter ) + [
323 $actorField => 0
324 ],
325 __METHOD__
326 );
327 $countUpdated += $dbw->affectedRows();
328 }
329
330 list( $next, $display ) = $this->makeNextCond( $dbw, $primaryKey, $lastRow );
331 $this->output( "... $display\n" );
332 $lbFactory->waitForReplication();
333 }
334
335 $this->output(
336 "Completed migration, updated $countUpdated row(s) with $countActors new actor(s), "
337 . "$countErrors error(s)\n"
338 );
339 return $countErrors;
340 }
341
346 protected function migrateLogSearch() {
347 if ( !$this->doTable( 'log_search' ) ) {
348 $this->output( "Skipping log_search, not included in --tables\n" );
349 return 0;
350 }
351
352 $complainedAboutUsers = [];
353
354 $primaryKey = [ 'ls_value', 'ls_log_id' ];
355 $this->output( "Beginning migration of log_search\n" );
356 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
357 $lbFactory->waitForReplication();
358
359 $dbw = $this->getDB( DB_PRIMARY );
360 $countInserted = 0;
361 $countActors = 0;
362 $countErrors = 0;
363
364 $anyBad = (bool)$dbw->selectField( 'log_search', '1',
365 [ 'ls_field' => 'target_author_actor', 'ls_value' => '' ],
366 __METHOD__
367 );
368 if ( $anyBad ) {
369 $this->output( "... Deleting bogus rows due to T215525\n" );
370 $dbw->delete(
371 'log_search',
372 [ 'ls_field' => 'target_author_actor', 'ls_value' => '' ],
373 __METHOD__
374 );
375 $ct = $dbw->affectedRows();
376 $this->output( "... Deleted $ct bogus row(s) from T215525\n" );
377 $lbFactory->waitForReplication();
378 }
379
380 $next = '1=1';
381 while ( true ) {
382 // Fetch the rows needing update
383 $res = $dbw->select(
384 [ 'log_search', 'actor' ],
385 [ 'ls_value', 'ls_log_id', 'actor_id' ],
386 [
387 'ls_field' => 'target_author_id',
388 $next
389 ],
390 __METHOD__,
391 [
392 'ORDER BY' => $primaryKey,
393 'LIMIT' => $this->mBatchSize,
394 ],
395 [ 'actor' => [ 'LEFT JOIN', 'actor_user = ' . $dbw->buildIntegerCast( 'ls_value' ) ] ]
396 );
397 if ( !$res->numRows() ) {
398 break;
399 }
400
401 // Insert a 'target_author_actor' for each 'target_author_id'
402 $ins = [];
403 foreach ( $res as $row ) {
404 $lastRow = $row;
405 if ( !$row->actor_id ) {
406 list( , $display ) = $this->makeNextCond( $dbw, $primaryKey, $row );
407 $this->error( "No actor for target_author_id row with $display\n" );
408 $countErrors++;
409 continue;
410 }
411 $ins[] = [
412 'ls_field' => 'target_author_actor',
413 'ls_value' => $row->actor_id,
414 'ls_log_id' => $row->ls_log_id,
415 ];
416 }
417 $dbw->insert( 'log_search', $ins, __METHOD__, [ 'IGNORE' ] );
418 $countInserted += $dbw->affectedRows();
419
420 // @phan-suppress-next-next-line PhanTypeMismatchArgumentNullable,PhanPossiblyUndeclaredVariable
421 // lastRow is set when used here
422 list( $next, $display ) = $this->makeNextCond( $dbw, $primaryKey, $lastRow );
423 $this->output( "... target_author_id, $display\n" );
424 $lbFactory->waitForReplication();
425 }
426
427 $next = '1=1';
428 while ( true ) {
429 // Fetch the rows needing update
430 $res = $dbw->select(
431 [ 'log_search', 'actor' ],
432 [ 'ls_value', 'ls_log_id', 'actor_id' ],
433 [
434 'ls_field' => 'target_author_ip',
435 $next
436 ],
437 __METHOD__,
438 [
439 'ORDER BY' => $primaryKey,
440 'LIMIT' => $this->mBatchSize,
441 ],
442 [ 'actor' => [ 'LEFT JOIN', 'ls_value = actor_name' ] ]
443 );
444 if ( !$res->numRows() ) {
445 break;
446 }
447
448 // Insert new actors for rows that need one
449 $rows = iterator_to_array( $res );
450 $lastRow = end( $rows );
451 $countActors += $this->addActorsForRows(
452 $dbw, 'ls_value', $rows, $complainedAboutUsers, $countErrors
453 );
454
455 // Insert a 'target_author_actor' for each 'target_author_ip'
456 $ins = [];
457 foreach ( $rows as $row ) {
458 if ( !$row->actor_id ) {
459 list( , $display ) = $this->makeNextCond( $dbw, $primaryKey, $row );
460 $this->error( "Could not make actor for target_author_ip row with $display\n" );
461 $countErrors++;
462 continue;
463 }
464 $ins[] = [
465 'ls_field' => 'target_author_actor',
466 'ls_value' => $row->actor_id,
467 'ls_log_id' => $row->ls_log_id,
468 ];
469 }
470 $dbw->insert( 'log_search', $ins, __METHOD__, [ 'IGNORE' ] );
471 $countInserted += $dbw->affectedRows();
472
473 list( $next, $display ) = $this->makeNextCond( $dbw, $primaryKey, $lastRow );
474 $this->output( "... target_author_ip, $display\n" );
475 $lbFactory->waitForReplication();
476 }
477
478 $this->output(
479 "Completed migration, inserted $countInserted row(s) with $countActors new actor(s), "
480 . "$countErrors error(s)\n"
481 );
482 return $countErrors;
483 }
484}
getDB()
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
error( $err, $die=0)
Throw an error to the user.
output( $out, $channel=null)
Throw some output to the user.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
Service locator for MediaWiki core services.
Maintenance script that migrates actors from pre-1.31 columns to the 'actor' table.
doDBUpdates()
Do the actual work.
migrate( $table, $primaryKey, $userField, $nameField, $actorField)
Migrate actors in a table.
migrateLogSearch()
Migrate actors in the log_search table.
__construct()
Default constructor.
getUpdateKey()
Get the update key name to go in the update log table.
addQuotes( $s)
Escape and quote a raw value string for use in a SQL query.
Basic database interface for live and lazy-loaded relation database handles.
Definition IDatabase.php:39
select( $table, $vars, $conds='', $fname=__METHOD__, $options=[], $join_conds=[])
Execute a SELECT query constructed using the various parameters provided.
insertSelect( $destTable, $srcTable, $varMap, $conds, $fname=__METHOD__, $insertOptions=[], $selectOptions=[], $selectJoinConds=[])
INSERT SELECT wrapper.
affectedRows()
Get the number of rows affected by the last write query.
delete( $table, $conds, $fname=__METHOD__)
Delete all rows in a table that match a condition.
update( $table, $set, $conds, $fname=__METHOD__, $options=[])
Update all rows in a table that match a given condition.
selectField( $table, $var, $cond='', $fname=__METHOD__, $options=[], $join_conds=[])
A SELECT wrapper which returns a single field from a single result row.
insert( $table, $rows, $fname=__METHOD__, $options=[])
Insert row(s) into a table, in the provided order.
buildSelectSubquery( $table, $vars, $conds='', $fname=__METHOD__, $options=[], $join_conds=[])
Equivalent to IDatabase::selectSQLText() except wraps the result in Subquery.
const DB_REPLICA
Definition defines.php:26
const DB_PRIMARY
Definition defines.php:28