MediaWiki  master
uppercaseTitlesForUnicodeTransition.php
Go to the documentation of this file.
1 <?php
27 
28 require_once __DIR__ . '/Maintenance.php';
29 
37 
39  private $run = false;
40 
42  private $charmap = [];
43 
45  private $user;
46 
48  private $reason = 'Uppercasing title for Unicode upgrade';
49 
51  private $tags = [];
52 
54  private $seenUsers = [];
55 
57  private $namespaces = null;
58 
60  private $prefix = null, $suffix = null;
61 
63  private $prefixNs = null;
64 
66  private $tables = null;
67 
68  public function __construct() {
69  parent::__construct();
70  $this->addDescription(
71  "Rename titles when changing behavior of Language::ucfirst().\n"
72  . "\n"
73  . "This script skips User and User_talk pages for registered users, as renaming of users "
74  . "is too complex to try to implement here. Use something like Extension:Renameuser to "
75  . "clean those up; this script can provide a list of user names affected."
76  );
77  $this->addOption(
78  'charmap', 'Character map generated by maintenance/language/generateUcfirstOverrides.php',
79  true, true
80  );
81  $this->addOption(
82  'user', 'System user to use to do the renames. Default is "Maintenance script".', false, true
83  );
84  $this->addOption(
85  'steal',
86  'If the username specified by --user exists, specify this to force conversion to a system user.'
87  );
88  $this->addOption(
89  'run', 'If not specified, the script will not actually perform any moves (i.e. it will dry-run).'
90  );
91  $this->addOption(
92  'prefix', 'When the new title already exists, add this prefix.', false, true
93  );
94  $this->addOption(
95  'suffix', 'When the new title already exists, add this suffix.', false, true
96  );
97  $this->addOption( 'reason', 'Reason to use when moving pages.', false, true );
98  $this->addOption( 'tag', 'Change tag to apply when moving pages.', false, true );
99  $this->addOption( 'tables', 'Comma-separated list of database tables to process.', false, true );
100  $this->addOption(
101  'userlist', 'Filename to which to output usernames needing rename.', false, true
102  );
103  $this->setBatchSize( 1000 );
104  }
105 
106  public function execute() {
107  $this->run = $this->getOption( 'run', false );
108 
109  if ( $this->run ) {
110  $username = $this->getOption( 'user', 'Maintenance script' );
111  $steal = $this->getOption( 'steal', false );
112  $this->user = User::newSystemUser( $username, [ 'steal' => $steal ] );
113  if ( !$this->user ) {
114  $user = User::newFromName( $username );
115  if ( !$steal && $user && $user->isLoggedIn() ) {
116  $this->fatalError( "User $username already exists.\n"
117  . "Use --steal if you really want to steal it from the human who currently owns it."
118  );
119  }
120  $this->fatalError( "Could not obtain system user $username." );
121  }
122  }
123 
124  $tables = $this->getOption( 'tables' );
125  if ( $tables !== null ) {
126  $this->tables = explode( ',', $tables );
127  }
128 
129  $prefix = $this->getOption( 'prefix' );
130  if ( $prefix !== null ) {
131  $title = Title::newFromText( $prefix . 'X' );
132  if ( !$title || substr( $title->getDBkey(), -1 ) !== 'X' ) {
133  $this->fatalError( 'Invalid --prefix.' );
134  }
135  if ( $title->getNamespace() <= NS_MAIN || $title->isExternal() ) {
136  $this->fatalError( 'Invalid --prefix. It must not be in namespace 0 and must not be external' );
137  }
138  $this->prefixNs = $title->getNamespace();
139  $this->prefix = substr( $title->getText(), 0, -1 );
140  }
141  $this->suffix = $this->getOption( 'suffix' );
142 
143  $this->reason = $this->getOption( 'reason' ) ?: $this->reason;
144  $this->tags = (array)$this->getOption( 'tag', null );
145 
146  $charmapFile = $this->getOption( 'charmap' );
147  if ( !file_exists( $charmapFile ) ) {
148  $this->fatalError( "Charmap file $charmapFile does not exist." );
149  }
150  if ( !is_file( $charmapFile ) || !is_readable( $charmapFile ) ) {
151  $this->fatalError( "Charmap file $charmapFile is not readable." );
152  }
153  $this->charmap = require $charmapFile;
154  if ( !is_array( $this->charmap ) ) {
155  $this->fatalError( "Charmap file $charmapFile did not return a PHP array." );
156  }
157  $this->charmap = array_filter(
158  $this->charmap,
159  function ( $v, $k ) {
160  if ( mb_strlen( $k ) !== 1 ) {
161  $this->error( "Ignoring mapping from multi-character key '$k' to '$v'" );
162  return false;
163  }
164  return $k !== $v;
165  },
166  ARRAY_FILTER_USE_BOTH
167  );
168  if ( !$this->charmap ) {
169  $this->fatalError( "Charmap file $charmapFile did not contain any usable character mappings." );
170  }
171 
172  $db = $this->getDB( $this->run ? DB_MASTER : DB_REPLICA );
173  $this->processTable( $db, true, 'page', 'page_namespace', 'page_title', [ 'page_id' ] );
174  $this->processTable( $db, true, 'image', NS_FILE, 'img_name', [] );
175  $this->processTable(
176  $db, false, 'archive', 'ar_namespace', 'ar_title', [ 'ar_timestamp', 'ar_id' ]
177  );
178  $this->processTable( $db, false, 'filearchive', NS_FILE, 'fa_name', [ 'fa_timestamp', 'fa_id' ] );
179  $this->processTable( $db, false, 'logging', 'log_namespace', 'log_title', [ 'log_id' ] );
180  $this->processTable( $db, false, 'redirect', 'rd_namespace', 'rd_title', [ 'rd_from' ] );
181  $this->processTable( $db, false, 'protected_titles', 'pt_namespace', 'pt_title', [] );
182  $this->processUsers( $db );
183  }
184 
192  private function getLikeBatches( IDatabase $db, $field, $batchSize = 100 ) {
193  $ret = [];
194  $likes = [];
195  foreach ( $this->charmap as $from => $to ) {
196  $likes[] = $field . $db->buildLike( $from, $db->anyString() );
197  if ( count( $likes ) >= $batchSize ) {
198  $ret[] = $db->makeList( $likes, $db::LIST_OR );
199  $likes = [];
200  }
201  }
202  if ( $likes ) {
203  $ret[] = $db->makeList( $likes, $db::LIST_OR );
204  }
205  return $ret;
206  }
207 
216  private function getNamespaces() {
217  if ( $this->namespaces === null ) {
218  $nsinfo = MediaWikiServices::getInstance()->getNamespaceInfo();
219  $this->namespaces = array_filter(
220  array_keys( $nsinfo->getCanonicalNamespaces() ),
221  function ( $ns ) use ( $nsinfo ) {
222  return $nsinfo->isMovable( $ns ) && $nsinfo->isCapitalized( $ns );
223  }
224  );
225  usort( $this->namespaces, function ( $ns1, $ns2 ) use ( $nsinfo ) {
226  if ( $ns1 === $ns2 ) {
227  return 0;
228  }
229 
230  $s1 = $nsinfo->getSubject( $ns1 );
231  $s2 = $nsinfo->getSubject( $ns2 );
232 
233  // Order by subject namespace number first
234  if ( $s1 !== $s2 ) {
235  return $s1 < $s2 ? -1 : 1;
236  }
237 
238  // Second, put subject namespaces before non-subject namespaces
239  if ( $s1 === $ns1 ) {
240  return -1;
241  }
242  if ( $s2 === $ns2 ) {
243  return 1;
244  }
245 
246  // Don't care about the relative order if there are somehow
247  // multiple non-subject namespaces for a namespace.
248  return 0;
249  } );
250  }
251 
252  return $this->namespaces;
253  }
254 
262  private function isUserPage( IDatabase $db, $ns, $title ) {
263  if ( $ns !== NS_USER && $ns !== NS_USER_TALK ) {
264  return false;
265  }
266 
267  list( $base ) = explode( '/', $title, 2 );
268  if ( !isset( $this->seenUsers[$base] ) ) {
269  // Can't use User directly because it might uppercase the name
270  $this->seenUsers[$base] = (bool)$db->selectField(
271  'user',
272  'user_id',
273  [ 'user_name' => strtr( $base, '_', ' ' ) ],
274  __METHOD__
275  );
276  }
277  return $this->seenUsers[$base];
278  }
279 
287  private function mungeTitle( IDatabase $db, Title $oldTitle, Title &$newTitle ) {
288  $nt = $newTitle->getPrefixedText();
289 
290  $munge = false;
291  if ( $this->isUserPage( $db, $newTitle->getNamespace(), $newTitle->getText() ) ) {
292  $munge = 'Target title\'s user exists';
293  } else {
294  $mp = new MovePage( $oldTitle, $newTitle );
295  $status = $mp->isValidMove();
296  if ( !$status->isOK() && $status->hasMessage( 'articleexists' ) ) {
297  $munge = 'Target title exists';
298  }
299  }
300  if ( !$munge ) {
301  return true;
302  };
303 
304  if ( $this->prefix !== null ) {
305  $newTitle = Title::makeTitle(
306  $this->prefixNs,
307  $this->prefix . $oldTitle->getPrefixedText() . ( $this->suffix ?? '' )
308  );
309  } elseif ( $this->suffix !== null ) {
310  $newTitle = Title::makeTitle( $newTitle->getNamespace(), $newTitle->getText() . $this->suffix );
311  } else {
312  $this->error(
313  "Cannot move {$oldTitle->getPrefixedText()} → $nt: "
314  . "$munge and no --prefix or --suffix was given"
315  );
316  return false;
317  }
318 
319  if ( !$newTitle->canExist() ) {
320  $this->error(
321  "Cannot move {$oldTitle->getPrefixedText()} → $nt: "
322  . "$munge and munged title '{$newTitle->getPrefixedText()}' is not valid"
323  );
324  return false;
325  }
326  if ( $newTitle->exists() ) {
327  $this->error(
328  "Cannot move {$oldTitle->getPrefixedText()} → $nt: "
329  . "$munge and munged title '{$newTitle->getPrefixedText()}' also exists"
330  );
331  return false;
332  }
333 
334  return true;
335  }
336 
344  private function doMove( IDatabase $db, $ns, $title ) {
345  $char = mb_substr( $title, 0, 1 );
346  if ( !array_key_exists( $char, $this->charmap ) ) {
347  $this->error(
348  "Query returned NS$ns $title, which does not begin with a character in the charmap."
349  );
350  return false;
351  }
352 
353  if ( $this->isUserPage( $db, $ns, $title ) ) {
354  $this->output( "... Skipping user page NS$ns $title\n" );
355  return null;
356  }
357 
358  $oldTitle = Title::makeTitle( $ns, $title );
359  $newTitle = Title::makeTitle( $ns, $this->charmap[$char] . mb_substr( $title, 1 ) );
360  if ( !$this->mungeTitle( $db, $oldTitle, $newTitle ) ) {
361  return false;
362  }
363 
364  $mp = new MovePage( $oldTitle, $newTitle );
365  $status = $mp->isValidMove();
366  if ( !$status->isOK() ) {
367  $this->error(
368  "Invalid move {$oldTitle->getPrefixedText()} → {$newTitle->getPrefixedText()}: "
369  . $status->getMessage( false, false, 'en' )->useDatabase( false )->plain()
370  );
371  return false;
372  }
373 
374  if ( !$this->run ) {
375  $this->output(
376  "Would rename {$oldTitle->getPrefixedText()} → {$newTitle->getPrefixedText()}\n"
377  );
378  return true;
379  }
380 
381  $status = $mp->move( $this->user, $this->reason, false, $this->tags );
382  if ( !$status->isOK() ) {
383  $this->error(
384  "Move {$oldTitle->getPrefixedText()} → {$newTitle->getPrefixedText()} failed: "
385  . $status->getMessage( false, false, 'en' )->useDatabase( false )->plain()
386  );
387  }
388  return $status->isOK();
389  }
390 
399  private function doUpdate( IDatabase $db, $table, $nsField, $titleField, $row ) {
400  $ns = is_int( $nsField ) ? $nsField : (int)$row->$nsField;
401  $title = $row->$titleField;
402 
403  $char = mb_substr( $title, 0, 1 );
404  if ( !array_key_exists( $char, $this->charmap ) ) {
405  $r = json_encode( $row, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE );
406  $this->error(
407  "Query returned $r, but title does not begin with a character in the charmap."
408  );
409  return false;
410  }
411 
412  if ( $this->isUserPage( $db, $ns, $title ) ) {
413  $this->output( "... Skipping user page NS$ns $title\n" );
414  return null;
415  }
416 
417  $oldTitle = Title::makeTitle( $ns, $title );
418  $newTitle = Title::makeTitle( $ns, $this->charmap[$char] . mb_substr( $title, 1 ) );
419  if ( !$this->mungeTitle( $db, $oldTitle, $newTitle ) ) {
420  return false;
421  }
422 
423  if ( $this->run ) {
424  $db->update(
425  $table,
426  array_merge(
427  is_int( $nsField ) ? [] : [ $nsField => $newTitle->getNamespace() ],
428  [ $titleField => $newTitle->getDBkey() ]
429  ),
430  (array)$row,
431  __METHOD__
432  );
433  } else {
434  $r = json_encode( $row, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE );
435  $this->output( "Would set $r to {$newTitle->getPrefixedText()}\n" );
436  }
437 
438  return true;
439  }
440 
451  private function processTable( IDatabase $db, $doMove, $table, $nsField, $titleField, $pkFields ) {
452  if ( $this->tables !== null && !in_array( $table, $this->tables, true ) ) {
453  $this->output( "Skipping table `$table`, not in --tables.\n" );
454  return;
455  }
456 
457  $batchSize = $this->getBatchSize();
458  $namespaces = $this->getNamespaces();
459  $likes = $this->getLikeBatches( $db, $titleField );
460  $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
461 
462  if ( is_int( $nsField ) ) {
463  $namespaces = array_intersect( $namespaces, [ $nsField ] );
464  }
465 
466  if ( !$namespaces ) {
467  $this->output( "Skipping table `$table`, no valid namespaces.\n" );
468  return;
469  }
470 
471  $this->output( "Processing table `$table`...\n" );
472 
473  $selectFields = array_merge(
474  is_int( $nsField ) ? [] : [ $nsField ],
475  [ $titleField ],
476  $pkFields
477  );
478  $contFields = array_reverse( array_merge( [ $titleField ], $pkFields ) );
479 
480  $lastReplicationWait = 0.0;
481  $count = 0;
482  $errors = 0;
483  foreach ( $namespaces as $ns ) {
484  foreach ( $likes as $like ) {
485  $cont = [];
486  do {
487  $res = $db->select(
488  $table,
489  $selectFields,
490  array_merge( [ "$nsField = $ns", $like ], $cont ),
491  __METHOD__,
492  [ 'ORDER BY' => array_merge( [ $titleField ], $pkFields ), 'LIMIT' => $batchSize ]
493  );
494  $cont = [];
495  foreach ( $res as $row ) {
496  $cont = '';
497  foreach ( $contFields as $field ) {
498  $v = $db->addQuotes( $row->$field );
499  if ( $cont === '' ) {
500  $cont = "$field > $v";
501  } else {
502  $cont = "$field > $v OR $field = $v AND ($cont)";
503  }
504  }
505  $cont = [ $cont ];
506 
507  if ( $doMove ) {
508  $ns = is_int( $nsField ) ? $nsField : (int)$row->$nsField;
509  $ret = $this->doMove( $db, $ns, $row->$titleField );
510  } else {
511  $ret = $this->doUpdate( $db, $table, $nsField, $titleField, $row );
512  }
513  if ( $ret === true ) {
514  $count++;
515  } elseif ( $ret === false ) {
516  $errors++;
517  }
518  }
519 
520  if ( $this->run ) {
521  $r = $cont ? json_encode( $row, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE ) : '<end>';
522  $this->output( "... $table: $count renames, $errors errors at $r\n" );
523  $lbFactory->waitForReplication(
524  [ 'timeout' => 30, 'ifWritesSince' => $lastReplicationWait ]
525  );
526  $lastReplicationWait = microtime( true );
527  }
528  } while ( $cont );
529  }
530  }
531 
532  $this->output( "Done processing table `$table`.\n" );
533  }
534 
539  private function processUsers( IDatabase $db ) {
540  $userlistFile = $this->getOption( 'userlist' );
541  if ( $userlistFile === null ) {
542  $this->output( "Not generating user list, --userlist was not specified.\n" );
543  return;
544  }
545 
546  $fh = fopen( $userlistFile, 'wb' );
547  if ( !$fh ) {
548  $this->error( "Could not open user list file $userlistFile" );
549  return;
550  }
551 
552  $this->output( "Generating user list...\n" );
553  $count = 0;
554  $batchSize = $this->getBatchSize();
555  foreach ( $this->getLikeBatches( $db, 'user_name' ) as $like ) {
556  $cont = [];
557  while ( true ) {
558  $names = $db->selectFieldValues(
559  'user',
560  'user_name',
561  array_merge( [ $like ], $cont ),
562  __METHOD__,
563  [ 'ORDER BY' => 'user_name', 'LIMIT' => $batchSize ]
564  );
565  if ( !$names ) {
566  break;
567  }
568 
569  $last = end( $names );
570  $cont = [ 'user_name > ' . $db->addQuotes( $last ) ];
571  foreach ( $names as $name ) {
572  $char = mb_substr( $name, 0, 1 );
573  if ( !array_key_exists( $char, $this->charmap ) ) {
574  $this->error(
575  "Query returned $name, but user name does not begin with a character in the charmap."
576  );
577  continue;
578  }
579  $newName = $this->charmap[$char] . mb_substr( $name, 1 );
580  fprintf( $fh, "%s\t%s\n", $name, $newName );
581  $count++;
582  }
583  $this->output( "... at $last, $count names so far\n" );
584  }
585  }
586 
587  if ( !fclose( $fh ) ) {
588  $this->error( "fclose on $userlistFile failed" );
589  }
590  $this->output( "User list output to $userlistFile, $count users need renaming.\n" );
591  }
592 }
593 
594 $maintClass = UppercaseTitlesForUnicodeTransition::class;
595 require_once RUN_MAINTENANCE_IF_MAIN;
const RUN_MAINTENANCE_IF_MAIN
Definition: Maintenance.php:39
const NS_MAIN
Definition: Defines.php:60
getText()
Get the text form (spaces not underscores) of the main part.
Definition: Title.php:996
error( $err, $die=0)
Throw an error to the user.
selectField( $table, $var, $cond='', $fname=__METHOD__, $options=[], $join_conds=[])
A SELECT wrapper which returns a single field from a single result row.
makeList(array $a, $mode=self::LIST_COMMA)
Makes an encoded list of strings from an array.
getNamespaces()
Get the list of namespaces to operate on.
getOption( $name, $default=null)
Get an option, or return the default.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: Maintenance.php:86
isLoggedIn()
Get whether the user is logged in.
Definition: User.php:3580
setBatchSize( $s=0)
Set the batch size.
getPrefixedText()
Get the prefixed title with spaces.
Definition: Title.php:1853
const DB_MASTER
Definition: defines.php:26
anyString()
Returns a token for buildLike() that denotes a &#39;&#39; to be used in a LIKE query.
getLikeBatches(IDatabase $db, $field, $batchSize=100)
Get batched LIKE conditions from the charmap.
canExist()
Can this title represent a page in the wiki&#39;s database?
Definition: Title.php:1189
float $lastReplicationWait
UNIX timestamp.
addDescription( $text)
Set the description text.
getNamespace()
Get the namespace index, i.e.
Definition: Title.php:1035
const NS_FILE
Definition: Defines.php:66
output( $out, $channel=null)
Throw some output to the user.
doUpdate(IDatabase $db, $table, $nsField, $titleField, $row)
Directly update a database row.
isUserPage(IDatabase $db, $ns, $title)
Check if a ns+title is a registered user&#39;s page.
const LIST_OR
Definition: Defines.php:42
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:584
mungeTitle(IDatabase $db, Title $oldTitle, Title &$newTitle)
Munge a target title, if necessary.
selectFieldValues( $table, $var, $cond='', $fname=__METHOD__, $options=[], $join_conds=[])
A SELECT wrapper which returns a list of single field values from result rows.
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:38
Maintenance script to rename titles affected by changes to Unicode (or otherwise to Language::ucfirst...
getBatchSize()
Returns batch size.
exists( $flags=0)
Check if page exists.
Definition: Title.php:4132
processTable(IDatabase $db, $doMove, $table, $nsField, $titleField, $pkFields)
Rename entries in other tables.
select( $table, $vars, $conds='', $fname=__METHOD__, $options=[], $join_conds=[])
Execute a SELECT query constructed using the various parameters provided.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
doMove(IDatabase $db, $ns, $title)
Use MovePage to move a title.
const DB_REPLICA
Definition: defines.php:25
update( $table, $values, $conds, $fname=__METHOD__, $options=[])
UPDATE wrapper.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
static newFromName( $name, $validate='valid')
Static factory method for creation from username.
Definition: User.php:536
const NS_USER_TALK
Definition: Defines.php:63
addQuotes( $s)
Escape and quote a raw value string for use in a SQL query.
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
buildLike( $param,... $params)
LIKE statement wrapper.
static newSystemUser( $name, $options=[])
Static factory method for creation of a "system" user from username.
Definition: User.php:758
processUsers(IDatabase $db)
List users needing renaming.
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:317