MediaWiki  master
namespaceDupes.php
Go to the documentation of this file.
1 <?php
27 require_once __DIR__ . '/Maintenance.php';
28 
33 
40 class NamespaceDupes extends Maintenance {
41 
45  protected $db;
46 
47  private $resolvablePages = 0;
48  private $totalPages = 0;
49 
50  private $resolvableLinks = 0;
51  private $totalLinks = 0;
52 
53  public function __construct() {
54  parent::__construct();
55  $this->addDescription( 'Find and fix pages affected by namespace addition/removal' );
56  $this->addOption( 'fix', 'Attempt to automatically fix errors' );
57  $this->addOption( 'merge', "Instead of renaming conflicts, do a history merge with " .
58  "the correct title" );
59  $this->addOption( 'add-suffix', "Dupes will be renamed with correct namespace with " .
60  "<text> appended after the article name", false, true );
61  $this->addOption( 'add-prefix', "Dupes will be renamed with correct namespace with " .
62  "<text> prepended before the article name", false, true );
63  $this->addOption( 'source-pseudo-namespace', "Move all pages with the given source " .
64  "prefix (with an implied colon following it). If --dest-namespace is not specified, " .
65  "the colon will be replaced with a hyphen.",
66  false, true );
67  $this->addOption( 'dest-namespace', "In combination with --source-pseudo-namespace, " .
68  "specify the namespace ID of the destination.", false, true );
69  $this->addOption( 'move-talk', "If this is specified, pages in the Talk namespace that " .
70  "begin with a conflicting prefix will be renamed, for example " .
71  "Talk:File:Foo -> File_Talk:Foo" );
72  }
73 
74  public function execute() {
75  $options = [
76  'fix' => $this->hasOption( 'fix' ),
77  'merge' => $this->hasOption( 'merge' ),
78  'add-suffix' => $this->getOption( 'add-suffix', '' ),
79  'add-prefix' => $this->getOption( 'add-prefix', '' ),
80  'move-talk' => $this->hasOption( 'move-talk' ),
81  'source-pseudo-namespace' => $this->getOption( 'source-pseudo-namespace', '' ),
82  'dest-namespace' => intval( $this->getOption( 'dest-namespace', 0 ) ) ];
83 
84  if ( $options['source-pseudo-namespace'] !== '' ) {
85  $retval = $this->checkPrefix( $options );
86  } else {
87  $retval = $this->checkAll( $options );
88  }
89 
90  if ( $retval ) {
91  $this->output( "\nLooks good!\n" );
92  } else {
93  $this->output( "\nOh noeees\n" );
94  }
95  }
96 
104  private function checkAll( $options ) {
105  $contLang = MediaWikiServices::getInstance()->getContentLanguage();
106  $spaces = [];
107 
108  // List interwikis first, so they'll be overridden
109  // by any conflicting local namespaces.
110  foreach ( $this->getInterwikiList() as $prefix ) {
111  $name = $contLang->ucfirst( $prefix );
112  $spaces[$name] = 0;
113  }
114 
115  // Now pull in all canonical and alias namespaces...
116  foreach (
117  MediaWikiServices::getInstance()->getNamespaceInfo()->getCanonicalNamespaces()
118  as $ns => $name
119  ) {
120  // This includes $wgExtraNamespaces
121  if ( $name !== '' ) {
122  $spaces[$name] = $ns;
123  }
124  }
125  foreach ( $contLang->getNamespaces() as $ns => $name ) {
126  if ( $name !== '' ) {
127  $spaces[$name] = $ns;
128  }
129  }
130  foreach ( $this->getConfig()->get( 'NamespaceAliases' ) as $name => $ns ) {
131  $spaces[$name] = $ns;
132  }
133  foreach ( $contLang->getNamespaceAliases() as $name => $ns ) {
134  $spaces[$name] = $ns;
135  }
136 
137  // We'll need to check for lowercase keys as well,
138  // since we're doing case-sensitive searches in the db.
139  $capitalLinks = $this->getConfig()->get( 'CapitalLinks' );
140  foreach ( $spaces as $name => $ns ) {
141  $moreNames = [];
142  $moreNames[] = $contLang->uc( $name );
143  $moreNames[] = $contLang->ucfirst( $contLang->lc( $name ) );
144  $moreNames[] = $contLang->ucwords( $name );
145  $moreNames[] = $contLang->ucwords( $contLang->lc( $name ) );
146  $moreNames[] = $contLang->ucwordbreaks( $name );
147  $moreNames[] = $contLang->ucwordbreaks( $contLang->lc( $name ) );
148  if ( !$capitalLinks ) {
149  foreach ( $moreNames as $altName ) {
150  $moreNames[] = $contLang->lcfirst( $altName );
151  }
152  $moreNames[] = $contLang->lcfirst( $name );
153  }
154  foreach ( array_unique( $moreNames ) as $altName ) {
155  if ( $altName !== $name ) {
156  $spaces[$altName] = $ns;
157  }
158  }
159  }
160 
161  // Sort by namespace index, and if there are two with the same index,
162  // break the tie by sorting by name
163  $origSpaces = $spaces;
164  uksort( $spaces, function ( $a, $b ) use ( $origSpaces ) {
165  return $origSpaces[$a] <=> $origSpaces[$b]
166  ?: $a <=> $b;
167  } );
168 
169  $ok = true;
170  foreach ( $spaces as $name => $ns ) {
171  $ok = $this->checkNamespace( $ns, $name, $options ) && $ok;
172  }
173 
174  $this->output( "{$this->totalPages} pages to fix, " .
175  "{$this->resolvablePages} were resolvable.\n\n" );
176 
177  foreach ( $spaces as $name => $ns ) {
178  if ( $ns != 0 ) {
179  /* Fix up link destinations for non-interwiki links only.
180  *
181  * For example if a page has [[Foo:Bar]] and then a Foo namespace
182  * is introduced, pagelinks needs to be updated to have
183  * page_namespace = NS_FOO.
184  *
185  * If instead an interwiki prefix was introduced called "Foo",
186  * the link should instead be moved to the iwlinks table. If a new
187  * language is introduced called "Foo", or if there is a pagelink
188  * [[fr:Bar]] when interlanguage magic links are turned on, the
189  * link would have to be moved to the langlinks table. Let's put
190  * those cases in the too-hard basket for now. The consequences are
191  * not especially severe.
192  * @fixme Handle interwiki links, and pagelinks to Category:, File:
193  * which probably need reparsing.
194  */
195 
196  $this->checkLinkTable( 'pagelinks', 'pl', $ns, $name, $options );
197  $this->checkLinkTable( 'templatelinks', 'tl', $ns, $name, $options );
198 
199  // The redirect table has interwiki links randomly mixed in, we
200  // need to filter those out. For example [[w:Foo:Bar]] would
201  // have rd_interwiki=w and rd_namespace=0, which would match the
202  // query for a conflicting namespace "Foo" if filtering wasn't done.
203  $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
204  [ 'rd_interwiki' => null ] );
205  $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
206  [ 'rd_interwiki' => '' ] );
207  }
208  }
209 
210  $this->output( "{$this->totalLinks} links to fix, " .
211  "{$this->resolvableLinks} were resolvable.\n" );
212 
213  return $ok;
214  }
215 
221  private function getInterwikiList() {
222  $result = MediaWikiServices::getInstance()->getInterwikiLookup()->getAllPrefixes();
223  $prefixes = [];
224  foreach ( $result as $row ) {
225  $prefixes[] = $row['iw_prefix'];
226  }
227 
228  return $prefixes;
229  }
230 
239  private function checkNamespace( $ns, $name, $options ) {
240  $targets = $this->getTargetList( $ns, $name, $options );
241  $count = $targets->numRows();
242  $this->totalPages += $count;
243  if ( $count == 0 ) {
244  return true;
245  }
246 
247  $dryRunNote = $options['fix'] ? '' : ' DRY RUN ONLY';
248 
249  $ok = true;
250  foreach ( $targets as $row ) {
251  // Find the new title and determine the action to take
252 
253  $newTitle = $this->getDestinationTitle(
254  $ns, $name, $row->page_namespace, $row->page_title );
255  $logStatus = false;
256  if ( !$newTitle ) {
257  $logStatus = 'invalid title';
258  $action = 'abort';
259  } elseif ( $newTitle->exists() ) {
260  if ( $options['merge'] ) {
261  if ( $this->canMerge( $row->page_id, $newTitle, $logStatus ) ) {
262  $action = 'merge';
263  } else {
264  $action = 'abort';
265  }
266  } elseif ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) {
267  $action = 'abort';
268  $logStatus = 'dest title exists and --add-prefix not specified';
269  } else {
270  $newTitle = $this->getAlternateTitle( $newTitle, $options );
271  if ( !$newTitle ) {
272  $action = 'abort';
273  $logStatus = 'alternate title is invalid';
274  } elseif ( $newTitle->exists() ) {
275  $action = 'abort';
276  $logStatus = 'title conflict';
277  } else {
278  $action = 'move';
279  $logStatus = 'alternate';
280  }
281  }
282  } else {
283  $action = 'move';
284  $logStatus = 'no conflict';
285  }
286 
287  // Take the action or log a dry run message
288 
289  $logTitle = "id={$row->page_id} ns={$row->page_namespace} dbk={$row->page_title}";
290  $pageOK = true;
291 
292  switch ( $action ) {
293  case 'abort':
294  $this->output( "$logTitle *** $logStatus\n" );
295  $pageOK = false;
296  break;
297  case 'move':
298  $this->output( "$logTitle -> " .
299  $newTitle->getPrefixedDBkey() . " ($logStatus)$dryRunNote\n" );
300 
301  if ( $options['fix'] ) {
302  $pageOK = $this->movePage( $row->page_id, $newTitle );
303  }
304  break;
305  case 'merge':
306  $this->output( "$logTitle => " .
307  $newTitle->getPrefixedDBkey() . " (merge)$dryRunNote\n" );
308 
309  if ( $options['fix'] ) {
310  $pageOK = $this->mergePage( $row, $newTitle );
311  }
312  break;
313  }
314 
315  if ( $pageOK ) {
316  $this->resolvablePages++;
317  } else {
318  $ok = false;
319  }
320  }
321 
322  return $ok;
323  }
324 
334  private function checkLinkTable( $table, $fieldPrefix, $ns, $name, $options,
335  $extraConds = []
336  ) {
337  $dbw = $this->getDB( DB_MASTER );
338 
339  $batchConds = [];
340  $fromField = "{$fieldPrefix}_from";
341  $namespaceField = "{$fieldPrefix}_namespace";
342  $titleField = "{$fieldPrefix}_title";
343  $batchSize = 500;
344  while ( true ) {
345  $res = $dbw->select(
346  $table,
347  [ $fromField, $namespaceField, $titleField ],
348  array_merge( $batchConds, $extraConds, [
349  $namespaceField => 0,
350  $titleField . $dbw->buildLike( "$name:", $dbw->anyString() )
351  ] ),
352  __METHOD__,
353  [
354  'ORDER BY' => [ $titleField, $fromField ],
355  'LIMIT' => $batchSize
356  ]
357  );
358 
359  if ( $res->numRows() == 0 ) {
360  break;
361  }
362  foreach ( $res as $row ) {
363  $logTitle = "from={$row->$fromField} ns={$row->$namespaceField} " .
364  "dbk={$row->$titleField}";
365  $destTitle = $this->getDestinationTitle(
366  $ns, $name, $row->$namespaceField, $row->$titleField );
367  $this->totalLinks++;
368  if ( !$destTitle ) {
369  $this->output( "$table $logTitle *** INVALID\n" );
370  continue;
371  }
372  $this->resolvableLinks++;
373  if ( !$options['fix'] ) {
374  $this->output( "$table $logTitle -> " .
375  $destTitle->getPrefixedDBkey() . " DRY RUN\n" );
376  continue;
377  }
378 
379  $dbw->update( $table,
380  // SET
381  [
382  $namespaceField => $destTitle->getNamespace(),
383  $titleField => $destTitle->getDBkey()
384  ],
385  // WHERE
386  [
387  $namespaceField => 0,
388  $titleField => $row->$titleField,
389  $fromField => $row->$fromField
390  ],
391  __METHOD__,
392  [ 'IGNORE' ]
393  );
394  $this->output( "$table $logTitle -> " .
395  $destTitle->getPrefixedDBkey() . "\n" );
396  }
397  $encLastTitle = $dbw->addQuotes( $row->$titleField );
398  $encLastFrom = $dbw->addQuotes( $row->$fromField );
399 
400  $batchConds = [
401  "$titleField > $encLastTitle " .
402  "OR ($titleField = $encLastTitle AND $fromField > $encLastFrom)" ];
403 
404  wfWaitForSlaves();
405  }
406  }
407 
415  private function checkPrefix( $options ) {
416  $prefix = $options['source-pseudo-namespace'];
417  $ns = $options['dest-namespace'];
418  $this->output( "Checking prefix \"$prefix\" vs namespace $ns\n" );
419 
420  return $this->checkNamespace( $ns, $prefix, $options );
421  }
422 
433  private function getTargetList( $ns, $name, $options ) {
434  $dbw = $this->getDB( DB_MASTER );
435 
436  if (
437  $options['move-talk'] &&
438  MediaWikiServices::getInstance()->getNamespaceInfo()->isSubject( $ns )
439  ) {
440  $checkNamespaces = [ NS_MAIN, NS_TALK ];
441  } else {
442  $checkNamespaces = NS_MAIN;
443  }
444 
445  return $dbw->select( 'page',
446  [
447  'page_id',
448  'page_title',
449  'page_namespace',
450  ],
451  [
452  'page_namespace' => $checkNamespaces,
453  'page_title' . $dbw->buildLike( "$name:", $dbw->anyString() ),
454  ],
455  __METHOD__
456  );
457  }
458 
467  private function getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk ) {
468  $dbk = substr( $sourceDbk, strlen( "$name:" ) );
469  if ( $ns == 0 ) {
470  // An interwiki; try an alternate encoding with '-' for ':'
471  $dbk = "$name-" . $dbk;
472  }
473  $destNS = $ns;
474  $nsInfo = MediaWikiServices::getInstance()->getNamespaceInfo();
475  if ( $sourceNs == NS_TALK && $nsInfo->isSubject( $ns ) ) {
476  // This is an associated talk page moved with the --move-talk feature.
477  $destNS = $nsInfo->getTalk( $destNS );
478  }
479  $newTitle = Title::makeTitleSafe( $destNS, $dbk );
480  if ( !$newTitle || !$newTitle->canExist() ) {
481  return false;
482  }
483  return $newTitle;
484  }
485 
494  private function getAlternateTitle( LinkTarget $linkTarget, $options ) {
495  $prefix = $options['add-prefix'];
496  $suffix = $options['add-suffix'];
497  if ( $prefix == '' && $suffix == '' ) {
498  return false;
499  }
500  while ( true ) {
501  $dbk = $prefix . $linkTarget->getDBkey() . $suffix;
502  $title = Title::makeTitleSafe( $linkTarget->getNamespace(), $dbk );
503  if ( !$title ) {
504  return false;
505  }
506  if ( !$title->exists() ) {
507  return $title;
508  }
509  }
510  }
511 
519  private function movePage( $id, LinkTarget $newLinkTarget ) {
520  $dbw = $this->getDB( DB_MASTER );
521 
522  $dbw->update( 'page',
523  [
524  "page_namespace" => $newLinkTarget->getNamespace(),
525  "page_title" => $newLinkTarget->getDBkey(),
526  ],
527  [
528  "page_id" => $id,
529  ],
530  __METHOD__ );
531 
532  // Update *_from_namespace in links tables
533  $fromNamespaceTables = [
534  [ 'pagelinks', 'pl' ],
535  [ 'templatelinks', 'tl' ],
536  [ 'imagelinks', 'il' ] ];
537  foreach ( $fromNamespaceTables as $tableInfo ) {
538  list( $table, $fieldPrefix ) = $tableInfo;
539  $dbw->update( $table,
540  // SET
541  [ "{$fieldPrefix}_from_namespace" => $newLinkTarget->getNamespace() ],
542  // WHERE
543  [ "{$fieldPrefix}_from" => $id ],
544  __METHOD__ );
545  }
546 
547  return true;
548  }
549 
562  private function canMerge( $id, LinkTarget $linkTarget, &$logStatus ) {
563  $latestDest = Revision::newFromTitle( $linkTarget, 0, Revision::READ_LATEST );
564  $latestSource = Revision::newFromPageId( $id, 0, Revision::READ_LATEST );
565  if ( $latestSource->getTimestamp() > $latestDest->getTimestamp() ) {
566  $logStatus = 'cannot merge since source is later';
567  return false;
568  } else {
569  return true;
570  }
571  }
572 
580  private function mergePage( $row, Title $newTitle ) {
581  $dbw = $this->getDB( DB_MASTER );
582 
583  $id = $row->page_id;
584 
585  // Construct the WikiPage object we will need later, while the
586  // page_id still exists. Note that this cannot use makeTitleSafe(),
587  // we are deliberately constructing an invalid title.
588  $sourceTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
589  $sourceTitle->resetArticleID( $id );
590  $wikiPage = new WikiPage( $sourceTitle );
591  $wikiPage->loadPageData( 'fromdbmaster' );
592 
593  $destId = $newTitle->getArticleID();
594  $this->beginTransaction( $dbw, __METHOD__ );
595  $dbw->update( 'revision',
596  // SET
597  [ 'rev_page' => $destId ],
598  // WHERE
599  [ 'rev_page' => $id ],
600  __METHOD__ );
601 
602  $dbw->delete( 'page', [ 'page_id' => $id ], __METHOD__ );
603 
604  $this->commitTransaction( $dbw, __METHOD__ );
605 
606  /* Call LinksDeletionUpdate to delete outgoing links from the old title,
607  * and update category counts.
608  *
609  * Calling external code with a fake broken Title is a fairly dubious
610  * idea. It's necessary because it's quite a lot of code to duplicate,
611  * but that also makes it fragile since it would be easy for someone to
612  * accidentally introduce an assumption of title validity to the code we
613  * are calling.
614  */
617 
618  return true;
619  }
620 }
621 
622 $maintClass = NamespaceDupes::class;
623 require_once RUN_MAINTENANCE_IF_MAIN;
commitTransaction(IDatabase $dbw, $fname)
Commit the transcation on a DB handle and wait for replica DBs to catch up.
getArticleID( $flags=0)
Get the article ID for this Title from the link cache, adding it if necessary.
Definition: Title.php:3161
const RUN_MAINTENANCE_IF_MAIN
Definition: Maintenance.php:39
const NS_MAIN
Definition: Defines.php:60
getOption( $name, $default=null)
Get an option, or return the default.
getTargetList( $ns, $name, $options)
Find pages in main and talk namespaces that have a prefix of the new namespace so we know titles that...
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: Maintenance.php:86
static newFromPageId( $pageId, $revId=0, $flags=0)
Load either the current, or a specified, revision that&#39;s attached to a given page ID...
Definition: Revision.php:158
hasOption( $name)
Checks to see if a particular option exists.
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that&#39;s attached to a given link target...
Definition: Revision.php:139
const DB_MASTER
Definition: defines.php:26
getNamespace()
Get the namespace index.
wfWaitForSlaves( $ifWritesSince=null, $wiki=false, $cluster=false, $timeout=null)
Waits for the replica DBs to catch up to the master position.
addDescription( $text)
Set the description text.
getDBkey()
Get the main part with underscores.
checkAll( $options)
Check all namespaces.
movePage( $id, LinkTarget $newLinkTarget)
Move a page.
getInterwikiList()
Get the interwiki list.
checkPrefix( $options)
Move the given pseudo-namespace, either replacing the colon with a hyphen (useful for pseudo-namespac...
output( $out, $channel=null)
Throw some output to the user.
mergePage( $row, Title $newTitle)
Merge page histories.
getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk)
Get the preferred destination title for a given target page.
checkLinkTable( $table, $fieldPrefix, $ns, $name, $options, $extraConds=[])
Check and repair the destination fields in a link table.
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:610
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:584
canMerge( $id, LinkTarget $linkTarget, &$logStatus)
Determine if we can merge a page.
static doUpdates( $mode='run', $stage=self::ALL)
Do any deferred updates and clear the list.
static addUpdate(DeferrableUpdate $update, $stage=self::POSTSEND)
Add an update to the deferred list to be run later by execute()
checkNamespace( $ns, $name, $options)
Check a given prefix and try to move it into the given destination namespace.
IMaintainableDatabase $db
$maintClass
getAlternateTitle(LinkTarget $linkTarget, $options)
Get an alternative title to move a page to.
Maintenance script that checks for articles to fix after adding/deleting namespaces.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
const NS_TALK
Definition: Defines.php:61
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
beginTransaction(IDatabase $dbw, $fname)
Begin a transcation on a DB.