MediaWiki  master
namespaceDupes.php
Go to the documentation of this file.
1 <?php
27 require_once __DIR__ . '/Maintenance.php';
28 
36 
43 class NamespaceDupes extends Maintenance {
44 
48  protected $db;
49 
54  private $resolvablePages = 0;
55 
60  private $totalPages = 0;
61 
66  private $resolvableLinks = 0;
67 
72  private $totalLinks = 0;
73 
79  private $deletedLinks = 0;
80 
81  public function __construct() {
82  parent::__construct();
83  $this->addDescription( 'Find and fix pages affected by namespace addition/removal' );
84  $this->addOption( 'fix', 'Attempt to automatically fix errors and delete broken links' );
85  $this->addOption( 'merge', "Instead of renaming conflicts, do a history merge with " .
86  "the correct title" );
87  $this->addOption( 'add-suffix', "Dupes will be renamed with correct namespace with " .
88  "<text> appended after the article name", false, true );
89  $this->addOption( 'add-prefix', "Dupes will be renamed with correct namespace with " .
90  "<text> prepended before the article name", false, true );
91  $this->addOption( 'source-pseudo-namespace', "Move all pages with the given source " .
92  "prefix (with an implied colon following it). If --dest-namespace is not specified, " .
93  "the colon will be replaced with a hyphen.",
94  false, true );
95  $this->addOption( 'dest-namespace', "In combination with --source-pseudo-namespace, " .
96  "specify the namespace ID of the destination.", false, true );
97  $this->addOption( 'move-talk', "If this is specified, pages in the Talk namespace that " .
98  "begin with a conflicting prefix will be renamed, for example " .
99  "Talk:File:Foo -> File_Talk:Foo" );
100  }
101 
102  public function execute() {
103  $options = [
104  'fix' => $this->hasOption( 'fix' ),
105  'merge' => $this->hasOption( 'merge' ),
106  'add-suffix' => $this->getOption( 'add-suffix', '' ),
107  'add-prefix' => $this->getOption( 'add-prefix', '' ),
108  'move-talk' => $this->hasOption( 'move-talk' ),
109  'source-pseudo-namespace' => $this->getOption( 'source-pseudo-namespace', '' ),
110  'dest-namespace' => intval( $this->getOption( 'dest-namespace', 0 ) )
111  ];
112 
113  if ( $options['source-pseudo-namespace'] !== '' ) {
114  $retval = $this->checkPrefix( $options );
115  } else {
116  $retval = $this->checkAll( $options );
117  }
118 
119  if ( $retval ) {
120  $this->output( "\nLooks good!\n" );
121  } else {
122  $this->output( "\nOh noeees\n" );
123  }
124  }
125 
133  private function checkAll( $options ) {
134  $contLang = MediaWikiServices::getInstance()->getContentLanguage();
135  $spaces = [];
136 
137  // List interwikis first, so they'll be overridden
138  // by any conflicting local namespaces.
139  foreach ( $this->getInterwikiList() as $prefix ) {
140  $name = $contLang->ucfirst( $prefix );
141  $spaces[$name] = 0;
142  }
143 
144  // Now pull in all canonical and alias namespaces...
145  foreach (
146  MediaWikiServices::getInstance()->getNamespaceInfo()->getCanonicalNamespaces()
147  as $ns => $name
148  ) {
149  // This includes $wgExtraNamespaces
150  if ( $name !== '' ) {
151  $spaces[$name] = $ns;
152  }
153  }
154  foreach ( $contLang->getNamespaces() as $ns => $name ) {
155  if ( $name !== '' ) {
156  $spaces[$name] = $ns;
157  }
158  }
159  foreach ( $contLang->getNamespaceAliases() as $name => $ns ) {
160  $spaces[$name] = $ns;
161  }
162 
163  // We'll need to check for lowercase keys as well,
164  // since we're doing case-sensitive searches in the db.
165  $capitalLinks = $this->getConfig()->get( MainConfigNames::CapitalLinks );
166  foreach ( $spaces as $name => $ns ) {
167  $moreNames = [];
168  $moreNames[] = $contLang->uc( $name );
169  $moreNames[] = $contLang->ucfirst( $contLang->lc( $name ) );
170  $moreNames[] = $contLang->ucwords( $name );
171  $moreNames[] = $contLang->ucwords( $contLang->lc( $name ) );
172  $moreNames[] = $contLang->ucwordbreaks( $name );
173  $moreNames[] = $contLang->ucwordbreaks( $contLang->lc( $name ) );
174  if ( !$capitalLinks ) {
175  foreach ( $moreNames as $altName ) {
176  $moreNames[] = $contLang->lcfirst( $altName );
177  }
178  $moreNames[] = $contLang->lcfirst( $name );
179  }
180  foreach ( array_unique( $moreNames ) as $altName ) {
181  if ( $altName !== $name ) {
182  $spaces[$altName] = $ns;
183  }
184  }
185  }
186 
187  // Sort by namespace index, and if there are two with the same index,
188  // break the tie by sorting by name
189  $origSpaces = $spaces;
190  uksort( $spaces, static function ( $a, $b ) use ( $origSpaces ) {
191  return $origSpaces[$a] <=> $origSpaces[$b]
192  ?: $a <=> $b;
193  } );
194 
195  $ok = true;
196  foreach ( $spaces as $name => $ns ) {
197  $ok = $this->checkNamespace( $ns, $name, $options ) && $ok;
198  }
199 
200  $this->output(
201  "{$this->totalPages} pages to fix, " .
202  "{$this->resolvablePages} were resolvable.\n\n"
203  );
204 
205  foreach ( $spaces as $name => $ns ) {
206  if ( $ns != 0 ) {
207  /* Fix up link destinations for non-interwiki links only.
208  *
209  * For example if a page has [[Foo:Bar]] and then a Foo namespace
210  * is introduced, pagelinks needs to be updated to have
211  * page_namespace = NS_FOO.
212  *
213  * If instead an interwiki prefix was introduced called "Foo",
214  * the link should instead be moved to the iwlinks table. If a new
215  * language is introduced called "Foo", or if there is a pagelink
216  * [[fr:Bar]] when interlanguage magic links are turned on, the
217  * link would have to be moved to the langlinks table. Let's put
218  * those cases in the too-hard basket for now. The consequences are
219  * not especially severe.
220  * @fixme Handle interwiki links, and pagelinks to Category:, File:
221  * which probably need reparsing.
222  */
223 
224  $this->checkLinkTable( 'pagelinks', 'pl', $ns, $name, $options );
225  $this->checkLinkTable( 'templatelinks', 'tl', $ns, $name, $options );
226 
227  // The redirect table has interwiki links randomly mixed in, we
228  // need to filter those out. For example [[w:Foo:Bar]] would
229  // have rd_interwiki=w and rd_namespace=0, which would match the
230  // query for a conflicting namespace "Foo" if filtering wasn't done.
231  $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
232  [ 'rd_interwiki' => null ] );
233  $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
234  [ 'rd_interwiki' => '' ] );
235  }
236  }
237 
238  $this->output(
239  "{$this->totalLinks} links to fix, " .
240  "{$this->resolvableLinks} were resolvable, " .
241  "{$this->deletedLinks} were deleted.\n"
242  );
243 
244  return $ok;
245  }
246 
250  private function getInterwikiList() {
251  $result = MediaWikiServices::getInstance()->getInterwikiLookup()->getAllPrefixes();
252  return array_column( $result, 'iw_prefix' );
253  }
254 
263  private function checkNamespace( $ns, $name, $options ) {
264  $targets = $this->getTargetList( $ns, $name, $options );
265  $count = $targets->numRows();
266  $this->totalPages += $count;
267  if ( $count == 0 ) {
268  return true;
269  }
270 
271  $dryRunNote = $options['fix'] ? '' : ' DRY RUN ONLY';
272 
273  $ok = true;
274  foreach ( $targets as $row ) {
275  // Find the new title and determine the action to take
276 
277  $newTitle = $this->getDestinationTitle(
278  $ns, $name, $row->page_namespace, $row->page_title );
279  $logStatus = false;
280  if ( !$newTitle ) {
281  if ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) {
282  $logStatus = 'invalid title and --add-prefix not specified';
283  $action = 'abort';
284  } else {
285  $action = 'alternate';
286  }
287  } elseif ( $newTitle->exists() ) {
288  if ( $options['merge'] ) {
289  if ( $this->canMerge( $row->page_id, $newTitle, $logStatus ) ) {
290  $action = 'merge';
291  } else {
292  $action = 'abort';
293  }
294  } elseif ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) {
295  $action = 'abort';
296  $logStatus = 'dest title exists and --add-prefix not specified';
297  } else {
298  $action = 'alternate';
299  }
300  } else {
301  $action = 'move';
302  $logStatus = 'no conflict';
303  }
304  if ( $action === 'alternate' ) {
305  [ $ns, $dbk ] = $this->getDestination( $ns, $name, $row->page_namespace,
306  $row->page_title );
307  $newTitle = $this->getAlternateTitle( $ns, $dbk, $options );
308  if ( !$newTitle ) {
309  $action = 'abort';
310  $logStatus = 'alternate title is invalid';
311  } elseif ( $newTitle->exists() ) {
312  $action = 'abort';
313  $logStatus = 'alternate title conflicts';
314  } else {
315  $action = 'move';
316  $logStatus = 'alternate';
317  }
318  }
319 
320  // Take the action or log a dry run message
321 
322  $logTitle = "id={$row->page_id} ns={$row->page_namespace} dbk={$row->page_title}";
323  $pageOK = true;
324 
325  switch ( $action ) {
326  case 'abort':
327  $this->output( "$logTitle *** $logStatus\n" );
328  $pageOK = false;
329  break;
330  case 'move':
331  $this->output( "$logTitle -> " .
332  $newTitle->getPrefixedDBkey() . " ($logStatus)$dryRunNote\n" );
333 
334  if ( $options['fix'] ) {
335  $pageOK = $this->movePage( $row->page_id, $newTitle );
336  }
337  break;
338  case 'merge':
339  $this->output( "$logTitle => " .
340  $newTitle->getPrefixedDBkey() . " (merge)$dryRunNote\n" );
341 
342  if ( $options['fix'] ) {
343  $pageOK = $this->mergePage( $row, $newTitle );
344  }
345  break;
346  }
347 
348  if ( $pageOK ) {
349  $this->resolvablePages++;
350  } else {
351  $ok = false;
352  }
353  }
354 
355  return $ok;
356  }
357 
367  private function checkLinkTable( $table, $fieldPrefix, $ns, $name, $options,
368  $extraConds = []
369  ) {
370  $dbw = $this->getDB( DB_PRIMARY );
371 
372  $batchConds = [];
373  $fromField = "{$fieldPrefix}_from";
374  $batchSize = 500;
375  $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
376  $linksMigration = MediaWikiServices::getInstance()->getLinksMigration();
377  if ( isset( $linksMigration::$mapping[$table] ) ) {
378  $queryInfo = $linksMigration->getQueryInfo( $table );
379  list( $namespaceField, $titleField ) = $linksMigration->getTitleFields( $table );
380  } else {
381  $queryInfo = [
382  'tables' => [ $table ],
383  'fields' => [
384  "{$fieldPrefix}_namespace",
385  "{$fieldPrefix}_title"
386  ],
387  'joins' => []
388  ];
389  $namespaceField = "{$fieldPrefix}_namespace";
390  $titleField = "{$fieldPrefix}_title";
391  }
392 
393  while ( true ) {
394  $res = $dbw->select(
395  $queryInfo['tables'],
396  array_merge( [ $fromField ], $queryInfo['fields'] ),
397  array_merge(
398  $batchConds,
399  $extraConds,
400  [
401  $namespaceField => 0,
402  $titleField . $dbw->buildLike( "$name:", $dbw->anyString() )
403  ]
404  ),
405  __METHOD__,
406  [
407  'ORDER BY' => [ $titleField, $fromField ],
408  'LIMIT' => $batchSize
409  ],
410  $queryInfo['joins']
411  );
412 
413  if ( $res->numRows() == 0 ) {
414  break;
415  }
416 
417  $rowsToDeleteIfStillExists = [];
418 
419  foreach ( $res as $row ) {
420  $logTitle = "from={$row->$fromField} ns={$row->$namespaceField} " .
421  "dbk={$row->$titleField}";
422  $destTitle = $this->getDestinationTitle(
423  $ns, $name, $row->$namespaceField, $row->$titleField );
424  $this->totalLinks++;
425  if ( !$destTitle ) {
426  $this->output( "$table $logTitle *** INVALID\n" );
427  continue;
428  }
429  $this->resolvableLinks++;
430  if ( !$options['fix'] ) {
431  $this->output( "$table $logTitle -> " .
432  $destTitle->getPrefixedDBkey() . " DRY RUN\n" );
433  continue;
434  }
435 
436  if ( isset( $linksMigration::$mapping[$table] ) ) {
437  $setValue = $linksMigration->getLinksConditions( $table, $destTitle );
438  $whereCondition = $linksMigration->getLinksConditions(
439  $table,
440  new TitleValue( 0, $row->$titleField )
441  );
442  $deleteCondition = $linksMigration->getLinksConditions(
443  $table,
444  new TitleValue( (int)$row->$namespaceField, $row->$titleField )
445  );
446  } else {
447  $setValue = [
448  $namespaceField => $destTitle->getNamespace(),
449  $titleField => $destTitle->getDBkey()
450  ];
451  $whereCondition = [
452  $namespaceField => 0,
453  $titleField => $row->$titleField
454  ];
455  $deleteCondition = [
456  $namespaceField => $row->$namespaceField,
457  $titleField => $row->$titleField,
458  ];
459  }
460 
461  $dbw->update( $table,
462  // SET
463  $setValue,
464  // WHERE
465  array_merge( [ $fromField => $row->$fromField ], $whereCondition ),
466  __METHOD__,
467  [ 'IGNORE' ]
468  );
469 
470  $rowsToDeleteIfStillExists[] = $dbw->makeList(
471  array_merge( [ $fromField => $row->$fromField ], $deleteCondition ),
473  );
474 
475  $this->output( "$table $logTitle -> " .
476  $destTitle->getPrefixedDBkey() . "\n"
477  );
478  }
479 
480  if ( $options['fix'] && count( $rowsToDeleteIfStillExists ) > 0 ) {
481  $dbw->delete(
482  $table,
483  $dbw->makeList( $rowsToDeleteIfStillExists, IDatabase::LIST_OR ),
484  __METHOD__
485  );
486 
487  $this->deletedLinks += $dbw->affectedRows();
488  $this->resolvableLinks -= $dbw->affectedRows();
489  }
490 
491  // @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item
492  $encLastTitle = $dbw->addQuotes( $row->$titleField );
493  // @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item
494  $encLastFrom = $dbw->addQuotes( $row->$fromField );
495 
496  $batchConds = [
497  "$titleField > $encLastTitle " .
498  "OR ($titleField = $encLastTitle AND $fromField > $encLastFrom)"
499  ];
500 
501  $lbFactory->waitForReplication();
502  }
503  }
504 
512  private function checkPrefix( $options ) {
513  $prefix = $options['source-pseudo-namespace'];
514  $ns = $options['dest-namespace'];
515  $this->output( "Checking prefix \"$prefix\" vs namespace $ns\n" );
516 
517  return $this->checkNamespace( $ns, $prefix, $options );
518  }
519 
530  private function getTargetList( $ns, $name, $options ) {
531  $dbw = $this->getDB( DB_PRIMARY );
532 
533  if (
534  $options['move-talk'] &&
535  MediaWikiServices::getInstance()->getNamespaceInfo()->isSubject( $ns )
536  ) {
537  $checkNamespaces = [ NS_MAIN, NS_TALK ];
538  } else {
539  $checkNamespaces = NS_MAIN;
540  }
541 
542  return $dbw->select( 'page',
543  [
544  'page_id',
545  'page_title',
546  'page_namespace',
547  ],
548  [
549  'page_namespace' => $checkNamespaces,
550  'page_title' . $dbw->buildLike( "$name:", $dbw->anyString() ),
551  ],
552  __METHOD__
553  );
554  }
555 
564  private function getDestination( $ns, $name, $sourceNs, $sourceDbk ) {
565  $dbk = substr( $sourceDbk, strlen( "$name:" ) );
566  if ( $ns == 0 ) {
567  // An interwiki; try an alternate encoding with '-' for ':'
568  $dbk = "$name-" . $dbk;
569  }
570  $destNS = $ns;
571  $nsInfo = MediaWikiServices::getInstance()->getNamespaceInfo();
572  if ( $sourceNs == NS_TALK && $nsInfo->isSubject( $ns ) ) {
573  // This is an associated talk page moved with the --move-talk feature.
574  $destNS = $nsInfo->getTalk( $destNS );
575  }
576  return [ $destNS, $dbk ];
577  }
578 
587  private function getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk ) {
588  [ $destNS, $dbk ] = $this->getDestination( $ns, $name, $sourceNs, $sourceDbk );
589  $newTitle = Title::makeTitleSafe( $destNS, $dbk );
590  if ( !$newTitle || !$newTitle->canExist() ) {
591  return false;
592  }
593  return $newTitle;
594  }
595 
605  private function getAlternateTitle( $ns, $dbk, $options ) {
606  $prefix = $options['add-prefix'];
607  $suffix = $options['add-suffix'];
608  if ( $prefix == '' && $suffix == '' ) {
609  return false;
610  }
611  $newDbk = $prefix . $dbk . $suffix;
612  return Title::makeTitleSafe( $ns, $newDbk );
613  }
614 
622  private function movePage( $id, LinkTarget $newLinkTarget ) {
623  $dbw = $this->getDB( DB_PRIMARY );
624 
625  $dbw->update( 'page',
626  [
627  "page_namespace" => $newLinkTarget->getNamespace(),
628  "page_title" => $newLinkTarget->getDBkey(),
629  ],
630  [
631  "page_id" => $id,
632  ],
633  __METHOD__
634  );
635 
636  // Update *_from_namespace in links tables
637  $fromNamespaceTables = [
638  [ 'pagelinks', 'pl' ],
639  [ 'templatelinks', 'tl' ],
640  [ 'imagelinks', 'il' ]
641  ];
642  foreach ( $fromNamespaceTables as [ $table, $fieldPrefix ] ) {
643  $dbw->update( $table,
644  // SET
645  [ "{$fieldPrefix}_from_namespace" => $newLinkTarget->getNamespace() ],
646  // WHERE
647  [ "{$fieldPrefix}_from" => $id ],
648  __METHOD__
649  );
650  }
651 
652  return true;
653  }
654 
667  private function canMerge( $id, LinkTarget $linkTarget, &$logStatus ) {
668  $revisionLookup = MediaWikiServices::getInstance()->getRevisionLookup();
669  $latestDest = $revisionLookup->getRevisionByTitle( $linkTarget, 0,
670  IDBAccessObject::READ_LATEST );
671  $latestSource = $revisionLookup->getRevisionByPageId( $id, 0,
672  IDBAccessObject::READ_LATEST );
673  if ( $latestSource->getTimestamp() > $latestDest->getTimestamp() ) {
674  $logStatus = 'cannot merge since source is later';
675  return false;
676  } else {
677  return true;
678  }
679  }
680 
688  private function mergePage( $row, Title $newTitle ) {
689  $dbw = $this->getDB( DB_PRIMARY );
690 
691  $id = $row->page_id;
692 
693  // Construct the WikiPage object we will need later, while the
694  // page_id still exists. Note that this cannot use makeTitleSafe(),
695  // we are deliberately constructing an invalid title.
696  $sourceTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
697  $sourceTitle->resetArticleID( $id );
698  $wikiPage = MediaWikiServices::getInstance()->getWikiPageFactory()->newFromTitle( $sourceTitle );
699  $wikiPage->loadPageData( WikiPage::READ_LATEST );
700 
701  $destId = $newTitle->getArticleID();
702  $this->beginTransaction( $dbw, __METHOD__ );
703  $dbw->update( 'revision',
704  // SET
705  [ 'rev_page' => $destId ],
706  // WHERE
707  [ 'rev_page' => $id ],
708  __METHOD__
709  );
710 
711  $dbw->delete( 'page', [ 'page_id' => $id ], __METHOD__ );
712 
713  $this->commitTransaction( $dbw, __METHOD__ );
714 
715  /* Call LinksDeletionUpdate to delete outgoing links from the old title,
716  * and update category counts.
717  *
718  * Calling external code with a fake broken Title is a fairly dubious
719  * idea. It's necessary because it's quite a lot of code to duplicate,
720  * but that also makes it fragile since it would be easy for someone to
721  * accidentally introduce an assumption of title validity to the code we
722  * are calling.
723  */
726 
727  return true;
728  }
729 }
730 
731 $maintClass = NamespaceDupes::class;
732 require_once RUN_MAINTENANCE_IF_MAIN;
const NS_MAIN
Definition: Defines.php:64
const LIST_OR
Definition: Defines.php:46
const NS_TALK
Definition: Defines.php:65
const LIST_AND
Definition: Defines.php:43
static addUpdate(DeferrableUpdate $update, $stage=self::POSTSEND)
Add an update to the pending update queue for execution at the appropriate time.
static doUpdates( $unused=null, $stage=self::ALL)
Consume and execute all pending updates.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: Maintenance.php:66
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
beginTransaction(IDatabase $dbw, $fname)
Begin a transaction on a DB.
commitTransaction(IDatabase $dbw, $fname)
Commit the transaction on a DB handle and wait for replica DBs to catch up.
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option was set.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
Update object handling the cleanup of links tables after a page was deleted.
A class containing constants representing the names of configuration variables.
Service locator for MediaWiki core services.
Maintenance script that checks for articles to fix after adding/deleting namespaces.
execute()
Do the actual work.
__construct()
Default constructor.
IMaintainableDatabase $db
Represents a page (or page fragment) title within MediaWiki.
Definition: TitleValue.php:40
Represents a title within MediaWiki.
Definition: Title.php:49
exists( $flags=0)
Check if page exists.
Definition: Title.php:3478
getPrefixedDBkey()
Get the prefixed database key form.
Definition: Title.php:1876
canExist()
Can this title represent a page in the wiki's database?
Definition: Title.php:1232
getArticleID( $flags=0)
Get the article ID for this Title from the link cache, adding it if necessary.
Definition: Title.php:2825
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:664
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:638
getNamespace()
Get the namespace index.
getDBkey()
Get the main part of the link target, in canonical database form.
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:39
Advanced database interface for IDatabase handles that include maintenance methods.
Result wrapper for grabbing data queried from an IDatabase object.
$maintClass
const DB_PRIMARY
Definition: defines.php:28