MediaWiki REL1_40
namespaceDupes.php
Go to the documentation of this file.
1<?php
27require_once __DIR__ . '/Maintenance.php';
28
37
45
49 protected $db;
50
55 private $resolvablePages = 0;
56
61 private $totalPages = 0;
62
67 private $resolvableLinks = 0;
68
73 private $totalLinks = 0;
74
80 private $deletedLinks = 0;
81
82 public function __construct() {
83 parent::__construct();
84 $this->addDescription( 'Find and fix pages affected by namespace addition/removal' );
85 $this->addOption( 'fix', 'Attempt to automatically fix errors and delete broken links' );
86 $this->addOption( 'merge', "Instead of renaming conflicts, do a history merge with " .
87 "the correct title" );
88 $this->addOption( 'add-suffix', "Dupes will be renamed with correct namespace with " .
89 "<text> appended after the article name", false, true );
90 $this->addOption( 'add-prefix', "Dupes will be renamed with correct namespace with " .
91 "<text> prepended before the article name", false, true );
92 $this->addOption( 'source-pseudo-namespace', "Move all pages with the given source " .
93 "prefix (with an implied colon following it). If --dest-namespace is not specified, " .
94 "the colon will be replaced with a hyphen.",
95 false, true );
96 $this->addOption( 'dest-namespace', "In combination with --source-pseudo-namespace, " .
97 "specify the namespace ID of the destination.", false, true );
98 $this->addOption( 'move-talk', "If this is specified, pages in the Talk namespace that " .
99 "begin with a conflicting prefix will be renamed, for example " .
100 "Talk:File:Foo -> File_Talk:Foo" );
101 }
102
103 public function execute() {
104 $options = [
105 'fix' => $this->hasOption( 'fix' ),
106 'merge' => $this->hasOption( 'merge' ),
107 'add-suffix' => $this->getOption( 'add-suffix', '' ),
108 'add-prefix' => $this->getOption( 'add-prefix', '' ),
109 'move-talk' => $this->hasOption( 'move-talk' ),
110 'source-pseudo-namespace' => $this->getOption( 'source-pseudo-namespace', '' ),
111 'dest-namespace' => intval( $this->getOption( 'dest-namespace', 0 ) )
112 ];
113
114 if ( $options['source-pseudo-namespace'] !== '' ) {
115 $retval = $this->checkPrefix( $options );
116 } else {
117 $retval = $this->checkAll( $options );
118 }
119
120 if ( $retval ) {
121 $this->output( "\nLooks good!\n" );
122 } else {
123 $this->output( "\nOh noeees\n" );
124 }
125 }
126
134 private function checkAll( $options ) {
135 $contLang = MediaWikiServices::getInstance()->getContentLanguage();
136 $spaces = [];
137
138 // List interwikis first, so they'll be overridden
139 // by any conflicting local namespaces.
140 foreach ( $this->getInterwikiList() as $prefix ) {
141 $name = $contLang->ucfirst( $prefix );
142 $spaces[$name] = 0;
143 }
144
145 // Now pull in all canonical and alias namespaces...
146 foreach (
147 MediaWikiServices::getInstance()->getNamespaceInfo()->getCanonicalNamespaces()
148 as $ns => $name
149 ) {
150 // This includes $wgExtraNamespaces
151 if ( $name !== '' ) {
152 $spaces[$name] = $ns;
153 }
154 }
155 foreach ( $contLang->getNamespaces() as $ns => $name ) {
156 if ( $name !== '' ) {
157 $spaces[$name] = $ns;
158 }
159 }
160 foreach ( $contLang->getNamespaceAliases() as $name => $ns ) {
161 $spaces[$name] = $ns;
162 }
163
164 // We'll need to check for lowercase keys as well,
165 // since we're doing case-sensitive searches in the db.
166 $capitalLinks = $this->getConfig()->get( MainConfigNames::CapitalLinks );
167 foreach ( $spaces as $name => $ns ) {
168 $moreNames = [];
169 $moreNames[] = $contLang->uc( $name );
170 $moreNames[] = $contLang->ucfirst( $contLang->lc( $name ) );
171 $moreNames[] = $contLang->ucwords( $name );
172 $moreNames[] = $contLang->ucwords( $contLang->lc( $name ) );
173 $moreNames[] = $contLang->ucwordbreaks( $name );
174 $moreNames[] = $contLang->ucwordbreaks( $contLang->lc( $name ) );
175 if ( !$capitalLinks ) {
176 foreach ( $moreNames as $altName ) {
177 $moreNames[] = $contLang->lcfirst( $altName );
178 }
179 $moreNames[] = $contLang->lcfirst( $name );
180 }
181 foreach ( array_unique( $moreNames ) as $altName ) {
182 if ( $altName !== $name ) {
183 $spaces[$altName] = $ns;
184 }
185 }
186 }
187
188 // Sort by namespace index, and if there are two with the same index,
189 // break the tie by sorting by name
190 $origSpaces = $spaces;
191 uksort( $spaces, static function ( $a, $b ) use ( $origSpaces ) {
192 return $origSpaces[$a] <=> $origSpaces[$b]
193 ?: $a <=> $b;
194 } );
195
196 $ok = true;
197 foreach ( $spaces as $name => $ns ) {
198 $ok = $this->checkNamespace( $ns, $name, $options ) && $ok;
199 }
200
201 $this->output(
202 "{$this->totalPages} pages to fix, " .
203 "{$this->resolvablePages} were resolvable.\n\n"
204 );
205
206 foreach ( $spaces as $name => $ns ) {
207 if ( $ns != 0 ) {
208 /* Fix up link destinations for non-interwiki links only.
209 *
210 * For example if a page has [[Foo:Bar]] and then a Foo namespace
211 * is introduced, pagelinks needs to be updated to have
212 * page_namespace = NS_FOO.
213 *
214 * If instead an interwiki prefix was introduced called "Foo",
215 * the link should instead be moved to the iwlinks table. If a new
216 * language is introduced called "Foo", or if there is a pagelink
217 * [[fr:Bar]] when interlanguage magic links are turned on, the
218 * link would have to be moved to the langlinks table. Let's put
219 * those cases in the too-hard basket for now. The consequences are
220 * not especially severe.
221 * @fixme Handle interwiki links, and pagelinks to Category:, File:
222 * which probably need reparsing.
223 */
224
225 $this->checkLinkTable( 'pagelinks', 'pl', $ns, $name, $options );
226 $this->checkLinkTable( 'templatelinks', 'tl', $ns, $name, $options );
227
228 // The redirect table has interwiki links randomly mixed in, we
229 // need to filter those out. For example [[w:Foo:Bar]] would
230 // have rd_interwiki=w and rd_namespace=0, which would match the
231 // query for a conflicting namespace "Foo" if filtering wasn't done.
232 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
233 [ 'rd_interwiki' => null ] );
234 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
235 [ 'rd_interwiki' => '' ] );
236 }
237 }
238
239 $this->output(
240 "{$this->totalLinks} links to fix, " .
241 "{$this->resolvableLinks} were resolvable, " .
242 "{$this->deletedLinks} were deleted.\n"
243 );
244
245 return $ok;
246 }
247
251 private function getInterwikiList() {
252 $result = MediaWikiServices::getInstance()->getInterwikiLookup()->getAllPrefixes();
253 return array_column( $result, 'iw_prefix' );
254 }
255
264 private function checkNamespace( $ns, $name, $options ) {
265 $targets = $this->getTargetList( $ns, $name, $options );
266 $count = $targets->numRows();
267 $this->totalPages += $count;
268 if ( $count == 0 ) {
269 return true;
270 }
271
272 $dryRunNote = $options['fix'] ? '' : ' DRY RUN ONLY';
273
274 $ok = true;
275 foreach ( $targets as $row ) {
276 // Find the new title and determine the action to take
277
278 $newTitle = $this->getDestinationTitle(
279 $ns, $name, $row->page_namespace, $row->page_title );
280 $logStatus = false;
281 if ( !$newTitle ) {
282 if ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) {
283 $logStatus = 'invalid title and --add-prefix not specified';
284 $action = 'abort';
285 } else {
286 $action = 'alternate';
287 }
288 } elseif ( $newTitle->exists() ) {
289 if ( $options['merge'] ) {
290 if ( $this->canMerge( $row->page_id, $newTitle, $logStatus ) ) {
291 $action = 'merge';
292 } else {
293 $action = 'abort';
294 }
295 } elseif ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) {
296 $action = 'abort';
297 $logStatus = 'dest title exists and --add-prefix not specified';
298 } else {
299 $action = 'alternate';
300 }
301 } else {
302 $action = 'move';
303 $logStatus = 'no conflict';
304 }
305 if ( $action === 'alternate' ) {
306 [ $ns, $dbk ] = $this->getDestination( $ns, $name, $row->page_namespace,
307 $row->page_title );
308 $newTitle = $this->getAlternateTitle( $ns, $dbk, $options );
309 if ( !$newTitle ) {
310 $action = 'abort';
311 $logStatus = 'alternate title is invalid';
312 } elseif ( $newTitle->exists() ) {
313 $action = 'abort';
314 $logStatus = 'alternate title conflicts';
315 } else {
316 $action = 'move';
317 $logStatus = 'alternate';
318 }
319 }
320
321 // Take the action or log a dry run message
322
323 $logTitle = "id={$row->page_id} ns={$row->page_namespace} dbk={$row->page_title}";
324 $pageOK = true;
325
326 switch ( $action ) {
327 case 'abort':
328 $this->output( "$logTitle *** $logStatus\n" );
329 $pageOK = false;
330 break;
331 case 'move':
332 $this->output( "$logTitle -> " .
333 $newTitle->getPrefixedDBkey() . " ($logStatus)$dryRunNote\n" );
334
335 if ( $options['fix'] ) {
336 $pageOK = $this->movePage( $row->page_id, $newTitle );
337 }
338 break;
339 case 'merge':
340 $this->output( "$logTitle => " .
341 $newTitle->getPrefixedDBkey() . " (merge)$dryRunNote\n" );
342
343 if ( $options['fix'] ) {
344 $pageOK = $this->mergePage( $row, $newTitle );
345 }
346 break;
347 }
348
349 if ( $pageOK ) {
350 $this->resolvablePages++;
351 } else {
352 $ok = false;
353 }
354 }
355
356 return $ok;
357 }
358
368 private function checkLinkTable( $table, $fieldPrefix, $ns, $name, $options,
369 $extraConds = []
370 ) {
371 $dbw = $this->getDB( DB_PRIMARY );
372
373 $batchConds = [];
374 $fromField = "{$fieldPrefix}_from";
375 $batchSize = 500;
376 $linksMigration = MediaWikiServices::getInstance()->getLinksMigration();
377 if ( isset( $linksMigration::$mapping[$table] ) ) {
378 $queryInfo = $linksMigration->getQueryInfo( $table );
379 [ $namespaceField, $titleField ] = $linksMigration->getTitleFields( $table );
380 } else {
381 $queryInfo = [
382 'tables' => [ $table ],
383 'fields' => [
384 "{$fieldPrefix}_namespace",
385 "{$fieldPrefix}_title"
386 ],
387 'joins' => []
388 ];
389 $namespaceField = "{$fieldPrefix}_namespace";
390 $titleField = "{$fieldPrefix}_title";
391 }
392
393 while ( true ) {
394 $res = $dbw->select(
395 $queryInfo['tables'],
396 array_merge( [ $fromField ], $queryInfo['fields'] ),
397 array_merge(
398 $batchConds,
399 $extraConds,
400 [
401 $namespaceField => 0,
402 $titleField . $dbw->buildLike( "$name:", $dbw->anyString() )
403 ]
404 ),
405 __METHOD__,
406 [
407 'ORDER BY' => [ $titleField, $fromField ],
408 'LIMIT' => $batchSize
409 ],
410 $queryInfo['joins']
411 );
412
413 if ( $res->numRows() == 0 ) {
414 break;
415 }
416
417 $rowsToDeleteIfStillExists = [];
418
419 foreach ( $res as $row ) {
420 $logTitle = "from={$row->$fromField} ns={$row->$namespaceField} " .
421 "dbk={$row->$titleField}";
422 $destTitle = $this->getDestinationTitle(
423 $ns, $name, $row->$namespaceField, $row->$titleField );
424 $this->totalLinks++;
425 if ( !$destTitle ) {
426 $this->output( "$table $logTitle *** INVALID\n" );
427 continue;
428 }
429 $this->resolvableLinks++;
430 if ( !$options['fix'] ) {
431 $this->output( "$table $logTitle -> " .
432 $destTitle->getPrefixedDBkey() . " DRY RUN\n" );
433 continue;
434 }
435
436 if ( isset( $linksMigration::$mapping[$table] ) ) {
437 $setValue = $linksMigration->getLinksConditions( $table, $destTitle );
438 $whereCondition = $linksMigration->getLinksConditions(
439 $table,
440 new TitleValue( 0, $row->$titleField )
441 );
442 $deleteCondition = $linksMigration->getLinksConditions(
443 $table,
444 new TitleValue( (int)$row->$namespaceField, $row->$titleField )
445 );
446 } else {
447 $setValue = [
448 $namespaceField => $destTitle->getNamespace(),
449 $titleField => $destTitle->getDBkey()
450 ];
451 $whereCondition = [
452 $namespaceField => 0,
453 $titleField => $row->$titleField
454 ];
455 $deleteCondition = [
456 $namespaceField => $row->$namespaceField,
457 $titleField => $row->$titleField,
458 ];
459 }
460
461 $dbw->update( $table,
462 // SET
463 $setValue,
464 // WHERE
465 array_merge( [ $fromField => $row->$fromField ], $whereCondition ),
466 __METHOD__,
467 [ 'IGNORE' ]
468 );
469
470 $rowsToDeleteIfStillExists[] = $dbw->makeList(
471 array_merge( [ $fromField => $row->$fromField ], $deleteCondition ),
472 IDatabase::LIST_AND
473 );
474
475 $this->output( "$table $logTitle -> " .
476 $destTitle->getPrefixedDBkey() . "\n"
477 );
478 }
479
480 if ( $options['fix'] && count( $rowsToDeleteIfStillExists ) > 0 ) {
481 $dbw->delete(
482 $table,
483 $dbw->makeList( $rowsToDeleteIfStillExists, IDatabase::LIST_OR ),
484 __METHOD__
485 );
486
487 $this->deletedLinks += $dbw->affectedRows();
488 $this->resolvableLinks -= $dbw->affectedRows();
489 }
490
491 $batchConds = [
492 $dbw->buildComparison( '>', [
493 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item
494 $titleField => $row->$titleField,
495 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item
496 $fromField => $row->$fromField,
497 ] )
498 ];
499
500 $this->waitForReplication();
501 }
502 }
503
511 private function checkPrefix( $options ) {
512 $prefix = $options['source-pseudo-namespace'];
513 $ns = $options['dest-namespace'];
514 $this->output( "Checking prefix \"$prefix\" vs namespace $ns\n" );
515
516 return $this->checkNamespace( $ns, $prefix, $options );
517 }
518
529 private function getTargetList( $ns, $name, $options ) {
530 $dbw = $this->getDB( DB_PRIMARY );
531
532 if (
533 $options['move-talk'] &&
534 MediaWikiServices::getInstance()->getNamespaceInfo()->isSubject( $ns )
535 ) {
536 $checkNamespaces = [ NS_MAIN, NS_TALK ];
537 } else {
538 $checkNamespaces = NS_MAIN;
539 }
540
541 return $dbw->select( 'page',
542 [
543 'page_id',
544 'page_title',
545 'page_namespace',
546 ],
547 [
548 'page_namespace' => $checkNamespaces,
549 'page_title' . $dbw->buildLike( "$name:", $dbw->anyString() ),
550 ],
551 __METHOD__
552 );
553 }
554
563 private function getDestination( $ns, $name, $sourceNs, $sourceDbk ) {
564 $dbk = substr( $sourceDbk, strlen( "$name:" ) );
565 if ( $ns == 0 ) {
566 // An interwiki; try an alternate encoding with '-' for ':'
567 $dbk = "$name-" . $dbk;
568 }
569 $destNS = $ns;
570 $nsInfo = MediaWikiServices::getInstance()->getNamespaceInfo();
571 if ( $sourceNs == NS_TALK && $nsInfo->isSubject( $ns ) ) {
572 // This is an associated talk page moved with the --move-talk feature.
573 $destNS = $nsInfo->getTalk( $destNS );
574 }
575 return [ $destNS, $dbk ];
576 }
577
586 private function getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk ) {
587 [ $destNS, $dbk ] = $this->getDestination( $ns, $name, $sourceNs, $sourceDbk );
588 $newTitle = Title::makeTitleSafe( $destNS, $dbk );
589 if ( !$newTitle || !$newTitle->canExist() ) {
590 return false;
591 }
592 return $newTitle;
593 }
594
604 private function getAlternateTitle( $ns, $dbk, $options ) {
605 $prefix = $options['add-prefix'];
606 $suffix = $options['add-suffix'];
607 if ( $prefix == '' && $suffix == '' ) {
608 return false;
609 }
610 $newDbk = $prefix . $dbk . $suffix;
611 return Title::makeTitleSafe( $ns, $newDbk );
612 }
613
621 private function movePage( $id, LinkTarget $newLinkTarget ) {
622 $dbw = $this->getDB( DB_PRIMARY );
623
624 $dbw->update( 'page',
625 [
626 "page_namespace" => $newLinkTarget->getNamespace(),
627 "page_title" => $newLinkTarget->getDBkey(),
628 ],
629 [
630 "page_id" => $id,
631 ],
632 __METHOD__
633 );
634
635 // Update *_from_namespace in links tables
636 $fromNamespaceTables = [
637 [ 'pagelinks', 'pl' ],
638 [ 'templatelinks', 'tl' ],
639 [ 'imagelinks', 'il' ]
640 ];
641 foreach ( $fromNamespaceTables as [ $table, $fieldPrefix ] ) {
642 $dbw->update( $table,
643 // SET
644 [ "{$fieldPrefix}_from_namespace" => $newLinkTarget->getNamespace() ],
645 // WHERE
646 [ "{$fieldPrefix}_from" => $id ],
647 __METHOD__
648 );
649 }
650
651 return true;
652 }
653
666 private function canMerge( $id, LinkTarget $linkTarget, &$logStatus ) {
667 $revisionLookup = MediaWikiServices::getInstance()->getRevisionLookup();
668 $latestDest = $revisionLookup->getRevisionByTitle( $linkTarget, 0,
669 IDBAccessObject::READ_LATEST );
670 $latestSource = $revisionLookup->getRevisionByPageId( $id, 0,
671 IDBAccessObject::READ_LATEST );
672 if ( $latestSource->getTimestamp() > $latestDest->getTimestamp() ) {
673 $logStatus = 'cannot merge since source is later';
674 return false;
675 } else {
676 return true;
677 }
678 }
679
687 private function mergePage( $row, Title $newTitle ) {
688 $dbw = $this->getDB( DB_PRIMARY );
689
690 $id = $row->page_id;
691
692 // Construct the WikiPage object we will need later, while the
693 // page_id still exists. Note that this cannot use makeTitleSafe(),
694 // we are deliberately constructing an invalid title.
695 $sourceTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
696 $sourceTitle->resetArticleID( $id );
697 $wikiPage = MediaWikiServices::getInstance()->getWikiPageFactory()->newFromTitle( $sourceTitle );
698 $wikiPage->loadPageData( WikiPage::READ_LATEST );
699
700 $destId = $newTitle->getArticleID();
701 $this->beginTransaction( $dbw, __METHOD__ );
702 $dbw->update( 'revision',
703 // SET
704 [ 'rev_page' => $destId ],
705 // WHERE
706 [ 'rev_page' => $id ],
707 __METHOD__
708 );
709
710 $dbw->delete( 'page', [ 'page_id' => $id ], __METHOD__ );
711
712 $this->commitTransaction( $dbw, __METHOD__ );
713
714 /* Call LinksDeletionUpdate to delete outgoing links from the old title,
715 * and update category counts.
716 *
717 * Calling external code with a fake broken Title is a fairly dubious
718 * idea. It's necessary because it's quite a lot of code to duplicate,
719 * but that also makes it fragile since it would be easy for someone to
720 * accidentally introduce an assumption of title validity to the code we
721 * are calling.
722 */
725
726 return true;
727 }
728}
729
730$maintClass = NamespaceDupes::class;
731require_once RUN_MAINTENANCE_IF_MAIN;
getDB()
const NS_MAIN
Definition Defines.php:64
const NS_TALK
Definition Defines.php:65
static addUpdate(DeferrableUpdate $update, $stage=self::POSTSEND)
Add an update to the pending update queue for execution at the appropriate time.
static doUpdates( $stage=self::ALL)
Consume and execute all pending updates.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
beginTransaction(IDatabase $dbw, $fname)
Begin a transaction on a DB.
commitTransaction(IDatabase $dbw, $fname)
Commit the transaction on a DB handle and wait for replica DBs to catch up.
output( $out, $channel=null)
Throw some output to the user.
waitForReplication()
Wait for replica DBs to catch up.
hasOption( $name)
Checks to see if a particular option was set.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
Update object handling the cleanup of links tables after a page was deleted.
A class containing constants representing the names of configuration variables.
Service locator for MediaWiki core services.
Represents a title within MediaWiki.
Definition Title.php:82
canExist()
Can this title represent a page in the wiki's database?
Definition Title.php:1265
exists( $flags=0)
Check if page exists.
Definition Title.php:3523
getArticleID( $flags=0)
Get the article ID for this Title from the link cache, adding it if necessary.
Definition Title.php:2870
getPrefixedDBkey()
Get the prefixed database key form.
Definition Title.php:1911
Maintenance script that checks for articles to fix after adding/deleting namespaces.
execute()
Do the actual work.
__construct()
Default constructor.
IMaintainableDatabase $db
Represents a page (or page fragment) title within MediaWiki.
getNamespace()
Get the namespace index.
getDBkey()
Get the main part of the link target, in canonical database form.
Basic database interface for live and lazy-loaded relation database handles.
Definition IDatabase.php:36
Advanced database interface for IDatabase handles that include maintenance methods.
Result wrapper for grabbing data queried from an IDatabase object.
$maintClass
const DB_PRIMARY
Definition defines.php:28