MediaWiki REL1_39
namespaceDupes.php
Go to the documentation of this file.
1<?php
27require_once __DIR__ . '/Maintenance.php';
28
36
44
48 protected $db;
49
54 private $resolvablePages = 0;
55
60 private $totalPages = 0;
61
66 private $resolvableLinks = 0;
67
72 private $totalLinks = 0;
73
79 private $deletedLinks = 0;
80
81 public function __construct() {
82 parent::__construct();
83 $this->addDescription( 'Find and fix pages affected by namespace addition/removal' );
84 $this->addOption( 'fix', 'Attempt to automatically fix errors and delete broken links' );
85 $this->addOption( 'merge', "Instead of renaming conflicts, do a history merge with " .
86 "the correct title" );
87 $this->addOption( 'add-suffix', "Dupes will be renamed with correct namespace with " .
88 "<text> appended after the article name", false, true );
89 $this->addOption( 'add-prefix', "Dupes will be renamed with correct namespace with " .
90 "<text> prepended before the article name", false, true );
91 $this->addOption( 'source-pseudo-namespace', "Move all pages with the given source " .
92 "prefix (with an implied colon following it). If --dest-namespace is not specified, " .
93 "the colon will be replaced with a hyphen.",
94 false, true );
95 $this->addOption( 'dest-namespace', "In combination with --source-pseudo-namespace, " .
96 "specify the namespace ID of the destination.", false, true );
97 $this->addOption( 'move-talk', "If this is specified, pages in the Talk namespace that " .
98 "begin with a conflicting prefix will be renamed, for example " .
99 "Talk:File:Foo -> File_Talk:Foo" );
100 }
101
102 public function execute() {
103 $options = [
104 'fix' => $this->hasOption( 'fix' ),
105 'merge' => $this->hasOption( 'merge' ),
106 'add-suffix' => $this->getOption( 'add-suffix', '' ),
107 'add-prefix' => $this->getOption( 'add-prefix', '' ),
108 'move-talk' => $this->hasOption( 'move-talk' ),
109 'source-pseudo-namespace' => $this->getOption( 'source-pseudo-namespace', '' ),
110 'dest-namespace' => intval( $this->getOption( 'dest-namespace', 0 ) )
111 ];
112
113 if ( $options['source-pseudo-namespace'] !== '' ) {
114 $retval = $this->checkPrefix( $options );
115 } else {
116 $retval = $this->checkAll( $options );
117 }
118
119 if ( $retval ) {
120 $this->output( "\nLooks good!\n" );
121 } else {
122 $this->output( "\nOh noeees\n" );
123 }
124 }
125
133 private function checkAll( $options ) {
134 $contLang = MediaWikiServices::getInstance()->getContentLanguage();
135 $spaces = [];
136
137 // List interwikis first, so they'll be overridden
138 // by any conflicting local namespaces.
139 foreach ( $this->getInterwikiList() as $prefix ) {
140 $name = $contLang->ucfirst( $prefix );
141 $spaces[$name] = 0;
142 }
143
144 // Now pull in all canonical and alias namespaces...
145 foreach (
146 MediaWikiServices::getInstance()->getNamespaceInfo()->getCanonicalNamespaces()
147 as $ns => $name
148 ) {
149 // This includes $wgExtraNamespaces
150 if ( $name !== '' ) {
151 $spaces[$name] = $ns;
152 }
153 }
154 foreach ( $contLang->getNamespaces() as $ns => $name ) {
155 if ( $name !== '' ) {
156 $spaces[$name] = $ns;
157 }
158 }
159 foreach ( $contLang->getNamespaceAliases() as $name => $ns ) {
160 $spaces[$name] = $ns;
161 }
162
163 // We'll need to check for lowercase keys as well,
164 // since we're doing case-sensitive searches in the db.
165 $capitalLinks = $this->getConfig()->get( MainConfigNames::CapitalLinks );
166 foreach ( $spaces as $name => $ns ) {
167 $moreNames = [];
168 $moreNames[] = $contLang->uc( $name );
169 $moreNames[] = $contLang->ucfirst( $contLang->lc( $name ) );
170 $moreNames[] = $contLang->ucwords( $name );
171 $moreNames[] = $contLang->ucwords( $contLang->lc( $name ) );
172 $moreNames[] = $contLang->ucwordbreaks( $name );
173 $moreNames[] = $contLang->ucwordbreaks( $contLang->lc( $name ) );
174 if ( !$capitalLinks ) {
175 foreach ( $moreNames as $altName ) {
176 $moreNames[] = $contLang->lcfirst( $altName );
177 }
178 $moreNames[] = $contLang->lcfirst( $name );
179 }
180 foreach ( array_unique( $moreNames ) as $altName ) {
181 if ( $altName !== $name ) {
182 $spaces[$altName] = $ns;
183 }
184 }
185 }
186
187 // Sort by namespace index, and if there are two with the same index,
188 // break the tie by sorting by name
189 $origSpaces = $spaces;
190 uksort( $spaces, static function ( $a, $b ) use ( $origSpaces ) {
191 return $origSpaces[$a] <=> $origSpaces[$b]
192 ?: $a <=> $b;
193 } );
194
195 $ok = true;
196 foreach ( $spaces as $name => $ns ) {
197 $ok = $this->checkNamespace( $ns, $name, $options ) && $ok;
198 }
199
200 $this->output(
201 "{$this->totalPages} pages to fix, " .
202 "{$this->resolvablePages} were resolvable.\n\n"
203 );
204
205 foreach ( $spaces as $name => $ns ) {
206 if ( $ns != 0 ) {
207 /* Fix up link destinations for non-interwiki links only.
208 *
209 * For example if a page has [[Foo:Bar]] and then a Foo namespace
210 * is introduced, pagelinks needs to be updated to have
211 * page_namespace = NS_FOO.
212 *
213 * If instead an interwiki prefix was introduced called "Foo",
214 * the link should instead be moved to the iwlinks table. If a new
215 * language is introduced called "Foo", or if there is a pagelink
216 * [[fr:Bar]] when interlanguage magic links are turned on, the
217 * link would have to be moved to the langlinks table. Let's put
218 * those cases in the too-hard basket for now. The consequences are
219 * not especially severe.
220 * @fixme Handle interwiki links, and pagelinks to Category:, File:
221 * which probably need reparsing.
222 */
223
224 $this->checkLinkTable( 'pagelinks', 'pl', $ns, $name, $options );
225 $this->checkLinkTable( 'templatelinks', 'tl', $ns, $name, $options );
226
227 // The redirect table has interwiki links randomly mixed in, we
228 // need to filter those out. For example [[w:Foo:Bar]] would
229 // have rd_interwiki=w and rd_namespace=0, which would match the
230 // query for a conflicting namespace "Foo" if filtering wasn't done.
231 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
232 [ 'rd_interwiki' => null ] );
233 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
234 [ 'rd_interwiki' => '' ] );
235 }
236 }
237
238 $this->output(
239 "{$this->totalLinks} links to fix, " .
240 "{$this->resolvableLinks} were resolvable, " .
241 "{$this->deletedLinks} were deleted.\n"
242 );
243
244 return $ok;
245 }
246
250 private function getInterwikiList() {
251 $result = MediaWikiServices::getInstance()->getInterwikiLookup()->getAllPrefixes();
252 return array_column( $result, 'iw_prefix' );
253 }
254
263 private function checkNamespace( $ns, $name, $options ) {
264 $targets = $this->getTargetList( $ns, $name, $options );
265 $count = $targets->numRows();
266 $this->totalPages += $count;
267 if ( $count == 0 ) {
268 return true;
269 }
270
271 $dryRunNote = $options['fix'] ? '' : ' DRY RUN ONLY';
272
273 $ok = true;
274 foreach ( $targets as $row ) {
275 // Find the new title and determine the action to take
276
277 $newTitle = $this->getDestinationTitle(
278 $ns, $name, $row->page_namespace, $row->page_title );
279 $logStatus = false;
280 if ( !$newTitle ) {
281 if ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) {
282 $logStatus = 'invalid title and --add-prefix not specified';
283 $action = 'abort';
284 } else {
285 $action = 'alternate';
286 }
287 } elseif ( $newTitle->exists() ) {
288 if ( $options['merge'] ) {
289 if ( $this->canMerge( $row->page_id, $newTitle, $logStatus ) ) {
290 $action = 'merge';
291 } else {
292 $action = 'abort';
293 }
294 } elseif ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) {
295 $action = 'abort';
296 $logStatus = 'dest title exists and --add-prefix not specified';
297 } else {
298 $action = 'alternate';
299 }
300 } else {
301 $action = 'move';
302 $logStatus = 'no conflict';
303 }
304 if ( $action === 'alternate' ) {
305 [ $ns, $dbk ] = $this->getDestination( $ns, $name, $row->page_namespace,
306 $row->page_title );
307 $newTitle = $this->getAlternateTitle( $ns, $dbk, $options );
308 if ( !$newTitle ) {
309 $action = 'abort';
310 $logStatus = 'alternate title is invalid';
311 } elseif ( $newTitle->exists() ) {
312 $action = 'abort';
313 $logStatus = 'alternate title conflicts';
314 } else {
315 $action = 'move';
316 $logStatus = 'alternate';
317 }
318 }
319
320 // Take the action or log a dry run message
321
322 $logTitle = "id={$row->page_id} ns={$row->page_namespace} dbk={$row->page_title}";
323 $pageOK = true;
324
325 switch ( $action ) {
326 case 'abort':
327 $this->output( "$logTitle *** $logStatus\n" );
328 $pageOK = false;
329 break;
330 case 'move':
331 $this->output( "$logTitle -> " .
332 $newTitle->getPrefixedDBkey() . " ($logStatus)$dryRunNote\n" );
333
334 if ( $options['fix'] ) {
335 $pageOK = $this->movePage( $row->page_id, $newTitle );
336 }
337 break;
338 case 'merge':
339 $this->output( "$logTitle => " .
340 $newTitle->getPrefixedDBkey() . " (merge)$dryRunNote\n" );
341
342 if ( $options['fix'] ) {
343 $pageOK = $this->mergePage( $row, $newTitle );
344 }
345 break;
346 }
347
348 if ( $pageOK ) {
349 $this->resolvablePages++;
350 } else {
351 $ok = false;
352 }
353 }
354
355 return $ok;
356 }
357
367 private function checkLinkTable( $table, $fieldPrefix, $ns, $name, $options,
368 $extraConds = []
369 ) {
370 $dbw = $this->getDB( DB_PRIMARY );
371
372 $batchConds = [];
373 $fromField = "{$fieldPrefix}_from";
374 $batchSize = 500;
375 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
376 $linksMigration = MediaWikiServices::getInstance()->getLinksMigration();
377 if ( isset( $linksMigration::$mapping[$table] ) ) {
378 $queryInfo = $linksMigration->getQueryInfo( $table );
379 list( $namespaceField, $titleField ) = $linksMigration->getTitleFields( $table );
380 } else {
381 $queryInfo = [
382 'tables' => [ $table ],
383 'fields' => [
384 "{$fieldPrefix}_namespace",
385 "{$fieldPrefix}_title"
386 ],
387 'joins' => []
388 ];
389 $namespaceField = "{$fieldPrefix}_namespace";
390 $titleField = "{$fieldPrefix}_title";
391 }
392
393 while ( true ) {
394 $res = $dbw->select(
395 $queryInfo['tables'],
396 array_merge( [ $fromField ], $queryInfo['fields'] ),
397 array_merge(
398 $batchConds,
399 $extraConds,
400 [
401 $namespaceField => 0,
402 $titleField . $dbw->buildLike( "$name:", $dbw->anyString() )
403 ]
404 ),
405 __METHOD__,
406 [
407 'ORDER BY' => [ $titleField, $fromField ],
408 'LIMIT' => $batchSize
409 ],
410 $queryInfo['joins']
411 );
412
413 if ( $res->numRows() == 0 ) {
414 break;
415 }
416
417 $rowsToDeleteIfStillExists = [];
418
419 foreach ( $res as $row ) {
420 $logTitle = "from={$row->$fromField} ns={$row->$namespaceField} " .
421 "dbk={$row->$titleField}";
422 $destTitle = $this->getDestinationTitle(
423 $ns, $name, $row->$namespaceField, $row->$titleField );
424 $this->totalLinks++;
425 if ( !$destTitle ) {
426 $this->output( "$table $logTitle *** INVALID\n" );
427 continue;
428 }
429 $this->resolvableLinks++;
430 if ( !$options['fix'] ) {
431 $this->output( "$table $logTitle -> " .
432 $destTitle->getPrefixedDBkey() . " DRY RUN\n" );
433 continue;
434 }
435
436 if ( isset( $linksMigration::$mapping[$table] ) ) {
437 $setValue = $linksMigration->getLinksConditions( $table, $destTitle );
438 $whereCondition = $linksMigration->getLinksConditions(
439 $table,
440 new TitleValue( 0, $row->$titleField )
441 );
442 $deleteCondition = $linksMigration->getLinksConditions(
443 $table,
444 new TitleValue( (int)$row->$namespaceField, $row->$titleField )
445 );
446 } else {
447 $setValue = [
448 $namespaceField => $destTitle->getNamespace(),
449 $titleField => $destTitle->getDBkey()
450 ];
451 $whereCondition = [
452 $namespaceField => 0,
453 $titleField => $row->$titleField
454 ];
455 $deleteCondition = [
456 $namespaceField => $row->$namespaceField,
457 $titleField => $row->$titleField,
458 ];
459 }
460
461 $dbw->update( $table,
462 // SET
463 $setValue,
464 // WHERE
465 array_merge( [ $fromField => $row->$fromField ], $whereCondition ),
466 __METHOD__,
467 [ 'IGNORE' ]
468 );
469
470 $rowsToDeleteIfStillExists[] = $dbw->makeList(
471 array_merge( [ $fromField => $row->$fromField ], $deleteCondition ),
472 IDatabase::LIST_AND
473 );
474
475 $this->output( "$table $logTitle -> " .
476 $destTitle->getPrefixedDBkey() . "\n"
477 );
478 }
479
480 if ( $options['fix'] && count( $rowsToDeleteIfStillExists ) > 0 ) {
481 $dbw->delete(
482 $table,
483 $dbw->makeList( $rowsToDeleteIfStillExists, IDatabase::LIST_OR ),
484 __METHOD__
485 );
486
487 $this->deletedLinks += $dbw->affectedRows();
488 $this->resolvableLinks -= $dbw->affectedRows();
489 }
490
491 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item
492 $encLastTitle = $dbw->addQuotes( $row->$titleField );
493 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item
494 $encLastFrom = $dbw->addQuotes( $row->$fromField );
495
496 $batchConds = [
497 "$titleField > $encLastTitle " .
498 "OR ($titleField = $encLastTitle AND $fromField > $encLastFrom)"
499 ];
500
501 $lbFactory->waitForReplication();
502 }
503 }
504
512 private function checkPrefix( $options ) {
513 $prefix = $options['source-pseudo-namespace'];
514 $ns = $options['dest-namespace'];
515 $this->output( "Checking prefix \"$prefix\" vs namespace $ns\n" );
516
517 return $this->checkNamespace( $ns, $prefix, $options );
518 }
519
530 private function getTargetList( $ns, $name, $options ) {
531 $dbw = $this->getDB( DB_PRIMARY );
532
533 if (
534 $options['move-talk'] &&
535 MediaWikiServices::getInstance()->getNamespaceInfo()->isSubject( $ns )
536 ) {
537 $checkNamespaces = [ NS_MAIN, NS_TALK ];
538 } else {
539 $checkNamespaces = NS_MAIN;
540 }
541
542 return $dbw->select( 'page',
543 [
544 'page_id',
545 'page_title',
546 'page_namespace',
547 ],
548 [
549 'page_namespace' => $checkNamespaces,
550 'page_title' . $dbw->buildLike( "$name:", $dbw->anyString() ),
551 ],
552 __METHOD__
553 );
554 }
555
564 private function getDestination( $ns, $name, $sourceNs, $sourceDbk ) {
565 $dbk = substr( $sourceDbk, strlen( "$name:" ) );
566 if ( $ns == 0 ) {
567 // An interwiki; try an alternate encoding with '-' for ':'
568 $dbk = "$name-" . $dbk;
569 }
570 $destNS = $ns;
571 $nsInfo = MediaWikiServices::getInstance()->getNamespaceInfo();
572 if ( $sourceNs == NS_TALK && $nsInfo->isSubject( $ns ) ) {
573 // This is an associated talk page moved with the --move-talk feature.
574 $destNS = $nsInfo->getTalk( $destNS );
575 }
576 return [ $destNS, $dbk ];
577 }
578
587 private function getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk ) {
588 [ $destNS, $dbk ] = $this->getDestination( $ns, $name, $sourceNs, $sourceDbk );
589 $newTitle = Title::makeTitleSafe( $destNS, $dbk );
590 if ( !$newTitle || !$newTitle->canExist() ) {
591 return false;
592 }
593 return $newTitle;
594 }
595
605 private function getAlternateTitle( $ns, $dbk, $options ) {
606 $prefix = $options['add-prefix'];
607 $suffix = $options['add-suffix'];
608 if ( $prefix == '' && $suffix == '' ) {
609 return false;
610 }
611 $newDbk = $prefix . $dbk . $suffix;
612 return Title::makeTitleSafe( $ns, $newDbk );
613 }
614
622 private function movePage( $id, LinkTarget $newLinkTarget ) {
623 $dbw = $this->getDB( DB_PRIMARY );
624
625 $dbw->update( 'page',
626 [
627 "page_namespace" => $newLinkTarget->getNamespace(),
628 "page_title" => $newLinkTarget->getDBkey(),
629 ],
630 [
631 "page_id" => $id,
632 ],
633 __METHOD__
634 );
635
636 // Update *_from_namespace in links tables
637 $fromNamespaceTables = [
638 [ 'pagelinks', 'pl' ],
639 [ 'templatelinks', 'tl' ],
640 [ 'imagelinks', 'il' ]
641 ];
642 foreach ( $fromNamespaceTables as [ $table, $fieldPrefix ] ) {
643 $dbw->update( $table,
644 // SET
645 [ "{$fieldPrefix}_from_namespace" => $newLinkTarget->getNamespace() ],
646 // WHERE
647 [ "{$fieldPrefix}_from" => $id ],
648 __METHOD__
649 );
650 }
651
652 return true;
653 }
654
667 private function canMerge( $id, LinkTarget $linkTarget, &$logStatus ) {
668 $revisionLookup = MediaWikiServices::getInstance()->getRevisionLookup();
669 $latestDest = $revisionLookup->getRevisionByTitle( $linkTarget, 0,
670 IDBAccessObject::READ_LATEST );
671 $latestSource = $revisionLookup->getRevisionByPageId( $id, 0,
672 IDBAccessObject::READ_LATEST );
673 if ( $latestSource->getTimestamp() > $latestDest->getTimestamp() ) {
674 $logStatus = 'cannot merge since source is later';
675 return false;
676 } else {
677 return true;
678 }
679 }
680
688 private function mergePage( $row, Title $newTitle ) {
689 $dbw = $this->getDB( DB_PRIMARY );
690
691 $id = $row->page_id;
692
693 // Construct the WikiPage object we will need later, while the
694 // page_id still exists. Note that this cannot use makeTitleSafe(),
695 // we are deliberately constructing an invalid title.
696 $sourceTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
697 $sourceTitle->resetArticleID( $id );
698 $wikiPage = MediaWikiServices::getInstance()->getWikiPageFactory()->newFromTitle( $sourceTitle );
699 $wikiPage->loadPageData( WikiPage::READ_LATEST );
700
701 $destId = $newTitle->getArticleID();
702 $this->beginTransaction( $dbw, __METHOD__ );
703 $dbw->update( 'revision',
704 // SET
705 [ 'rev_page' => $destId ],
706 // WHERE
707 [ 'rev_page' => $id ],
708 __METHOD__
709 );
710
711 $dbw->delete( 'page', [ 'page_id' => $id ], __METHOD__ );
712
713 $this->commitTransaction( $dbw, __METHOD__ );
714
715 /* Call LinksDeletionUpdate to delete outgoing links from the old title,
716 * and update category counts.
717 *
718 * Calling external code with a fake broken Title is a fairly dubious
719 * idea. It's necessary because it's quite a lot of code to duplicate,
720 * but that also makes it fragile since it would be easy for someone to
721 * accidentally introduce an assumption of title validity to the code we
722 * are calling.
723 */
726
727 return true;
728 }
729}
730
731$maintClass = NamespaceDupes::class;
732require_once RUN_MAINTENANCE_IF_MAIN;
getDB()
const NS_MAIN
Definition Defines.php:64
const NS_TALK
Definition Defines.php:65
static addUpdate(DeferrableUpdate $update, $stage=self::POSTSEND)
Add an update to the pending update queue for execution at the appropriate time.
static doUpdates( $unused=null, $stage=self::ALL)
Consume and execute all pending updates.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
beginTransaction(IDatabase $dbw, $fname)
Begin a transaction on a DB.
commitTransaction(IDatabase $dbw, $fname)
Commit the transaction on a DB handle and wait for replica DBs to catch up.
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option was set.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
Update object handling the cleanup of links tables after a page was deleted.
A class containing constants representing the names of configuration variables.
Service locator for MediaWiki core services.
Maintenance script that checks for articles to fix after adding/deleting namespaces.
execute()
Do the actual work.
__construct()
Default constructor.
IMaintainableDatabase $db
Represents a page (or page fragment) title within MediaWiki.
Represents a title within MediaWiki.
Definition Title.php:49
exists( $flags=0)
Check if page exists.
Definition Title.php:3477
getPrefixedDBkey()
Get the prefixed database key form.
Definition Title.php:1878
canExist()
Can this title represent a page in the wiki's database?
Definition Title.php:1232
getArticleID( $flags=0)
Get the article ID for this Title from the link cache, adding it if necessary.
Definition Title.php:2824
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition Title.php:664
static makeTitle( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition Title.php:638
getNamespace()
Get the namespace index.
getDBkey()
Get the main part of the link target, in canonical database form.
Basic database interface for live and lazy-loaded relation database handles.
Definition IDatabase.php:39
Advanced database interface for IDatabase handles that include maintenance methods.
Result wrapper for grabbing data queried from an IDatabase object.
$maintClass
const DB_PRIMARY
Definition defines.php:28