MediaWiki REL1_35
namespaceDupes.php
Go to the documentation of this file.
1<?php
27require_once __DIR__ . '/Maintenance.php';
28
34
42
46 protected $db;
47
52 private $resolvablePages = 0;
53
58 private $totalPages = 0;
59
64 private $resolvableLinks = 0;
65
70 private $totalLinks = 0;
71
77 private $deletedLinks = 0;
78
79 public function __construct() {
80 parent::__construct();
81 $this->addDescription( 'Find and fix pages affected by namespace addition/removal' );
82 $this->addOption( 'fix', 'Attempt to automatically fix errors and delete broken links' );
83 $this->addOption( 'merge', "Instead of renaming conflicts, do a history merge with " .
84 "the correct title" );
85 $this->addOption( 'add-suffix', "Dupes will be renamed with correct namespace with " .
86 "<text> appended after the article name", false, true );
87 $this->addOption( 'add-prefix', "Dupes will be renamed with correct namespace with " .
88 "<text> prepended before the article name", false, true );
89 $this->addOption( 'source-pseudo-namespace', "Move all pages with the given source " .
90 "prefix (with an implied colon following it). If --dest-namespace is not specified, " .
91 "the colon will be replaced with a hyphen.",
92 false, true );
93 $this->addOption( 'dest-namespace', "In combination with --source-pseudo-namespace, " .
94 "specify the namespace ID of the destination.", false, true );
95 $this->addOption( 'move-talk', "If this is specified, pages in the Talk namespace that " .
96 "begin with a conflicting prefix will be renamed, for example " .
97 "Talk:File:Foo -> File_Talk:Foo" );
98 }
99
100 public function execute() {
101 $options = [
102 'fix' => $this->hasOption( 'fix' ),
103 'merge' => $this->hasOption( 'merge' ),
104 'add-suffix' => $this->getOption( 'add-suffix', '' ),
105 'add-prefix' => $this->getOption( 'add-prefix', '' ),
106 'move-talk' => $this->hasOption( 'move-talk' ),
107 'source-pseudo-namespace' => $this->getOption( 'source-pseudo-namespace', '' ),
108 'dest-namespace' => intval( $this->getOption( 'dest-namespace', 0 ) )
109 ];
110
111 if ( $options['source-pseudo-namespace'] !== '' ) {
112 $retval = $this->checkPrefix( $options );
113 } else {
114 $retval = $this->checkAll( $options );
115 }
116
117 if ( $retval ) {
118 $this->output( "\nLooks good!\n" );
119 } else {
120 $this->output( "\nOh noeees\n" );
121 }
122 }
123
131 private function checkAll( $options ) {
132 $contLang = MediaWikiServices::getInstance()->getContentLanguage();
133 $spaces = [];
134
135 // List interwikis first, so they'll be overridden
136 // by any conflicting local namespaces.
137 foreach ( $this->getInterwikiList() as $prefix ) {
138 $name = $contLang->ucfirst( $prefix );
139 $spaces[$name] = 0;
140 }
141
142 // Now pull in all canonical and alias namespaces...
143 foreach (
144 MediaWikiServices::getInstance()->getNamespaceInfo()->getCanonicalNamespaces()
145 as $ns => $name
146 ) {
147 // This includes $wgExtraNamespaces
148 if ( $name !== '' ) {
149 $spaces[$name] = $ns;
150 }
151 }
152 foreach ( $contLang->getNamespaces() as $ns => $name ) {
153 if ( $name !== '' ) {
154 $spaces[$name] = $ns;
155 }
156 }
157 foreach ( $contLang->getNamespaceAliases() as $name => $ns ) {
158 $spaces[$name] = $ns;
159 }
160
161 // We'll need to check for lowercase keys as well,
162 // since we're doing case-sensitive searches in the db.
163 $capitalLinks = $this->getConfig()->get( 'CapitalLinks' );
164 foreach ( $spaces as $name => $ns ) {
165 $moreNames = [];
166 $moreNames[] = $contLang->uc( $name );
167 $moreNames[] = $contLang->ucfirst( $contLang->lc( $name ) );
168 $moreNames[] = $contLang->ucwords( $name );
169 $moreNames[] = $contLang->ucwords( $contLang->lc( $name ) );
170 $moreNames[] = $contLang->ucwordbreaks( $name );
171 $moreNames[] = $contLang->ucwordbreaks( $contLang->lc( $name ) );
172 if ( !$capitalLinks ) {
173 foreach ( $moreNames as $altName ) {
174 $moreNames[] = $contLang->lcfirst( $altName );
175 }
176 $moreNames[] = $contLang->lcfirst( $name );
177 }
178 foreach ( array_unique( $moreNames ) as $altName ) {
179 if ( $altName !== $name ) {
180 $spaces[$altName] = $ns;
181 }
182 }
183 }
184
185 // Sort by namespace index, and if there are two with the same index,
186 // break the tie by sorting by name
187 $origSpaces = $spaces;
188 uksort( $spaces, function ( $a, $b ) use ( $origSpaces ) {
189 return $origSpaces[$a] <=> $origSpaces[$b]
190 ?: $a <=> $b;
191 } );
192
193 $ok = true;
194 foreach ( $spaces as $name => $ns ) {
195 $ok = $this->checkNamespace( $ns, $name, $options ) && $ok;
196 }
197
198 $this->output(
199 "{$this->totalPages} pages to fix, " .
200 "{$this->resolvablePages} were resolvable.\n\n"
201 );
202
203 foreach ( $spaces as $name => $ns ) {
204 if ( $ns != 0 ) {
205 /* Fix up link destinations for non-interwiki links only.
206 *
207 * For example if a page has [[Foo:Bar]] and then a Foo namespace
208 * is introduced, pagelinks needs to be updated to have
209 * page_namespace = NS_FOO.
210 *
211 * If instead an interwiki prefix was introduced called "Foo",
212 * the link should instead be moved to the iwlinks table. If a new
213 * language is introduced called "Foo", or if there is a pagelink
214 * [[fr:Bar]] when interlanguage magic links are turned on, the
215 * link would have to be moved to the langlinks table. Let's put
216 * those cases in the too-hard basket for now. The consequences are
217 * not especially severe.
218 * @fixme Handle interwiki links, and pagelinks to Category:, File:
219 * which probably need reparsing.
220 */
221
222 $this->checkLinkTable( 'pagelinks', 'pl', $ns, $name, $options );
223 $this->checkLinkTable( 'templatelinks', 'tl', $ns, $name, $options );
224
225 // The redirect table has interwiki links randomly mixed in, we
226 // need to filter those out. For example [[w:Foo:Bar]] would
227 // have rd_interwiki=w and rd_namespace=0, which would match the
228 // query for a conflicting namespace "Foo" if filtering wasn't done.
229 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
230 [ 'rd_interwiki' => null ] );
231 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
232 [ 'rd_interwiki' => '' ] );
233 }
234 }
235
236 $this->output(
237 "{$this->totalLinks} links to fix, " .
238 "{$this->resolvableLinks} were resolvable, " .
239 "{$this->deletedLinks} were deleted.\n"
240 );
241
242 return $ok;
243 }
244
250 private function getInterwikiList() {
251 $result = MediaWikiServices::getInstance()->getInterwikiLookup()->getAllPrefixes();
252 $prefixes = [];
253 foreach ( $result as $row ) {
254 $prefixes[] = $row['iw_prefix'];
255 }
256
257 return $prefixes;
258 }
259
268 private function checkNamespace( $ns, $name, $options ) {
269 $targets = $this->getTargetList( $ns, $name, $options );
270 $count = $targets->numRows();
271 $this->totalPages += $count;
272 if ( $count == 0 ) {
273 return true;
274 }
275
276 $dryRunNote = $options['fix'] ? '' : ' DRY RUN ONLY';
277
278 $ok = true;
279 foreach ( $targets as $row ) {
280 // Find the new title and determine the action to take
281
282 $newTitle = $this->getDestinationTitle(
283 $ns, $name, $row->page_namespace, $row->page_title );
284 $logStatus = false;
285 if ( !$newTitle ) {
286 $logStatus = 'invalid title';
287 $action = 'abort';
288 } elseif ( $newTitle->exists() ) {
289 if ( $options['merge'] ) {
290 if ( $this->canMerge( $row->page_id, $newTitle, $logStatus ) ) {
291 $action = 'merge';
292 } else {
293 $action = 'abort';
294 }
295 } elseif ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) {
296 $action = 'abort';
297 $logStatus = 'dest title exists and --add-prefix not specified';
298 } else {
299 $newTitle = $this->getAlternateTitle( $newTitle, $options );
300 if ( !$newTitle ) {
301 $action = 'abort';
302 $logStatus = 'alternate title is invalid';
303 } elseif ( $newTitle->exists() ) {
304 $action = 'abort';
305 $logStatus = 'title conflict';
306 } else {
307 $action = 'move';
308 $logStatus = 'alternate';
309 }
310 }
311 } else {
312 $action = 'move';
313 $logStatus = 'no conflict';
314 }
315
316 // Take the action or log a dry run message
317
318 $logTitle = "id={$row->page_id} ns={$row->page_namespace} dbk={$row->page_title}";
319 $pageOK = true;
320
321 switch ( $action ) {
322 case 'abort':
323 $this->output( "$logTitle *** $logStatus\n" );
324 $pageOK = false;
325 break;
326 case 'move':
327 $this->output( "$logTitle -> " .
328 $newTitle->getPrefixedDBkey() . " ($logStatus)$dryRunNote\n" );
329
330 if ( $options['fix'] ) {
331 $pageOK = $this->movePage( $row->page_id, $newTitle );
332 }
333 break;
334 case 'merge':
335 $this->output( "$logTitle => " .
336 $newTitle->getPrefixedDBkey() . " (merge)$dryRunNote\n" );
337
338 if ( $options['fix'] ) {
339 $pageOK = $this->mergePage( $row, $newTitle );
340 }
341 break;
342 }
343
344 if ( $pageOK ) {
345 $this->resolvablePages++;
346 } else {
347 $ok = false;
348 }
349 }
350
351 return $ok;
352 }
353
363 private function checkLinkTable( $table, $fieldPrefix, $ns, $name, $options,
364 $extraConds = []
365 ) {
366 $dbw = $this->getDB( DB_MASTER );
367
368 $batchConds = [];
369 $fromField = "{$fieldPrefix}_from";
370 $namespaceField = "{$fieldPrefix}_namespace";
371 $titleField = "{$fieldPrefix}_title";
372 $batchSize = 500;
373 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
374 while ( true ) {
375 $res = $dbw->select(
376 $table,
377 [ $fromField, $namespaceField, $titleField ],
378 array_merge(
379 $batchConds,
380 $extraConds,
381 [
382 $namespaceField => 0,
383 $titleField . $dbw->buildLike( "$name:", $dbw->anyString() )
384 ]
385 ),
386 __METHOD__,
387 [
388 'ORDER BY' => [ $titleField, $fromField ],
389 'LIMIT' => $batchSize
390 ]
391 );
392
393 if ( $res->numRows() == 0 ) {
394 break;
395 }
396
397 $rowsToDeleteIfStillExists = [];
398
399 foreach ( $res as $row ) {
400 $logTitle = "from={$row->$fromField} ns={$row->$namespaceField} " .
401 "dbk={$row->$titleField}";
402 $destTitle = $this->getDestinationTitle(
403 $ns, $name, $row->$namespaceField, $row->$titleField );
404 $this->totalLinks++;
405 if ( !$destTitle ) {
406 $this->output( "$table $logTitle *** INVALID\n" );
407 continue;
408 }
409 $this->resolvableLinks++;
410 if ( !$options['fix'] ) {
411 $this->output( "$table $logTitle -> " .
412 $destTitle->getPrefixedDBkey() . " DRY RUN\n" );
413 continue;
414 }
415
416 $dbw->update( $table,
417 // SET
418 [
419 $namespaceField => $destTitle->getNamespace(),
420 $titleField => $destTitle->getDBkey()
421 ],
422 // WHERE
423 [
424 $namespaceField => 0,
425 $titleField => $row->$titleField,
426 $fromField => $row->$fromField
427 ],
428 __METHOD__,
429 [ 'IGNORE' ]
430 );
431
432 $rowsToDeleteIfStillExists[] = $dbw->makeList(
433 [
434 $fromField => $row->$fromField,
435 $namespaceField => $row->$namespaceField,
436 $titleField => $row->$titleField,
437 ],
438 IDatabase::LIST_AND
439 );
440
441 $this->output( "$table $logTitle -> " .
442 $destTitle->getPrefixedDBkey() . "\n"
443 );
444 }
445
446 if ( $options['fix'] && count( $rowsToDeleteIfStillExists ) > 0 ) {
447 $dbw->delete(
448 $table,
449 $dbw->makeList( $rowsToDeleteIfStillExists, IDatabase::LIST_OR ),
450 __METHOD__
451 );
452
453 $this->deletedLinks += $dbw->affectedRows();
454 $this->resolvableLinks -= $dbw->affectedRows();
455 }
456
457 $encLastTitle = $dbw->addQuotes( $row->$titleField );
458 $encLastFrom = $dbw->addQuotes( $row->$fromField );
459
460 $batchConds = [
461 "$titleField > $encLastTitle " .
462 "OR ($titleField = $encLastTitle AND $fromField > $encLastFrom)"
463 ];
464
465 $lbFactory->waitForReplication();
466 }
467 }
468
476 private function checkPrefix( $options ) {
477 $prefix = $options['source-pseudo-namespace'];
478 $ns = $options['dest-namespace'];
479 $this->output( "Checking prefix \"$prefix\" vs namespace $ns\n" );
480
481 return $this->checkNamespace( $ns, $prefix, $options );
482 }
483
494 private function getTargetList( $ns, $name, $options ) {
495 $dbw = $this->getDB( DB_MASTER );
496
497 if (
498 $options['move-talk'] &&
499 MediaWikiServices::getInstance()->getNamespaceInfo()->isSubject( $ns )
500 ) {
501 $checkNamespaces = [ NS_MAIN, NS_TALK ];
502 } else {
503 $checkNamespaces = NS_MAIN;
504 }
505
506 return $dbw->select( 'page',
507 [
508 'page_id',
509 'page_title',
510 'page_namespace',
511 ],
512 [
513 'page_namespace' => $checkNamespaces,
514 'page_title' . $dbw->buildLike( "$name:", $dbw->anyString() ),
515 ],
516 __METHOD__
517 );
518 }
519
528 private function getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk ) {
529 $dbk = substr( $sourceDbk, strlen( "$name:" ) );
530 if ( $ns == 0 ) {
531 // An interwiki; try an alternate encoding with '-' for ':'
532 $dbk = "$name-" . $dbk;
533 }
534 $destNS = $ns;
535 $nsInfo = MediaWikiServices::getInstance()->getNamespaceInfo();
536 if ( $sourceNs == NS_TALK && $nsInfo->isSubject( $ns ) ) {
537 // This is an associated talk page moved with the --move-talk feature.
538 $destNS = $nsInfo->getTalk( $destNS );
539 }
540 $newTitle = Title::makeTitleSafe( $destNS, $dbk );
541 if ( !$newTitle || !$newTitle->canExist() ) {
542 return false;
543 }
544 return $newTitle;
545 }
546
555 private function getAlternateTitle( LinkTarget $linkTarget, $options ) {
556 $prefix = $options['add-prefix'];
557 $suffix = $options['add-suffix'];
558 if ( $prefix == '' && $suffix == '' ) {
559 return false;
560 }
561 while ( true ) {
562 $dbk = $prefix . $linkTarget->getDBkey() . $suffix;
563 $title = Title::makeTitleSafe( $linkTarget->getNamespace(), $dbk );
564 if ( !$title ) {
565 return false;
566 }
567 if ( !$title->exists() ) {
568 return $title;
569 }
570 }
571 }
572
580 private function movePage( $id, LinkTarget $newLinkTarget ) {
581 $dbw = $this->getDB( DB_MASTER );
582
583 $dbw->update( 'page',
584 [
585 "page_namespace" => $newLinkTarget->getNamespace(),
586 "page_title" => $newLinkTarget->getDBkey(),
587 ],
588 [
589 "page_id" => $id,
590 ],
591 __METHOD__
592 );
593
594 // Update *_from_namespace in links tables
595 $fromNamespaceTables = [
596 [ 'pagelinks', 'pl' ],
597 [ 'templatelinks', 'tl' ],
598 [ 'imagelinks', 'il' ]
599 ];
600 foreach ( $fromNamespaceTables as $tableInfo ) {
601 list( $table, $fieldPrefix ) = $tableInfo;
602 $dbw->update( $table,
603 // SET
604 [ "{$fieldPrefix}_from_namespace" => $newLinkTarget->getNamespace() ],
605 // WHERE
606 [ "{$fieldPrefix}_from" => $id ],
607 __METHOD__
608 );
609 }
610
611 return true;
612 }
613
626 private function canMerge( $id, LinkTarget $linkTarget, &$logStatus ) {
627 $revisionLookup = MediaWikiServices::getInstance()->getRevisionLookup();
628 $latestDest = $revisionLookup->getRevisionByTitle( $linkTarget, 0,
629 IDBAccessObject::READ_LATEST );
630 $latestSource = $revisionLookup->getRevisionByPageId( $id, 0,
631 IDBAccessObject::READ_LATEST );
632 if ( $latestSource->getTimestamp() > $latestDest->getTimestamp() ) {
633 $logStatus = 'cannot merge since source is later';
634 return false;
635 } else {
636 return true;
637 }
638 }
639
647 private function mergePage( $row, Title $newTitle ) {
648 $dbw = $this->getDB( DB_MASTER );
649
650 $id = $row->page_id;
651
652 // Construct the WikiPage object we will need later, while the
653 // page_id still exists. Note that this cannot use makeTitleSafe(),
654 // we are deliberately constructing an invalid title.
655 $sourceTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
656 $sourceTitle->resetArticleID( $id );
657 $wikiPage = new WikiPage( $sourceTitle );
658 $wikiPage->loadPageData( 'fromdbmaster' );
659
660 $destId = $newTitle->getArticleID();
661 $this->beginTransaction( $dbw, __METHOD__ );
662 $dbw->update( 'revision',
663 // SET
664 [ 'rev_page' => $destId ],
665 // WHERE
666 [ 'rev_page' => $id ],
667 __METHOD__
668 );
669
670 $dbw->delete( 'page', [ 'page_id' => $id ], __METHOD__ );
671
672 $this->commitTransaction( $dbw, __METHOD__ );
673
674 /* Call LinksDeletionUpdate to delete outgoing links from the old title,
675 * and update category counts.
676 *
677 * Calling external code with a fake broken Title is a fairly dubious
678 * idea. It's necessary because it's quite a lot of code to duplicate,
679 * but that also makes it fragile since it would be easy for someone to
680 * accidentally introduce an assumption of title validity to the code we
681 * are calling.
682 */
683 DeferredUpdates::addUpdate( new LinksDeletionUpdate( $wikiPage ) );
684 DeferredUpdates::doUpdates();
685
686 return true;
687 }
688}
689
690$maintClass = NamespaceDupes::class;
691require_once RUN_MAINTENANCE_IF_MAIN;
getDB()
const RUN_MAINTENANCE_IF_MAIN
Update object handling the cleanup of links tables after a page was deleted.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
beginTransaction(IDatabase $dbw, $fname)
Begin a transcation on a DB.
commitTransaction(IDatabase $dbw, $fname)
Commit the transcation on a DB handle and wait for replica DBs to catch up.
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option was set.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Maintenance script that checks for articles to fix after adding/deleting namespaces.
getAlternateTitle(LinkTarget $linkTarget, $options)
Get an alternative title to move a page to.
checkPrefix( $options)
Move the given pseudo-namespace, either replacing the colon with a hyphen (useful for pseudo-namespac...
checkLinkTable( $table, $fieldPrefix, $ns, $name, $options, $extraConds=[])
Check and repair the destination fields in a link table.
mergePage( $row, Title $newTitle)
Merge page histories.
canMerge( $id, LinkTarget $linkTarget, &$logStatus)
Determine if we can merge a page.
execute()
Do the actual work.
movePage( $id, LinkTarget $newLinkTarget)
Move a page.
__construct()
Default constructor.
int $resolvableLinks
Total number of links that need fixing that are automatically resolveable.
int $totalPages
Total number of pages that need fixing.
checkNamespace( $ns, $name, $options)
Check a given prefix and try to move it into the given destination namespace.
IMaintainableDatabase $db
checkAll( $options)
Check all namespaces.
getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk)
Get the preferred destination title for a given target page.
getTargetList( $ns, $name, $options)
Find pages in main and talk namespaces that have a prefix of the new namespace so we know titles that...
int $resolvablePages
Total number of pages that need fixing that are automatically resolveable.
getInterwikiList()
Get the interwiki list.
int $totalLinks
Total number of erroneous links.
int $deletedLinks
Total number of links deleted because they weren't automatically resolveable due to the target alread...
Represents a title within MediaWiki.
Definition Title.php:42
getArticleID( $flags=0)
Get the article ID for this Title from the link cache, adding it if necessary.
Definition Title.php:3225
Class representing a MediaWiki article and history.
Definition WikiPage.php:51
const NS_MAIN
Definition Defines.php:70
const NS_TALK
Definition Defines.php:71
getNamespace()
Get the namespace index.
getDBkey()
Get the main part with underscores.
Basic database interface for live and lazy-loaded relation database handles.
Definition IDatabase.php:38
Advanced database interface for IDatabase handles that include maintenance methods.
Result wrapper for grabbing data queried from an IDatabase object.
$maintClass
const DB_MASTER
Definition defines.php:29