MediaWiki REL1_34
namespaceDupes.php
Go to the documentation of this file.
1<?php
27require_once __DIR__ . '/Maintenance.php';
28
33
41
45 protected $db;
46
47 private $resolvablePages = 0;
48 private $totalPages = 0;
49
50 private $resolvableLinks = 0;
51 private $totalLinks = 0;
52
53 public function __construct() {
54 parent::__construct();
55 $this->addDescription( 'Find and fix pages affected by namespace addition/removal' );
56 $this->addOption( 'fix', 'Attempt to automatically fix errors' );
57 $this->addOption( 'merge', "Instead of renaming conflicts, do a history merge with " .
58 "the correct title" );
59 $this->addOption( 'add-suffix', "Dupes will be renamed with correct namespace with " .
60 "<text> appended after the article name", false, true );
61 $this->addOption( 'add-prefix', "Dupes will be renamed with correct namespace with " .
62 "<text> prepended before the article name", false, true );
63 $this->addOption( 'source-pseudo-namespace', "Move all pages with the given source " .
64 "prefix (with an implied colon following it). If --dest-namespace is not specified, " .
65 "the colon will be replaced with a hyphen.",
66 false, true );
67 $this->addOption( 'dest-namespace', "In combination with --source-pseudo-namespace, " .
68 "specify the namespace ID of the destination.", false, true );
69 $this->addOption( 'move-talk', "If this is specified, pages in the Talk namespace that " .
70 "begin with a conflicting prefix will be renamed, for example " .
71 "Talk:File:Foo -> File_Talk:Foo" );
72 }
73
74 public function execute() {
75 $options = [
76 'fix' => $this->hasOption( 'fix' ),
77 'merge' => $this->hasOption( 'merge' ),
78 'add-suffix' => $this->getOption( 'add-suffix', '' ),
79 'add-prefix' => $this->getOption( 'add-prefix', '' ),
80 'move-talk' => $this->hasOption( 'move-talk' ),
81 'source-pseudo-namespace' => $this->getOption( 'source-pseudo-namespace', '' ),
82 'dest-namespace' => intval( $this->getOption( 'dest-namespace', 0 ) ) ];
83
84 if ( $options['source-pseudo-namespace'] !== '' ) {
85 $retval = $this->checkPrefix( $options );
86 } else {
87 $retval = $this->checkAll( $options );
88 }
89
90 if ( $retval ) {
91 $this->output( "\nLooks good!\n" );
92 } else {
93 $this->output( "\nOh noeees\n" );
94 }
95 }
96
104 private function checkAll( $options ) {
105 $contLang = MediaWikiServices::getInstance()->getContentLanguage();
106 $spaces = [];
107
108 // List interwikis first, so they'll be overridden
109 // by any conflicting local namespaces.
110 foreach ( $this->getInterwikiList() as $prefix ) {
111 $name = $contLang->ucfirst( $prefix );
112 $spaces[$name] = 0;
113 }
114
115 // Now pull in all canonical and alias namespaces...
116 foreach (
117 MediaWikiServices::getInstance()->getNamespaceInfo()->getCanonicalNamespaces()
118 as $ns => $name
119 ) {
120 // This includes $wgExtraNamespaces
121 if ( $name !== '' ) {
122 $spaces[$name] = $ns;
123 }
124 }
125 foreach ( $contLang->getNamespaces() as $ns => $name ) {
126 if ( $name !== '' ) {
127 $spaces[$name] = $ns;
128 }
129 }
130 foreach ( $this->getConfig()->get( 'NamespaceAliases' ) as $name => $ns ) {
131 $spaces[$name] = $ns;
132 }
133 foreach ( $contLang->getNamespaceAliases() as $name => $ns ) {
134 $spaces[$name] = $ns;
135 }
136
137 // We'll need to check for lowercase keys as well,
138 // since we're doing case-sensitive searches in the db.
139 $capitalLinks = $this->getConfig()->get( 'CapitalLinks' );
140 foreach ( $spaces as $name => $ns ) {
141 $moreNames = [];
142 $moreNames[] = $contLang->uc( $name );
143 $moreNames[] = $contLang->ucfirst( $contLang->lc( $name ) );
144 $moreNames[] = $contLang->ucwords( $name );
145 $moreNames[] = $contLang->ucwords( $contLang->lc( $name ) );
146 $moreNames[] = $contLang->ucwordbreaks( $name );
147 $moreNames[] = $contLang->ucwordbreaks( $contLang->lc( $name ) );
148 if ( !$capitalLinks ) {
149 foreach ( $moreNames as $altName ) {
150 $moreNames[] = $contLang->lcfirst( $altName );
151 }
152 $moreNames[] = $contLang->lcfirst( $name );
153 }
154 foreach ( array_unique( $moreNames ) as $altName ) {
155 if ( $altName !== $name ) {
156 $spaces[$altName] = $ns;
157 }
158 }
159 }
160
161 // Sort by namespace index, and if there are two with the same index,
162 // break the tie by sorting by name
163 $origSpaces = $spaces;
164 uksort( $spaces, function ( $a, $b ) use ( $origSpaces ) {
165 return $origSpaces[$a] <=> $origSpaces[$b]
166 ?: $a <=> $b;
167 } );
168
169 $ok = true;
170 foreach ( $spaces as $name => $ns ) {
171 $ok = $this->checkNamespace( $ns, $name, $options ) && $ok;
172 }
173
174 $this->output( "{$this->totalPages} pages to fix, " .
175 "{$this->resolvablePages} were resolvable.\n\n" );
176
177 foreach ( $spaces as $name => $ns ) {
178 if ( $ns != 0 ) {
179 /* Fix up link destinations for non-interwiki links only.
180 *
181 * For example if a page has [[Foo:Bar]] and then a Foo namespace
182 * is introduced, pagelinks needs to be updated to have
183 * page_namespace = NS_FOO.
184 *
185 * If instead an interwiki prefix was introduced called "Foo",
186 * the link should instead be moved to the iwlinks table. If a new
187 * language is introduced called "Foo", or if there is a pagelink
188 * [[fr:Bar]] when interlanguage magic links are turned on, the
189 * link would have to be moved to the langlinks table. Let's put
190 * those cases in the too-hard basket for now. The consequences are
191 * not especially severe.
192 * @fixme Handle interwiki links, and pagelinks to Category:, File:
193 * which probably need reparsing.
194 */
195
196 $this->checkLinkTable( 'pagelinks', 'pl', $ns, $name, $options );
197 $this->checkLinkTable( 'templatelinks', 'tl', $ns, $name, $options );
198
199 // The redirect table has interwiki links randomly mixed in, we
200 // need to filter those out. For example [[w:Foo:Bar]] would
201 // have rd_interwiki=w and rd_namespace=0, which would match the
202 // query for a conflicting namespace "Foo" if filtering wasn't done.
203 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
204 [ 'rd_interwiki' => null ] );
205 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
206 [ 'rd_interwiki' => '' ] );
207 }
208 }
209
210 $this->output( "{$this->totalLinks} links to fix, " .
211 "{$this->resolvableLinks} were resolvable.\n" );
212
213 return $ok;
214 }
215
221 private function getInterwikiList() {
222 $result = MediaWikiServices::getInstance()->getInterwikiLookup()->getAllPrefixes();
223 $prefixes = [];
224 foreach ( $result as $row ) {
225 $prefixes[] = $row['iw_prefix'];
226 }
227
228 return $prefixes;
229 }
230
239 private function checkNamespace( $ns, $name, $options ) {
240 $targets = $this->getTargetList( $ns, $name, $options );
241 $count = $targets->numRows();
242 $this->totalPages += $count;
243 if ( $count == 0 ) {
244 return true;
245 }
246
247 $dryRunNote = $options['fix'] ? '' : ' DRY RUN ONLY';
248
249 $ok = true;
250 foreach ( $targets as $row ) {
251 // Find the new title and determine the action to take
252
253 $newTitle = $this->getDestinationTitle(
254 $ns, $name, $row->page_namespace, $row->page_title );
255 $logStatus = false;
256 if ( !$newTitle ) {
257 $logStatus = 'invalid title';
258 $action = 'abort';
259 } elseif ( $newTitle->exists() ) {
260 if ( $options['merge'] ) {
261 if ( $this->canMerge( $row->page_id, $newTitle, $logStatus ) ) {
262 $action = 'merge';
263 } else {
264 $action = 'abort';
265 }
266 } elseif ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) {
267 $action = 'abort';
268 $logStatus = 'dest title exists and --add-prefix not specified';
269 } else {
270 $newTitle = $this->getAlternateTitle( $newTitle, $options );
271 if ( !$newTitle ) {
272 $action = 'abort';
273 $logStatus = 'alternate title is invalid';
274 } elseif ( $newTitle->exists() ) {
275 $action = 'abort';
276 $logStatus = 'title conflict';
277 } else {
278 $action = 'move';
279 $logStatus = 'alternate';
280 }
281 }
282 } else {
283 $action = 'move';
284 $logStatus = 'no conflict';
285 }
286
287 // Take the action or log a dry run message
288
289 $logTitle = "id={$row->page_id} ns={$row->page_namespace} dbk={$row->page_title}";
290 $pageOK = true;
291
292 switch ( $action ) {
293 case 'abort':
294 $this->output( "$logTitle *** $logStatus\n" );
295 $pageOK = false;
296 break;
297 case 'move':
298 $this->output( "$logTitle -> " .
299 $newTitle->getPrefixedDBkey() . " ($logStatus)$dryRunNote\n" );
300
301 if ( $options['fix'] ) {
302 $pageOK = $this->movePage( $row->page_id, $newTitle );
303 }
304 break;
305 case 'merge':
306 $this->output( "$logTitle => " .
307 $newTitle->getPrefixedDBkey() . " (merge)$dryRunNote\n" );
308
309 if ( $options['fix'] ) {
310 $pageOK = $this->mergePage( $row, $newTitle );
311 }
312 break;
313 }
314
315 if ( $pageOK ) {
316 $this->resolvablePages++;
317 } else {
318 $ok = false;
319 }
320 }
321
322 return $ok;
323 }
324
334 private function checkLinkTable( $table, $fieldPrefix, $ns, $name, $options,
335 $extraConds = []
336 ) {
337 $dbw = $this->getDB( DB_MASTER );
338
339 $batchConds = [];
340 $fromField = "{$fieldPrefix}_from";
341 $namespaceField = "{$fieldPrefix}_namespace";
342 $titleField = "{$fieldPrefix}_title";
343 $batchSize = 500;
344 while ( true ) {
345 $res = $dbw->select(
346 $table,
347 [ $fromField, $namespaceField, $titleField ],
348 array_merge( $batchConds, $extraConds, [
349 $namespaceField => 0,
350 $titleField . $dbw->buildLike( "$name:", $dbw->anyString() )
351 ] ),
352 __METHOD__,
353 [
354 'ORDER BY' => [ $titleField, $fromField ],
355 'LIMIT' => $batchSize
356 ]
357 );
358
359 if ( $res->numRows() == 0 ) {
360 break;
361 }
362 foreach ( $res as $row ) {
363 $logTitle = "from={$row->$fromField} ns={$row->$namespaceField} " .
364 "dbk={$row->$titleField}";
365 $destTitle = $this->getDestinationTitle(
366 $ns, $name, $row->$namespaceField, $row->$titleField );
367 $this->totalLinks++;
368 if ( !$destTitle ) {
369 $this->output( "$table $logTitle *** INVALID\n" );
370 continue;
371 }
372 $this->resolvableLinks++;
373 if ( !$options['fix'] ) {
374 $this->output( "$table $logTitle -> " .
375 $destTitle->getPrefixedDBkey() . " DRY RUN\n" );
376 continue;
377 }
378
379 $dbw->update( $table,
380 // SET
381 [
382 $namespaceField => $destTitle->getNamespace(),
383 $titleField => $destTitle->getDBkey()
384 ],
385 // WHERE
386 [
387 $namespaceField => 0,
388 $titleField => $row->$titleField,
389 $fromField => $row->$fromField
390 ],
391 __METHOD__,
392 [ 'IGNORE' ]
393 );
394 $this->output( "$table $logTitle -> " .
395 $destTitle->getPrefixedDBkey() . "\n" );
396 }
397 $encLastTitle = $dbw->addQuotes( $row->$titleField );
398 $encLastFrom = $dbw->addQuotes( $row->$fromField );
399
400 $batchConds = [
401 "$titleField > $encLastTitle " .
402 "OR ($titleField = $encLastTitle AND $fromField > $encLastFrom)" ];
403
405 }
406 }
407
415 private function checkPrefix( $options ) {
416 $prefix = $options['source-pseudo-namespace'];
417 $ns = $options['dest-namespace'];
418 $this->output( "Checking prefix \"$prefix\" vs namespace $ns\n" );
419
420 return $this->checkNamespace( $ns, $prefix, $options );
421 }
422
433 private function getTargetList( $ns, $name, $options ) {
434 $dbw = $this->getDB( DB_MASTER );
435
436 if (
437 $options['move-talk'] &&
438 MediaWikiServices::getInstance()->getNamespaceInfo()->isSubject( $ns )
439 ) {
440 $checkNamespaces = [ NS_MAIN, NS_TALK ];
441 } else {
442 $checkNamespaces = NS_MAIN;
443 }
444
445 return $dbw->select( 'page',
446 [
447 'page_id',
448 'page_title',
449 'page_namespace',
450 ],
451 [
452 'page_namespace' => $checkNamespaces,
453 'page_title' . $dbw->buildLike( "$name:", $dbw->anyString() ),
454 ],
455 __METHOD__
456 );
457 }
458
467 private function getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk ) {
468 $dbk = substr( $sourceDbk, strlen( "$name:" ) );
469 if ( $ns == 0 ) {
470 // An interwiki; try an alternate encoding with '-' for ':'
471 $dbk = "$name-" . $dbk;
472 }
473 $destNS = $ns;
474 $nsInfo = MediaWikiServices::getInstance()->getNamespaceInfo();
475 if ( $sourceNs == NS_TALK && $nsInfo->isSubject( $ns ) ) {
476 // This is an associated talk page moved with the --move-talk feature.
477 $destNS = $nsInfo->getTalk( $destNS );
478 }
479 $newTitle = Title::makeTitleSafe( $destNS, $dbk );
480 if ( !$newTitle || !$newTitle->canExist() ) {
481 return false;
482 }
483 return $newTitle;
484 }
485
494 private function getAlternateTitle( LinkTarget $linkTarget, $options ) {
495 $prefix = $options['add-prefix'];
496 $suffix = $options['add-suffix'];
497 if ( $prefix == '' && $suffix == '' ) {
498 return false;
499 }
500 while ( true ) {
501 $dbk = $prefix . $linkTarget->getDBkey() . $suffix;
502 $title = Title::makeTitleSafe( $linkTarget->getNamespace(), $dbk );
503 if ( !$title ) {
504 return false;
505 }
506 if ( !$title->exists() ) {
507 return $title;
508 }
509 }
510 }
511
519 private function movePage( $id, LinkTarget $newLinkTarget ) {
520 $dbw = $this->getDB( DB_MASTER );
521
522 $dbw->update( 'page',
523 [
524 "page_namespace" => $newLinkTarget->getNamespace(),
525 "page_title" => $newLinkTarget->getDBkey(),
526 ],
527 [
528 "page_id" => $id,
529 ],
530 __METHOD__ );
531
532 // Update *_from_namespace in links tables
533 $fromNamespaceTables = [
534 [ 'pagelinks', 'pl' ],
535 [ 'templatelinks', 'tl' ],
536 [ 'imagelinks', 'il' ] ];
537 foreach ( $fromNamespaceTables as $tableInfo ) {
538 list( $table, $fieldPrefix ) = $tableInfo;
539 $dbw->update( $table,
540 // SET
541 [ "{$fieldPrefix}_from_namespace" => $newLinkTarget->getNamespace() ],
542 // WHERE
543 [ "{$fieldPrefix}_from" => $id ],
544 __METHOD__ );
545 }
546
547 return true;
548 }
549
562 private function canMerge( $id, LinkTarget $linkTarget, &$logStatus ) {
563 $latestDest = Revision::newFromTitle( $linkTarget, 0, Revision::READ_LATEST );
564 $latestSource = Revision::newFromPageId( $id, 0, Revision::READ_LATEST );
565 if ( $latestSource->getTimestamp() > $latestDest->getTimestamp() ) {
566 $logStatus = 'cannot merge since source is later';
567 return false;
568 } else {
569 return true;
570 }
571 }
572
580 private function mergePage( $row, Title $newTitle ) {
581 $dbw = $this->getDB( DB_MASTER );
582
583 $id = $row->page_id;
584
585 // Construct the WikiPage object we will need later, while the
586 // page_id still exists. Note that this cannot use makeTitleSafe(),
587 // we are deliberately constructing an invalid title.
588 $sourceTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
589 $sourceTitle->resetArticleID( $id );
590 $wikiPage = new WikiPage( $sourceTitle );
591 $wikiPage->loadPageData( 'fromdbmaster' );
592
593 $destId = $newTitle->getArticleID();
594 $this->beginTransaction( $dbw, __METHOD__ );
595 $dbw->update( 'revision',
596 // SET
597 [ 'rev_page' => $destId ],
598 // WHERE
599 [ 'rev_page' => $id ],
600 __METHOD__ );
601
602 $dbw->delete( 'page', [ 'page_id' => $id ], __METHOD__ );
603
604 $this->commitTransaction( $dbw, __METHOD__ );
605
606 /* Call LinksDeletionUpdate to delete outgoing links from the old title,
607 * and update category counts.
608 *
609 * Calling external code with a fake broken Title is a fairly dubious
610 * idea. It's necessary because it's quite a lot of code to duplicate,
611 * but that also makes it fragile since it would be easy for someone to
612 * accidentally introduce an assumption of title validity to the code we
613 * are calling.
614 */
615 DeferredUpdates::addUpdate( new LinksDeletionUpdate( $wikiPage ) );
616 DeferredUpdates::doUpdates();
617
618 return true;
619 }
620}
621
622$maintClass = NamespaceDupes::class;
623require_once RUN_MAINTENANCE_IF_MAIN;
getDB()
wfWaitForSlaves( $ifWritesSince=null, $wiki=false, $cluster=false, $timeout=null)
Waits for the replica DBs to catch up to the master position.
const RUN_MAINTENANCE_IF_MAIN
Update object handling the cleanup of links tables after a page was deleted.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
beginTransaction(IDatabase $dbw, $fname)
Begin a transcation on a DB.
commitTransaction(IDatabase $dbw, $fname)
Commit the transcation on a DB handle and wait for replica DBs to catch up.
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option exists.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Maintenance script that checks for articles to fix after adding/deleting namespaces.
getAlternateTitle(LinkTarget $linkTarget, $options)
Get an alternative title to move a page to.
checkPrefix( $options)
Move the given pseudo-namespace, either replacing the colon with a hyphen (useful for pseudo-namespac...
checkLinkTable( $table, $fieldPrefix, $ns, $name, $options, $extraConds=[])
Check and repair the destination fields in a link table.
mergePage( $row, Title $newTitle)
Merge page histories.
canMerge( $id, LinkTarget $linkTarget, &$logStatus)
Determine if we can merge a page.
execute()
Do the actual work.
movePage( $id, LinkTarget $newLinkTarget)
Move a page.
__construct()
Default constructor.
checkNamespace( $ns, $name, $options)
Check a given prefix and try to move it into the given destination namespace.
IMaintainableDatabase $db
checkAll( $options)
Check all namespaces.
getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk)
Get the preferred destination title for a given target page.
getTargetList( $ns, $name, $options)
Find pages in main and talk namespaces that have a prefix of the new namespace so we know titles that...
getInterwikiList()
Get the interwiki list.
static newFromPageId( $pageId, $revId=0, $flags=0)
Load either the current, or a specified, revision that's attached to a given page ID.
Definition Revision.php:157
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that's attached to a given link target.
Definition Revision.php:138
Represents a title within MediaWiki.
Definition Title.php:42
getArticleID( $flags=0)
Get the article ID for this Title from the link cache, adding it if necessary.
Definition Title.php:3126
Class representing a MediaWiki article and history.
Definition WikiPage.php:47
const NS_MAIN
Definition Defines.php:69
const NS_TALK
Definition Defines.php:70
getNamespace()
Get the namespace index.
getDBkey()
Get the main part with underscores.
Advanced database interface for IDatabase handles that include maintenance methods.
Result wrapper for grabbing data queried from an IDatabase object.
$maintClass
const DB_MASTER
Definition defines.php:26