MediaWiki REL1_32
namespaceDupes.php
Go to the documentation of this file.
1<?php
27require_once __DIR__ . '/Maintenance.php';
28
33
41
45 protected $db;
46
47 private $resolvablePages = 0;
48 private $totalPages = 0;
49
50 private $resolvableLinks = 0;
51 private $totalLinks = 0;
52
53 public function __construct() {
54 parent::__construct();
55 $this->addDescription( 'Find and fix pages affected by namespace addition/removal' );
56 $this->addOption( 'fix', 'Attempt to automatically fix errors' );
57 $this->addOption( 'merge', "Instead of renaming conflicts, do a history merge with " .
58 "the correct title" );
59 $this->addOption( 'add-suffix', "Dupes will be renamed with correct namespace with " .
60 "<text> appended after the article name", false, true );
61 $this->addOption( 'add-prefix', "Dupes will be renamed with correct namespace with " .
62 "<text> prepended before the article name", false, true );
63 $this->addOption( 'source-pseudo-namespace', "Move all pages with the given source " .
64 "prefix (with an implied colon following it). If --dest-namespace is not specified, " .
65 "the colon will be replaced with a hyphen.",
66 false, true );
67 $this->addOption( 'dest-namespace', "In combination with --source-pseudo-namespace, " .
68 "specify the namespace ID of the destination.", false, true );
69 $this->addOption( 'move-talk', "If this is specified, pages in the Talk namespace that " .
70 "begin with a conflicting prefix will be renamed, for example " .
71 "Talk:File:Foo -> File_Talk:Foo" );
72 }
73
74 public function execute() {
75 $this->db = $this->getDB( DB_MASTER );
76
77 $options = [
78 'fix' => $this->hasOption( 'fix' ),
79 'merge' => $this->hasOption( 'merge' ),
80 'add-suffix' => $this->getOption( 'add-suffix', '' ),
81 'add-prefix' => $this->getOption( 'add-prefix', '' ),
82 'move-talk' => $this->hasOption( 'move-talk' ),
83 'source-pseudo-namespace' => $this->getOption( 'source-pseudo-namespace', '' ),
84 'dest-namespace' => intval( $this->getOption( 'dest-namespace', 0 ) ) ];
85
86 if ( $options['source-pseudo-namespace'] !== '' ) {
87 $retval = $this->checkPrefix( $options );
88 } else {
89 $retval = $this->checkAll( $options );
90 }
91
92 if ( $retval ) {
93 $this->output( "\nLooks good!\n" );
94 } else {
95 $this->output( "\nOh noeees\n" );
96 }
97 }
98
106 private function checkAll( $options ) {
108
109 $contLang = MediaWikiServices::getInstance()->getContentLanguage();
110 $spaces = [];
111
112 // List interwikis first, so they'll be overridden
113 // by any conflicting local namespaces.
114 foreach ( $this->getInterwikiList() as $prefix ) {
115 $name = $contLang->ucfirst( $prefix );
116 $spaces[$name] = 0;
117 }
118
119 // Now pull in all canonical and alias namespaces...
120 foreach ( MWNamespace::getCanonicalNamespaces() as $ns => $name ) {
121 // This includes $wgExtraNamespaces
122 if ( $name !== '' ) {
123 $spaces[$name] = $ns;
124 }
125 }
126 foreach ( $contLang->getNamespaces() as $ns => $name ) {
127 if ( $name !== '' ) {
128 $spaces[$name] = $ns;
129 }
130 }
131 foreach ( $wgNamespaceAliases as $name => $ns ) {
132 $spaces[$name] = $ns;
133 }
134 foreach ( $contLang->getNamespaceAliases() as $name => $ns ) {
135 $spaces[$name] = $ns;
136 }
137
138 // We'll need to check for lowercase keys as well,
139 // since we're doing case-sensitive searches in the db.
140 foreach ( $spaces as $name => $ns ) {
141 $moreNames = [];
142 $moreNames[] = $contLang->uc( $name );
143 $moreNames[] = $contLang->ucfirst( $contLang->lc( $name ) );
144 $moreNames[] = $contLang->ucwords( $name );
145 $moreNames[] = $contLang->ucwords( $contLang->lc( $name ) );
146 $moreNames[] = $contLang->ucwordbreaks( $name );
147 $moreNames[] = $contLang->ucwordbreaks( $contLang->lc( $name ) );
148 if ( !$wgCapitalLinks ) {
149 foreach ( $moreNames as $altName ) {
150 $moreNames[] = $contLang->lcfirst( $altName );
151 }
152 $moreNames[] = $contLang->lcfirst( $name );
153 }
154 foreach ( array_unique( $moreNames ) as $altName ) {
155 if ( $altName !== $name ) {
156 $spaces[$altName] = $ns;
157 }
158 }
159 }
160
161 // Sort by namespace index, and if there are two with the same index,
162 // break the tie by sorting by name
163 $origSpaces = $spaces;
164 uksort( $spaces, function ( $a, $b ) use ( $origSpaces ) {
165 return $origSpaces[$a] <=> $origSpaces[$b]
166 ?: $a <=> $b;
167 } );
168
169 $ok = true;
170 foreach ( $spaces as $name => $ns ) {
171 $ok = $this->checkNamespace( $ns, $name, $options ) && $ok;
172 }
173
174 $this->output( "{$this->totalPages} pages to fix, " .
175 "{$this->resolvablePages} were resolvable.\n\n" );
176
177 foreach ( $spaces as $name => $ns ) {
178 if ( $ns != 0 ) {
179 /* Fix up link destinations for non-interwiki links only.
180 *
181 * For example if a page has [[Foo:Bar]] and then a Foo namespace
182 * is introduced, pagelinks needs to be updated to have
183 * page_namespace = NS_FOO.
184 *
185 * If instead an interwiki prefix was introduced called "Foo",
186 * the link should instead be moved to the iwlinks table. If a new
187 * language is introduced called "Foo", or if there is a pagelink
188 * [[fr:Bar]] when interlanguage magic links are turned on, the
189 * link would have to be moved to the langlinks table. Let's put
190 * those cases in the too-hard basket for now. The consequences are
191 * not especially severe.
192 * @fixme Handle interwiki links, and pagelinks to Category:, File:
193 * which probably need reparsing.
194 */
195
196 $this->checkLinkTable( 'pagelinks', 'pl', $ns, $name, $options );
197 $this->checkLinkTable( 'templatelinks', 'tl', $ns, $name, $options );
198
199 // The redirect table has interwiki links randomly mixed in, we
200 // need to filter those out. For example [[w:Foo:Bar]] would
201 // have rd_interwiki=w and rd_namespace=0, which would match the
202 // query for a conflicting namespace "Foo" if filtering wasn't done.
203 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
204 [ 'rd_interwiki' => null ] );
205 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
206 [ 'rd_interwiki' => '' ] );
207 }
208 }
209
210 $this->output( "{$this->totalLinks} links to fix, " .
211 "{$this->resolvableLinks} were resolvable.\n" );
212
213 return $ok;
214 }
215
221 private function getInterwikiList() {
222 $result = MediaWikiServices::getInstance()->getInterwikiLookup()->getAllPrefixes();
223 $prefixes = [];
224 foreach ( $result as $row ) {
225 $prefixes[] = $row['iw_prefix'];
226 }
227
228 return $prefixes;
229 }
230
239 private function checkNamespace( $ns, $name, $options ) {
240 $targets = $this->getTargetList( $ns, $name, $options );
241 $count = $targets->numRows();
242 $this->totalPages += $count;
243 if ( $count == 0 ) {
244 return true;
245 }
246
247 $dryRunNote = $options['fix'] ? '' : ' DRY RUN ONLY';
248
249 $ok = true;
250 foreach ( $targets as $row ) {
251 // Find the new title and determine the action to take
252
253 $newTitle = $this->getDestinationTitle( $ns, $name,
254 $row->page_namespace, $row->page_title, $options );
255 $logStatus = false;
256 if ( !$newTitle ) {
257 $logStatus = 'invalid title';
258 $action = 'abort';
259 } elseif ( $newTitle->exists() ) {
260 if ( $options['merge'] ) {
261 if ( $this->canMerge( $row->page_id, $newTitle, $logStatus ) ) {
262 $action = 'merge';
263 } else {
264 $action = 'abort';
265 }
266 } elseif ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) {
267 $action = 'abort';
268 $logStatus = 'dest title exists and --add-prefix not specified';
269 } else {
270 $newTitle = $this->getAlternateTitle( $newTitle, $options );
271 if ( !$newTitle ) {
272 $action = 'abort';
273 $logStatus = 'alternate title is invalid';
274 } elseif ( $newTitle->exists() ) {
275 $action = 'abort';
276 $logStatus = 'title conflict';
277 } else {
278 $action = 'move';
279 $logStatus = 'alternate';
280 }
281 }
282 } else {
283 $action = 'move';
284 $logStatus = 'no conflict';
285 }
286
287 // Take the action or log a dry run message
288
289 $logTitle = "id={$row->page_id} ns={$row->page_namespace} dbk={$row->page_title}";
290 $pageOK = true;
291
292 switch ( $action ) {
293 case 'abort':
294 $this->output( "$logTitle *** $logStatus\n" );
295 $pageOK = false;
296 break;
297 case 'move':
298 $this->output( "$logTitle -> " .
299 $newTitle->getPrefixedDBkey() . " ($logStatus)$dryRunNote\n" );
300
301 if ( $options['fix'] ) {
302 $pageOK = $this->movePage( $row->page_id, $newTitle );
303 }
304 break;
305 case 'merge':
306 $this->output( "$logTitle => " .
307 $newTitle->getPrefixedDBkey() . " (merge)$dryRunNote\n" );
308
309 if ( $options['fix'] ) {
310 $pageOK = $this->mergePage( $row, $newTitle );
311 }
312 break;
313 }
314
315 if ( $pageOK ) {
316 $this->resolvablePages++;
317 } else {
318 $ok = false;
319 }
320 }
321
322 return $ok;
323 }
324
334 private function checkLinkTable( $table, $fieldPrefix, $ns, $name, $options,
335 $extraConds = []
336 ) {
337 $batchConds = [];
338 $fromField = "{$fieldPrefix}_from";
339 $namespaceField = "{$fieldPrefix}_namespace";
340 $titleField = "{$fieldPrefix}_title";
341 $batchSize = 500;
342 while ( true ) {
343 $res = $this->db->select(
344 $table,
345 [ $fromField, $namespaceField, $titleField ],
346 array_merge( $batchConds, $extraConds, [
347 $namespaceField => 0,
348 $titleField . $this->db->buildLike( "$name:", $this->db->anyString() )
349 ] ),
350 __METHOD__,
351 [
352 'ORDER BY' => [ $titleField, $fromField ],
353 'LIMIT' => $batchSize
354 ]
355 );
356
357 if ( $res->numRows() == 0 ) {
358 break;
359 }
360 foreach ( $res as $row ) {
361 $logTitle = "from={$row->$fromField} ns={$row->$namespaceField} " .
362 "dbk={$row->$titleField}";
363 $destTitle = $this->getDestinationTitle( $ns, $name,
364 $row->$namespaceField, $row->$titleField, $options );
365 $this->totalLinks++;
366 if ( !$destTitle ) {
367 $this->output( "$table $logTitle *** INVALID\n" );
368 continue;
369 }
370 $this->resolvableLinks++;
371 if ( !$options['fix'] ) {
372 $this->output( "$table $logTitle -> " .
373 $destTitle->getPrefixedDBkey() . " DRY RUN\n" );
374 continue;
375 }
376
377 $this->db->update( $table,
378 // SET
379 [
380 $namespaceField => $destTitle->getNamespace(),
381 $titleField => $destTitle->getDBkey()
382 ],
383 // WHERE
384 [
385 $namespaceField => 0,
386 $titleField => $row->$titleField,
387 $fromField => $row->$fromField
388 ],
389 __METHOD__,
390 [ 'IGNORE' ]
391 );
392 $this->output( "$table $logTitle -> " .
393 $destTitle->getPrefixedDBkey() . "\n" );
394 }
395 $encLastTitle = $this->db->addQuotes( $row->$titleField );
396 $encLastFrom = $this->db->addQuotes( $row->$fromField );
397
398 $batchConds = [
399 "$titleField > $encLastTitle " .
400 "OR ($titleField = $encLastTitle AND $fromField > $encLastFrom)" ];
401
403 }
404 }
405
413 private function checkPrefix( $options ) {
414 $prefix = $options['source-pseudo-namespace'];
415 $ns = $options['dest-namespace'];
416 $this->output( "Checking prefix \"$prefix\" vs namespace $ns\n" );
417
418 return $this->checkNamespace( $ns, $prefix, $options );
419 }
420
431 private function getTargetList( $ns, $name, $options ) {
432 if ( $options['move-talk'] && MWNamespace::isSubject( $ns ) ) {
433 $checkNamespaces = [ NS_MAIN, NS_TALK ];
434 } else {
435 $checkNamespaces = NS_MAIN;
436 }
437
438 return $this->db->select( 'page',
439 [
440 'page_id',
441 'page_title',
442 'page_namespace',
443 ],
444 [
445 'page_namespace' => $checkNamespaces,
446 'page_title' . $this->db->buildLike( "$name:", $this->db->anyString() ),
447 ],
448 __METHOD__
449 );
450 }
451
461 private function getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk, $options ) {
462 $dbk = substr( $sourceDbk, strlen( "$name:" ) );
463 if ( $ns == 0 ) {
464 // An interwiki; try an alternate encoding with '-' for ':'
465 $dbk = "$name-" . $dbk;
466 }
467 $destNS = $ns;
468 if ( $sourceNs == NS_TALK && MWNamespace::isSubject( $ns ) ) {
469 // This is an associated talk page moved with the --move-talk feature.
470 $destNS = MWNamespace::getTalk( $destNS );
471 }
472 $newTitle = Title::makeTitleSafe( $destNS, $dbk );
473 if ( !$newTitle || !$newTitle->canExist() ) {
474 return false;
475 }
476 return $newTitle;
477 }
478
487 private function getAlternateTitle( LinkTarget $linkTarget, $options ) {
488 $prefix = $options['add-prefix'];
489 $suffix = $options['add-suffix'];
490 if ( $prefix == '' && $suffix == '' ) {
491 return false;
492 }
493 while ( true ) {
494 $dbk = $prefix . $linkTarget->getDBkey() . $suffix;
495 $title = Title::makeTitleSafe( $linkTarget->getNamespace(), $dbk );
496 if ( !$title ) {
497 return false;
498 }
499 if ( !$title->exists() ) {
500 return $title;
501 }
502 }
503 }
504
512 private function movePage( $id, LinkTarget $newLinkTarget ) {
513 $this->db->update( 'page',
514 [
515 "page_namespace" => $newLinkTarget->getNamespace(),
516 "page_title" => $newLinkTarget->getDBkey(),
517 ],
518 [
519 "page_id" => $id,
520 ],
521 __METHOD__ );
522
523 // Update *_from_namespace in links tables
524 $fromNamespaceTables = [
525 [ 'pagelinks', 'pl' ],
526 [ 'templatelinks', 'tl' ],
527 [ 'imagelinks', 'il' ] ];
528 foreach ( $fromNamespaceTables as $tableInfo ) {
529 list( $table, $fieldPrefix ) = $tableInfo;
530 $this->db->update( $table,
531 // SET
532 [ "{$fieldPrefix}_from_namespace" => $newLinkTarget->getNamespace() ],
533 // WHERE
534 [ "{$fieldPrefix}_from" => $id ],
535 __METHOD__ );
536 }
537
538 return true;
539 }
540
553 private function canMerge( $id, LinkTarget $linkTarget, &$logStatus ) {
554 $latestDest = Revision::newFromTitle( $linkTarget, 0, Revision::READ_LATEST );
555 $latestSource = Revision::newFromPageId( $id, 0, Revision::READ_LATEST );
556 if ( $latestSource->getTimestamp() > $latestDest->getTimestamp() ) {
557 $logStatus = 'cannot merge since source is later';
558 return false;
559 } else {
560 return true;
561 }
562 }
563
571 private function mergePage( $row, Title $newTitle ) {
572 $id = $row->page_id;
573
574 // Construct the WikiPage object we will need later, while the
575 // page_id still exists. Note that this cannot use makeTitleSafe(),
576 // we are deliberately constructing an invalid title.
577 $sourceTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
578 $sourceTitle->resetArticleID( $id );
579 $wikiPage = new WikiPage( $sourceTitle );
580 $wikiPage->loadPageData( 'fromdbmaster' );
581
582 $destId = $newTitle->getArticleID();
583 $this->beginTransaction( $this->db, __METHOD__ );
584 $this->db->update( 'revision',
585 // SET
586 [ 'rev_page' => $destId ],
587 // WHERE
588 [ 'rev_page' => $id ],
589 __METHOD__ );
590
591 $this->db->delete( 'page', [ 'page_id' => $id ], __METHOD__ );
592
593 $this->commitTransaction( $this->db, __METHOD__ );
594
595 /* Call LinksDeletionUpdate to delete outgoing links from the old title,
596 * and update category counts.
597 *
598 * Calling external code with a fake broken Title is a fairly dubious
599 * idea. It's necessary because it's quite a lot of code to duplicate,
600 * but that also makes it fragile since it would be easy for someone to
601 * accidentally introduce an assumption of title validity to the code we
602 * are calling.
603 */
604 DeferredUpdates::addUpdate( new LinksDeletionUpdate( $wikiPage ) );
605 DeferredUpdates::doUpdates();
606
607 return true;
608 }
609}
610
611$maintClass = NamespaceDupes::class;
612require_once RUN_MAINTENANCE_IF_MAIN;
$wgCapitalLinks
Set this to false to avoid forcing the first letter of links to capitals.
$wgNamespaceAliases
Namespace aliases.
wfWaitForSlaves( $ifWritesSince=null, $wiki=false, $cluster=false, $timeout=null)
Waits for the replica DBs to catch up to the master position.
Update object handling the cleanup of links tables after a page was deleted.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
beginTransaction(IDatabase $dbw, $fname)
Begin a transcation on a DB.
commitTransaction(IDatabase $dbw, $fname)
Commit the transcation on a DB handle and wait for replica DBs to catch up.
output( $out, $channel=null)
Throw some output to the user.
getDB( $db, $groups=[], $wiki=false)
Returns a database to be used by current maintenance script.
hasOption( $name)
Checks to see if a particular option exists.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Maintenance script that checks for articles to fix after adding/deleting namespaces.
getAlternateTitle(LinkTarget $linkTarget, $options)
Get an alternative title to move a page to.
checkPrefix( $options)
Move the given pseudo-namespace, either replacing the colon with a hyphen (useful for pseudo-namespac...
checkLinkTable( $table, $fieldPrefix, $ns, $name, $options, $extraConds=[])
Check and repair the destination fields in a link table.
mergePage( $row, Title $newTitle)
Merge page histories.
canMerge( $id, LinkTarget $linkTarget, &$logStatus)
Determine if we can merge a page.
execute()
Do the actual work.
getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk, $options)
Get the preferred destination title for a given target page.
movePage( $id, LinkTarget $newLinkTarget)
Move a page.
__construct()
Default constructor.
checkNamespace( $ns, $name, $options)
Check a given prefix and try to move it into the given destination namespace.
IMaintainableDatabase $db
checkAll( $options)
Check all namespaces.
getTargetList( $ns, $name, $options)
Find pages in main and talk namespaces that have a prefix of the new namespace so we know titles that...
getInterwikiList()
Get the interwiki list.
static newFromPageId( $pageId, $revId=0, $flags=0)
Load either the current, or a specified, revision that's attached to a given page ID.
Definition Revision.php:152
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that's attached to a given link target.
Definition Revision.php:133
Represents a title within MediaWiki.
Definition Title.php:39
getArticleID( $flags=0)
Get the article ID for this Title from the link cache, adding it if necessary.
Definition Title.php:3566
Class representing a MediaWiki article and history.
Definition WikiPage.php:44
Result wrapper for grabbing data queried from an IDatabase object.
$res
Definition database.txt:21
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
please add to it if you re going to add events to the MediaWiki code where normally authentication against an external auth plugin would be creating a local account incomplete not yet checked for validity & $retval
Definition hooks.txt:266
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition hooks.txt:2050
namespace and then decline to actually register it file or subcat img or subcat $title
Definition hooks.txt:994
Allows to change the fields on the form that will be generated $name
Definition hooks.txt:302
const NS_MAIN
Definition Defines.php:64
const NS_TALK
Definition Defines.php:65
getNamespace()
Get the namespace index.
getDBkey()
Get the main part with underscores.
Advanced database interface for IDatabase handles that include maintenance methods.
require_once RUN_MAINTENANCE_IF_MAIN
$maintClass
const DB_MASTER
Definition defines.php:26