MediaWiki REL1_31
namespaceDupes.php
Go to the documentation of this file.
1<?php
27require_once __DIR__ . '/Maintenance.php';
28
33
41
45 protected $db;
46
47 private $resolvablePages = 0;
48 private $totalPages = 0;
49
50 private $resolvableLinks = 0;
51 private $totalLinks = 0;
52
53 public function __construct() {
54 parent::__construct();
55 $this->addDescription( 'Find and fix pages affected by namespace addition/removal' );
56 $this->addOption( 'fix', 'Attempt to automatically fix errors' );
57 $this->addOption( 'merge', "Instead of renaming conflicts, do a history merge with " .
58 "the correct title" );
59 $this->addOption( 'add-suffix', "Dupes will be renamed with correct namespace with " .
60 "<text> appended after the article name", false, true );
61 $this->addOption( 'add-prefix', "Dupes will be renamed with correct namespace with " .
62 "<text> prepended before the article name", false, true );
63 $this->addOption( 'source-pseudo-namespace', "Move all pages with the given source " .
64 "prefix (with an implied colon following it). If --dest-namespace is not specified, " .
65 "the colon will be replaced with a hyphen.",
66 false, true );
67 $this->addOption( 'dest-namespace', "In combination with --source-pseudo-namespace, " .
68 "specify the namespace ID of the destination.", false, true );
69 $this->addOption( 'move-talk', "If this is specified, pages in the Talk namespace that " .
70 "begin with a conflicting prefix will be renamed, for example " .
71 "Talk:File:Foo -> File_Talk:Foo" );
72 }
73
74 public function execute() {
75 $this->db = $this->getDB( DB_MASTER );
76
77 $options = [
78 'fix' => $this->hasOption( 'fix' ),
79 'merge' => $this->hasOption( 'merge' ),
80 'add-suffix' => $this->getOption( 'add-suffix', '' ),
81 'add-prefix' => $this->getOption( 'add-prefix', '' ),
82 'move-talk' => $this->hasOption( 'move-talk' ),
83 'source-pseudo-namespace' => $this->getOption( 'source-pseudo-namespace', '' ),
84 'dest-namespace' => intval( $this->getOption( 'dest-namespace', 0 ) ) ];
85
86 if ( $options['source-pseudo-namespace'] !== '' ) {
87 $retval = $this->checkPrefix( $options );
88 } else {
89 $retval = $this->checkAll( $options );
90 }
91
92 if ( $retval ) {
93 $this->output( "\nLooks good!\n" );
94 } else {
95 $this->output( "\nOh noeees\n" );
96 }
97 }
98
106 private function checkAll( $options ) {
108
109 $spaces = [];
110
111 // List interwikis first, so they'll be overridden
112 // by any conflicting local namespaces.
113 foreach ( $this->getInterwikiList() as $prefix ) {
114 $name = $wgContLang->ucfirst( $prefix );
115 $spaces[$name] = 0;
116 }
117
118 // Now pull in all canonical and alias namespaces...
119 foreach ( MWNamespace::getCanonicalNamespaces() as $ns => $name ) {
120 // This includes $wgExtraNamespaces
121 if ( $name !== '' ) {
122 $spaces[$name] = $ns;
123 }
124 }
125 foreach ( $wgContLang->getNamespaces() as $ns => $name ) {
126 if ( $name !== '' ) {
127 $spaces[$name] = $ns;
128 }
129 }
130 foreach ( $wgNamespaceAliases as $name => $ns ) {
131 $spaces[$name] = $ns;
132 }
133 foreach ( $wgContLang->getNamespaceAliases() as $name => $ns ) {
134 $spaces[$name] = $ns;
135 }
136
137 // We'll need to check for lowercase keys as well,
138 // since we're doing case-sensitive searches in the db.
139 foreach ( $spaces as $name => $ns ) {
140 $moreNames = [];
141 $moreNames[] = $wgContLang->uc( $name );
142 $moreNames[] = $wgContLang->ucfirst( $wgContLang->lc( $name ) );
143 $moreNames[] = $wgContLang->ucwords( $name );
144 $moreNames[] = $wgContLang->ucwords( $wgContLang->lc( $name ) );
145 $moreNames[] = $wgContLang->ucwordbreaks( $name );
146 $moreNames[] = $wgContLang->ucwordbreaks( $wgContLang->lc( $name ) );
147 if ( !$wgCapitalLinks ) {
148 foreach ( $moreNames as $altName ) {
149 $moreNames[] = $wgContLang->lcfirst( $altName );
150 }
151 $moreNames[] = $wgContLang->lcfirst( $name );
152 }
153 foreach ( array_unique( $moreNames ) as $altName ) {
154 if ( $altName !== $name ) {
155 $spaces[$altName] = $ns;
156 }
157 }
158 }
159
160 // Sort by namespace index, and if there are two with the same index,
161 // break the tie by sorting by name
162 $origSpaces = $spaces;
163 uksort( $spaces, function ( $a, $b ) use ( $origSpaces ) {
164 if ( $origSpaces[$a] < $origSpaces[$b] ) {
165 return -1;
166 } elseif ( $origSpaces[$a] > $origSpaces[$b] ) {
167 return 1;
168 } elseif ( $a < $b ) {
169 return -1;
170 } elseif ( $a > $b ) {
171 return 1;
172 } else {
173 return 0;
174 }
175 } );
176
177 $ok = true;
178 foreach ( $spaces as $name => $ns ) {
179 $ok = $this->checkNamespace( $ns, $name, $options ) && $ok;
180 }
181
182 $this->output( "{$this->totalPages} pages to fix, " .
183 "{$this->resolvablePages} were resolvable.\n\n" );
184
185 foreach ( $spaces as $name => $ns ) {
186 if ( $ns != 0 ) {
187 /* Fix up link destinations for non-interwiki links only.
188 *
189 * For example if a page has [[Foo:Bar]] and then a Foo namespace
190 * is introduced, pagelinks needs to be updated to have
191 * page_namespace = NS_FOO.
192 *
193 * If instead an interwiki prefix was introduced called "Foo",
194 * the link should instead be moved to the iwlinks table. If a new
195 * language is introduced called "Foo", or if there is a pagelink
196 * [[fr:Bar]] when interlanguage magic links are turned on, the
197 * link would have to be moved to the langlinks table. Let's put
198 * those cases in the too-hard basket for now. The consequences are
199 * not especially severe.
200 * @fixme Handle interwiki links, and pagelinks to Category:, File:
201 * which probably need reparsing.
202 */
203
204 $this->checkLinkTable( 'pagelinks', 'pl', $ns, $name, $options );
205 $this->checkLinkTable( 'templatelinks', 'tl', $ns, $name, $options );
206
207 // The redirect table has interwiki links randomly mixed in, we
208 // need to filter those out. For example [[w:Foo:Bar]] would
209 // have rd_interwiki=w and rd_namespace=0, which would match the
210 // query for a conflicting namespace "Foo" if filtering wasn't done.
211 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
212 [ 'rd_interwiki' => null ] );
213 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
214 [ 'rd_interwiki' => '' ] );
215 }
216 }
217
218 $this->output( "{$this->totalLinks} links to fix, " .
219 "{$this->resolvableLinks} were resolvable.\n" );
220
221 return $ok;
222 }
223
229 private function getInterwikiList() {
230 $result = MediaWikiServices::getInstance()->getInterwikiLookup()->getAllPrefixes();
231 $prefixes = [];
232 foreach ( $result as $row ) {
233 $prefixes[] = $row['iw_prefix'];
234 }
235
236 return $prefixes;
237 }
238
247 private function checkNamespace( $ns, $name, $options ) {
248 $targets = $this->getTargetList( $ns, $name, $options );
249 $count = $targets->numRows();
250 $this->totalPages += $count;
251 if ( $count == 0 ) {
252 return true;
253 }
254
255 $dryRunNote = $options['fix'] ? '' : ' DRY RUN ONLY';
256
257 $ok = true;
258 foreach ( $targets as $row ) {
259 // Find the new title and determine the action to take
260
261 $newTitle = $this->getDestinationTitle( $ns, $name,
262 $row->page_namespace, $row->page_title, $options );
263 $logStatus = false;
264 if ( !$newTitle ) {
265 $logStatus = 'invalid title';
266 $action = 'abort';
267 } elseif ( $newTitle->exists() ) {
268 if ( $options['merge'] ) {
269 if ( $this->canMerge( $row->page_id, $newTitle, $logStatus ) ) {
270 $action = 'merge';
271 } else {
272 $action = 'abort';
273 }
274 } elseif ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) {
275 $action = 'abort';
276 $logStatus = 'dest title exists and --add-prefix not specified';
277 } else {
278 $newTitle = $this->getAlternateTitle( $newTitle, $options );
279 if ( !$newTitle ) {
280 $action = 'abort';
281 $logStatus = 'alternate title is invalid';
282 } elseif ( $newTitle->exists() ) {
283 $action = 'abort';
284 $logStatus = 'title conflict';
285 } else {
286 $action = 'move';
287 $logStatus = 'alternate';
288 }
289 }
290 } else {
291 $action = 'move';
292 $logStatus = 'no conflict';
293 }
294
295 // Take the action or log a dry run message
296
297 $logTitle = "id={$row->page_id} ns={$row->page_namespace} dbk={$row->page_title}";
298 $pageOK = true;
299
300 switch ( $action ) {
301 case 'abort':
302 $this->output( "$logTitle *** $logStatus\n" );
303 $pageOK = false;
304 break;
305 case 'move':
306 $this->output( "$logTitle -> " .
307 $newTitle->getPrefixedDBkey() . " ($logStatus)$dryRunNote\n" );
308
309 if ( $options['fix'] ) {
310 $pageOK = $this->movePage( $row->page_id, $newTitle );
311 }
312 break;
313 case 'merge':
314 $this->output( "$logTitle => " .
315 $newTitle->getPrefixedDBkey() . " (merge)$dryRunNote\n" );
316
317 if ( $options['fix'] ) {
318 $pageOK = $this->mergePage( $row, $newTitle );
319 }
320 break;
321 }
322
323 if ( $pageOK ) {
324 $this->resolvablePages++;
325 } else {
326 $ok = false;
327 }
328 }
329
330 return $ok;
331 }
332
342 private function checkLinkTable( $table, $fieldPrefix, $ns, $name, $options,
343 $extraConds = []
344 ) {
345 $batchConds = [];
346 $fromField = "{$fieldPrefix}_from";
347 $namespaceField = "{$fieldPrefix}_namespace";
348 $titleField = "{$fieldPrefix}_title";
349 $batchSize = 500;
350 while ( true ) {
351 $res = $this->db->select(
352 $table,
353 [ $fromField, $namespaceField, $titleField ],
354 array_merge( $batchConds, $extraConds, [
355 $namespaceField => 0,
356 $titleField . $this->db->buildLike( "$name:", $this->db->anyString() )
357 ] ),
358 __METHOD__,
359 [
360 'ORDER BY' => [ $titleField, $fromField ],
361 'LIMIT' => $batchSize
362 ]
363 );
364
365 if ( $res->numRows() == 0 ) {
366 break;
367 }
368 foreach ( $res as $row ) {
369 $logTitle = "from={$row->$fromField} ns={$row->$namespaceField} " .
370 "dbk={$row->$titleField}";
371 $destTitle = $this->getDestinationTitle( $ns, $name,
372 $row->$namespaceField, $row->$titleField, $options );
373 $this->totalLinks++;
374 if ( !$destTitle ) {
375 $this->output( "$table $logTitle *** INVALID\n" );
376 continue;
377 }
378 $this->resolvableLinks++;
379 if ( !$options['fix'] ) {
380 $this->output( "$table $logTitle -> " .
381 $destTitle->getPrefixedDBkey() . " DRY RUN\n" );
382 continue;
383 }
384
385 $this->db->update( $table,
386 // SET
387 [
388 $namespaceField => $destTitle->getNamespace(),
389 $titleField => $destTitle->getDBkey()
390 ],
391 // WHERE
392 [
393 $namespaceField => 0,
394 $titleField => $row->$titleField,
395 $fromField => $row->$fromField
396 ],
397 __METHOD__,
398 [ 'IGNORE' ]
399 );
400 $this->output( "$table $logTitle -> " .
401 $destTitle->getPrefixedDBkey() . "\n" );
402 }
403 $encLastTitle = $this->db->addQuotes( $row->$titleField );
404 $encLastFrom = $this->db->addQuotes( $row->$fromField );
405
406 $batchConds = [
407 "$titleField > $encLastTitle " .
408 "OR ($titleField = $encLastTitle AND $fromField > $encLastFrom)" ];
409
411 }
412 }
413
421 private function checkPrefix( $options ) {
422 $prefix = $options['source-pseudo-namespace'];
423 $ns = $options['dest-namespace'];
424 $this->output( "Checking prefix \"$prefix\" vs namespace $ns\n" );
425
426 return $this->checkNamespace( $ns, $prefix, $options );
427 }
428
439 private function getTargetList( $ns, $name, $options ) {
440 if ( $options['move-talk'] && MWNamespace::isSubject( $ns ) ) {
441 $checkNamespaces = [ NS_MAIN, NS_TALK ];
442 } else {
443 $checkNamespaces = NS_MAIN;
444 }
445
446 return $this->db->select( 'page',
447 [
448 'page_id',
449 'page_title',
450 'page_namespace',
451 ],
452 [
453 'page_namespace' => $checkNamespaces,
454 'page_title' . $this->db->buildLike( "$name:", $this->db->anyString() ),
455 ],
456 __METHOD__
457 );
458 }
459
469 private function getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk, $options ) {
470 $dbk = substr( $sourceDbk, strlen( "$name:" ) );
471 if ( $ns == 0 ) {
472 // An interwiki; try an alternate encoding with '-' for ':'
473 $dbk = "$name-" . $dbk;
474 }
475 $destNS = $ns;
476 if ( $sourceNs == NS_TALK && MWNamespace::isSubject( $ns ) ) {
477 // This is an associated talk page moved with the --move-talk feature.
478 $destNS = MWNamespace::getTalk( $destNS );
479 }
480 $newTitle = Title::makeTitleSafe( $destNS, $dbk );
481 if ( !$newTitle || !$newTitle->canExist() ) {
482 return false;
483 }
484 return $newTitle;
485 }
486
495 private function getAlternateTitle( LinkTarget $linkTarget, $options ) {
496 $prefix = $options['add-prefix'];
497 $suffix = $options['add-suffix'];
498 if ( $prefix == '' && $suffix == '' ) {
499 return false;
500 }
501 while ( true ) {
502 $dbk = $prefix . $linkTarget->getDBkey() . $suffix;
503 $title = Title::makeTitleSafe( $linkTarget->getNamespace(), $dbk );
504 if ( !$title ) {
505 return false;
506 }
507 if ( !$title->exists() ) {
508 return $title;
509 }
510 }
511 }
512
520 private function movePage( $id, LinkTarget $newLinkTarget ) {
521 $this->db->update( 'page',
522 [
523 "page_namespace" => $newLinkTarget->getNamespace(),
524 "page_title" => $newLinkTarget->getDBkey(),
525 ],
526 [
527 "page_id" => $id,
528 ],
529 __METHOD__ );
530
531 // Update *_from_namespace in links tables
532 $fromNamespaceTables = [
533 [ 'pagelinks', 'pl' ],
534 [ 'templatelinks', 'tl' ],
535 [ 'imagelinks', 'il' ] ];
536 foreach ( $fromNamespaceTables as $tableInfo ) {
537 list( $table, $fieldPrefix ) = $tableInfo;
538 $this->db->update( $table,
539 // SET
540 [ "{$fieldPrefix}_from_namespace" => $newLinkTarget->getNamespace() ],
541 // WHERE
542 [ "{$fieldPrefix}_from" => $id ],
543 __METHOD__ );
544 }
545
546 return true;
547 }
548
561 private function canMerge( $id, LinkTarget $linkTarget, &$logStatus ) {
562 $latestDest = Revision::newFromTitle( $linkTarget, 0, Revision::READ_LATEST );
563 $latestSource = Revision::newFromPageId( $id, 0, Revision::READ_LATEST );
564 if ( $latestSource->getTimestamp() > $latestDest->getTimestamp() ) {
565 $logStatus = 'cannot merge since source is later';
566 return false;
567 } else {
568 return true;
569 }
570 }
571
579 private function mergePage( $row, Title $newTitle ) {
580 $id = $row->page_id;
581
582 // Construct the WikiPage object we will need later, while the
583 // page_id still exists. Note that this cannot use makeTitleSafe(),
584 // we are deliberately constructing an invalid title.
585 $sourceTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
586 $sourceTitle->resetArticleID( $id );
587 $wikiPage = new WikiPage( $sourceTitle );
588 $wikiPage->loadPageData( 'fromdbmaster' );
589
590 $destId = $newTitle->getArticleID();
591 $this->beginTransaction( $this->db, __METHOD__ );
592 $this->db->update( 'revision',
593 // SET
594 [ 'rev_page' => $destId ],
595 // WHERE
596 [ 'rev_page' => $id ],
597 __METHOD__ );
598
599 $this->db->delete( 'page', [ 'page_id' => $id ], __METHOD__ );
600
601 $this->commitTransaction( $this->db, __METHOD__ );
602
603 /* Call LinksDeletionUpdate to delete outgoing links from the old title,
604 * and update category counts.
605 *
606 * Calling external code with a fake broken Title is a fairly dubious
607 * idea. It's necessary because it's quite a lot of code to duplicate,
608 * but that also makes it fragile since it would be easy for someone to
609 * accidentally introduce an assumption of title validity to the code we
610 * are calling.
611 */
612 DeferredUpdates::addUpdate( new LinksDeletionUpdate( $wikiPage ) );
613 DeferredUpdates::doUpdates();
614
615 return true;
616 }
617}
618
619$maintClass = NamespaceConflictChecker::class;
620require_once RUN_MAINTENANCE_IF_MAIN;
$wgCapitalLinks
Set this to false to avoid forcing the first letter of links to capitals.
$wgNamespaceAliases
Namespace aliases.
wfWaitForSlaves( $ifWritesSince=null, $wiki=false, $cluster=false, $timeout=null)
Waits for the replica DBs to catch up to the master position.
Update object handling the cleanup of links tables after a page was deleted.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
beginTransaction(IDatabase $dbw, $fname)
Begin a transcation on a DB.
commitTransaction(IDatabase $dbw, $fname)
Commit the transcation on a DB handle and wait for replica DBs to catch up.
getDB( $db, $groups=[], $wiki=false)
Returns a database to be used by current maintenance script.
hasOption( $name)
Checks to see if a particular param exists.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Maintenance script that checks for articles to fix after adding/deleting namespaces.
checkLinkTable( $table, $fieldPrefix, $ns, $name, $options, $extraConds=[])
Check and repair the destination fields in a link table.
getAlternateTitle(LinkTarget $linkTarget, $options)
Get an alternative title to move a page to.
execute()
Do the actual work.
IMaintainableDatabase $db
getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk, $options)
Get the preferred destination title for a given target page.
movePage( $id, LinkTarget $newLinkTarget)
Move a page.
getInterwikiList()
Get the interwiki list.
checkNamespace( $ns, $name, $options)
Check a given prefix and try to move it into the given destination namespace.
canMerge( $id, LinkTarget $linkTarget, &$logStatus)
Determine if we can merge a page.
getTargetList( $ns, $name, $options)
Find pages in main and talk namespaces that have a prefix of the new namespace so we know titles that...
__construct()
Default constructor.
checkAll( $options)
Check all namespaces.
checkPrefix( $options)
Move the given pseudo-namespace, either replacing the colon with a hyphen (useful for pseudo-namespac...
mergePage( $row, Title $newTitle)
Merge page histories.
Represents a title within MediaWiki.
Definition Title.php:39
getArticleID( $flags=0)
Get the article ID for this Title from the link cache, adding it if necessary.
Definition Title.php:3436
Class representing a MediaWiki article and history.
Definition WikiPage.php:37
Result wrapper for grabbing data queried from an IDatabase object.
$res
Definition database.txt:21
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the local content language as $wgContLang
Definition design.txt:57
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling output() to send it all. It could be easily changed to send incrementally if that becomes useful
please add to it if you re going to add events to the MediaWiki code where normally authentication against an external auth plugin would be creating a local account incomplete not yet checked for validity & $retval
Definition hooks.txt:266
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition hooks.txt:2001
namespace and then decline to actually register it file or subcat img or subcat $title
Definition hooks.txt:964
Allows to change the fields on the form that will be generated $name
Definition hooks.txt:302
const NS_MAIN
Definition Defines.php:74
const NS_TALK
Definition Defines.php:75
getNamespace()
Get the namespace index.
getDBkey()
Get the main part with underscores.
Advanced database interface for IDatabase handles that include maintenance methods.
require_once RUN_MAINTENANCE_IF_MAIN
$maintClass
const DB_MASTER
Definition defines.php:29