Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 390 |
|
0.00% |
0 / 14 |
CRAP | |
0.00% |
0 / 1 |
NamespaceDupes | |
0.00% |
0 / 390 |
|
0.00% |
0 / 14 |
6972 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
12 | |||
checkAll | |
0.00% |
0 / 54 |
|
0.00% |
0 / 1 |
306 | |||
getInterwikiList | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
checkNamespace | |
0.00% |
0 / 61 |
|
0.00% |
0 / 1 |
462 | |||
checkLinkTable | |
0.00% |
0 / 113 |
|
0.00% |
0 / 1 |
240 | |||
checkPrefix | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
getTargetList | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
12 | |||
getDestination | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
20 | |||
getDestinationTitle | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
getAlternateTitle | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
movePage | |
0.00% |
0 / 46 |
|
0.00% |
0 / 1 |
42 | |||
canMerge | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
6 | |||
mergePage | |
0.00% |
0 / 35 |
|
0.00% |
0 / 1 |
12 |
1 | <?php |
2 | /** |
3 | * Check for articles to fix after adding/deleting namespaces |
4 | * |
5 | * Copyright © 2005-2007 Brooke Vibber <bvibber@wikimedia.org> |
6 | * https://www.mediawiki.org/ |
7 | * |
8 | * This program is free software; you can redistribute it and/or modify |
9 | * it under the terms of the GNU General Public License as published by |
10 | * the Free Software Foundation; either version 2 of the License, or |
11 | * (at your option) any later version. |
12 | * |
13 | * This program is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | * GNU General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU General Public License along |
19 | * with this program; if not, write to the Free Software Foundation, Inc., |
20 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
21 | * http://www.gnu.org/copyleft/gpl.html |
22 | * |
23 | * @file |
24 | * @ingroup Maintenance |
25 | */ |
26 | |
27 | // @codeCoverageIgnoreStart |
28 | require_once __DIR__ . '/Maintenance.php'; |
29 | // @codeCoverageIgnoreEnd |
30 | |
31 | use MediaWiki\Deferred\DeferredUpdates; |
32 | use MediaWiki\Deferred\LinksUpdate\LinksDeletionUpdate; |
33 | use MediaWiki\Linker\LinkTarget; |
34 | use MediaWiki\MainConfigNames; |
35 | use MediaWiki\Maintenance\Maintenance; |
36 | use MediaWiki\Title\Title; |
37 | use MediaWiki\Title\TitleValue; |
38 | use Wikimedia\Rdbms\IDBAccessObject; |
39 | use Wikimedia\Rdbms\IExpression; |
40 | use Wikimedia\Rdbms\IResultWrapper; |
41 | use Wikimedia\Rdbms\LikeValue; |
42 | |
43 | /** |
44 | * Maintenance script that checks for articles to fix after |
45 | * adding/deleting namespaces. |
46 | * |
47 | * @ingroup Maintenance |
48 | */ |
49 | class NamespaceDupes extends Maintenance { |
50 | |
51 | /** |
52 | * Total number of pages that need fixing that are automatically resolveable |
53 | * @var int |
54 | */ |
55 | private $resolvablePages = 0; |
56 | |
57 | /** |
58 | * Total number of pages that need fixing |
59 | * @var int |
60 | */ |
61 | private $totalPages = 0; |
62 | |
63 | /** |
64 | * Total number of links that need fixing that are automatically resolveable |
65 | * @var int |
66 | */ |
67 | private $resolvableLinks = 0; |
68 | |
69 | /** |
70 | * Total number of erroneous links |
71 | * @var int |
72 | */ |
73 | private $totalLinks = 0; |
74 | |
75 | /** |
76 | * Total number of links deleted because they weren't automatically resolveable due to the |
77 | * target already existing |
78 | * @var int |
79 | */ |
80 | private $deletedLinks = 0; |
81 | |
82 | public function __construct() { |
83 | parent::__construct(); |
84 | $this->addDescription( 'Find and fix pages affected by namespace addition/removal' ); |
85 | $this->addOption( 'fix', 'Attempt to automatically fix errors and delete broken links' ); |
86 | $this->addOption( 'merge', "Instead of renaming conflicts, do a history merge with " . |
87 | "the correct title" ); |
88 | $this->addOption( 'add-suffix', "Dupes will be renamed with correct namespace with " . |
89 | "<text> appended after the article name", false, true ); |
90 | $this->addOption( 'add-prefix', "Dupes will be renamed with correct namespace with " . |
91 | "<text> prepended before the article name", false, true ); |
92 | $this->addOption( 'source-pseudo-namespace', "Move all pages with the given source " . |
93 | "prefix (with an implied colon following it). If --dest-namespace is not specified, " . |
94 | "the colon will be replaced with a hyphen.", |
95 | false, true ); |
96 | $this->addOption( 'dest-namespace', "In combination with --source-pseudo-namespace, " . |
97 | "specify the namespace ID of the destination.", false, true ); |
98 | $this->addOption( 'move-talk', "If this is specified, pages in the Talk namespace that " . |
99 | "begin with a conflicting prefix will be renamed, for example " . |
100 | "Talk:File:Foo -> File_Talk:Foo" ); |
101 | } |
102 | |
103 | public function execute() { |
104 | $options = [ |
105 | 'fix' => $this->hasOption( 'fix' ), |
106 | 'merge' => $this->hasOption( 'merge' ), |
107 | 'add-suffix' => $this->getOption( 'add-suffix', '' ), |
108 | 'add-prefix' => $this->getOption( 'add-prefix', '' ), |
109 | 'move-talk' => $this->hasOption( 'move-talk' ), |
110 | 'source-pseudo-namespace' => $this->getOption( 'source-pseudo-namespace', '' ), |
111 | 'dest-namespace' => intval( $this->getOption( 'dest-namespace', 0 ) ) |
112 | ]; |
113 | |
114 | if ( $options['source-pseudo-namespace'] !== '' ) { |
115 | $retval = $this->checkPrefix( $options ); |
116 | } else { |
117 | $retval = $this->checkAll( $options ); |
118 | } |
119 | |
120 | if ( $retval ) { |
121 | $this->output( "\nLooks good!\n" ); |
122 | } else { |
123 | $this->output( "\nOh noeees\n" ); |
124 | } |
125 | } |
126 | |
127 | /** |
128 | * Check all namespaces |
129 | * |
130 | * @param array $options Associative array of validated command-line options |
131 | * |
132 | * @return bool |
133 | */ |
134 | private function checkAll( $options ) { |
135 | $contLang = $this->getServiceContainer()->getContentLanguage(); |
136 | $spaces = []; |
137 | |
138 | // List interwikis first, so they'll be overridden |
139 | // by any conflicting local namespaces. |
140 | foreach ( $this->getInterwikiList() as $prefix ) { |
141 | $name = $contLang->ucfirst( $prefix ); |
142 | $spaces[$name] = 0; |
143 | } |
144 | |
145 | // Now pull in all canonical and alias namespaces... |
146 | foreach ( |
147 | $this->getServiceContainer()->getNamespaceInfo()->getCanonicalNamespaces() |
148 | as $ns => $name |
149 | ) { |
150 | // This includes $wgExtraNamespaces |
151 | if ( $name !== '' ) { |
152 | $spaces[$name] = $ns; |
153 | } |
154 | } |
155 | foreach ( $contLang->getNamespaces() as $ns => $name ) { |
156 | if ( $name !== '' ) { |
157 | $spaces[$name] = $ns; |
158 | } |
159 | } |
160 | foreach ( $contLang->getNamespaceAliases() as $name => $ns ) { |
161 | $spaces[$name] = $ns; |
162 | } |
163 | |
164 | // We'll need to check for lowercase keys as well, |
165 | // since we're doing case-sensitive searches in the db. |
166 | $capitalLinks = $this->getConfig()->get( MainConfigNames::CapitalLinks ); |
167 | foreach ( $spaces as $name => $ns ) { |
168 | $moreNames = []; |
169 | $moreNames[] = $contLang->uc( $name ); |
170 | $moreNames[] = $contLang->ucfirst( $contLang->lc( $name ) ); |
171 | $moreNames[] = $contLang->ucwords( $name ); |
172 | $moreNames[] = $contLang->ucwords( $contLang->lc( $name ) ); |
173 | $moreNames[] = $contLang->ucwordbreaks( $name ); |
174 | $moreNames[] = $contLang->ucwordbreaks( $contLang->lc( $name ) ); |
175 | if ( !$capitalLinks ) { |
176 | foreach ( $moreNames as $altName ) { |
177 | $moreNames[] = $contLang->lcfirst( $altName ); |
178 | } |
179 | $moreNames[] = $contLang->lcfirst( $name ); |
180 | } |
181 | foreach ( array_unique( $moreNames ) as $altName ) { |
182 | if ( $altName !== $name ) { |
183 | $spaces[$altName] = $ns; |
184 | } |
185 | } |
186 | } |
187 | |
188 | // Sort by namespace index, and if there are two with the same index, |
189 | // break the tie by sorting by name |
190 | $origSpaces = $spaces; |
191 | uksort( $spaces, static function ( $a, $b ) use ( $origSpaces ) { |
192 | return $origSpaces[$a] <=> $origSpaces[$b] |
193 | ?: $a <=> $b; |
194 | } ); |
195 | |
196 | $ok = true; |
197 | foreach ( $spaces as $name => $ns ) { |
198 | $ok = $this->checkNamespace( $ns, $name, $options ) && $ok; |
199 | } |
200 | |
201 | $this->output( |
202 | "{$this->totalPages} pages to fix, " . |
203 | "{$this->resolvablePages} were resolvable.\n\n" |
204 | ); |
205 | |
206 | foreach ( $spaces as $name => $ns ) { |
207 | if ( $ns != 0 ) { |
208 | /* Fix up link destinations for non-interwiki links only. |
209 | * |
210 | * For example if a page has [[Foo:Bar]] and then a Foo namespace |
211 | * is introduced, pagelinks needs to be updated to have |
212 | * page_namespace = NS_FOO. |
213 | * |
214 | * If instead an interwiki prefix was introduced called "Foo", |
215 | * the link should instead be moved to the iwlinks table. If a new |
216 | * language is introduced called "Foo", or if there is a pagelink |
217 | * [[fr:Bar]] when interlanguage magic links are turned on, the |
218 | * link would have to be moved to the langlinks table. Let's put |
219 | * those cases in the too-hard basket for now. The consequences are |
220 | * not especially severe. |
221 | * @fixme Handle interwiki links, and pagelinks to Category:, File: |
222 | * which probably need reparsing. |
223 | */ |
224 | |
225 | $this->checkLinkTable( 'pagelinks', 'pl', $ns, $name, $options ); |
226 | $this->checkLinkTable( 'templatelinks', 'tl', $ns, $name, $options ); |
227 | |
228 | // The redirect table has interwiki links randomly mixed in, we |
229 | // need to filter those out. For example [[w:Foo:Bar]] would |
230 | // have rd_interwiki=w and rd_namespace=0, which would match the |
231 | // query for a conflicting namespace "Foo" if filtering wasn't done. |
232 | $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options, |
233 | [ 'rd_interwiki' => '' ] ); |
234 | } |
235 | } |
236 | |
237 | $this->output( |
238 | "{$this->totalLinks} links to fix, " . |
239 | "{$this->resolvableLinks} were resolvable, " . |
240 | "{$this->deletedLinks} were deleted.\n" |
241 | ); |
242 | |
243 | return $ok; |
244 | } |
245 | |
246 | /** |
247 | * @return string[] |
248 | */ |
249 | private function getInterwikiList() { |
250 | $result = $this->getServiceContainer()->getInterwikiLookup()->getAllPrefixes(); |
251 | return array_column( $result, 'iw_prefix' ); |
252 | } |
253 | |
254 | /** |
255 | * Check a given prefix and try to move it into the given destination namespace |
256 | * |
257 | * @param int $ns Destination namespace id |
258 | * @param string $name |
259 | * @param array $options Associative array of validated command-line options |
260 | * @return bool |
261 | */ |
262 | private function checkNamespace( $ns, $name, $options ) { |
263 | $targets = $this->getTargetList( $ns, $name, $options ); |
264 | $count = $targets->numRows(); |
265 | $this->totalPages += $count; |
266 | if ( $count == 0 ) { |
267 | return true; |
268 | } |
269 | |
270 | $dryRunNote = $options['fix'] ? '' : ' DRY RUN ONLY'; |
271 | |
272 | $ok = true; |
273 | foreach ( $targets as $row ) { |
274 | // Find the new title and determine the action to take |
275 | |
276 | $newTitle = $this->getDestinationTitle( |
277 | $ns, $name, $row->page_namespace, $row->page_title ); |
278 | $logStatus = false; |
279 | if ( !$newTitle ) { |
280 | if ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) { |
281 | $logStatus = 'invalid title and --add-prefix not specified'; |
282 | $action = 'abort'; |
283 | } else { |
284 | $action = 'alternate'; |
285 | } |
286 | } elseif ( $newTitle->exists( IDBAccessObject::READ_LATEST ) ) { |
287 | if ( $options['merge'] ) { |
288 | if ( $this->canMerge( $row->page_id, $newTitle, $logStatus ) ) { |
289 | $action = 'merge'; |
290 | } else { |
291 | $action = 'abort'; |
292 | } |
293 | } elseif ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) { |
294 | $action = 'abort'; |
295 | $logStatus = 'dest title exists and --add-prefix not specified'; |
296 | } else { |
297 | $action = 'alternate'; |
298 | } |
299 | } else { |
300 | $action = 'move'; |
301 | $logStatus = 'no conflict'; |
302 | } |
303 | if ( $action === 'alternate' ) { |
304 | [ $ns, $dbk ] = $this->getDestination( $ns, $name, $row->page_namespace, |
305 | $row->page_title ); |
306 | $newTitle = $this->getAlternateTitle( $ns, $dbk, $options ); |
307 | if ( !$newTitle ) { |
308 | $action = 'abort'; |
309 | $logStatus = 'alternate title is invalid'; |
310 | } elseif ( $newTitle->exists() ) { |
311 | $action = 'abort'; |
312 | $logStatus = 'alternate title conflicts'; |
313 | } else { |
314 | $action = 'move'; |
315 | $logStatus = 'alternate'; |
316 | } |
317 | } |
318 | |
319 | // Take the action or log a dry run message |
320 | |
321 | $logTitle = "id={$row->page_id} ns={$row->page_namespace} dbk={$row->page_title}"; |
322 | $pageOK = true; |
323 | |
324 | switch ( $action ) { |
325 | case 'abort': |
326 | $this->output( "$logTitle *** $logStatus\n" ); |
327 | $pageOK = false; |
328 | break; |
329 | case 'move': |
330 | $this->output( "$logTitle -> " . |
331 | $newTitle->getPrefixedDBkey() . " ($logStatus)$dryRunNote\n" ); |
332 | |
333 | if ( $options['fix'] ) { |
334 | $pageOK = $this->movePage( $row->page_id, $newTitle ); |
335 | } |
336 | break; |
337 | case 'merge': |
338 | $this->output( "$logTitle => " . |
339 | $newTitle->getPrefixedDBkey() . " (merge)$dryRunNote\n" ); |
340 | |
341 | if ( $options['fix'] ) { |
342 | $pageOK = $this->mergePage( $row, $newTitle ); |
343 | } |
344 | break; |
345 | } |
346 | |
347 | if ( $pageOK ) { |
348 | $this->resolvablePages++; |
349 | } else { |
350 | $ok = false; |
351 | } |
352 | } |
353 | |
354 | return $ok; |
355 | } |
356 | |
357 | /** |
358 | * Check and repair the destination fields in a link table |
359 | * @param string $table The link table name |
360 | * @param string $fieldPrefix The field prefix in the link table |
361 | * @param int $ns Destination namespace id |
362 | * @param string $name |
363 | * @param array $options Associative array of validated command-line options |
364 | * @param array $extraConds Extra conditions for the SQL query |
365 | */ |
366 | private function checkLinkTable( $table, $fieldPrefix, $ns, $name, $options, |
367 | $extraConds = [] |
368 | ) { |
369 | $dbw = $this->getPrimaryDB(); |
370 | |
371 | $batchConds = []; |
372 | $fromField = "{$fieldPrefix}_from"; |
373 | $batchSize = 100; |
374 | $sqb = $dbw->newSelectQueryBuilder() |
375 | ->select( $fromField ) |
376 | ->where( $extraConds ) |
377 | ->limit( $batchSize ); |
378 | |
379 | $linksMigration = $this->getServiceContainer()->getLinksMigration(); |
380 | if ( isset( $linksMigration::$mapping[$table] ) ) { |
381 | $sqb->queryInfo( $linksMigration->getQueryInfo( $table ) ); |
382 | [ $namespaceField, $titleField ] = $linksMigration->getTitleFields( $table ); |
383 | $schemaMigrationStage = $linksMigration::$mapping[$table]['config'] === -1 |
384 | ? MIGRATION_NEW |
385 | : $this->getConfig()->get( $linksMigration::$mapping[$table]['config'] ); |
386 | $linkTargetLookup = $this->getServiceContainer()->getLinkTargetLookup(); |
387 | $targetIdField = $linksMigration::$mapping[$table]['target_id']; |
388 | } else { |
389 | $sqb->table( $table ); |
390 | $namespaceField = "{$fieldPrefix}_namespace"; |
391 | $titleField = "{$fieldPrefix}_title"; |
392 | $sqb->fields( [ $namespaceField, $titleField ] ); |
393 | // Variables only used for links migration, init only |
394 | $schemaMigrationStage = -1; |
395 | $linkTargetLookup = null; |
396 | $targetIdField = ''; |
397 | } |
398 | $sqb->andWhere( [ |
399 | $namespaceField => 0, |
400 | $dbw->expr( $titleField, IExpression::LIKE, new LikeValue( "$name:", $dbw->anyString() ) ), |
401 | ] ) |
402 | ->orderBy( [ $titleField, $fromField ] ) |
403 | ->caller( __METHOD__ ); |
404 | |
405 | $updateRowsPerQuery = $this->getConfig()->get( MainConfigNames::UpdateRowsPerQuery ); |
406 | while ( true ) { |
407 | $res = ( clone $sqb ) |
408 | ->andWhere( $batchConds ) |
409 | ->fetchResultSet(); |
410 | if ( $res->numRows() == 0 ) { |
411 | break; |
412 | } |
413 | |
414 | $rowsToDeleteIfStillExists = []; |
415 | |
416 | foreach ( $res as $row ) { |
417 | $logTitle = "from={$row->$fromField} ns={$row->$namespaceField} " . |
418 | "dbk={$row->$titleField}"; |
419 | $destTitle = $this->getDestinationTitle( |
420 | $ns, $name, $row->$namespaceField, $row->$titleField ); |
421 | $this->totalLinks++; |
422 | if ( !$destTitle ) { |
423 | $this->output( "$table $logTitle *** INVALID\n" ); |
424 | continue; |
425 | } |
426 | $this->resolvableLinks++; |
427 | if ( !$options['fix'] ) { |
428 | $this->output( "$table $logTitle -> " . |
429 | $destTitle->getPrefixedDBkey() . " DRY RUN\n" ); |
430 | continue; |
431 | } |
432 | |
433 | if ( isset( $linksMigration::$mapping[$table] ) ) { |
434 | $setValue = []; |
435 | if ( $schemaMigrationStage & SCHEMA_COMPAT_WRITE_NEW ) { |
436 | $setValue[$targetIdField] = $linkTargetLookup->acquireLinkTargetId( $destTitle, $dbw ); |
437 | } |
438 | if ( $schemaMigrationStage & SCHEMA_COMPAT_WRITE_OLD ) { |
439 | $setValue["{$fieldPrefix}_namespace"] = $destTitle->getNamespace(); |
440 | $setValue["{$fieldPrefix}_title"] = $destTitle->getDBkey(); |
441 | } |
442 | $whereCondition = $linksMigration->getLinksConditions( |
443 | $table, |
444 | new TitleValue( 0, $row->$titleField ) |
445 | ); |
446 | $deleteCondition = $linksMigration->getLinksConditions( |
447 | $table, |
448 | new TitleValue( (int)$row->$namespaceField, $row->$titleField ) |
449 | ); |
450 | } else { |
451 | $setValue = [ |
452 | $namespaceField => $destTitle->getNamespace(), |
453 | $titleField => $destTitle->getDBkey() |
454 | ]; |
455 | $whereCondition = [ |
456 | $namespaceField => 0, |
457 | $titleField => $row->$titleField |
458 | ]; |
459 | $deleteCondition = [ |
460 | $namespaceField => $row->$namespaceField, |
461 | $titleField => $row->$titleField, |
462 | ]; |
463 | } |
464 | |
465 | $dbw->newUpdateQueryBuilder() |
466 | ->update( $table ) |
467 | ->ignore() |
468 | ->set( $setValue ) |
469 | ->where( [ $fromField => $row->$fromField ] ) |
470 | ->andWhere( $whereCondition ) |
471 | ->caller( __METHOD__ ) |
472 | ->execute(); |
473 | |
474 | // In case there is a key conflict on UPDATE IGNORE the row needs deletion |
475 | $rowsToDeleteIfStillExists[] = array_merge( [ $fromField => $row->$fromField ], $deleteCondition ); |
476 | |
477 | $this->output( "$table $logTitle -> " . |
478 | $destTitle->getPrefixedDBkey() . "\n" |
479 | ); |
480 | } |
481 | |
482 | if ( $options['fix'] && count( $rowsToDeleteIfStillExists ) > 0 ) { |
483 | $affectedRows = 0; |
484 | $deleteBatches = array_chunk( $rowsToDeleteIfStillExists, $updateRowsPerQuery ); |
485 | foreach ( $deleteBatches as $deleteBatch ) { |
486 | $dbw->newDeleteQueryBuilder() |
487 | ->deleteFrom( $table ) |
488 | ->where( $dbw->factorConds( $deleteBatch ) ) |
489 | ->caller( __METHOD__ ) |
490 | ->execute(); |
491 | $affectedRows += $dbw->affectedRows(); |
492 | if ( count( $deleteBatches ) > 1 ) { |
493 | $this->waitForReplication(); |
494 | } |
495 | } |
496 | |
497 | $this->deletedLinks += $affectedRows; |
498 | $this->resolvableLinks -= $affectedRows; |
499 | } |
500 | |
501 | $batchConds = [ |
502 | $dbw->buildComparison( '>', [ |
503 | // @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item |
504 | $titleField => $row->$titleField, |
505 | // @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item |
506 | $fromField => $row->$fromField, |
507 | ] ) |
508 | ]; |
509 | |
510 | $this->waitForReplication(); |
511 | } |
512 | } |
513 | |
514 | /** |
515 | * Move the given pseudo-namespace, either replacing the colon with a hyphen |
516 | * (useful for pseudo-namespaces that conflict with interwiki links) or move |
517 | * them to another namespace if specified. |
518 | * @param array $options Associative array of validated command-line options |
519 | * @return bool |
520 | */ |
521 | private function checkPrefix( $options ) { |
522 | $prefix = $options['source-pseudo-namespace']; |
523 | $ns = $options['dest-namespace']; |
524 | $this->output( "Checking prefix \"$prefix\" vs namespace $ns\n" ); |
525 | |
526 | return $this->checkNamespace( $ns, $prefix, $options ); |
527 | } |
528 | |
529 | /** |
530 | * Find pages in main and talk namespaces that have a prefix of the new |
531 | * namespace so we know titles that will need migrating |
532 | * |
533 | * @param int $ns Destination namespace id |
534 | * @param string $name Prefix that is being made a namespace |
535 | * @param array $options Associative array of validated command-line options |
536 | * |
537 | * @return IResultWrapper |
538 | */ |
539 | private function getTargetList( $ns, $name, $options ) { |
540 | $dbw = $this->getPrimaryDB(); |
541 | |
542 | if ( |
543 | $options['move-talk'] && |
544 | $this->getServiceContainer()->getNamespaceInfo()->isSubject( $ns ) |
545 | ) { |
546 | $checkNamespaces = [ NS_MAIN, NS_TALK ]; |
547 | } else { |
548 | $checkNamespaces = NS_MAIN; |
549 | } |
550 | |
551 | return $dbw->newSelectQueryBuilder() |
552 | ->select( [ 'page_id', 'page_title', 'page_namespace' ] ) |
553 | ->from( 'page' ) |
554 | ->where( [ |
555 | 'page_namespace' => $checkNamespaces, |
556 | $dbw->expr( 'page_title', IExpression::LIKE, new LikeValue( "$name:", $dbw->anyString() ) ), |
557 | ] ) |
558 | ->caller( __METHOD__ )->fetchResultSet(); |
559 | } |
560 | |
561 | /** |
562 | * Get the preferred destination for a given target page. |
563 | * @param int $ns The destination namespace ID |
564 | * @param string $name The conflicting prefix |
565 | * @param int $sourceNs The source namespace |
566 | * @param string $sourceDbk The source DB key (i.e. page_title) |
567 | * @return array [ ns, dbkey ], not necessarily valid |
568 | */ |
569 | private function getDestination( $ns, $name, $sourceNs, $sourceDbk ) { |
570 | $dbk = substr( $sourceDbk, strlen( "$name:" ) ); |
571 | if ( $ns <= 0 ) { |
572 | // An interwiki or an illegal namespace like "Special" or "Media" |
573 | // try an alternate encoding with '-' for ':' |
574 | $dbk = "$name-" . $dbk; |
575 | $ns = 0; |
576 | } |
577 | $destNS = $ns; |
578 | $nsInfo = $this->getServiceContainer()->getNamespaceInfo(); |
579 | if ( $sourceNs == NS_TALK && $nsInfo->isSubject( $ns ) ) { |
580 | // This is an associated talk page moved with the --move-talk feature. |
581 | $destNS = $nsInfo->getTalk( $destNS ); |
582 | } |
583 | return [ $destNS, $dbk ]; |
584 | } |
585 | |
586 | /** |
587 | * Get the preferred destination title for a given target page. |
588 | * @param int $ns The destination namespace ID |
589 | * @param string $name The conflicting prefix |
590 | * @param int $sourceNs The source namespace |
591 | * @param string $sourceDbk The source DB key (i.e. page_title) |
592 | * @return Title|false |
593 | */ |
594 | private function getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk ) { |
595 | [ $destNS, $dbk ] = $this->getDestination( $ns, $name, $sourceNs, $sourceDbk ); |
596 | $newTitle = Title::makeTitleSafe( $destNS, $dbk ); |
597 | if ( !$newTitle || !$newTitle->canExist() ) { |
598 | return false; |
599 | } |
600 | return $newTitle; |
601 | } |
602 | |
603 | /** |
604 | * Get an alternative title to move a page to. This is used if the |
605 | * preferred destination title already exists. |
606 | * |
607 | * @param int $ns The destination namespace ID |
608 | * @param string $dbk The source DB key (i.e. page_title) |
609 | * @param array $options Associative array of validated command-line options |
610 | * @return Title|false |
611 | */ |
612 | private function getAlternateTitle( $ns, $dbk, $options ) { |
613 | $prefix = $options['add-prefix']; |
614 | $suffix = $options['add-suffix']; |
615 | if ( $prefix == '' && $suffix == '' ) { |
616 | return false; |
617 | } |
618 | $newDbk = $prefix . $dbk . $suffix; |
619 | return Title::makeTitleSafe( $ns, $newDbk ); |
620 | } |
621 | |
622 | /** |
623 | * Move a page |
624 | * |
625 | * @param int $id The page_id |
626 | * @param LinkTarget $newLinkTarget The new title link target |
627 | * @return bool |
628 | */ |
629 | private function movePage( $id, LinkTarget $newLinkTarget ) { |
630 | $dbw = $this->getPrimaryDB(); |
631 | |
632 | $dbw->newUpdateQueryBuilder() |
633 | ->update( 'page' ) |
634 | ->set( [ |
635 | "page_namespace" => $newLinkTarget->getNamespace(), |
636 | "page_title" => $newLinkTarget->getDBkey(), |
637 | ] ) |
638 | ->where( [ |
639 | "page_id" => $id, |
640 | ] ) |
641 | ->caller( __METHOD__ ) |
642 | ->execute(); |
643 | |
644 | // Update *_from_namespace in links tables |
645 | $fromNamespaceTables = [ |
646 | [ 'templatelinks', 'tl', [ 'tl_target_id' ] ], |
647 | [ 'imagelinks', 'il', [ 'il_to' ] ] |
648 | ]; |
649 | if ( $this->getConfig()->get( MainConfigNames::PageLinksSchemaMigrationStage ) & SCHEMA_COMPAT_WRITE_OLD ) { |
650 | $fromNamespaceTables[] = [ 'pagelinks', 'pl', [ 'pl_namespace', 'pl_title' ] ]; |
651 | } else { |
652 | $fromNamespaceTables[] = [ 'pagelinks', 'pl', [ 'pl_target_id' ] ]; |
653 | } |
654 | $updateRowsPerQuery = $this->getConfig()->get( MainConfigNames::UpdateRowsPerQuery ); |
655 | foreach ( $fromNamespaceTables as [ $table, $fieldPrefix, $additionalPrimaryKeyFields ] ) { |
656 | $fromField = "{$fieldPrefix}_from"; |
657 | $fromNamespaceField = "{$fieldPrefix}_from_namespace"; |
658 | |
659 | $res = $dbw->newSelectQueryBuilder() |
660 | ->select( $additionalPrimaryKeyFields ) |
661 | ->from( $table ) |
662 | ->where( [ $fromField => $id ] ) |
663 | ->andWhere( $dbw->expr( $fromNamespaceField, '!=', $newLinkTarget->getNamespace() ) ) |
664 | ->caller( __METHOD__ ) |
665 | ->fetchResultSet(); |
666 | if ( !$res ) { |
667 | continue; |
668 | } |
669 | |
670 | $updateConds = []; |
671 | foreach ( $res as $row ) { |
672 | $updateConds[] = array_merge( [ $fromField => $id ], (array)$row ); |
673 | } |
674 | $updateBatches = array_chunk( $updateConds, $updateRowsPerQuery ); |
675 | foreach ( $updateBatches as $updateBatch ) { |
676 | $this->beginTransactionRound( __METHOD__ ); |
677 | $dbw->newUpdateQueryBuilder() |
678 | ->update( $table ) |
679 | ->set( [ $fromNamespaceField => $newLinkTarget->getNamespace() ] ) |
680 | ->where( $dbw->factorConds( $updateBatch ) ) |
681 | ->caller( __METHOD__ ) |
682 | ->execute(); |
683 | $this->commitTransactionRound( __METHOD__ ); |
684 | } |
685 | } |
686 | |
687 | return true; |
688 | } |
689 | |
690 | /** |
691 | * Determine if we can merge a page. |
692 | * We check if an inaccessible revision would become the latest and |
693 | * deny the merge if so -- it's theoretically possible to update the |
694 | * latest revision, but opens a can of worms -- search engine updates, |
695 | * recentchanges review, etc. |
696 | * |
697 | * @param int $id The page_id |
698 | * @param LinkTarget $linkTarget The new link target |
699 | * @param string &$logStatus This is set to the log status message on failure @phan-output-reference |
700 | * @return bool |
701 | */ |
702 | private function canMerge( $id, LinkTarget $linkTarget, &$logStatus ) { |
703 | $revisionLookup = $this->getServiceContainer()->getRevisionLookup(); |
704 | $latestDest = $revisionLookup->getRevisionByTitle( $linkTarget, 0, |
705 | IDBAccessObject::READ_LATEST ); |
706 | $latestSource = $revisionLookup->getRevisionByPageId( $id, 0, |
707 | IDBAccessObject::READ_LATEST ); |
708 | if ( $latestSource->getTimestamp() > $latestDest->getTimestamp() ) { |
709 | $logStatus = 'cannot merge since source is later'; |
710 | return false; |
711 | } else { |
712 | return true; |
713 | } |
714 | } |
715 | |
716 | /** |
717 | * Merge page histories |
718 | * |
719 | * @param stdClass $row Page row |
720 | * @param Title $newTitle |
721 | * @return bool |
722 | */ |
723 | private function mergePage( $row, Title $newTitle ) { |
724 | $updateRowsPerQuery = $this->getConfig()->get( MainConfigNames::UpdateRowsPerQuery ); |
725 | |
726 | $id = $row->page_id; |
727 | |
728 | // Construct the WikiPage object we will need later, while the |
729 | // page_id still exists. Note that this cannot use makeTitleSafe(), |
730 | // we are deliberately constructing an invalid title. |
731 | $sourceTitle = Title::makeTitle( $row->page_namespace, $row->page_title ); |
732 | $sourceTitle->resetArticleID( $id ); |
733 | $wikiPage = $this->getServiceContainer()->getWikiPageFactory()->newFromTitle( $sourceTitle ); |
734 | $wikiPage->loadPageData( IDBAccessObject::READ_LATEST ); |
735 | $destId = $newTitle->getArticleID(); |
736 | |
737 | $dbw = $this->getPrimaryDB(); |
738 | $this->beginTransactionRound( __METHOD__ ); |
739 | $revIds = $dbw->newSelectQueryBuilder() |
740 | ->select( 'rev_id' ) |
741 | ->from( 'revision' ) |
742 | ->where( [ 'rev_page' => $id ] ) |
743 | ->caller( __METHOD__ ) |
744 | ->fetchFieldValues(); |
745 | $updateBatches = array_chunk( array_map( 'intval', $revIds ), $updateRowsPerQuery ); |
746 | foreach ( $updateBatches as $updateBatch ) { |
747 | $dbw->newUpdateQueryBuilder() |
748 | ->update( 'revision' ) |
749 | ->set( [ 'rev_page' => $destId ] ) |
750 | ->where( [ 'rev_id' => $updateBatch ] ) |
751 | ->caller( __METHOD__ ) |
752 | ->execute(); |
753 | if ( count( $updateBatches ) > 1 ) { |
754 | $this->commitTransactionRound( __METHOD__ ); |
755 | $this->beginTransactionRound( __METHOD__ ); |
756 | } |
757 | } |
758 | $dbw->newDeleteQueryBuilder() |
759 | ->deleteFrom( 'page' ) |
760 | ->where( [ 'page_id' => $id ] ) |
761 | ->caller( __METHOD__ ) |
762 | ->execute(); |
763 | $this->commitTransactionRound( __METHOD__ ); |
764 | |
765 | /* Call LinksDeletionUpdate to delete outgoing links from the old title, |
766 | * and update category counts. |
767 | * |
768 | * Calling external code with a fake broken Title is a fairly dubious |
769 | * idea. It's necessary because it's quite a lot of code to duplicate, |
770 | * but that also makes it fragile since it would be easy for someone to |
771 | * accidentally introduce an assumption of title validity to the code we |
772 | * are calling. |
773 | */ |
774 | DeferredUpdates::addUpdate( new LinksDeletionUpdate( $wikiPage ) ); |
775 | DeferredUpdates::doUpdates(); |
776 | |
777 | return true; |
778 | } |
779 | } |
780 | |
781 | // @codeCoverageIgnoreStart |
782 | $maintClass = NamespaceDupes::class; |
783 | require_once RUN_MAINTENANCE_IF_MAIN; |
784 | // @codeCoverageIgnoreEnd |