Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 435 |
|
0.00% |
0 / 16 |
CRAP | |
0.00% |
0 / 1 |
NamespaceDupes | |
0.00% |
0 / 435 |
|
0.00% |
0 / 16 |
9506 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
12 | |||
checkAll | |
0.00% |
0 / 54 |
|
0.00% |
0 / 1 |
306 | |||
getInterwikiList | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
isSingleRevRedirectTo | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
30 | |||
deletePage | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
checkNamespace | |
0.00% |
0 / 92 |
|
0.00% |
0 / 1 |
930 | |||
checkLinkTable | |
0.00% |
0 / 113 |
|
0.00% |
0 / 1 |
240 | |||
checkPrefix | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
getTargetList | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
12 | |||
getDestination | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
20 | |||
getDestinationTitle | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
getAlternateTitle | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
movePage | |
0.00% |
0 / 44 |
|
0.00% |
0 / 1 |
30 | |||
canMerge | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
6 | |||
mergePage | |
0.00% |
0 / 35 |
|
0.00% |
0 / 1 |
12 |
1 | <?php |
2 | /** |
3 | * Check for articles to fix after adding/deleting namespaces |
4 | * |
5 | * Copyright © 2005-2007 Brooke Vibber <bvibber@wikimedia.org> |
6 | * https://www.mediawiki.org/ |
7 | * |
8 | * This program is free software; you can redistribute it and/or modify |
9 | * it under the terms of the GNU General Public License as published by |
10 | * the Free Software Foundation; either version 2 of the License, or |
11 | * (at your option) any later version. |
12 | * |
13 | * This program is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | * GNU General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU General Public License along |
19 | * with this program; if not, write to the Free Software Foundation, Inc., |
20 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
21 | * http://www.gnu.org/copyleft/gpl.html |
22 | * |
23 | * @file |
24 | * @ingroup Maintenance |
25 | */ |
26 | |
27 | // @codeCoverageIgnoreStart |
28 | require_once __DIR__ . '/Maintenance.php'; |
29 | // @codeCoverageIgnoreEnd |
30 | |
31 | use MediaWiki\Deferred\DeferredUpdates; |
32 | use MediaWiki\Deferred\LinksUpdate\LinksDeletionUpdate; |
33 | use MediaWiki\Linker\LinkTarget; |
34 | use MediaWiki\MainConfigNames; |
35 | use MediaWiki\Maintenance\Maintenance; |
36 | use MediaWiki\Page\PageIdentity; |
37 | use MediaWiki\Revision\SlotRecord; |
38 | use MediaWiki\Status\Status; |
39 | use MediaWiki\Title\Title; |
40 | use MediaWiki\Title\TitleValue; |
41 | use Wikimedia\Rdbms\IDBAccessObject; |
42 | use Wikimedia\Rdbms\IExpression; |
43 | use Wikimedia\Rdbms\IResultWrapper; |
44 | use Wikimedia\Rdbms\LikeValue; |
45 | |
46 | /** |
47 | * Maintenance script that checks for articles to fix after |
48 | * adding/deleting namespaces. |
49 | * |
50 | * @ingroup Maintenance |
51 | */ |
52 | class NamespaceDupes extends Maintenance { |
53 | |
54 | /** |
55 | * Total number of pages that need fixing that are automatically resolveable |
56 | * @var int |
57 | */ |
58 | private $resolvablePages = 0; |
59 | |
60 | /** |
61 | * Total number of pages that need fixing |
62 | * @var int |
63 | */ |
64 | private $totalPages = 0; |
65 | |
66 | /** |
67 | * Total number of links that need fixing that are automatically resolveable |
68 | * @var int |
69 | */ |
70 | private $resolvableLinks = 0; |
71 | |
72 | /** |
73 | * Total number of erroneous links |
74 | * @var int |
75 | */ |
76 | private $totalLinks = 0; |
77 | |
78 | /** |
79 | * Total number of links deleted because they weren't automatically resolveable due to the |
80 | * target already existing |
81 | * @var int |
82 | */ |
83 | private $deletedLinks = 0; |
84 | |
85 | public function __construct() { |
86 | parent::__construct(); |
87 | $this->addDescription( 'Find and fix pages affected by namespace addition/removal' ); |
88 | $this->addOption( 'fix', 'Attempt to automatically fix errors and delete broken links' ); |
89 | $this->addOption( 'merge', "Instead of renaming conflicts, do a history merge with " . |
90 | "the correct title" ); |
91 | $this->addOption( 'add-suffix', "Dupes will be renamed with correct namespace with " . |
92 | "<text> appended after the article name", false, true ); |
93 | $this->addOption( 'add-prefix', "Dupes will be renamed with correct namespace with " . |
94 | "<text> prepended before the article name", false, true ); |
95 | $this->addOption( 'source-pseudo-namespace', "Move all pages with the given source " . |
96 | "prefix (with an implied colon following it). If --dest-namespace is not specified, " . |
97 | "the colon will be replaced with a hyphen.", |
98 | false, true ); |
99 | $this->addOption( 'dest-namespace', "In combination with --source-pseudo-namespace, " . |
100 | "specify the namespace ID of the destination.", false, true ); |
101 | $this->addOption( 'move-talk', "If this is specified, pages in the Talk namespace that " . |
102 | "begin with a conflicting prefix will be renamed, for example " . |
103 | "Talk:File:Foo -> File_Talk:Foo" ); |
104 | } |
105 | |
106 | public function execute() { |
107 | $options = [ |
108 | 'fix' => $this->hasOption( 'fix' ), |
109 | 'merge' => $this->hasOption( 'merge' ), |
110 | 'add-suffix' => $this->getOption( 'add-suffix', '' ), |
111 | 'add-prefix' => $this->getOption( 'add-prefix', '' ), |
112 | 'move-talk' => $this->hasOption( 'move-talk' ), |
113 | 'source-pseudo-namespace' => $this->getOption( 'source-pseudo-namespace', '' ), |
114 | 'dest-namespace' => intval( $this->getOption( 'dest-namespace', 0 ) ) |
115 | ]; |
116 | |
117 | if ( $options['source-pseudo-namespace'] !== '' ) { |
118 | $retval = $this->checkPrefix( $options ); |
119 | } else { |
120 | $retval = $this->checkAll( $options ); |
121 | } |
122 | |
123 | if ( $retval ) { |
124 | $this->output( "\nLooks good!\n" ); |
125 | } else { |
126 | $this->output( "\nOh noeees\n" ); |
127 | } |
128 | } |
129 | |
130 | /** |
131 | * Check all namespaces |
132 | * |
133 | * @param array $options Associative array of validated command-line options |
134 | * |
135 | * @return bool |
136 | */ |
137 | private function checkAll( $options ) { |
138 | $contLang = $this->getServiceContainer()->getContentLanguage(); |
139 | $spaces = []; |
140 | |
141 | // List interwikis first, so they'll be overridden |
142 | // by any conflicting local namespaces. |
143 | foreach ( $this->getInterwikiList() as $prefix ) { |
144 | $name = $contLang->ucfirst( $prefix ); |
145 | $spaces[$name] = 0; |
146 | } |
147 | |
148 | // Now pull in all canonical and alias namespaces... |
149 | foreach ( |
150 | $this->getServiceContainer()->getNamespaceInfo()->getCanonicalNamespaces() |
151 | as $ns => $name |
152 | ) { |
153 | // This includes $wgExtraNamespaces |
154 | if ( $name !== '' ) { |
155 | $spaces[$name] = $ns; |
156 | } |
157 | } |
158 | foreach ( $contLang->getNamespaces() as $ns => $name ) { |
159 | if ( $name !== '' ) { |
160 | $spaces[$name] = $ns; |
161 | } |
162 | } |
163 | foreach ( $contLang->getNamespaceAliases() as $name => $ns ) { |
164 | $spaces[$name] = $ns; |
165 | } |
166 | |
167 | // We'll need to check for lowercase keys as well, |
168 | // since we're doing case-sensitive searches in the db. |
169 | $capitalLinks = $this->getConfig()->get( MainConfigNames::CapitalLinks ); |
170 | foreach ( $spaces as $name => $ns ) { |
171 | $moreNames = []; |
172 | $moreNames[] = $contLang->uc( $name ); |
173 | $moreNames[] = $contLang->ucfirst( $contLang->lc( $name ) ); |
174 | $moreNames[] = $contLang->ucwords( $name ); |
175 | $moreNames[] = $contLang->ucwords( $contLang->lc( $name ) ); |
176 | $moreNames[] = $contLang->ucwordbreaks( $name ); |
177 | $moreNames[] = $contLang->ucwordbreaks( $contLang->lc( $name ) ); |
178 | if ( !$capitalLinks ) { |
179 | foreach ( $moreNames as $altName ) { |
180 | $moreNames[] = $contLang->lcfirst( $altName ); |
181 | } |
182 | $moreNames[] = $contLang->lcfirst( $name ); |
183 | } |
184 | foreach ( array_unique( $moreNames ) as $altName ) { |
185 | if ( $altName !== $name ) { |
186 | $spaces[$altName] = $ns; |
187 | } |
188 | } |
189 | } |
190 | |
191 | // Sort by namespace index, and if there are two with the same index, |
192 | // break the tie by sorting by name |
193 | $origSpaces = $spaces; |
194 | uksort( $spaces, static function ( $a, $b ) use ( $origSpaces ) { |
195 | return $origSpaces[$a] <=> $origSpaces[$b] |
196 | ?: $a <=> $b; |
197 | } ); |
198 | |
199 | $ok = true; |
200 | foreach ( $spaces as $name => $ns ) { |
201 | $ok = $this->checkNamespace( $ns, $name, $options ) && $ok; |
202 | } |
203 | |
204 | $this->output( |
205 | "{$this->totalPages} pages to fix, " . |
206 | "{$this->resolvablePages} were resolvable.\n\n" |
207 | ); |
208 | |
209 | foreach ( $spaces as $name => $ns ) { |
210 | if ( $ns != 0 ) { |
211 | /* Fix up link destinations for non-interwiki links only. |
212 | * |
213 | * For example if a page has [[Foo:Bar]] and then a Foo namespace |
214 | * is introduced, pagelinks needs to be updated to have |
215 | * page_namespace = NS_FOO. |
216 | * |
217 | * If instead an interwiki prefix was introduced called "Foo", |
218 | * the link should instead be moved to the iwlinks table. If a new |
219 | * language is introduced called "Foo", or if there is a pagelink |
220 | * [[fr:Bar]] when interlanguage magic links are turned on, the |
221 | * link would have to be moved to the langlinks table. Let's put |
222 | * those cases in the too-hard basket for now. The consequences are |
223 | * not especially severe. |
224 | * @fixme Handle interwiki links, and pagelinks to Category:, File: |
225 | * which probably need reparsing. |
226 | */ |
227 | |
228 | $this->checkLinkTable( 'pagelinks', 'pl', $ns, $name, $options ); |
229 | $this->checkLinkTable( 'templatelinks', 'tl', $ns, $name, $options ); |
230 | |
231 | // The redirect table has interwiki links randomly mixed in, we |
232 | // need to filter those out. For example [[w:Foo:Bar]] would |
233 | // have rd_interwiki=w and rd_namespace=0, which would match the |
234 | // query for a conflicting namespace "Foo" if filtering wasn't done. |
235 | $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options, |
236 | [ 'rd_interwiki' => '' ] ); |
237 | } |
238 | } |
239 | |
240 | $this->output( |
241 | "{$this->totalLinks} links to fix, " . |
242 | "{$this->resolvableLinks} were resolvable, " . |
243 | "{$this->deletedLinks} were deleted.\n" |
244 | ); |
245 | |
246 | return $ok; |
247 | } |
248 | |
249 | /** |
250 | * @return string[] |
251 | */ |
252 | private function getInterwikiList() { |
253 | $result = $this->getServiceContainer()->getInterwikiLookup()->getAllPrefixes(); |
254 | return array_column( $result, 'iw_prefix' ); |
255 | } |
256 | |
257 | private function isSingleRevRedirectTo( Title $oldTitle, Title $newTitle ): bool { |
258 | if ( !$oldTitle->isSingleRevRedirect() ) { |
259 | return false; |
260 | } |
261 | $revStore = $this->getServiceContainer()->getRevisionStore(); |
262 | $rev = $revStore->getRevisionByTitle( $oldTitle, 0, IDBAccessObject::READ_LATEST ); |
263 | if ( !$rev ) { |
264 | return false; |
265 | } |
266 | $content = $rev->getContent( SlotRecord::MAIN ); |
267 | if ( !$content ) { |
268 | return false; |
269 | } |
270 | $target = $content->getRedirectTarget(); |
271 | return $target && $target->equals( $newTitle ); |
272 | } |
273 | |
274 | private function deletePage( Title $pageToDelete, string $reason ): Status { |
275 | $services = $this->getServiceContainer(); |
276 | $page = $services->getWikiPageFactory()->newFromTitle( $pageToDelete ); |
277 | $user = User::newSystemUser( "Maintenance script" ); |
278 | $deletePage = $services->getDeletePageFactory()->newDeletePage( $page, $user ); |
279 | return $deletePage->deleteUnsafe( $reason ); |
280 | } |
281 | |
282 | /** |
283 | * Check a given prefix and try to move it into the given destination namespace |
284 | * |
285 | * @param int $ns Destination namespace id |
286 | * @param string $name |
287 | * @param array $options Associative array of validated command-line options |
288 | * @return bool |
289 | */ |
290 | private function checkNamespace( $ns, $name, $options ) { |
291 | $targets = $this->getTargetList( $ns, $name, $options ); |
292 | $count = $targets->numRows(); |
293 | $this->totalPages += $count; |
294 | if ( $count == 0 ) { |
295 | return true; |
296 | } |
297 | |
298 | $dryRunNote = $options['fix'] ? '' : ' DRY RUN ONLY'; |
299 | |
300 | $ok = true; |
301 | foreach ( $targets as $row ) { |
302 | // Find the new title and determine the action to take |
303 | |
304 | $newTitle = $this->getDestinationTitle( |
305 | $ns, $name, $row->page_namespace, $row->page_title ); |
306 | $logStatus = false; |
307 | // $oldTitle is not a valid title by definition but the methods I use here |
308 | // shouldn't care |
309 | $oldTitle = Title::makeTitle( $row->page_namespace, $row->page_title ); |
310 | if ( !$newTitle ) { |
311 | if ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) { |
312 | $logStatus = 'invalid title and --add-prefix not specified'; |
313 | $action = 'abort'; |
314 | } else { |
315 | $action = 'alternate'; |
316 | } |
317 | } elseif ( $newTitle->exists( IDBAccessObject::READ_LATEST ) ) { |
318 | if ( $this->isSingleRevRedirectTo( $newTitle, $newTitle ) ) { |
319 | // Conceptually this is the new title redirecting to the old title |
320 | // except that the redirect target is parsed as wikitext so is actually |
321 | // appears to redirect to itself |
322 | $action = 'delete-new'; |
323 | } elseif ( $options['merge'] ) { |
324 | if ( $this->canMerge( $row->page_id, $newTitle, $logStatus ) ) { |
325 | $action = 'merge'; |
326 | } else { |
327 | $action = 'abort'; |
328 | } |
329 | } elseif ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) { |
330 | $action = 'abort'; |
331 | $logStatus = 'dest title exists and --add-prefix not specified'; |
332 | } else { |
333 | $action = 'alternate'; |
334 | } |
335 | } else { |
336 | $action = 'move'; |
337 | $logStatus = 'no conflict'; |
338 | } |
339 | if ( $action === 'alternate' ) { |
340 | [ $ns, $dbk ] = $this->getDestination( $ns, $name, $row->page_namespace, |
341 | $row->page_title ); |
342 | $altTitle = $this->getAlternateTitle( $ns, $dbk, $options ); |
343 | if ( !$altTitle ) { |
344 | $action = 'abort'; |
345 | $logStatus = 'alternate title is invalid'; |
346 | } elseif ( $altTitle->exists() ) { |
347 | $action = 'abort'; |
348 | $logStatus = 'alternate title conflicts'; |
349 | } elseif ( $this->isSingleRevRedirectTo( $oldTitle, $newTitle ) ) { |
350 | $action = 'delete-old'; |
351 | $newTitle = $altTitle; |
352 | } else { |
353 | $action = 'move'; |
354 | $logStatus = 'alternate'; |
355 | $newTitle = $altTitle; |
356 | } |
357 | } |
358 | |
359 | // Take the action or log a dry run message |
360 | |
361 | $logTitle = "id={$row->page_id} ns={$row->page_namespace} dbk={$row->page_title}"; |
362 | $pageOK = true; |
363 | |
364 | switch ( $action ) { |
365 | case 'delete-old': |
366 | $this->output( "$logTitle move to " . $newTitle->getPrefixedDBKey() . |
367 | " then delete as single-revision redirect to new home$dryRunNote\n" ); |
368 | if ( $options['fix'] ) { |
369 | // First move the page so the delete command gets a valid title |
370 | $pageOK = $this->movePage( $row->page_id, $newTitle ); |
371 | if ( $pageOK ) { |
372 | $status = $this->deletePage( |
373 | $newTitle, |
374 | "Non-normalized title already redirects to new form" |
375 | ); |
376 | if ( !$status->isOK() ) { |
377 | $this->error( $status ); |
378 | $pageOK = false; |
379 | } |
380 | } |
381 | } |
382 | break; |
383 | case "delete-new": |
384 | $this->output( "$logTitle -> " . |
385 | $newTitle->getPrefixedDBkey() . " delete existing page $dryRunNote\n" ); |
386 | if ( $options['fix'] ) { |
387 | $status = $this->deletePage( $newTitle, "Delete circular redirect to make way for move" ); |
388 | $pageOK = $status->isOK(); |
389 | if ( $pageOK ) { |
390 | $pageOK = $this->movePage( $row->page_id, $newTitle ); |
391 | } else { |
392 | $this->error( $status ); |
393 | } |
394 | } |
395 | break; |
396 | case 'abort': |
397 | $this->output( "$logTitle *** $logStatus\n" ); |
398 | $pageOK = false; |
399 | break; |
400 | case 'move': |
401 | $this->output( "$logTitle -> " . |
402 | $newTitle->getPrefixedDBkey() . " ($logStatus)$dryRunNote\n" ); |
403 | |
404 | if ( $options['fix'] ) { |
405 | $pageOK = $this->movePage( $row->page_id, $newTitle ); |
406 | } |
407 | break; |
408 | case 'merge': |
409 | $this->output( "$logTitle => " . |
410 | $newTitle->getPrefixedDBkey() . " (merge)$dryRunNote\n" ); |
411 | |
412 | if ( $options['fix'] ) { |
413 | $pageOK = $this->mergePage( $row, $newTitle ); |
414 | } |
415 | break; |
416 | } |
417 | |
418 | if ( $pageOK ) { |
419 | $this->resolvablePages++; |
420 | } else { |
421 | $ok = false; |
422 | } |
423 | } |
424 | |
425 | return $ok; |
426 | } |
427 | |
428 | /** |
429 | * Check and repair the destination fields in a link table |
430 | * @param string $table The link table name |
431 | * @param string $fieldPrefix The field prefix in the link table |
432 | * @param int $ns Destination namespace id |
433 | * @param string $name |
434 | * @param array $options Associative array of validated command-line options |
435 | * @param array $extraConds Extra conditions for the SQL query |
436 | */ |
437 | private function checkLinkTable( $table, $fieldPrefix, $ns, $name, $options, |
438 | $extraConds = [] |
439 | ) { |
440 | $dbw = $this->getPrimaryDB(); |
441 | |
442 | $batchConds = []; |
443 | $fromField = "{$fieldPrefix}_from"; |
444 | $batchSize = 100; |
445 | $sqb = $dbw->newSelectQueryBuilder() |
446 | ->select( $fromField ) |
447 | ->where( $extraConds ) |
448 | ->limit( $batchSize ); |
449 | |
450 | $linksMigration = $this->getServiceContainer()->getLinksMigration(); |
451 | if ( isset( $linksMigration::$mapping[$table] ) ) { |
452 | $sqb->queryInfo( $linksMigration->getQueryInfo( $table ) ); |
453 | [ $namespaceField, $titleField ] = $linksMigration->getTitleFields( $table ); |
454 | $schemaMigrationStage = $linksMigration::$mapping[$table]['config'] === -1 |
455 | ? MIGRATION_NEW |
456 | : $this->getConfig()->get( $linksMigration::$mapping[$table]['config'] ); |
457 | $linkTargetLookup = $this->getServiceContainer()->getLinkTargetLookup(); |
458 | $targetIdField = $linksMigration::$mapping[$table]['target_id']; |
459 | } else { |
460 | $sqb->table( $table ); |
461 | $namespaceField = "{$fieldPrefix}_namespace"; |
462 | $titleField = "{$fieldPrefix}_title"; |
463 | $sqb->fields( [ $namespaceField, $titleField ] ); |
464 | // Variables only used for links migration, init only |
465 | $schemaMigrationStage = -1; |
466 | $linkTargetLookup = null; |
467 | $targetIdField = ''; |
468 | } |
469 | $sqb->andWhere( [ |
470 | $namespaceField => 0, |
471 | $dbw->expr( $titleField, IExpression::LIKE, new LikeValue( "$name:", $dbw->anyString() ) ), |
472 | ] ) |
473 | ->orderBy( [ $titleField, $fromField ] ) |
474 | ->caller( __METHOD__ ); |
475 | |
476 | $updateRowsPerQuery = $this->getConfig()->get( MainConfigNames::UpdateRowsPerQuery ); |
477 | while ( true ) { |
478 | $res = ( clone $sqb ) |
479 | ->andWhere( $batchConds ) |
480 | ->fetchResultSet(); |
481 | if ( $res->numRows() == 0 ) { |
482 | break; |
483 | } |
484 | |
485 | $rowsToDeleteIfStillExists = []; |
486 | |
487 | foreach ( $res as $row ) { |
488 | $logTitle = "from={$row->$fromField} ns={$row->$namespaceField} " . |
489 | "dbk={$row->$titleField}"; |
490 | $destTitle = $this->getDestinationTitle( |
491 | $ns, $name, $row->$namespaceField, $row->$titleField ); |
492 | $this->totalLinks++; |
493 | if ( !$destTitle ) { |
494 | $this->output( "$table $logTitle *** INVALID\n" ); |
495 | continue; |
496 | } |
497 | $this->resolvableLinks++; |
498 | if ( !$options['fix'] ) { |
499 | $this->output( "$table $logTitle -> " . |
500 | $destTitle->getPrefixedDBkey() . " DRY RUN\n" ); |
501 | continue; |
502 | } |
503 | |
504 | if ( isset( $linksMigration::$mapping[$table] ) ) { |
505 | $setValue = []; |
506 | if ( $schemaMigrationStage & SCHEMA_COMPAT_WRITE_NEW ) { |
507 | $setValue[$targetIdField] = $linkTargetLookup->acquireLinkTargetId( $destTitle, $dbw ); |
508 | } |
509 | if ( $schemaMigrationStage & SCHEMA_COMPAT_WRITE_OLD ) { |
510 | $setValue["{$fieldPrefix}_namespace"] = $destTitle->getNamespace(); |
511 | $setValue["{$fieldPrefix}_title"] = $destTitle->getDBkey(); |
512 | } |
513 | $whereCondition = $linksMigration->getLinksConditions( |
514 | $table, |
515 | new TitleValue( 0, $row->$titleField ) |
516 | ); |
517 | $deleteCondition = $linksMigration->getLinksConditions( |
518 | $table, |
519 | new TitleValue( (int)$row->$namespaceField, $row->$titleField ) |
520 | ); |
521 | } else { |
522 | $setValue = [ |
523 | $namespaceField => $destTitle->getNamespace(), |
524 | $titleField => $destTitle->getDBkey() |
525 | ]; |
526 | $whereCondition = [ |
527 | $namespaceField => 0, |
528 | $titleField => $row->$titleField |
529 | ]; |
530 | $deleteCondition = [ |
531 | $namespaceField => $row->$namespaceField, |
532 | $titleField => $row->$titleField, |
533 | ]; |
534 | } |
535 | |
536 | $dbw->newUpdateQueryBuilder() |
537 | ->update( $table ) |
538 | ->ignore() |
539 | ->set( $setValue ) |
540 | ->where( [ $fromField => $row->$fromField ] ) |
541 | ->andWhere( $whereCondition ) |
542 | ->caller( __METHOD__ ) |
543 | ->execute(); |
544 | |
545 | // In case there is a key conflict on UPDATE IGNORE the row needs deletion |
546 | $rowsToDeleteIfStillExists[] = array_merge( [ $fromField => $row->$fromField ], $deleteCondition ); |
547 | |
548 | $this->output( "$table $logTitle -> " . |
549 | $destTitle->getPrefixedDBkey() . "\n" |
550 | ); |
551 | } |
552 | |
553 | if ( $options['fix'] && count( $rowsToDeleteIfStillExists ) > 0 ) { |
554 | $affectedRows = 0; |
555 | $deleteBatches = array_chunk( $rowsToDeleteIfStillExists, $updateRowsPerQuery ); |
556 | foreach ( $deleteBatches as $deleteBatch ) { |
557 | $dbw->newDeleteQueryBuilder() |
558 | ->deleteFrom( $table ) |
559 | ->where( $dbw->factorConds( $deleteBatch ) ) |
560 | ->caller( __METHOD__ ) |
561 | ->execute(); |
562 | $affectedRows += $dbw->affectedRows(); |
563 | if ( count( $deleteBatches ) > 1 ) { |
564 | $this->waitForReplication(); |
565 | } |
566 | } |
567 | |
568 | $this->deletedLinks += $affectedRows; |
569 | $this->resolvableLinks -= $affectedRows; |
570 | } |
571 | |
572 | $batchConds = [ |
573 | $dbw->buildComparison( '>', [ |
574 | // @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item |
575 | $titleField => $row->$titleField, |
576 | // @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item |
577 | $fromField => $row->$fromField, |
578 | ] ) |
579 | ]; |
580 | |
581 | $this->waitForReplication(); |
582 | } |
583 | } |
584 | |
585 | /** |
586 | * Move the given pseudo-namespace, either replacing the colon with a hyphen |
587 | * (useful for pseudo-namespaces that conflict with interwiki links) or move |
588 | * them to another namespace if specified. |
589 | * @param array $options Associative array of validated command-line options |
590 | * @return bool |
591 | */ |
592 | private function checkPrefix( $options ) { |
593 | $prefix = $options['source-pseudo-namespace']; |
594 | $ns = $options['dest-namespace']; |
595 | $this->output( "Checking prefix \"$prefix\" vs namespace $ns\n" ); |
596 | |
597 | return $this->checkNamespace( $ns, $prefix, $options ); |
598 | } |
599 | |
600 | /** |
601 | * Find pages in main and talk namespaces that have a prefix of the new |
602 | * namespace so we know titles that will need migrating |
603 | * |
604 | * @param int $ns Destination namespace id |
605 | * @param string $name Prefix that is being made a namespace |
606 | * @param array $options Associative array of validated command-line options |
607 | * |
608 | * @return IResultWrapper |
609 | */ |
610 | private function getTargetList( $ns, $name, $options ) { |
611 | $dbw = $this->getPrimaryDB(); |
612 | |
613 | if ( |
614 | $options['move-talk'] && |
615 | $this->getServiceContainer()->getNamespaceInfo()->isSubject( $ns ) |
616 | ) { |
617 | $checkNamespaces = [ NS_MAIN, NS_TALK ]; |
618 | } else { |
619 | $checkNamespaces = NS_MAIN; |
620 | } |
621 | |
622 | return $dbw->newSelectQueryBuilder() |
623 | ->select( [ 'page_id', 'page_title', 'page_namespace' ] ) |
624 | ->from( 'page' ) |
625 | ->where( [ |
626 | 'page_namespace' => $checkNamespaces, |
627 | $dbw->expr( 'page_title', IExpression::LIKE, new LikeValue( "$name:", $dbw->anyString() ) ), |
628 | ] ) |
629 | ->caller( __METHOD__ )->fetchResultSet(); |
630 | } |
631 | |
632 | /** |
633 | * Get the preferred destination for a given target page. |
634 | * @param int $ns The destination namespace ID |
635 | * @param string $name The conflicting prefix |
636 | * @param int $sourceNs The source namespace |
637 | * @param string $sourceDbk The source DB key (i.e. page_title) |
638 | * @return array [ ns, dbkey ], not necessarily valid |
639 | */ |
640 | private function getDestination( $ns, $name, $sourceNs, $sourceDbk ) { |
641 | $dbk = substr( $sourceDbk, strlen( "$name:" ) ); |
642 | if ( $ns <= 0 ) { |
643 | // An interwiki or an illegal namespace like "Special" or "Media" |
644 | // try an alternate encoding with '-' for ':' |
645 | $dbk = "$name-" . $dbk; |
646 | $ns = 0; |
647 | } |
648 | $destNS = $ns; |
649 | $nsInfo = $this->getServiceContainer()->getNamespaceInfo(); |
650 | if ( $sourceNs == NS_TALK && $nsInfo->isSubject( $ns ) ) { |
651 | // This is an associated talk page moved with the --move-talk feature. |
652 | $destNS = $nsInfo->getTalk( $destNS ); |
653 | } |
654 | return [ $destNS, $dbk ]; |
655 | } |
656 | |
657 | /** |
658 | * Get the preferred destination title for a given target page. |
659 | * @param int $ns The destination namespace ID |
660 | * @param string $name The conflicting prefix |
661 | * @param int $sourceNs The source namespace |
662 | * @param string $sourceDbk The source DB key (i.e. page_title) |
663 | * @return Title|false |
664 | */ |
665 | private function getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk ) { |
666 | [ $destNS, $dbk ] = $this->getDestination( $ns, $name, $sourceNs, $sourceDbk ); |
667 | $newTitle = Title::makeTitleSafe( $destNS, $dbk ); |
668 | if ( !$newTitle || !$newTitle->canExist() ) { |
669 | return false; |
670 | } |
671 | return $newTitle; |
672 | } |
673 | |
674 | /** |
675 | * Get an alternative title to move a page to. This is used if the |
676 | * preferred destination title already exists. |
677 | * |
678 | * @param int $ns The destination namespace ID |
679 | * @param string $dbk The source DB key (i.e. page_title) |
680 | * @param array $options Associative array of validated command-line options |
681 | * @return Title|false |
682 | */ |
683 | private function getAlternateTitle( $ns, $dbk, $options ) { |
684 | $prefix = $options['add-prefix']; |
685 | $suffix = $options['add-suffix']; |
686 | if ( $prefix == '' && $suffix == '' ) { |
687 | return false; |
688 | } |
689 | $newDbk = $prefix . $dbk . $suffix; |
690 | return Title::makeTitleSafe( $ns, $newDbk ); |
691 | } |
692 | |
693 | /** |
694 | * Move a page |
695 | * |
696 | * @param int $id The page_id |
697 | * @param LinkTarget $newLinkTarget The new title link target |
698 | * @return bool |
699 | */ |
700 | private function movePage( $id, LinkTarget $newLinkTarget ) { |
701 | $dbw = $this->getPrimaryDB(); |
702 | |
703 | $dbw->newUpdateQueryBuilder() |
704 | ->update( 'page' ) |
705 | ->set( [ |
706 | "page_namespace" => $newLinkTarget->getNamespace(), |
707 | "page_title" => $newLinkTarget->getDBkey(), |
708 | ] ) |
709 | ->where( [ |
710 | "page_id" => $id, |
711 | ] ) |
712 | ->caller( __METHOD__ ) |
713 | ->execute(); |
714 | |
715 | // Update *_from_namespace in links tables |
716 | $fromNamespaceTables = [ |
717 | [ 'templatelinks', 'tl', [ 'tl_target_id' ] ], |
718 | [ 'imagelinks', 'il', [ 'il_to' ] ], |
719 | [ 'pagelinks', 'pl', [ 'pl_target_id' ] ], |
720 | ]; |
721 | $updateRowsPerQuery = $this->getConfig()->get( MainConfigNames::UpdateRowsPerQuery ); |
722 | foreach ( $fromNamespaceTables as [ $table, $fieldPrefix, $additionalPrimaryKeyFields ] ) { |
723 | $fromField = "{$fieldPrefix}_from"; |
724 | $fromNamespaceField = "{$fieldPrefix}_from_namespace"; |
725 | |
726 | $res = $dbw->newSelectQueryBuilder() |
727 | ->select( $additionalPrimaryKeyFields ) |
728 | ->from( $table ) |
729 | ->where( [ $fromField => $id ] ) |
730 | ->andWhere( $dbw->expr( $fromNamespaceField, '!=', $newLinkTarget->getNamespace() ) ) |
731 | ->caller( __METHOD__ ) |
732 | ->fetchResultSet(); |
733 | if ( !$res ) { |
734 | continue; |
735 | } |
736 | |
737 | $updateConds = []; |
738 | foreach ( $res as $row ) { |
739 | $updateConds[] = array_merge( [ $fromField => $id ], (array)$row ); |
740 | } |
741 | $updateBatches = array_chunk( $updateConds, $updateRowsPerQuery ); |
742 | foreach ( $updateBatches as $updateBatch ) { |
743 | $this->beginTransactionRound( __METHOD__ ); |
744 | $dbw->newUpdateQueryBuilder() |
745 | ->update( $table ) |
746 | ->set( [ $fromNamespaceField => $newLinkTarget->getNamespace() ] ) |
747 | ->where( $dbw->factorConds( $updateBatch ) ) |
748 | ->caller( __METHOD__ ) |
749 | ->execute(); |
750 | $this->commitTransactionRound( __METHOD__ ); |
751 | } |
752 | } |
753 | |
754 | return true; |
755 | } |
756 | |
757 | /** |
758 | * Determine if we can merge a page. |
759 | * We check if an inaccessible revision would become the latest and |
760 | * deny the merge if so -- it's theoretically possible to update the |
761 | * latest revision, but opens a can of worms -- search engine updates, |
762 | * recentchanges review, etc. |
763 | * |
764 | * @param int $id The page_id |
765 | * @param PageIdentity $page |
766 | * @param string &$logStatus This is set to the log status message on failure @phan-output-reference |
767 | * @return bool |
768 | */ |
769 | private function canMerge( $id, PageIdentity $page, &$logStatus ) { |
770 | $revisionLookup = $this->getServiceContainer()->getRevisionLookup(); |
771 | $latestDest = $revisionLookup->getRevisionByTitle( $page, 0, |
772 | IDBAccessObject::READ_LATEST ); |
773 | $latestSource = $revisionLookup->getRevisionByPageId( $id, 0, |
774 | IDBAccessObject::READ_LATEST ); |
775 | if ( $latestSource->getTimestamp() > $latestDest->getTimestamp() ) { |
776 | $logStatus = 'cannot merge since source is later'; |
777 | return false; |
778 | } else { |
779 | return true; |
780 | } |
781 | } |
782 | |
783 | /** |
784 | * Merge page histories |
785 | * |
786 | * @param stdClass $row Page row |
787 | * @param Title $newTitle |
788 | * @return bool |
789 | */ |
790 | private function mergePage( $row, Title $newTitle ) { |
791 | $updateRowsPerQuery = $this->getConfig()->get( MainConfigNames::UpdateRowsPerQuery ); |
792 | |
793 | $id = $row->page_id; |
794 | |
795 | // Construct the WikiPage object we will need later, while the |
796 | // page_id still exists. Note that this cannot use makeTitleSafe(), |
797 | // we are deliberately constructing an invalid title. |
798 | $sourceTitle = Title::makeTitle( $row->page_namespace, $row->page_title ); |
799 | $sourceTitle->resetArticleID( $id ); |
800 | $wikiPage = $this->getServiceContainer()->getWikiPageFactory()->newFromTitle( $sourceTitle ); |
801 | $wikiPage->loadPageData( IDBAccessObject::READ_LATEST ); |
802 | $destId = $newTitle->getArticleID(); |
803 | |
804 | $dbw = $this->getPrimaryDB(); |
805 | $this->beginTransactionRound( __METHOD__ ); |
806 | $revIds = $dbw->newSelectQueryBuilder() |
807 | ->select( 'rev_id' ) |
808 | ->from( 'revision' ) |
809 | ->where( [ 'rev_page' => $id ] ) |
810 | ->caller( __METHOD__ ) |
811 | ->fetchFieldValues(); |
812 | $updateBatches = array_chunk( array_map( 'intval', $revIds ), $updateRowsPerQuery ); |
813 | foreach ( $updateBatches as $updateBatch ) { |
814 | $dbw->newUpdateQueryBuilder() |
815 | ->update( 'revision' ) |
816 | ->set( [ 'rev_page' => $destId ] ) |
817 | ->where( [ 'rev_id' => $updateBatch ] ) |
818 | ->caller( __METHOD__ ) |
819 | ->execute(); |
820 | if ( count( $updateBatches ) > 1 ) { |
821 | $this->commitTransactionRound( __METHOD__ ); |
822 | $this->beginTransactionRound( __METHOD__ ); |
823 | } |
824 | } |
825 | $dbw->newDeleteQueryBuilder() |
826 | ->deleteFrom( 'page' ) |
827 | ->where( [ 'page_id' => $id ] ) |
828 | ->caller( __METHOD__ ) |
829 | ->execute(); |
830 | $this->commitTransactionRound( __METHOD__ ); |
831 | |
832 | /* Call LinksDeletionUpdate to delete outgoing links from the old title, |
833 | * and update category counts. |
834 | * |
835 | * Calling external code with a fake broken Title is a fairly dubious |
836 | * idea. It's necessary because it's quite a lot of code to duplicate, |
837 | * but that also makes it fragile since it would be easy for someone to |
838 | * accidentally introduce an assumption of title validity to the code we |
839 | * are calling. |
840 | */ |
841 | DeferredUpdates::addUpdate( new LinksDeletionUpdate( $wikiPage ) ); |
842 | DeferredUpdates::doUpdates(); |
843 | |
844 | return true; |
845 | } |
846 | } |
847 | |
848 | // @codeCoverageIgnoreStart |
849 | $maintClass = NamespaceDupes::class; |
850 | require_once RUN_MAINTENANCE_IF_MAIN; |
851 | // @codeCoverageIgnoreEnd |