Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 451 |
|
0.00% |
0 / 16 |
CRAP | |
0.00% |
0 / 1 |
| NamespaceDupes | |
0.00% |
0 / 451 |
|
0.00% |
0 / 16 |
9900 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
2 | |||
| execute | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
12 | |||
| checkAll | |
0.00% |
0 / 54 |
|
0.00% |
0 / 1 |
306 | |||
| getInterwikiList | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| isSingleRevRedirectTo | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
30 | |||
| deletePage | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
| checkNamespace | |
0.00% |
0 / 92 |
|
0.00% |
0 / 1 |
930 | |||
| checkLinkTable | |
0.00% |
0 / 121 |
|
0.00% |
0 / 1 |
272 | |||
| checkPrefix | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
| getTargetList | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
12 | |||
| getDestination | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
20 | |||
| getDestinationTitle | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
| getAlternateTitle | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
| movePage | |
0.00% |
0 / 52 |
|
0.00% |
0 / 1 |
42 | |||
| canMerge | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
6 | |||
| mergePage | |
0.00% |
0 / 35 |
|
0.00% |
0 / 1 |
12 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * Check for articles to fix after adding/deleting namespaces |
| 4 | * |
| 5 | * Copyright © 2005-2007 Brooke Vibber <bvibber@wikimedia.org> |
| 6 | * https://www.mediawiki.org/ |
| 7 | * |
| 8 | * @license GPL-2.0-or-later |
| 9 | * @file |
| 10 | * @ingroup Maintenance |
| 11 | */ |
| 12 | |
| 13 | // @codeCoverageIgnoreStart |
| 14 | require_once __DIR__ . '/Maintenance.php'; |
| 15 | // @codeCoverageIgnoreEnd |
| 16 | |
| 17 | use MediaWiki\Deferred\DeferredUpdates; |
| 18 | use MediaWiki\Deferred\LinksUpdate\ImageLinksTable; |
| 19 | use MediaWiki\Deferred\LinksUpdate\LinksDeletionUpdate; |
| 20 | use MediaWiki\Deferred\LinksUpdate\PageLinksTable; |
| 21 | use MediaWiki\Deferred\LinksUpdate\TemplateLinksTable; |
| 22 | use MediaWiki\Linker\LinkTarget; |
| 23 | use MediaWiki\MainConfigNames; |
| 24 | use MediaWiki\Maintenance\Maintenance; |
| 25 | use MediaWiki\Page\PageIdentity; |
| 26 | use MediaWiki\Revision\SlotRecord; |
| 27 | use MediaWiki\Status\Status; |
| 28 | use MediaWiki\Title\Title; |
| 29 | use MediaWiki\Title\TitleValue; |
| 30 | use Wikimedia\Rdbms\IDBAccessObject; |
| 31 | use Wikimedia\Rdbms\IExpression; |
| 32 | use Wikimedia\Rdbms\IResultWrapper; |
| 33 | use Wikimedia\Rdbms\LikeValue; |
| 34 | |
| 35 | /** |
| 36 | * Maintenance script that checks for articles to fix after |
| 37 | * adding/deleting namespaces. |
| 38 | * |
| 39 | * @ingroup Maintenance |
| 40 | */ |
| 41 | class NamespaceDupes extends Maintenance { |
| 42 | |
| 43 | /** |
| 44 | * Total number of pages that need fixing that are automatically resolveable |
| 45 | * @var int |
| 46 | */ |
| 47 | private $resolvablePages = 0; |
| 48 | |
| 49 | /** |
| 50 | * Total number of pages that need fixing |
| 51 | * @var int |
| 52 | */ |
| 53 | private $totalPages = 0; |
| 54 | |
| 55 | /** |
| 56 | * Total number of links that need fixing that are automatically resolveable |
| 57 | * @var int |
| 58 | */ |
| 59 | private $resolvableLinks = 0; |
| 60 | |
| 61 | /** |
| 62 | * Total number of erroneous links |
| 63 | * @var int |
| 64 | */ |
| 65 | private $totalLinks = 0; |
| 66 | |
| 67 | /** |
| 68 | * Total number of links deleted because they weren't automatically resolveable due to the |
| 69 | * target already existing |
| 70 | * @var int |
| 71 | */ |
| 72 | private $deletedLinks = 0; |
| 73 | |
| 74 | public function __construct() { |
| 75 | parent::__construct(); |
| 76 | $this->addDescription( 'Find and fix pages affected by namespace addition/removal' ); |
| 77 | $this->addOption( 'fix', 'Attempt to automatically fix errors and delete broken links' ); |
| 78 | $this->addOption( 'merge', "Instead of renaming conflicts, do a history merge with " . |
| 79 | "the correct title" ); |
| 80 | $this->addOption( 'add-suffix', "Dupes will be renamed with correct namespace with " . |
| 81 | "<text> appended after the article name", false, true ); |
| 82 | $this->addOption( 'add-prefix', "Dupes will be renamed with correct namespace with " . |
| 83 | "<text> prepended before the article name", false, true ); |
| 84 | $this->addOption( 'source-pseudo-namespace', "Move all pages with the given source " . |
| 85 | "prefix (with an implied colon following it). If --dest-namespace is not specified, " . |
| 86 | "the colon will be replaced with a hyphen.", |
| 87 | false, true ); |
| 88 | $this->addOption( 'dest-namespace', "In combination with --source-pseudo-namespace, " . |
| 89 | "specify the namespace ID of the destination.", false, true ); |
| 90 | $this->addOption( 'move-talk', "If this is specified, pages in the Talk namespace that " . |
| 91 | "begin with a conflicting prefix will be renamed, for example " . |
| 92 | "Talk:File:Foo -> File_Talk:Foo" ); |
| 93 | } |
| 94 | |
| 95 | public function execute() { |
| 96 | $options = [ |
| 97 | 'fix' => $this->hasOption( 'fix' ), |
| 98 | 'merge' => $this->hasOption( 'merge' ), |
| 99 | 'add-suffix' => $this->getOption( 'add-suffix', '' ), |
| 100 | 'add-prefix' => $this->getOption( 'add-prefix', '' ), |
| 101 | 'move-talk' => $this->hasOption( 'move-talk' ), |
| 102 | 'source-pseudo-namespace' => $this->getOption( 'source-pseudo-namespace', '' ), |
| 103 | 'dest-namespace' => intval( $this->getOption( 'dest-namespace', 0 ) ) |
| 104 | ]; |
| 105 | |
| 106 | if ( $options['source-pseudo-namespace'] !== '' ) { |
| 107 | $retval = $this->checkPrefix( $options ); |
| 108 | } else { |
| 109 | $retval = $this->checkAll( $options ); |
| 110 | } |
| 111 | |
| 112 | if ( $retval ) { |
| 113 | $this->output( "\nLooks good!\n" ); |
| 114 | } else { |
| 115 | $this->output( "\nOh noeees\n" ); |
| 116 | } |
| 117 | } |
| 118 | |
| 119 | /** |
| 120 | * Check all namespaces |
| 121 | * |
| 122 | * @param array $options Associative array of validated command-line options |
| 123 | * |
| 124 | * @return bool |
| 125 | */ |
| 126 | private function checkAll( $options ) { |
| 127 | $contLang = $this->getServiceContainer()->getContentLanguage(); |
| 128 | $spaces = []; |
| 129 | |
| 130 | // List interwikis first, so they'll be overridden |
| 131 | // by any conflicting local namespaces. |
| 132 | foreach ( $this->getInterwikiList() as $prefix ) { |
| 133 | $name = $contLang->ucfirst( $prefix ); |
| 134 | $spaces[$name] = 0; |
| 135 | } |
| 136 | |
| 137 | // Now pull in all canonical and alias namespaces... |
| 138 | foreach ( |
| 139 | $this->getServiceContainer()->getNamespaceInfo()->getCanonicalNamespaces() |
| 140 | as $ns => $name |
| 141 | ) { |
| 142 | // This includes $wgExtraNamespaces |
| 143 | if ( $name !== '' ) { |
| 144 | $spaces[$name] = $ns; |
| 145 | } |
| 146 | } |
| 147 | foreach ( $contLang->getNamespaces() as $ns => $name ) { |
| 148 | if ( $name !== '' ) { |
| 149 | $spaces[$name] = $ns; |
| 150 | } |
| 151 | } |
| 152 | foreach ( $contLang->getNamespaceAliases() as $name => $ns ) { |
| 153 | $spaces[$name] = $ns; |
| 154 | } |
| 155 | |
| 156 | // We'll need to check for lowercase keys as well, |
| 157 | // since we're doing case-sensitive searches in the db. |
| 158 | $capitalLinks = $this->getConfig()->get( MainConfigNames::CapitalLinks ); |
| 159 | foreach ( $spaces as $name => $ns ) { |
| 160 | $moreNames = []; |
| 161 | $moreNames[] = $contLang->uc( $name ); |
| 162 | $moreNames[] = $contLang->ucfirst( $contLang->lc( $name ) ); |
| 163 | $moreNames[] = $contLang->ucwords( $name ); |
| 164 | $moreNames[] = $contLang->ucwords( $contLang->lc( $name ) ); |
| 165 | $moreNames[] = $contLang->ucwordbreaks( $name ); |
| 166 | $moreNames[] = $contLang->ucwordbreaks( $contLang->lc( $name ) ); |
| 167 | if ( !$capitalLinks ) { |
| 168 | foreach ( $moreNames as $altName ) { |
| 169 | $moreNames[] = $contLang->lcfirst( $altName ); |
| 170 | } |
| 171 | $moreNames[] = $contLang->lcfirst( $name ); |
| 172 | } |
| 173 | foreach ( array_unique( $moreNames ) as $altName ) { |
| 174 | if ( $altName !== $name ) { |
| 175 | $spaces[$altName] = $ns; |
| 176 | } |
| 177 | } |
| 178 | } |
| 179 | |
| 180 | // Sort by namespace index, and if there are two with the same index, |
| 181 | // break the tie by sorting by name |
| 182 | $origSpaces = $spaces; |
| 183 | uksort( $spaces, static function ( $a, $b ) use ( $origSpaces ) { |
| 184 | return $origSpaces[$a] <=> $origSpaces[$b] |
| 185 | ?: $a <=> $b; |
| 186 | } ); |
| 187 | |
| 188 | $ok = true; |
| 189 | foreach ( $spaces as $name => $ns ) { |
| 190 | $ok = $this->checkNamespace( $ns, $name, $options ) && $ok; |
| 191 | } |
| 192 | |
| 193 | $this->output( |
| 194 | "{$this->totalPages} pages to fix, " . |
| 195 | "{$this->resolvablePages} were resolvable.\n\n" |
| 196 | ); |
| 197 | |
| 198 | foreach ( $spaces as $name => $ns ) { |
| 199 | if ( $ns != 0 ) { |
| 200 | /* Fix up link destinations for non-interwiki links only. |
| 201 | * |
| 202 | * For example if a page has [[Foo:Bar]] and then a Foo namespace |
| 203 | * is introduced, pagelinks needs to be updated to have |
| 204 | * page_namespace = NS_FOO. |
| 205 | * |
| 206 | * If instead an interwiki prefix was introduced called "Foo", |
| 207 | * the link should instead be moved to the iwlinks table. If a new |
| 208 | * language is introduced called "Foo", or if there is a pagelink |
| 209 | * [[fr:Bar]] when interlanguage magic links are turned on, the |
| 210 | * link would have to be moved to the langlinks table. Let's put |
| 211 | * those cases in the too-hard basket for now. The consequences are |
| 212 | * not especially severe. |
| 213 | * @fixme Handle interwiki links, and pagelinks to Category:, File: |
| 214 | * which probably need reparsing. |
| 215 | */ |
| 216 | |
| 217 | $this->checkLinkTable( 'pagelinks', 'pl', $ns, $name, $options ); |
| 218 | $this->checkLinkTable( 'templatelinks', 'tl', $ns, $name, $options ); |
| 219 | |
| 220 | // The redirect table has interwiki links randomly mixed in, we |
| 221 | // need to filter those out. For example [[w:Foo:Bar]] would |
| 222 | // have rd_interwiki=w and rd_namespace=0, which would match the |
| 223 | // query for a conflicting namespace "Foo" if filtering wasn't done. |
| 224 | $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options, |
| 225 | [ 'rd_interwiki' => '' ] ); |
| 226 | } |
| 227 | } |
| 228 | |
| 229 | $this->output( |
| 230 | "{$this->totalLinks} links to fix, " . |
| 231 | "{$this->resolvableLinks} were resolvable, " . |
| 232 | "{$this->deletedLinks} were deleted.\n" |
| 233 | ); |
| 234 | |
| 235 | return $ok; |
| 236 | } |
| 237 | |
| 238 | /** |
| 239 | * @return string[] |
| 240 | */ |
| 241 | private function getInterwikiList() { |
| 242 | $result = $this->getServiceContainer()->getInterwikiLookup()->getAllPrefixes(); |
| 243 | return array_column( $result, 'iw_prefix' ); |
| 244 | } |
| 245 | |
| 246 | private function isSingleRevRedirectTo( Title $oldTitle, Title $newTitle ): bool { |
| 247 | if ( !$oldTitle->isSingleRevRedirect() ) { |
| 248 | return false; |
| 249 | } |
| 250 | $revStore = $this->getServiceContainer()->getRevisionStore(); |
| 251 | $rev = $revStore->getRevisionByTitle( $oldTitle, 0, IDBAccessObject::READ_LATEST ); |
| 252 | if ( !$rev ) { |
| 253 | return false; |
| 254 | } |
| 255 | $content = $rev->getContent( SlotRecord::MAIN ); |
| 256 | if ( !$content ) { |
| 257 | return false; |
| 258 | } |
| 259 | $target = $content->getRedirectTarget(); |
| 260 | return $target && $target->equals( $newTitle ); |
| 261 | } |
| 262 | |
| 263 | private function deletePage( Title $pageToDelete, string $reason ): Status { |
| 264 | $services = $this->getServiceContainer(); |
| 265 | $page = $services->getWikiPageFactory()->newFromTitle( $pageToDelete ); |
| 266 | $user = User::newSystemUser( "Maintenance script" ); |
| 267 | $deletePage = $services->getDeletePageFactory()->newDeletePage( $page, $user ); |
| 268 | return $deletePage->deleteUnsafe( $reason ); |
| 269 | } |
| 270 | |
| 271 | /** |
| 272 | * Check a given prefix and try to move it into the given destination namespace |
| 273 | * |
| 274 | * @param int $ns Destination namespace id |
| 275 | * @param string $name |
| 276 | * @param array $options Associative array of validated command-line options |
| 277 | * @return bool |
| 278 | */ |
| 279 | private function checkNamespace( $ns, $name, $options ) { |
| 280 | $targets = $this->getTargetList( $ns, $name, $options ); |
| 281 | $count = $targets->numRows(); |
| 282 | $this->totalPages += $count; |
| 283 | if ( $count == 0 ) { |
| 284 | return true; |
| 285 | } |
| 286 | |
| 287 | $dryRunNote = $options['fix'] ? '' : ' DRY RUN ONLY'; |
| 288 | |
| 289 | $ok = true; |
| 290 | foreach ( $targets as $row ) { |
| 291 | // Find the new title and determine the action to take |
| 292 | |
| 293 | $newTitle = $this->getDestinationTitle( |
| 294 | $ns, $name, $row->page_namespace, $row->page_title ); |
| 295 | $logStatus = false; |
| 296 | // $oldTitle is not a valid title by definition but the methods I use here |
| 297 | // shouldn't care |
| 298 | $oldTitle = Title::makeTitle( $row->page_namespace, $row->page_title ); |
| 299 | if ( !$newTitle ) { |
| 300 | if ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) { |
| 301 | $logStatus = 'invalid title and --add-prefix not specified'; |
| 302 | $action = 'abort'; |
| 303 | } else { |
| 304 | $action = 'alternate'; |
| 305 | } |
| 306 | } elseif ( $newTitle->exists( IDBAccessObject::READ_LATEST ) ) { |
| 307 | if ( $this->isSingleRevRedirectTo( $newTitle, $newTitle ) ) { |
| 308 | // Conceptually this is the new title redirecting to the old title |
| 309 | // except that the redirect target is parsed as wikitext so is actually |
| 310 | // appears to redirect to itself |
| 311 | $action = 'delete-new'; |
| 312 | } elseif ( $options['merge'] ) { |
| 313 | if ( $this->canMerge( $row->page_id, $newTitle, $logStatus ) ) { |
| 314 | $action = 'merge'; |
| 315 | } else { |
| 316 | $action = 'abort'; |
| 317 | } |
| 318 | } elseif ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) { |
| 319 | $action = 'abort'; |
| 320 | $logStatus = 'dest title exists and --add-prefix not specified'; |
| 321 | } else { |
| 322 | $action = 'alternate'; |
| 323 | } |
| 324 | } else { |
| 325 | $action = 'move'; |
| 326 | $logStatus = 'no conflict'; |
| 327 | } |
| 328 | if ( $action === 'alternate' ) { |
| 329 | [ $ns, $dbk ] = $this->getDestination( $ns, $name, $row->page_namespace, |
| 330 | $row->page_title ); |
| 331 | $altTitle = $this->getAlternateTitle( $ns, $dbk, $options ); |
| 332 | if ( !$altTitle ) { |
| 333 | $action = 'abort'; |
| 334 | $logStatus = 'alternate title is invalid'; |
| 335 | } elseif ( $altTitle->exists() ) { |
| 336 | $action = 'abort'; |
| 337 | $logStatus = 'alternate title conflicts'; |
| 338 | } elseif ( $this->isSingleRevRedirectTo( $oldTitle, $newTitle ) ) { |
| 339 | $action = 'delete-old'; |
| 340 | $newTitle = $altTitle; |
| 341 | } else { |
| 342 | $action = 'move'; |
| 343 | $logStatus = 'alternate'; |
| 344 | $newTitle = $altTitle; |
| 345 | } |
| 346 | } |
| 347 | |
| 348 | // Take the action or log a dry run message |
| 349 | |
| 350 | $logTitle = "id={$row->page_id} ns={$row->page_namespace} dbk={$row->page_title}"; |
| 351 | $pageOK = true; |
| 352 | |
| 353 | switch ( $action ) { |
| 354 | case 'delete-old': |
| 355 | $this->output( "$logTitle move to " . $newTitle->getPrefixedDBKey() . |
| 356 | " then delete as single-revision redirect to new home$dryRunNote\n" ); |
| 357 | if ( $options['fix'] ) { |
| 358 | // First move the page so the delete command gets a valid title |
| 359 | $pageOK = $this->movePage( $row->page_id, $newTitle ); |
| 360 | if ( $pageOK ) { |
| 361 | $status = $this->deletePage( |
| 362 | $newTitle, |
| 363 | "Non-normalized title already redirects to new form" |
| 364 | ); |
| 365 | if ( !$status->isOK() ) { |
| 366 | $this->error( $status ); |
| 367 | $pageOK = false; |
| 368 | } |
| 369 | } |
| 370 | } |
| 371 | break; |
| 372 | case "delete-new": |
| 373 | $this->output( "$logTitle -> " . |
| 374 | $newTitle->getPrefixedDBkey() . " delete existing page $dryRunNote\n" ); |
| 375 | if ( $options['fix'] ) { |
| 376 | $status = $this->deletePage( $newTitle, "Delete circular redirect to make way for move" ); |
| 377 | $pageOK = $status->isOK(); |
| 378 | if ( $pageOK ) { |
| 379 | $pageOK = $this->movePage( $row->page_id, $newTitle ); |
| 380 | } else { |
| 381 | $this->error( $status ); |
| 382 | } |
| 383 | } |
| 384 | break; |
| 385 | case 'abort': |
| 386 | $this->output( "$logTitle *** $logStatus\n" ); |
| 387 | $pageOK = false; |
| 388 | break; |
| 389 | case 'move': |
| 390 | $this->output( "$logTitle -> " . |
| 391 | $newTitle->getPrefixedDBkey() . " ($logStatus)$dryRunNote\n" ); |
| 392 | |
| 393 | if ( $options['fix'] ) { |
| 394 | $pageOK = $this->movePage( $row->page_id, $newTitle ); |
| 395 | } |
| 396 | break; |
| 397 | case 'merge': |
| 398 | $this->output( "$logTitle => " . |
| 399 | $newTitle->getPrefixedDBkey() . " (merge)$dryRunNote\n" ); |
| 400 | |
| 401 | if ( $options['fix'] ) { |
| 402 | $pageOK = $this->mergePage( $row, $newTitle ); |
| 403 | } |
| 404 | break; |
| 405 | } |
| 406 | |
| 407 | if ( $pageOK ) { |
| 408 | $this->resolvablePages++; |
| 409 | } else { |
| 410 | $ok = false; |
| 411 | } |
| 412 | } |
| 413 | |
| 414 | return $ok; |
| 415 | } |
| 416 | |
| 417 | /** |
| 418 | * Check and repair the destination fields in a link table |
| 419 | * @param string $table The link table name |
| 420 | * @param string $fieldPrefix The field prefix in the link table |
| 421 | * @param int $ns Destination namespace id |
| 422 | * @param string $name |
| 423 | * @param array $options Associative array of validated command-line options |
| 424 | * @param array $extraConds Extra conditions for the SQL query |
| 425 | */ |
| 426 | private function checkLinkTable( $table, $fieldPrefix, $ns, $name, $options, |
| 427 | $extraConds = [] |
| 428 | ) { |
| 429 | $domainMap = [ |
| 430 | 'templatelinks' => TemplateLinksTable::VIRTUAL_DOMAIN, |
| 431 | 'imagelinks' => ImageLinksTable::VIRTUAL_DOMAIN, |
| 432 | 'pagelinks' => PageLinksTable::VIRTUAL_DOMAIN, |
| 433 | ]; |
| 434 | |
| 435 | if ( isset( $domainMap[$table] ) ) { |
| 436 | $dbw = $this->getServiceContainer()->getConnectionProvider()->getPrimaryDatabase( $domainMap[$table] ); |
| 437 | } else { |
| 438 | $dbw = $this->getPrimaryDB(); |
| 439 | } |
| 440 | |
| 441 | $batchConds = []; |
| 442 | $fromField = "{$fieldPrefix}_from"; |
| 443 | $batchSize = 100; |
| 444 | $sqb = $dbw->newSelectQueryBuilder() |
| 445 | ->select( $fromField ) |
| 446 | ->where( $extraConds ) |
| 447 | ->limit( $batchSize ); |
| 448 | |
| 449 | $linksMigration = $this->getServiceContainer()->getLinksMigration(); |
| 450 | if ( isset( $linksMigration::$mapping[$table] ) ) { |
| 451 | $sqb->queryInfo( $linksMigration->getQueryInfo( $table ) ); |
| 452 | [ $namespaceField, $titleField ] = $linksMigration->getTitleFields( $table ); |
| 453 | $schemaMigrationStage = $linksMigration::$mapping[$table]['config'] === -1 |
| 454 | ? MIGRATION_NEW |
| 455 | // @phan-suppress-next-line PhanTypeMismatchArgument |
| 456 | : $this->getConfig()->get( $linksMigration::$mapping[$table]['config'] ); |
| 457 | $linkTargetLookup = $this->getServiceContainer()->getLinkTargetLookup(); |
| 458 | $targetIdField = $linksMigration::$mapping[$table]['target_id']; |
| 459 | } else { |
| 460 | $sqb->table( $table ); |
| 461 | $namespaceField = "{$fieldPrefix}_namespace"; |
| 462 | $titleField = "{$fieldPrefix}_title"; |
| 463 | $sqb->fields( [ $namespaceField, $titleField ] ); |
| 464 | // Variables only used for links migration, init only |
| 465 | $schemaMigrationStage = -1; |
| 466 | $linkTargetLookup = null; |
| 467 | $targetIdField = ''; |
| 468 | } |
| 469 | $sqb->andWhere( [ |
| 470 | $namespaceField => 0, |
| 471 | $dbw->expr( $titleField, IExpression::LIKE, new LikeValue( "$name:", $dbw->anyString() ) ), |
| 472 | ] ) |
| 473 | ->orderBy( [ $titleField, $fromField ] ) |
| 474 | ->caller( __METHOD__ ); |
| 475 | |
| 476 | $updateRowsPerQuery = $this->getConfig()->get( MainConfigNames::UpdateRowsPerQuery ); |
| 477 | while ( true ) { |
| 478 | $res = ( clone $sqb ) |
| 479 | ->andWhere( $batchConds ) |
| 480 | ->fetchResultSet(); |
| 481 | if ( $res->numRows() == 0 ) { |
| 482 | break; |
| 483 | } |
| 484 | |
| 485 | $rowsToDeleteIfStillExists = []; |
| 486 | |
| 487 | foreach ( $res as $row ) { |
| 488 | $logTitle = "from={$row->$fromField} ns={$row->$namespaceField} " . |
| 489 | "dbk={$row->$titleField}"; |
| 490 | $destTitle = $this->getDestinationTitle( |
| 491 | $ns, $name, $row->$namespaceField, $row->$titleField ); |
| 492 | $this->totalLinks++; |
| 493 | if ( !$destTitle ) { |
| 494 | $this->output( "$table $logTitle *** INVALID\n" ); |
| 495 | continue; |
| 496 | } |
| 497 | $this->resolvableLinks++; |
| 498 | if ( !$options['fix'] ) { |
| 499 | $this->output( "$table $logTitle -> " . |
| 500 | $destTitle->getPrefixedDBkey() . " DRY RUN\n" ); |
| 501 | continue; |
| 502 | } |
| 503 | |
| 504 | if ( isset( $linksMigration::$mapping[$table] ) ) { |
| 505 | $setValue = []; |
| 506 | if ( $schemaMigrationStage & SCHEMA_COMPAT_WRITE_NEW ) { |
| 507 | $setValue[$targetIdField] = $linkTargetLookup->acquireLinkTargetId( $destTitle, $dbw ); |
| 508 | } |
| 509 | if ( $schemaMigrationStage & SCHEMA_COMPAT_WRITE_OLD ) { |
| 510 | $setValue["{$fieldPrefix}_namespace"] = $destTitle->getNamespace(); |
| 511 | $setValue["{$fieldPrefix}_title"] = $destTitle->getDBkey(); |
| 512 | } |
| 513 | $whereCondition = $linksMigration->getLinksConditions( |
| 514 | $table, |
| 515 | new TitleValue( 0, $row->$titleField ) |
| 516 | ); |
| 517 | $deleteCondition = $linksMigration->getLinksConditions( |
| 518 | $table, |
| 519 | new TitleValue( (int)$row->$namespaceField, $row->$titleField ) |
| 520 | ); |
| 521 | } else { |
| 522 | $setValue = [ |
| 523 | $namespaceField => $destTitle->getNamespace(), |
| 524 | $titleField => $destTitle->getDBkey() |
| 525 | ]; |
| 526 | $whereCondition = [ |
| 527 | $namespaceField => 0, |
| 528 | $titleField => $row->$titleField |
| 529 | ]; |
| 530 | $deleteCondition = [ |
| 531 | $namespaceField => $row->$namespaceField, |
| 532 | $titleField => $row->$titleField, |
| 533 | ]; |
| 534 | } |
| 535 | |
| 536 | $dbw->newUpdateQueryBuilder() |
| 537 | ->update( $table ) |
| 538 | ->ignore() |
| 539 | ->set( $setValue ) |
| 540 | ->where( [ $fromField => $row->$fromField ] ) |
| 541 | ->andWhere( $whereCondition ) |
| 542 | ->caller( __METHOD__ ) |
| 543 | ->execute(); |
| 544 | |
| 545 | // In case there is a key conflict on UPDATE IGNORE the row needs deletion |
| 546 | $rowsToDeleteIfStillExists[] = array_merge( [ $fromField => $row->$fromField ], $deleteCondition ); |
| 547 | |
| 548 | $this->output( "$table $logTitle -> " . |
| 549 | $destTitle->getPrefixedDBkey() . "\n" |
| 550 | ); |
| 551 | } |
| 552 | |
| 553 | if ( $options['fix'] && count( $rowsToDeleteIfStillExists ) > 0 ) { |
| 554 | $affectedRows = 0; |
| 555 | $deleteBatches = array_chunk( $rowsToDeleteIfStillExists, $updateRowsPerQuery ); |
| 556 | foreach ( $deleteBatches as $deleteBatch ) { |
| 557 | $dbw->newDeleteQueryBuilder() |
| 558 | ->deleteFrom( $table ) |
| 559 | ->where( $dbw->factorConds( $deleteBatch ) ) |
| 560 | ->caller( __METHOD__ ) |
| 561 | ->execute(); |
| 562 | $affectedRows += $dbw->affectedRows(); |
| 563 | if ( count( $deleteBatches ) > 1 ) { |
| 564 | $this->waitForReplication(); |
| 565 | } |
| 566 | } |
| 567 | |
| 568 | $this->deletedLinks += $affectedRows; |
| 569 | $this->resolvableLinks -= $affectedRows; |
| 570 | } |
| 571 | |
| 572 | $batchConds = [ |
| 573 | $dbw->buildComparison( '>', [ |
| 574 | // @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item |
| 575 | $titleField => $row->$titleField, |
| 576 | // @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item |
| 577 | $fromField => $row->$fromField, |
| 578 | ] ) |
| 579 | ]; |
| 580 | |
| 581 | $this->waitForReplication(); |
| 582 | } |
| 583 | } |
| 584 | |
| 585 | /** |
| 586 | * Move the given pseudo-namespace, either replacing the colon with a hyphen |
| 587 | * (useful for pseudo-namespaces that conflict with interwiki links) or move |
| 588 | * them to another namespace if specified. |
| 589 | * @param array $options Associative array of validated command-line options |
| 590 | * @return bool |
| 591 | */ |
| 592 | private function checkPrefix( $options ) { |
| 593 | $prefix = $options['source-pseudo-namespace']; |
| 594 | $ns = $options['dest-namespace']; |
| 595 | $this->output( "Checking prefix \"$prefix\" vs namespace $ns\n" ); |
| 596 | |
| 597 | return $this->checkNamespace( $ns, $prefix, $options ); |
| 598 | } |
| 599 | |
| 600 | /** |
| 601 | * Find pages in main and talk namespaces that have a prefix of the new |
| 602 | * namespace so we know titles that will need migrating |
| 603 | * |
| 604 | * @param int $ns Destination namespace id |
| 605 | * @param string $name Prefix that is being made a namespace |
| 606 | * @param array $options Associative array of validated command-line options |
| 607 | * |
| 608 | * @return IResultWrapper |
| 609 | */ |
| 610 | private function getTargetList( $ns, $name, $options ) { |
| 611 | $dbw = $this->getPrimaryDB(); |
| 612 | |
| 613 | if ( |
| 614 | $options['move-talk'] && |
| 615 | $this->getServiceContainer()->getNamespaceInfo()->isSubject( $ns ) |
| 616 | ) { |
| 617 | $checkNamespaces = [ NS_MAIN, NS_TALK ]; |
| 618 | } else { |
| 619 | $checkNamespaces = NS_MAIN; |
| 620 | } |
| 621 | |
| 622 | return $dbw->newSelectQueryBuilder() |
| 623 | ->select( [ 'page_id', 'page_title', 'page_namespace' ] ) |
| 624 | ->from( 'page' ) |
| 625 | ->where( [ |
| 626 | 'page_namespace' => $checkNamespaces, |
| 627 | $dbw->expr( 'page_title', IExpression::LIKE, new LikeValue( "$name:", $dbw->anyString() ) ), |
| 628 | ] ) |
| 629 | ->caller( __METHOD__ )->fetchResultSet(); |
| 630 | } |
| 631 | |
| 632 | /** |
| 633 | * Get the preferred destination for a given target page. |
| 634 | * @param int $ns The destination namespace ID |
| 635 | * @param string $name The conflicting prefix |
| 636 | * @param int $sourceNs The source namespace |
| 637 | * @param string $sourceDbk The source DB key (i.e. page_title) |
| 638 | * @return array [ ns, dbkey ], not necessarily valid |
| 639 | */ |
| 640 | private function getDestination( $ns, $name, $sourceNs, $sourceDbk ) { |
| 641 | $dbk = substr( $sourceDbk, strlen( "$name:" ) ); |
| 642 | if ( $ns <= 0 ) { |
| 643 | // An interwiki or an illegal namespace like "Special" or "Media" |
| 644 | // try an alternate encoding with '-' for ':' |
| 645 | $dbk = "$name-" . $dbk; |
| 646 | $ns = 0; |
| 647 | } |
| 648 | $destNS = $ns; |
| 649 | $nsInfo = $this->getServiceContainer()->getNamespaceInfo(); |
| 650 | if ( $sourceNs == NS_TALK && $nsInfo->isSubject( $ns ) ) { |
| 651 | // This is an associated talk page moved with the --move-talk feature. |
| 652 | $destNS = $nsInfo->getTalk( $destNS ); |
| 653 | } |
| 654 | return [ $destNS, $dbk ]; |
| 655 | } |
| 656 | |
| 657 | /** |
| 658 | * Get the preferred destination title for a given target page. |
| 659 | * @param int $ns The destination namespace ID |
| 660 | * @param string $name The conflicting prefix |
| 661 | * @param int $sourceNs The source namespace |
| 662 | * @param string $sourceDbk The source DB key (i.e. page_title) |
| 663 | * @return Title|false |
| 664 | */ |
| 665 | private function getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk ) { |
| 666 | [ $destNS, $dbk ] = $this->getDestination( $ns, $name, $sourceNs, $sourceDbk ); |
| 667 | $newTitle = Title::makeTitleSafe( $destNS, $dbk ); |
| 668 | if ( !$newTitle || !$newTitle->canExist() ) { |
| 669 | return false; |
| 670 | } |
| 671 | return $newTitle; |
| 672 | } |
| 673 | |
| 674 | /** |
| 675 | * Get an alternative title to move a page to. This is used if the |
| 676 | * preferred destination title already exists. |
| 677 | * |
| 678 | * @param int $ns The destination namespace ID |
| 679 | * @param string $dbk The source DB key (i.e. page_title) |
| 680 | * @param array $options Associative array of validated command-line options |
| 681 | * @return Title|false |
| 682 | */ |
| 683 | private function getAlternateTitle( $ns, $dbk, $options ) { |
| 684 | $prefix = $options['add-prefix']; |
| 685 | $suffix = $options['add-suffix']; |
| 686 | if ( $prefix == '' && $suffix == '' ) { |
| 687 | return false; |
| 688 | } |
| 689 | $newDbk = $prefix . $dbk . $suffix; |
| 690 | return Title::makeTitleSafe( $ns, $newDbk ); |
| 691 | } |
| 692 | |
| 693 | /** |
| 694 | * Move a page |
| 695 | * |
| 696 | * @param int $id The page_id |
| 697 | * @param LinkTarget $newLinkTarget The new title link target |
| 698 | * @return bool |
| 699 | */ |
| 700 | private function movePage( $id, LinkTarget $newLinkTarget ) { |
| 701 | $dbw = $this->getPrimaryDB(); |
| 702 | |
| 703 | $dbw->newUpdateQueryBuilder() |
| 704 | ->update( 'page' ) |
| 705 | ->set( [ |
| 706 | "page_namespace" => $newLinkTarget->getNamespace(), |
| 707 | "page_title" => $newLinkTarget->getDBkey(), |
| 708 | ] ) |
| 709 | ->where( [ |
| 710 | "page_id" => $id, |
| 711 | ] ) |
| 712 | ->caller( __METHOD__ ) |
| 713 | ->execute(); |
| 714 | |
| 715 | // Update *_from_namespace in links tables |
| 716 | $fromNamespaceTables = [ |
| 717 | [ 'templatelinks', 'tl', [ 'tl_target_id' ] ], |
| 718 | [ 'imagelinks', 'il', [ 'il_to' ] ], |
| 719 | [ 'pagelinks', 'pl', [ 'pl_target_id' ] ], |
| 720 | ]; |
| 721 | $updateRowsPerQuery = $this->getConfig()->get( MainConfigNames::UpdateRowsPerQuery ); |
| 722 | |
| 723 | foreach ( $fromNamespaceTables as [ $table, $fieldPrefix, $additionalPrimaryKeyFields ] ) { |
| 724 | $domainMap = [ |
| 725 | 'templatelinks' => TemplateLinksTable::VIRTUAL_DOMAIN, |
| 726 | 'imagelinks' => ImageLinksTable::VIRTUAL_DOMAIN, |
| 727 | 'pagelinks' => PageLinksTable::VIRTUAL_DOMAIN, |
| 728 | ]; |
| 729 | |
| 730 | if ( isset( $domainMap[$table] ) ) { |
| 731 | $dbw = $this->getServiceContainer()->getConnectionProvider()->getPrimaryDatabase( $domainMap[$table] ); |
| 732 | } else { |
| 733 | $dbw = $this->getPrimaryDB(); |
| 734 | } |
| 735 | |
| 736 | $fromField = "{$fieldPrefix}_from"; |
| 737 | $fromNamespaceField = "{$fieldPrefix}_from_namespace"; |
| 738 | |
| 739 | $res = $dbw->newSelectQueryBuilder() |
| 740 | ->select( $additionalPrimaryKeyFields ) |
| 741 | ->from( $table ) |
| 742 | ->where( [ $fromField => $id ] ) |
| 743 | ->andWhere( $dbw->expr( $fromNamespaceField, '!=', $newLinkTarget->getNamespace() ) ) |
| 744 | ->caller( __METHOD__ ) |
| 745 | ->fetchResultSet(); |
| 746 | if ( !$res ) { |
| 747 | continue; |
| 748 | } |
| 749 | |
| 750 | $updateConds = []; |
| 751 | foreach ( $res as $row ) { |
| 752 | $updateConds[] = array_merge( [ $fromField => $id ], (array)$row ); |
| 753 | } |
| 754 | $updateBatches = array_chunk( $updateConds, $updateRowsPerQuery ); |
| 755 | foreach ( $updateBatches as $updateBatch ) { |
| 756 | $this->beginTransactionRound( __METHOD__ ); |
| 757 | $dbw->newUpdateQueryBuilder() |
| 758 | ->update( $table ) |
| 759 | ->set( [ $fromNamespaceField => $newLinkTarget->getNamespace() ] ) |
| 760 | ->where( $dbw->factorConds( $updateBatch ) ) |
| 761 | ->caller( __METHOD__ ) |
| 762 | ->execute(); |
| 763 | $this->commitTransactionRound( __METHOD__ ); |
| 764 | } |
| 765 | } |
| 766 | |
| 767 | return true; |
| 768 | } |
| 769 | |
| 770 | /** |
| 771 | * Determine if we can merge a page. |
| 772 | * We check if an inaccessible revision would become the latest and |
| 773 | * deny the merge if so -- it's theoretically possible to update the |
| 774 | * latest revision, but opens a can of worms -- search engine updates, |
| 775 | * recentchanges review, etc. |
| 776 | * |
| 777 | * @param int $id The page_id |
| 778 | * @param PageIdentity $page |
| 779 | * @param string &$logStatus This is set to the log status message on failure @phan-output-reference |
| 780 | * @return bool |
| 781 | */ |
| 782 | private function canMerge( $id, PageIdentity $page, &$logStatus ) { |
| 783 | $revisionLookup = $this->getServiceContainer()->getRevisionLookup(); |
| 784 | $latestDest = $revisionLookup->getRevisionByTitle( $page, 0, |
| 785 | IDBAccessObject::READ_LATEST ); |
| 786 | $latestSource = $revisionLookup->getRevisionByPageId( $id, 0, |
| 787 | IDBAccessObject::READ_LATEST ); |
| 788 | if ( $latestSource->getTimestamp() > $latestDest->getTimestamp() ) { |
| 789 | $logStatus = 'cannot merge since source is later'; |
| 790 | return false; |
| 791 | } else { |
| 792 | return true; |
| 793 | } |
| 794 | } |
| 795 | |
| 796 | /** |
| 797 | * Merge page histories |
| 798 | * |
| 799 | * @param stdClass $row Page row |
| 800 | * @param Title $newTitle |
| 801 | * @return bool |
| 802 | */ |
| 803 | private function mergePage( $row, Title $newTitle ) { |
| 804 | $updateRowsPerQuery = $this->getConfig()->get( MainConfigNames::UpdateRowsPerQuery ); |
| 805 | |
| 806 | $id = $row->page_id; |
| 807 | |
| 808 | // Construct the WikiPage object we will need later, while the |
| 809 | // page_id still exists. Note that this cannot use makeTitleSafe(), |
| 810 | // we are deliberately constructing an invalid title. |
| 811 | $sourceTitle = Title::makeTitle( $row->page_namespace, $row->page_title ); |
| 812 | $sourceTitle->resetArticleID( $id ); |
| 813 | $wikiPage = $this->getServiceContainer()->getWikiPageFactory()->newFromTitle( $sourceTitle ); |
| 814 | $wikiPage->loadPageData( IDBAccessObject::READ_LATEST ); |
| 815 | $destId = $newTitle->getArticleID(); |
| 816 | |
| 817 | $dbw = $this->getPrimaryDB(); |
| 818 | $this->beginTransactionRound( __METHOD__ ); |
| 819 | $revIds = $dbw->newSelectQueryBuilder() |
| 820 | ->select( 'rev_id' ) |
| 821 | ->from( 'revision' ) |
| 822 | ->where( [ 'rev_page' => $id ] ) |
| 823 | ->caller( __METHOD__ ) |
| 824 | ->fetchFieldValues(); |
| 825 | $updateBatches = array_chunk( array_map( 'intval', $revIds ), $updateRowsPerQuery ); |
| 826 | foreach ( $updateBatches as $updateBatch ) { |
| 827 | $dbw->newUpdateQueryBuilder() |
| 828 | ->update( 'revision' ) |
| 829 | ->set( [ 'rev_page' => $destId ] ) |
| 830 | ->where( [ 'rev_id' => $updateBatch ] ) |
| 831 | ->caller( __METHOD__ ) |
| 832 | ->execute(); |
| 833 | if ( count( $updateBatches ) > 1 ) { |
| 834 | $this->commitTransactionRound( __METHOD__ ); |
| 835 | $this->beginTransactionRound( __METHOD__ ); |
| 836 | } |
| 837 | } |
| 838 | $dbw->newDeleteQueryBuilder() |
| 839 | ->deleteFrom( 'page' ) |
| 840 | ->where( [ 'page_id' => $id ] ) |
| 841 | ->caller( __METHOD__ ) |
| 842 | ->execute(); |
| 843 | $this->commitTransactionRound( __METHOD__ ); |
| 844 | |
| 845 | /* Call LinksDeletionUpdate to delete outgoing links from the old title, |
| 846 | * and update category counts. |
| 847 | * |
| 848 | * Calling external code with a fake broken Title is a fairly dubious |
| 849 | * idea. It's necessary because it's quite a lot of code to duplicate, |
| 850 | * but that also makes it fragile since it would be easy for someone to |
| 851 | * accidentally introduce an assumption of title validity to the code we |
| 852 | * are calling. |
| 853 | */ |
| 854 | DeferredUpdates::addUpdate( new LinksDeletionUpdate( $wikiPage ) ); |
| 855 | DeferredUpdates::doUpdates(); |
| 856 | |
| 857 | return true; |
| 858 | } |
| 859 | } |
| 860 | |
| 861 | // @codeCoverageIgnoreStart |
| 862 | $maintClass = NamespaceDupes::class; |
| 863 | require_once RUN_MAINTENANCE_IF_MAIN; |
| 864 | // @codeCoverageIgnoreEnd |