51 private $resolvablePages = 0;
57 private $totalPages = 0;
63 private $resolvableLinks = 0;
69 private $totalLinks = 0;
76 private $deletedLinks = 0;
79 parent::__construct();
80 $this->
addDescription(
'Find and fix pages affected by namespace addition/removal' );
81 $this->
addOption(
'fix',
'Attempt to automatically fix errors and delete broken links' );
82 $this->
addOption(
'merge',
"Instead of renaming conflicts, do a history merge with " .
83 "the correct title" );
84 $this->
addOption(
'add-suffix',
"Dupes will be renamed with correct namespace with " .
85 "<text> appended after the article name",
false,
true );
86 $this->
addOption(
'add-prefix',
"Dupes will be renamed with correct namespace with " .
87 "<text> prepended before the article name",
false,
true );
88 $this->
addOption(
'source-pseudo-namespace',
"Move all pages with the given source " .
89 "prefix (with an implied colon following it). If --dest-namespace is not specified, " .
90 "the colon will be replaced with a hyphen.",
92 $this->
addOption(
'dest-namespace',
"In combination with --source-pseudo-namespace, " .
93 "specify the namespace ID of the destination.",
false,
true );
94 $this->
addOption(
'move-talk',
"If this is specified, pages in the Talk namespace that " .
95 "begin with a conflicting prefix will be renamed, for example " .
96 "Talk:File:Foo -> File_Talk:Foo" );
103 'add-suffix' => $this->
getOption(
'add-suffix',
'' ),
104 'add-prefix' => $this->
getOption(
'add-prefix',
'' ),
105 'move-talk' => $this->
hasOption(
'move-talk' ),
106 'source-pseudo-namespace' => $this->
getOption(
'source-pseudo-namespace',
'' ),
107 'dest-namespace' => intval( $this->
getOption(
'dest-namespace', 0 ) )
110 if ( $options[
'source-pseudo-namespace'] !==
'' ) {
111 $retval = $this->checkPrefix( $options );
113 $retval = $this->checkAll( $options );
117 $this->
output(
"\nLooks good!\n" );
119 $this->
output(
"\nOh noeees\n" );
130 private function checkAll( $options ) {
136 foreach ( $this->getInterwikiList() as $prefix ) {
137 $name = $contLang->ucfirst( $prefix );
147 if ( $name !==
'' ) {
148 $spaces[$name] = $ns;
151 foreach ( $contLang->getNamespaces() as $ns => $name ) {
152 if ( $name !==
'' ) {
153 $spaces[$name] = $ns;
156 foreach ( $contLang->getNamespaceAliases() as $name => $ns ) {
157 $spaces[$name] = $ns;
162 $capitalLinks = $this->
getConfig()->get( MainConfigNames::CapitalLinks );
163 foreach ( $spaces as $name => $ns ) {
165 $moreNames[] = $contLang->uc( $name );
166 $moreNames[] = $contLang->ucfirst( $contLang->lc( $name ) );
167 $moreNames[] = $contLang->ucwords( $name );
168 $moreNames[] = $contLang->ucwords( $contLang->lc( $name ) );
169 $moreNames[] = $contLang->ucwordbreaks( $name );
170 $moreNames[] = $contLang->ucwordbreaks( $contLang->lc( $name ) );
171 if ( !$capitalLinks ) {
172 foreach ( $moreNames as $altName ) {
173 $moreNames[] = $contLang->lcfirst( $altName );
175 $moreNames[] = $contLang->lcfirst( $name );
177 foreach ( array_unique( $moreNames ) as $altName ) {
178 if ( $altName !== $name ) {
179 $spaces[$altName] = $ns;
186 $origSpaces = $spaces;
187 uksort( $spaces,
static function ( $a, $b ) use ( $origSpaces ) {
188 return $origSpaces[$a] <=> $origSpaces[$b]
193 foreach ( $spaces as $name => $ns ) {
194 $ok = $this->checkNamespace( $ns, $name, $options ) && $ok;
198 "{$this->totalPages} pages to fix, " .
199 "{$this->resolvablePages} were resolvable.\n\n"
202 foreach ( $spaces as $name => $ns ) {
221 $this->checkLinkTable(
'pagelinks',
'pl', $ns, $name, $options );
222 $this->checkLinkTable(
'templatelinks',
'tl', $ns, $name, $options );
228 $this->checkLinkTable(
'redirect',
'rd', $ns, $name, $options,
229 [
'rd_interwiki' =>
'' ] );
234 "{$this->totalLinks} links to fix, " .
235 "{$this->resolvableLinks} were resolvable, " .
236 "{$this->deletedLinks} were deleted.\n"
245 private function getInterwikiList() {
247 return array_column( $result,
'iw_prefix' );
258 private function checkNamespace( $ns, $name, $options ) {
259 $targets = $this->getTargetList( $ns, $name, $options );
260 $count = $targets->numRows();
261 $this->totalPages += $count;
266 $dryRunNote = $options[
'fix'] ?
'' :
' DRY RUN ONLY';
269 foreach ( $targets as $row ) {
272 $newTitle = $this->getDestinationTitle(
273 $ns, $name, $row->page_namespace, $row->page_title );
276 if ( $options[
'add-prefix'] ==
'' && $options[
'add-suffix'] ==
'' ) {
277 $logStatus =
'invalid title and --add-prefix not specified';
280 $action =
'alternate';
282 } elseif ( $newTitle->
exists() ) {
283 if ( $options[
'merge'] ) {
284 if ( $this->canMerge( $row->page_id, $newTitle, $logStatus ) ) {
289 } elseif ( $options[
'add-prefix'] ==
'' && $options[
'add-suffix'] ==
'' ) {
291 $logStatus =
'dest title exists and --add-prefix not specified';
293 $action =
'alternate';
297 $logStatus =
'no conflict';
299 if ( $action ===
'alternate' ) {
300 [ $ns, $dbk ] = $this->getDestination( $ns, $name, $row->page_namespace,
302 $newTitle = $this->getAlternateTitle( $ns, $dbk, $options );
305 $logStatus =
'alternate title is invalid';
306 } elseif ( $newTitle->
exists() ) {
308 $logStatus =
'alternate title conflicts';
311 $logStatus =
'alternate';
317 $logTitle =
"id={$row->page_id} ns={$row->page_namespace} dbk={$row->page_title}";
322 $this->
output(
"$logTitle *** $logStatus\n" );
326 $this->
output(
"$logTitle -> " .
329 if ( $options[
'fix'] ) {
330 $pageOK = $this->movePage( $row->page_id, $newTitle );
334 $this->
output(
"$logTitle => " .
337 if ( $options[
'fix'] ) {
338 $pageOK = $this->mergePage( $row, $newTitle );
344 $this->resolvablePages++;
362 private function checkLinkTable( $table, $fieldPrefix, $ns, $name, $options,
368 $fromField =
"{$fieldPrefix}_from";
370 $sqb = $dbw->newSelectQueryBuilder()
371 ->select( $fromField )
372 ->where( $extraConds )
373 ->limit( $batchSize );
376 if ( isset( $linksMigration::$mapping[$table] ) ) {
377 $sqb->queryInfo( $linksMigration->getQueryInfo( $table ) );
378 [ $namespaceField, $titleField ] = $linksMigration->getTitleFields( $table );
379 $schemaMigrationStage = $linksMigration::$mapping[$table][
'config'] === -1
381 : $this->
getConfig()->get( $linksMigration::$mapping[$table][
'config'] );
383 $targetIdField = $linksMigration::$mapping[$table][
'target_id'];
385 $sqb->table( $table );
386 $namespaceField =
"{$fieldPrefix}_namespace";
387 $titleField =
"{$fieldPrefix}_title";
388 $sqb->fields( [ $namespaceField, $titleField ] );
390 $schemaMigrationStage = -1;
391 $linkTargetLookup =
null;
395 $namespaceField => 0,
396 $dbw->expr( $titleField, IExpression::LIKE,
new LikeValue(
"$name:", $dbw->anyString() ) ),
398 ->orderBy( [ $titleField, $fromField ] )
399 ->caller( __METHOD__ );
401 $updateRowsPerQuery = $this->
getConfig()->get( MainConfigNames::UpdateRowsPerQuery );
403 $res = ( clone $sqb )
404 ->andWhere( $batchConds )
406 if ( $res->numRows() == 0 ) {
410 $rowsToDeleteIfStillExists = [];
412 foreach ( $res as $row ) {
413 $logTitle =
"from={$row->$fromField} ns={$row->$namespaceField} " .
414 "dbk={$row->$titleField}";
415 $destTitle = $this->getDestinationTitle(
416 $ns, $name, $row->$namespaceField, $row->$titleField );
419 $this->
output(
"$table $logTitle *** INVALID\n" );
422 $this->resolvableLinks++;
423 if ( !$options[
'fix'] ) {
424 $this->
output(
"$table $logTitle -> " .
425 $destTitle->getPrefixedDBkey() .
" DRY RUN\n" );
429 if ( isset( $linksMigration::$mapping[$table] ) ) {
432 $setValue[$targetIdField] = $linkTargetLookup->acquireLinkTargetId( $destTitle, $dbw );
435 $setValue[
"{$fieldPrefix}_namespace"] = $destTitle->getNamespace();
436 $setValue[
"{$fieldPrefix}_title"] = $destTitle->getDBkey();
438 $whereCondition = $linksMigration->getLinksConditions(
442 $deleteCondition = $linksMigration->getLinksConditions(
444 new TitleValue( (
int)$row->$namespaceField, $row->$titleField )
448 $namespaceField => $destTitle->getNamespace(),
449 $titleField => $destTitle->getDBkey()
452 $namespaceField => 0,
453 $titleField => $row->$titleField
456 $namespaceField => $row->$namespaceField,
457 $titleField => $row->$titleField,
461 $dbw->newUpdateQueryBuilder()
465 ->where( [ $fromField => $row->$fromField ] )
466 ->andWhere( $whereCondition )
467 ->caller( __METHOD__ )
471 $rowsToDeleteIfStillExists[] = array_merge( [ $fromField => $row->$fromField ], $deleteCondition );
473 $this->
output(
"$table $logTitle -> " .
474 $destTitle->getPrefixedDBkey() .
"\n"
478 if ( $options[
'fix'] && count( $rowsToDeleteIfStillExists ) > 0 ) {
480 $deleteBatches = array_chunk( $rowsToDeleteIfStillExists, $updateRowsPerQuery );
481 foreach ( $deleteBatches as $deleteBatch ) {
482 $dbw->newDeleteQueryBuilder()
483 ->deleteFrom( $table )
484 ->where( $dbw->factorConds( $deleteBatch ) )
485 ->caller( __METHOD__ )
487 $affectedRows += $dbw->affectedRows();
488 if ( count( $deleteBatches ) > 1 ) {
493 $this->deletedLinks += $affectedRows;
494 $this->resolvableLinks -= $affectedRows;
498 $dbw->buildComparison(
'>', [
500 $titleField => $row->$titleField,
502 $fromField => $row->$fromField,
517 private function checkPrefix( $options ) {
518 $prefix = $options[
'source-pseudo-namespace'];
519 $ns = $options[
'dest-namespace'];
520 $this->
output(
"Checking prefix \"$prefix\" vs namespace $ns\n" );
522 return $this->checkNamespace( $ns, $prefix, $options );
535 private function getTargetList( $ns, $name, $options ) {
539 $options[
'move-talk'] &&
547 return $dbw->newSelectQueryBuilder()
548 ->select( [
'page_id',
'page_title',
'page_namespace' ] )
551 'page_namespace' => $checkNamespaces,
552 $dbw->expr(
'page_title', IExpression::LIKE,
new LikeValue(
"$name:", $dbw->anyString() ) ),
554 ->caller( __METHOD__ )->fetchResultSet();
565 private function getDestination( $ns, $name, $sourceNs, $sourceDbk ) {
566 $dbk = substr( $sourceDbk, strlen(
"$name:" ) );
570 $dbk =
"$name-" . $dbk;
575 if ( $sourceNs ==
NS_TALK && $nsInfo->isSubject( $ns ) ) {
577 $destNS = $nsInfo->getTalk( $destNS );
579 return [ $destNS, $dbk ];
590 private function getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk ) {
591 [ $destNS, $dbk ] = $this->getDestination( $ns, $name, $sourceNs, $sourceDbk );
592 $newTitle = Title::makeTitleSafe( $destNS, $dbk );
593 if ( !$newTitle || !$newTitle->
canExist() ) {
608 private function getAlternateTitle( $ns, $dbk, $options ) {
609 $prefix = $options[
'add-prefix'];
610 $suffix = $options[
'add-suffix'];
611 if ( $prefix ==
'' && $suffix ==
'' ) {
614 $newDbk = $prefix . $dbk . $suffix;
615 return Title::makeTitleSafe( $ns, $newDbk );
625 private function movePage( $id,
LinkTarget $newLinkTarget ) {
628 $dbw->newUpdateQueryBuilder()
632 "page_title" => $newLinkTarget->
getDBkey(),
637 ->caller( __METHOD__ )
641 $fromNamespaceTables = [
642 [
'templatelinks',
'tl', [
'tl_target_id' ] ],
643 [
'imagelinks',
'il', [
'il_to' ] ]
646 $fromNamespaceTables[] = [
'pagelinks',
'pl', [
'pl_namespace',
'pl_title' ] ];
648 $fromNamespaceTables[] = [
'pagelinks',
'pl', [
'pl_target_id' ] ];
650 $updateRowsPerQuery = $this->
getConfig()->get( MainConfigNames::UpdateRowsPerQuery );
651 foreach ( $fromNamespaceTables as [ $table, $fieldPrefix, $additionalPrimaryKeyFields ] ) {
652 $fromField =
"{$fieldPrefix}_from";
653 $fromNamespaceField =
"{$fieldPrefix}_from_namespace";
655 $res = $dbw->newSelectQueryBuilder()
656 ->select( $additionalPrimaryKeyFields )
658 ->where( [ $fromField => $id ] )
659 ->andWhere( $dbw->expr( $fromNamespaceField,
'!=', $newLinkTarget->
getNamespace() ) )
660 ->caller( __METHOD__ )
667 foreach ( $res as $row ) {
668 $updateConds[] = array_merge( [ $fromField => $id ], (array)$row );
670 $updateBatches = array_chunk( $updateConds, $updateRowsPerQuery );
671 foreach ( $updateBatches as $updateBatch ) {
672 $dbw->newUpdateQueryBuilder()
674 ->set( [ $fromNamespaceField => $newLinkTarget->
getNamespace() ] )
675 ->where( $dbw->factorConds( $updateBatch ) )
676 ->caller( __METHOD__ )
678 if ( count( $updateBatches ) > 1 ) {
699 private function canMerge( $id,
LinkTarget $linkTarget, &$logStatus ) {
701 $latestDest = $revisionLookup->getRevisionByTitle( $linkTarget, 0,
702 IDBAccessObject::READ_LATEST );
703 $latestSource = $revisionLookup->getRevisionByPageId( $id, 0,
704 IDBAccessObject::READ_LATEST );
705 if ( $latestSource->getTimestamp() > $latestDest->getTimestamp() ) {
706 $logStatus =
'cannot merge since source is later';
720 private function mergePage( $row,
Title $newTitle ) {
722 $updateRowsPerQuery = $this->
getConfig()->get( MainConfigNames::UpdateRowsPerQuery );
729 $sourceTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
730 $sourceTitle->resetArticleID( $id );
731 $wikiPage = $this->
getServiceContainer()->getWikiPageFactory()->newFromTitle( $sourceTitle );
732 $wikiPage->loadPageData( IDBAccessObject::READ_LATEST );
736 $revIds = $dbw->newSelectQueryBuilder()
739 ->where( [
'rev_page' => $id ] )
740 ->caller( __METHOD__ )
741 ->fetchFieldValues();
742 $updateBatches = array_chunk( array_map(
'intval', $revIds ), $updateRowsPerQuery );
743 foreach ( $updateBatches as $updateBatch ) {
744 $dbw->newUpdateQueryBuilder()
745 ->update(
'revision' )
746 ->set( [
'rev_page' => $destId ] )
747 ->where( [
'rev_id' => $updateBatch ] )
748 ->caller( __METHOD__ )
750 if ( count( $updateBatches ) > 1 ) {
755 $dbw->newDeleteQueryBuilder()
756 ->deleteFrom(
'page' )
757 ->where( [
'page_id' => $id ] )
758 ->caller( __METHOD__ )
773 DeferredUpdates::doUpdates();