Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
100.00% covered (success)
100.00%
61 / 61
100.00% covered (success)
100.00%
2 / 2
CRAP
100.00% covered (success)
100.00%
1 / 1
FixMergeHistoryCorruption
100.00% covered (success)
100.00%
61 / 61
100.00% covered (success)
100.00%
2 / 2
14
100.00% covered (success)
100.00%
1 / 1
 __construct
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
1
 execute
100.00% covered (success)
100.00%
56 / 56
100.00% covered (success)
100.00%
1 / 1
13
1<?php
2/**
3 * @license GPL-2.0-or-later
4 * @file
5 * @ingroup Maintenance
6 */
7
8// @codeCoverageIgnoreStart
9require_once __DIR__ . '/Maintenance.php';
10// @codeCoverageIgnoreEnd
11
12use MediaWiki\Maintenance\Maintenance;
13use MediaWiki\Title\Title;
14
15/**
16 * Maintenance script that clears rows of pages corrupted by MergeHistory, those
17 * pages 'exist' but have no visible revision.
18 *
19 * These pages are completely inaccessible via the UI due to revision/title mismatch
20 * exceptions in RevisionStore and elsewhere.
21 *
22 * These are rows in page_table that have 'page_latest' entry with corresponding
23 * 'rev_id' but no associated 'rev_page' entry in revision table. Such rows create
24 * ghost pages because their 'page_latest' is actually living on different pages
25 * (which possess the associated 'rev_page' on revision table now).
26 *
27 * @see https://phabricator.wikimedia.org/T263340
28 * @see https://phabricator.wikimedia.org/T259022
29 */
30class FixMergeHistoryCorruption extends Maintenance {
31
32    public function __construct() {
33        parent::__construct();
34        $this->addDescription( 'Delete pages corrupted by MergeHistory' );
35        $this->addOption( 'ns', 'Namespace to restrict the query', false, true );
36        $this->addOption( 'dry-run', 'Run in dry-mode' );
37        $this->addOption( 'delete', 'Actually delete the found rows' );
38    }
39
40    public function execute() {
41        $dbr = $this->getReplicaDB();
42        $dbw = $this->getPrimaryDB();
43
44        $dryRun = true;
45        if ( $this->hasOption( 'dry-run' ) && $this->hasOption( 'delete' ) ) {
46            $this->fatalError( 'Cannot do both --dry-run and --delete.' );
47        } elseif ( $this->hasOption( 'delete' ) ) {
48            $dryRun = false;
49        } elseif ( !$this->hasOption( 'dry-run' ) ) {
50            $this->fatalError( 'Either --dry-run or --delete must be specified.' );
51        }
52
53        $conds = [ 'page_id<>rev_page' ];
54        if ( $this->hasOption( 'ns' ) ) {
55            $conds['page_namespace'] = (int)$this->getOption( 'ns' );
56        }
57
58        $res = $dbr->newSelectQueryBuilder()
59            ->from( 'page' )
60            ->join( 'revision', null, 'page_latest=rev_id' )
61            ->fields( [ 'page_namespace', 'page_title', 'page_id' ] )
62            ->where( $conds )
63            ->caller( __METHOD__ )
64            ->fetchResultSet();
65
66        $count = $res->numRows();
67
68        if ( !$count ) {
69            $this->output( "Nothing was found, no page matches the criteria.\n" );
70            return;
71        }
72
73        $numDeleted = 0;
74        $numUpdated = 0;
75
76        foreach ( $res as $row ) {
77            $title = Title::makeTitleSafe( $row->page_namespace, $row->page_title );
78            if ( !$title ) {
79                $this->output( "Skipping invalid title with page_id: $row->page_id\n" );
80                continue;
81            }
82            $titleText = $title->getPrefixedDBkey();
83
84            // Check if there are any revisions that have this $row->page_id as their
85            // rev_page and select the largest which should be the newest revision.
86            $revId = $dbr->newSelectQueryBuilder()
87                ->select( 'MAX(rev_id)' )
88                ->from( 'revision' )
89                ->where( [ 'rev_page' => $row->page_id ] )
90                ->caller( __METHOD__ )->fetchField();
91
92            if ( !$revId ) {
93                if ( $dryRun ) {
94                    $this->output( "Would delete $titleText with page_id: $row->page_id\n" );
95                } else {
96                    $this->output( "Deleting $titleText with page_id: $row->page_id\n" );
97                    $dbw->newDeleteQueryBuilder()
98                        ->deleteFrom( 'page' )
99                        ->where( [ 'page_id' => $row->page_id ] )
100                        ->caller( __METHOD__ )->execute();
101                }
102                $numDeleted++;
103            } else {
104                if ( $dryRun ) {
105                    $this->output( "Would update page_id $row->page_id to page_latest $revId\n" );
106                } else {
107                    $this->output( "Updating page_id $row->page_id to page_latest $revId\n" );
108                    $dbw->newUpdateQueryBuilder()
109                        ->update( 'page' )
110                        ->set( [ 'page_latest' => $revId ] )
111                        ->where( [ 'page_id' => $row->page_id ] )
112                        ->caller( __METHOD__ )->execute();
113                }
114                $numUpdated++;
115            }
116        }
117
118        if ( !$dryRun ) {
119            $this->output( "Updated $numUpdated row(s), deleted $numDeleted row(s)\n" );
120        }
121    }
122}
123
124// @codeCoverageIgnoreStart
125$maintClass = FixMergeHistoryCorruption::class;
126require_once RUN_MAINTENANCE_IF_MAIN;
127// @codeCoverageIgnoreEnd