Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
89.90% covered (warning)
89.90%
89 / 99
33.33% covered (danger)
33.33%
1 / 3
CRAP
0.00% covered (danger)
0.00%
0 / 1
PurgeDeletedCognatePages
94.68% covered (success)
94.68%
89 / 94
33.33% covered (danger)
33.33%
1 / 3
17.04
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
1
 execute
86.67% covered (warning)
86.67%
26 / 30
0.00% covered (danger)
0.00%
0 / 1
5.06
 executeMainLoop
98.31% covered (success)
98.31%
58 / 59
0.00% covered (danger)
0.00%
0 / 1
11
1<?php
2
3namespace Cognate;
4
5use Maintenance;
6use MediaWiki\MediaWikiServices;
7use RuntimeException;
8use Wikimedia\Rdbms\IDatabase;
9use Wikimedia\Rdbms\IReadableDatabase;
10use Wikimedia\Rdbms\SelectQueryBuilder;
11
12if ( getenv( 'MW_INSTALL_PATH' ) !== false ) {
13    require_once getenv( 'MW_INSTALL_PATH' ) . '/maintenance/Maintenance.php';
14} else {
15    require_once __DIR__ . '/../../../maintenance/Maintenance.php';
16}
17
18/**
19 * Maintenance script for removing entries from the cognate_pages table that do not currently exist
20 * on the wiki. For example, due to pages being deleted while Cognate has been in read-only mode.
21 *
22 * @license GPL-2.0-or-later
23 * @author Addshore
24 */
25class PurgeDeletedCognatePages extends Maintenance {
26
27    public function __construct() {
28        parent::__construct();
29
30        $this->addDescription( 'Purge deleted pages from the cognate_pages table' );
31        $this->addOption( 'dry-run', 'Do not perform writes' );
32        $this->setBatchSize( 100 );
33        $this->requireExtension( 'Cognate' );
34    }
35
36    public function execute() {
37        if ( $this->mBatchSize <= 1 ) {
38            throw new RuntimeException( 'batch-size must be set to a value of 2 or more.' );
39        }
40
41        $services = MediaWikiServices::getInstance();
42
43        $connectionProvider = $services->getConnectionProvider();
44        $dbrCognate = $connectionProvider->getReplicaDatabase( CognateServices::VIRTUAL_DOMAIN );
45
46        $dbName = $services->getMainConfig()->get( 'DBname' );
47        $stringHasher = new StringHasher();
48        $siteKey = $stringHasher->hash( $dbName );
49
50        $this->output( "Started processing.\n" );
51        if ( $this->hasOption( 'dry-run' ) ) {
52            $this->output( "In DRY RUN mode.\n" );
53        }
54
55        $start = $dbrCognate->newSelectQueryBuilder()
56            ->select( 'MIN(cgpa_title)' )
57            ->from( 'cognate_pages' )
58            ->where( [
59                'cgpa_site' => $siteKey,
60            ] )
61            ->caller( __METHOD__ )
62            ->fetchField();
63        if ( !$start ) {
64            $this->output( "Nothing to do.\n" );
65            return true;
66        }
67
68        $loadBalancerFactory = $services->getDBLoadBalancerFactory();
69        $dbwCognate = $connectionProvider->getPrimaryDatabase( CognateServices::VIRTUAL_DOMAIN );
70        $dbr = $this->getDB( DB_REPLICA );
71
72        while ( $start ) {
73            $start = $this->executeMainLoop( $dbr, $dbrCognate, $dbwCognate, $siteKey, $start );
74            $loadBalancerFactory->waitForReplication();
75        }
76
77        $this->output( "Done.\n" );
78        return true;
79    }
80
81    /**
82     * @param IReadableDatabase $dbr
83     * @param IReadableDatabase $dbrCognate
84     * @param IDatabase $dbwCognate
85     * @param int $siteKey
86     * @param string $start
87     *
88     * @return bool|string cgpa_title to continue from or false if no more rows to process
89     */
90    private function executeMainLoop(
91        IReadableDatabase $dbr,
92        IReadableDatabase $dbrCognate,
93        IDatabase $dbwCognate,
94        $siteKey,
95        $start
96    ) {
97        // Select a batch of pages that are in the cognate page table
98        $cognateRows = $dbrCognate->newSelectQueryBuilder()
99            ->select( [ 'cgpa_namespace', 'cgpa_title', 'cgti_raw' ] )
100            ->from( CognateStore::TITLES_TABLE_NAME )
101            ->join( CognateStore::PAGES_TABLE_NAME, null, 'cgpa_title = cgti_raw_key' )
102            ->where( [
103                'cgpa_site' => $siteKey,
104                $dbrCognate->expr( 'cgpa_title', '>=', $start ),
105            ] )
106            ->orderBy( 'cgpa_title', SelectQueryBuilder::SORT_ASC )
107            ->limit( $this->mBatchSize )
108            ->caller( __METHOD__ )
109            ->fetchResultSet();
110
111        if ( !$cognateRows->numRows() ) {
112            return false;
113        }
114
115        // Get an array to select with
116        $cognateData = [];
117        foreach ( $cognateRows as $row ) {
118            $namespaceId = $row->cgpa_namespace;
119            $rawTitleText = $row->cgti_raw;
120            $rawTitleKey = $row->cgpa_title;
121            if ( !array_key_exists( $namespaceId, $cognateData ) ) {
122                $cognateData[$namespaceId] = [];
123            }
124            $cognateData[$namespaceId][$rawTitleText] = $rawTitleKey;
125            $start = $rawTitleKey;
126        }
127
128        // Select pages that exist in mediawiki with the given titles
129        $pageRows = $dbr->newSelectQueryBuilder()
130            ->select( [ 'page_namespace', 'page_title' ] )
131            ->from( 'page' )
132            ->where( $dbr->makeWhereFrom2d( $cognateData, 'page_namespace', 'page_title' ) )
133            ->caller( __METHOD__ )
134            ->fetchResultSet();
135        // Remove pages that do exist on wiki from the cognate data
136        foreach ( $pageRows as $row ) {
137            unset( $cognateData[$row->page_namespace][$row->page_title] );
138        }
139
140        // Get an array to delete with
141        $cognateDeletionData = [];
142        $rowsDeleting = 0;
143        foreach ( $cognateData as $namespaceId => $titles ) {
144            if ( !array_key_exists( $namespaceId, $cognateDeletionData ) ) {
145                $cognateDeletionData[$namespaceId] = [];
146            }
147            foreach ( $titles as $rawTitleKey ) {
148                $cognateDeletionData[$namespaceId][$rawTitleKey] = null;
149                $rowsDeleting++;
150            }
151        }
152
153        // Delete any remaining titles from the cognate pages table
154        if ( !$this->hasOption( 'dry-run' ) && $rowsDeleting > 0 ) {
155            $dbwCognate->newDeleteQueryBuilder()
156                ->deleteFrom( CognateStore::PAGES_TABLE_NAME )
157                ->where( [
158                    'cgpa_site' => $siteKey,
159                    $dbrCognate->makeWhereFrom2d(
160                        $cognateDeletionData,
161                        'cgpa_namespace',
162                        'cgpa_title'
163                    )
164                ] )
165                ->caller( __METHOD__ )
166                ->execute();
167        }
168
169        $this->output(
170            $cognateRows->numRows() . " rows processed, " .
171            $rowsDeleting . " rows deleted\n"
172        );
173
174        if ( $cognateRows->numRows() <= 1 ) {
175            return false;
176        }
177        return $start;
178    }
179
180}
181
182$maintClass = PurgeDeletedCognatePages::class;
183require_once RUN_MAINTENANCE_IF_MAIN;