Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
89.90% |
89 / 99 |
|
33.33% |
1 / 3 |
CRAP | |
0.00% |
0 / 1 |
PurgeDeletedCognatePages | |
94.68% |
89 / 94 |
|
33.33% |
1 / 3 |
17.04 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
execute | |
86.67% |
26 / 30 |
|
0.00% |
0 / 1 |
5.06 | |||
executeMainLoop | |
98.31% |
58 / 59 |
|
0.00% |
0 / 1 |
11 |
1 | <?php |
2 | |
3 | namespace Cognate; |
4 | |
5 | use Maintenance; |
6 | use MediaWiki\MediaWikiServices; |
7 | use RuntimeException; |
8 | use Wikimedia\Rdbms\IDatabase; |
9 | use Wikimedia\Rdbms\IReadableDatabase; |
10 | use Wikimedia\Rdbms\SelectQueryBuilder; |
11 | |
12 | if ( getenv( 'MW_INSTALL_PATH' ) !== false ) { |
13 | require_once getenv( 'MW_INSTALL_PATH' ) . '/maintenance/Maintenance.php'; |
14 | } else { |
15 | require_once __DIR__ . '/../../../maintenance/Maintenance.php'; |
16 | } |
17 | |
18 | /** |
19 | * Maintenance script for removing entries from the cognate_pages table that do not currently exist |
20 | * on the wiki. For example, due to pages being deleted while Cognate has been in read-only mode. |
21 | * |
22 | * @license GPL-2.0-or-later |
23 | * @author Addshore |
24 | */ |
25 | class PurgeDeletedCognatePages extends Maintenance { |
26 | |
27 | public function __construct() { |
28 | parent::__construct(); |
29 | |
30 | $this->addDescription( 'Purge deleted pages from the cognate_pages table' ); |
31 | $this->addOption( 'dry-run', 'Do not perform writes' ); |
32 | $this->setBatchSize( 100 ); |
33 | $this->requireExtension( 'Cognate' ); |
34 | } |
35 | |
36 | public function execute() { |
37 | if ( $this->mBatchSize <= 1 ) { |
38 | throw new RuntimeException( 'batch-size must be set to a value of 2 or more.' ); |
39 | } |
40 | |
41 | $services = MediaWikiServices::getInstance(); |
42 | |
43 | $connectionProvider = $services->getConnectionProvider(); |
44 | $dbrCognate = $connectionProvider->getReplicaDatabase( CognateServices::VIRTUAL_DOMAIN ); |
45 | |
46 | $dbName = $services->getMainConfig()->get( 'DBname' ); |
47 | $stringHasher = new StringHasher(); |
48 | $siteKey = $stringHasher->hash( $dbName ); |
49 | |
50 | $this->output( "Started processing.\n" ); |
51 | if ( $this->hasOption( 'dry-run' ) ) { |
52 | $this->output( "In DRY RUN mode.\n" ); |
53 | } |
54 | |
55 | $start = $dbrCognate->newSelectQueryBuilder() |
56 | ->select( 'MIN(cgpa_title)' ) |
57 | ->from( 'cognate_pages' ) |
58 | ->where( [ |
59 | 'cgpa_site' => $siteKey, |
60 | ] ) |
61 | ->caller( __METHOD__ ) |
62 | ->fetchField(); |
63 | if ( !$start ) { |
64 | $this->output( "Nothing to do.\n" ); |
65 | return true; |
66 | } |
67 | |
68 | $loadBalancerFactory = $services->getDBLoadBalancerFactory(); |
69 | $dbwCognate = $connectionProvider->getPrimaryDatabase( CognateServices::VIRTUAL_DOMAIN ); |
70 | $dbr = $this->getDB( DB_REPLICA ); |
71 | |
72 | while ( $start ) { |
73 | $start = $this->executeMainLoop( $dbr, $dbrCognate, $dbwCognate, $siteKey, $start ); |
74 | $loadBalancerFactory->waitForReplication(); |
75 | } |
76 | |
77 | $this->output( "Done.\n" ); |
78 | return true; |
79 | } |
80 | |
81 | /** |
82 | * @param IReadableDatabase $dbr |
83 | * @param IReadableDatabase $dbrCognate |
84 | * @param IDatabase $dbwCognate |
85 | * @param int $siteKey |
86 | * @param string $start |
87 | * |
88 | * @return bool|string cgpa_title to continue from or false if no more rows to process |
89 | */ |
90 | private function executeMainLoop( |
91 | IReadableDatabase $dbr, |
92 | IReadableDatabase $dbrCognate, |
93 | IDatabase $dbwCognate, |
94 | $siteKey, |
95 | $start |
96 | ) { |
97 | // Select a batch of pages that are in the cognate page table |
98 | $cognateRows = $dbrCognate->newSelectQueryBuilder() |
99 | ->select( [ 'cgpa_namespace', 'cgpa_title', 'cgti_raw' ] ) |
100 | ->from( CognateStore::TITLES_TABLE_NAME ) |
101 | ->join( CognateStore::PAGES_TABLE_NAME, null, 'cgpa_title = cgti_raw_key' ) |
102 | ->where( [ |
103 | 'cgpa_site' => $siteKey, |
104 | $dbrCognate->expr( 'cgpa_title', '>=', $start ), |
105 | ] ) |
106 | ->orderBy( 'cgpa_title', SelectQueryBuilder::SORT_ASC ) |
107 | ->limit( $this->mBatchSize ) |
108 | ->caller( __METHOD__ ) |
109 | ->fetchResultSet(); |
110 | |
111 | if ( !$cognateRows->numRows() ) { |
112 | return false; |
113 | } |
114 | |
115 | // Get an array to select with |
116 | $cognateData = []; |
117 | foreach ( $cognateRows as $row ) { |
118 | $namespaceId = $row->cgpa_namespace; |
119 | $rawTitleText = $row->cgti_raw; |
120 | $rawTitleKey = $row->cgpa_title; |
121 | if ( !array_key_exists( $namespaceId, $cognateData ) ) { |
122 | $cognateData[$namespaceId] = []; |
123 | } |
124 | $cognateData[$namespaceId][$rawTitleText] = $rawTitleKey; |
125 | $start = $rawTitleKey; |
126 | } |
127 | |
128 | // Select pages that exist in mediawiki with the given titles |
129 | $pageRows = $dbr->newSelectQueryBuilder() |
130 | ->select( [ 'page_namespace', 'page_title' ] ) |
131 | ->from( 'page' ) |
132 | ->where( $dbr->makeWhereFrom2d( $cognateData, 'page_namespace', 'page_title' ) ) |
133 | ->caller( __METHOD__ ) |
134 | ->fetchResultSet(); |
135 | // Remove pages that do exist on wiki from the cognate data |
136 | foreach ( $pageRows as $row ) { |
137 | unset( $cognateData[$row->page_namespace][$row->page_title] ); |
138 | } |
139 | |
140 | // Get an array to delete with |
141 | $cognateDeletionData = []; |
142 | $rowsDeleting = 0; |
143 | foreach ( $cognateData as $namespaceId => $titles ) { |
144 | if ( !array_key_exists( $namespaceId, $cognateDeletionData ) ) { |
145 | $cognateDeletionData[$namespaceId] = []; |
146 | } |
147 | foreach ( $titles as $rawTitleKey ) { |
148 | $cognateDeletionData[$namespaceId][$rawTitleKey] = null; |
149 | $rowsDeleting++; |
150 | } |
151 | } |
152 | |
153 | // Delete any remaining titles from the cognate pages table |
154 | if ( !$this->hasOption( 'dry-run' ) && $rowsDeleting > 0 ) { |
155 | $dbwCognate->newDeleteQueryBuilder() |
156 | ->deleteFrom( CognateStore::PAGES_TABLE_NAME ) |
157 | ->where( [ |
158 | 'cgpa_site' => $siteKey, |
159 | $dbrCognate->makeWhereFrom2d( |
160 | $cognateDeletionData, |
161 | 'cgpa_namespace', |
162 | 'cgpa_title' |
163 | ) |
164 | ] ) |
165 | ->caller( __METHOD__ ) |
166 | ->execute(); |
167 | } |
168 | |
169 | $this->output( |
170 | $cognateRows->numRows() . " rows processed, " . |
171 | $rowsDeleting . " rows deleted\n" |
172 | ); |
173 | |
174 | if ( $cognateRows->numRows() <= 1 ) { |
175 | return false; |
176 | } |
177 | return $start; |
178 | } |
179 | |
180 | } |
181 | |
182 | $maintClass = PurgeDeletedCognatePages::class; |
183 | require_once RUN_MAINTENANCE_IF_MAIN; |