MediaWiki master
cleanupBlocks.php
Go to the documentation of this file.
1<?php
2
4use Wikimedia\IPUtils;
5
6require_once __DIR__ . '/Maintenance.php';
7
9 private bool $dryRun = false;
10
11 public function __construct() {
12 parent::__construct();
13 $this->addDescription( 'Fix referential integrity issues in block and block_target tables' );
14 $this->addOption( 'dry-run', 'Just report, don\'t fix anything' );
15 }
16
17 public function execute() {
18 $this->dryRun = $this->hasOption( 'dry-run' );
19
20 $this->deleteOrphanBlockTargets();
21 $this->deleteTargetlessBlocks();
22 $this->normalizeAddresses();
23 $this->mergeDuplicateBlockTargets();
24 $this->fixTargetCounts();
25 }
26
30 private function deleteOrphanBlockTargets() {
31 $dbr = $this->getReplicaDB();
32 $badIds = $dbr->newSelectQueryBuilder()
33 ->select( 'bt_id' )
34 ->from( 'block_target' )
35 ->leftJoin( 'block', null, 'bt_id=bl_target' )
36 ->where( [ 'bl_target' => null ] )
37 ->caller( __METHOD__ )
38 ->fetchFieldValues();
39
40 foreach ( $badIds as $id ) {
41 $this->deleteOrphanBlockTarget( (int)$id );
42 }
43 }
44
49 private function deleteOrphanBlockTarget( int $id ) {
50 $this->output( "Deleting orphan bt_id=$id: " );
51 if ( $this->dryRun ) {
52 $this->output( "dry run\n" );
53 return;
54 }
55 $dbw = $this->getPrimaryDB();
56 $dbw->startAtomic( __METHOD__ );
57 $lockingUsage = $dbw->newSelectQueryBuilder()
58 ->select( 'COUNT(*)' )
59 ->from( 'block' )
60 ->where( [ 'bl_target' => $id ] )
61 ->forUpdate()
62 ->caller( __METHOD__ )
63 ->fetchField();
64 if ( $lockingUsage ) {
65 $dbw->endAtomic( __METHOD__ );
66 $this->output( "primary usage count is non-zero\n" );
67 return;
68 }
69 $dbw->newDeleteQueryBuilder()
70 ->deleteFrom( 'block_target' )
71 ->where( [ 'bt_id' => $id ] )
72 ->caller( __METHOD__ )
73 ->execute();
74 $affected = $dbw->affectedRows();
75 $dbw->endAtomic( __METHOD__ );
76 $this->output( $affected ? "OK\n" : "no rows affected\n" );
77 }
78
82 private function deleteTargetlessBlocks() {
83 $dbr = $this->getReplicaDB();
84 $res = $dbr->newSelectQueryBuilder()
85 ->select( [ 'bl_id', 'bl_target' ] )
86 ->from( 'block' )
87 ->leftJoin( 'block_target', null, 'bt_id=bl_target' )
88 ->where( [ 'bt_id' => null ] )
89 ->caller( __METHOD__ )
90 ->fetchResultSet();
91 foreach ( $res as $row ) {
92 $this->deleteTargetlessBlock( (int)$row->bl_id, (int)$row->bl_target );
93 }
94 }
95
102 private function deleteTargetlessBlock( int $blockId, int $targetId ) {
103 $this->output( "Deleting block $blockId on non-existent target $targetId: " );
104 if ( $this->dryRun ) {
105 $this->output( "dry run\n" );
106 return;
107 }
108 $dbw = $this->getPrimaryDB();
109 $dbw->startAtomic( __METHOD__ );
110 $lockingTargetCount = $dbw->newSelectQueryBuilder()
111 ->select( 'COUNT(*)' )
112 ->from( 'block_target' )
113 ->where( [ 'bt_id' => $targetId ] )
114 ->forUpdate()
115 ->caller( __METHOD__ )
116 ->fetchField();
117 if ( $lockingTargetCount ) {
118 $this->output( "target exists in primary\n" );
119 $dbw->endAtomic( __METHOD__ );
120 return;
121 }
122 $dbw->newDeleteQueryBuilder()
123 ->deleteFrom( 'block' )
124 ->where( [ 'bl_id' => $blockId, 'bl_target' => $targetId ] )
125 ->caller( __METHOD__ )
126 ->execute();
127 $affected = $dbw->affectedRows();
128 $dbw->endAtomic( __METHOD__ );
129 $this->output( $affected ? "OK\n" : "no rows affected\n" );
130 }
131
138 private function normalizeAddresses() {
139 $dbr = $this->getReplicaDB();
140 $dbType = $dbr->getType();
141 if ( $dbType !== 'mysql' ) {
142 $this->output( "Skipping IP address normalization: not implemented on $dbType\n" );
143 return;
144 }
145 $res = $dbr->newSelectQueryBuilder()
146 ->select( [ 'bt_id', 'bt_address' ] )
147 ->from( 'block_target' )
148 ->where( [ 'bt_user' => null ] )
149 ->andWhere( 'bt_range_start IS NOT NULL OR ' .
150 'bt_address RLIKE \'(^|[.:])0[0-9]|[a-f]|::\'' )
151 ->caller( __METHOD__ )
152 ->fetchResultSet();
153 $writeDone = false;
154 foreach ( $res as $row ) {
155 $addr = $row->bt_address;
156 if ( IPUtils::isValid( $addr ) ) {
157 $norm = IPUtils::sanitizeIP( $addr );
158 } elseif ( IPUtils::isValidRange( $addr ) ) {
159 $norm = IPUtils::sanitizeRange( $addr );
160 } else {
161 continue;
162 }
163 if ( $addr !== $norm && is_string( $norm ) ) {
164 $this->normalizeAddress( (int)$row->bt_id, $addr, $norm );
165 $writeDone = true;
166 }
167 }
168 if ( $writeDone ) {
169 // Ensure that mergeDuplicateBlockTargets() sees our changes
170 $this->waitForReplication();
171 }
172 }
173
181 private function normalizeAddress( int $targetId, string $address, string $normalizedAddress ) {
182 $this->output( "Normalizing bt_id=$targetId $address -> $normalizedAddress: " );
183 if ( $this->dryRun ) {
184 $this->output( "dry run\n" );
185 return;
186 }
187 $dbw = $this->getPrimaryDB();
188 $dbw->startAtomic( __METHOD__ );
189 $primaryAddr = $dbw->newSelectQueryBuilder()
190 ->select( 'bt_address' )
191 ->from( 'block_target' )
192 ->where( [ 'bt_id' => $targetId ] )
193 ->forUpdate()
194 ->caller( __METHOD__ )
195 ->fetchField();
196 if ( $primaryAddr === false ) {
197 $this->output( "missing in primary\n" );
198 return;
199 }
200 if ( $primaryAddr !== $address ) {
201 $this->output( "changed in primary\n" );
202 return;
203 }
204 $dbw->newUpdateQueryBuilder()
205 ->update( 'block_target' )
206 ->set( [ 'bt_address' => $normalizedAddress ] )
207 ->where( [ 'bt_id' => $targetId ] )
208 ->caller( __METHOD__ )
209 ->execute();
210 $dbw->endAtomic( __METHOD__ );
211 $this->output( "done\n" );
212 }
213
217 private function mergeDuplicateBlockTargets() {
218 $dbr = $this->getReplicaDB();
219 $rawGroups = $this->getReplicaDB()->newSelectQueryBuilder()
220 ->select( 'GROUP_CONCAT(bt_id)' )
221 ->from( 'block_target' )
222 ->where( $dbr->expr( 'bt_user', '!=', null ) )
223 ->groupBy( 'bt_user' )
224 ->having( 'COUNT(*) > 1' )
225 ->caller( __METHOD__ )
226 ->fetchFieldValues();
227 $this->processIdGroups( $rawGroups );
228
229 $rawGroups = $this->getReplicaDB()->newSelectQueryBuilder()
230 ->select( 'GROUP_CONCAT(bt_id)' )
231 ->from( 'block_target' )
232 ->where( $dbr->expr( 'bt_address', '!=', null ) )
233 ->groupBy( [ 'bt_auto', 'bt_address' ] )
234 ->having( 'COUNT(*) > 1' )
235 ->caller( __METHOD__ )
236 ->fetchFieldValues();
237 $this->processIdGroups( $rawGroups );
238 }
239
244 private function processIdGroups( $rawGroups ) {
245 foreach ( $rawGroups as $blob ) {
246 $group = array_map( 'intval', explode( ',', $blob ) );
247 sort( $group );
248 $main = array_shift( $group );
249 $this->mergeGroup( $main, $group );
250 }
251 }
252
258 private function mergeGroup( int $mainId, array $badIds ) {
259 $this->output( 'Merging bt_id ' . implode( ',', $badIds ) . " into $mainId: " );
260 if ( $this->dryRun ) {
261 $this->output( "dry run\n" );
262 return;
263 }
264
265 $dbw = $this->getPrimaryDB();
266 $dbw->startAtomic( __METHOD__ );
267
268 // Check that the targets are identical in the primary
269 $fieldsToTest = [ 'bt_address', 'bt_user', 'bt_user_text', 'bt_auto' ];
270 $mainRow = $dbw->newSelectQueryBuilder()
271 ->select( $fieldsToTest )
272 ->from( 'block_target' )
273 ->where( [ 'bt_id' => $mainId ] )
274 ->forUpdate()
275 ->caller( __METHOD__ )
276 ->fetchRow();
277 $badRows = $dbw->newSelectQueryBuilder()
278 ->select( $fieldsToTest )
279 ->select( 'bt_id' )
280 ->from( 'block_target' )
281 ->where( [ 'bt_id' => $badIds ] )
282 ->forUpdate()
283 ->caller( __METHOD__ )
284 ->fetchResultSet();
285
286 if ( $badRows->numRows() !== count( $badIds ) ) {
287 $this->output( "some IDs are not present in the primary\n" );
288 $dbw->endAtomic( __METHOD__ );
289 return;
290 }
291
292 foreach ( $badRows as $badRow ) {
293 foreach ( $fieldsToTest as $field ) {
294 if ( $mainRow->$field !== $badRow->$field ) {
295 $this->output( "mismatch in $field for bt_id={$badRow->bt_id}\n" );
296 $dbw->endAtomic( __METHOD__ );
297 return;
298 }
299 }
300 }
301
302 // Update the block rows for the targets to be deleted
303 $dbw->newUpdateQueryBuilder()
304 ->update( 'block' )
305 ->set( [ 'bl_target' => $mainId ] )
306 ->where( [ 'bl_target' => $badIds ] )
307 ->caller( __METHOD__ )
308 ->execute();
309 $blockCount = $dbw->affectedRows();
310
311 // Delete the bad targets
312 $dbw->newDeleteQueryBuilder()
313 ->delete( 'block_target' )
314 ->where( [ 'bt_id' => $badIds ] )
315 ->caller( __METHOD__ )
316 ->execute();
317
318 // Update bt_count for the remaining target
319 $dbw->newUpdateQueryBuilder()
320 ->update( 'block_target' )
321 ->set( 'bt_count=bt_count + ' . $blockCount )
322 ->where( [ 'bt_id' => $mainId ] )
323 ->caller( __METHOD__ )
324 ->execute();
325
326 $dbw->endAtomic( __METHOD__ );
327 $this->output( "done\n" );
328 }
329
333 private function fixTargetCounts() {
334 $dbr = $this->getReplicaDB();
335 $res = $dbr->newSelectQueryBuilder()
336 ->select( [ 'bt_id', 'bt_count', 'real_count' => 'COUNT(*)' ] )
337 ->from( 'block' )
338 ->join( 'block_target', null, 'bt_id=bl_target' )
339 ->groupBy( [ 'bt_id', 'bt_count' ] )
340 ->having( 'COUNT(*) != bt_count' )
341 ->caller( __METHOD__ )
342 ->fetchResultSet();
343
344 foreach ( $res as $row ) {
345 $this->fixTargetCount( (int)$row->bt_id, (int)$row->bt_count, (int)$row->real_count );
346 }
347 }
348
356 private function fixTargetCount( int $targetId, int $badCount, int $replicaCount ) {
357 $this->output( "Fixing bt_id=$targetId count $badCount -> $replicaCount: " );
358 if ( $this->dryRun ) {
359 $this->output( "dry run\n" );
360 return;
361 }
362
363 $dbw = $this->getPrimaryDB();
364 $dbw->startAtomic( __METHOD__ );
365 $primaryCount = (int)$dbw->newSelectQueryBuilder()
366 ->select( 'COUNT(*)' )
367 ->from( 'block' )
368 ->where( [ 'bl_target' => $targetId ] )
369 ->forUpdate()
370 ->caller( __METHOD__ )
371 ->fetchField();
372
373 if ( $primaryCount !== $replicaCount ) {
374 $dbw->endAtomic( __METHOD__ );
375 $this->output( "changed in primary, skipping\n" );
376 return;
377 }
378
379 $dbw->newUpdateQueryBuilder()
380 ->update( 'block_target' )
381 ->set( [ 'bt_count' => $primaryCount ] )
382 ->where( [
383 'bt_id' => $targetId,
384 'bt_count' => $badCount
385 ] )
386 ->caller( __METHOD__ )
387 ->execute();
388 $affected = $dbw->affectedRows();
389 $dbw->endAtomic( __METHOD__ );
390 $this->output( $affected ? "OK\n" : "no rows affected\n" );
391 }
392}
393
394$maintClass = CleanupBlocks::class;
395require_once RUN_MAINTENANCE_IF_MAIN;
execute()
Do the actual work.
__construct()
Default constructor.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
output( $out, $channel=null)
Throw some output to the user.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
waitForReplication()
Wait for replica DB servers to catch up.
hasOption( $name)
Checks to see if a particular option was set.
getReplicaDB(string|false $virtualDomain=false)
getPrimaryDB(string|false $virtualDomain=false)
addDescription( $text)
Set the description text.
$maintClass