Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
70.52% |
177 / 251 |
|
53.85% |
7 / 13 |
CRAP | |
0.00% |
0 / 1 |
| CleanupBlocks | |
71.37% |
177 / 248 |
|
53.85% |
7 / 13 |
80.44 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| execute | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
| deleteOrphanBlockTargets | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
2 | |||
| deleteOrphanBlockTarget | |
80.00% |
20 / 25 |
|
0.00% |
0 / 1 |
4.13 | |||
| deleteTargetlessBlocks | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
2 | |||
| deleteTargetlessBlock | |
80.00% |
20 / 25 |
|
0.00% |
0 / 1 |
4.13 | |||
| normalizeAddresses | |
19.23% |
5 / 26 |
|
0.00% |
0 / 1 |
41.72 | |||
| normalizeAddress | |
0.00% |
0 / 27 |
|
0.00% |
0 / 1 |
20 | |||
| mergeDuplicateBlockTargets | |
100.00% |
19 / 19 |
|
100.00% |
1 / 1 |
1 | |||
| processIdGroups | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
| mergeGroup | |
84.62% |
44 / 52 |
|
0.00% |
0 / 1 |
6.13 | |||
| fixTargetCounts | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
2 | |||
| fixTargetCount | |
82.76% |
24 / 29 |
|
0.00% |
0 / 1 |
4.08 | |||
| 1 | <?php |
| 2 | |
| 3 | use MediaWiki\Maintenance\Maintenance; |
| 4 | use Wikimedia\IPUtils; |
| 5 | |
| 6 | require_once __DIR__ . '/Maintenance.php'; |
| 7 | |
| 8 | class CleanupBlocks extends Maintenance { |
| 9 | private bool $dryRun = false; |
| 10 | |
| 11 | public function __construct() { |
| 12 | parent::__construct(); |
| 13 | $this->addDescription( 'Fix referential integrity issues in block and block_target tables' ); |
| 14 | $this->addOption( 'dry-run', 'Just report, don\'t fix anything' ); |
| 15 | } |
| 16 | |
| 17 | public function execute() { |
| 18 | $this->dryRun = $this->hasOption( 'dry-run' ); |
| 19 | |
| 20 | $this->deleteOrphanBlockTargets(); |
| 21 | $this->deleteTargetlessBlocks(); |
| 22 | $this->normalizeAddresses(); |
| 23 | $this->mergeDuplicateBlockTargets(); |
| 24 | $this->fixTargetCounts(); |
| 25 | } |
| 26 | |
| 27 | /** |
| 28 | * Delete any block_target rows that have no corresponding blocks |
| 29 | */ |
| 30 | private function deleteOrphanBlockTargets() { |
| 31 | $dbr = $this->getReplicaDB(); |
| 32 | $badIds = $dbr->newSelectQueryBuilder() |
| 33 | ->select( 'bt_id' ) |
| 34 | ->from( 'block_target' ) |
| 35 | ->leftJoin( 'block', null, 'bt_id=bl_target' ) |
| 36 | ->where( [ 'bl_target' => null ] ) |
| 37 | ->caller( __METHOD__ ) |
| 38 | ->fetchFieldValues(); |
| 39 | |
| 40 | foreach ( $badIds as $id ) { |
| 41 | $this->deleteOrphanBlockTarget( (int)$id ); |
| 42 | } |
| 43 | } |
| 44 | |
| 45 | /** |
| 46 | * Verify and delete an orphan block_target row |
| 47 | * @param int $id |
| 48 | */ |
| 49 | private function deleteOrphanBlockTarget( int $id ) { |
| 50 | $this->output( "Deleting orphan bt_id=$id: " ); |
| 51 | if ( $this->dryRun ) { |
| 52 | $this->output( "dry run\n" ); |
| 53 | return; |
| 54 | } |
| 55 | $dbw = $this->getPrimaryDB(); |
| 56 | $dbw->startAtomic( __METHOD__ ); |
| 57 | $lockingUsage = $dbw->newSelectQueryBuilder() |
| 58 | ->select( 'COUNT(*)' ) |
| 59 | ->from( 'block' ) |
| 60 | ->where( [ 'bl_target' => $id ] ) |
| 61 | ->forUpdate() |
| 62 | ->caller( __METHOD__ ) |
| 63 | ->fetchField(); |
| 64 | if ( $lockingUsage ) { |
| 65 | $dbw->endAtomic( __METHOD__ ); |
| 66 | $this->output( "primary usage count is non-zero\n" ); |
| 67 | return; |
| 68 | } |
| 69 | $dbw->newDeleteQueryBuilder() |
| 70 | ->deleteFrom( 'block_target' ) |
| 71 | ->where( [ 'bt_id' => $id ] ) |
| 72 | ->caller( __METHOD__ ) |
| 73 | ->execute(); |
| 74 | $affected = $dbw->affectedRows(); |
| 75 | $dbw->endAtomic( __METHOD__ ); |
| 76 | $this->output( $affected ? "OK\n" : "no rows affected\n" ); |
| 77 | } |
| 78 | |
| 79 | /** |
| 80 | * Delete blocks which have a bl_target pointing to a non-existent bt_id |
| 81 | */ |
| 82 | private function deleteTargetlessBlocks() { |
| 83 | $dbr = $this->getReplicaDB(); |
| 84 | $res = $dbr->newSelectQueryBuilder() |
| 85 | ->select( [ 'bl_id', 'bl_target' ] ) |
| 86 | ->from( 'block' ) |
| 87 | ->leftJoin( 'block_target', null, 'bt_id=bl_target' ) |
| 88 | ->where( [ 'bt_id' => null ] ) |
| 89 | ->caller( __METHOD__ ) |
| 90 | ->fetchResultSet(); |
| 91 | foreach ( $res as $row ) { |
| 92 | $this->deleteTargetlessBlock( (int)$row->bl_id, (int)$row->bl_target ); |
| 93 | } |
| 94 | } |
| 95 | |
| 96 | /** |
| 97 | * Verify and delete a block with no target |
| 98 | * |
| 99 | * @param int $blockId |
| 100 | * @param int $targetId |
| 101 | */ |
| 102 | private function deleteTargetlessBlock( int $blockId, int $targetId ) { |
| 103 | $this->output( "Deleting block $blockId on non-existent target $targetId: " ); |
| 104 | if ( $this->dryRun ) { |
| 105 | $this->output( "dry run\n" ); |
| 106 | return; |
| 107 | } |
| 108 | $dbw = $this->getPrimaryDB(); |
| 109 | $dbw->startAtomic( __METHOD__ ); |
| 110 | $lockingTargetCount = $dbw->newSelectQueryBuilder() |
| 111 | ->select( 'COUNT(*)' ) |
| 112 | ->from( 'block_target' ) |
| 113 | ->where( [ 'bt_id' => $targetId ] ) |
| 114 | ->forUpdate() |
| 115 | ->caller( __METHOD__ ) |
| 116 | ->fetchField(); |
| 117 | if ( $lockingTargetCount ) { |
| 118 | $this->output( "target exists in primary\n" ); |
| 119 | $dbw->endAtomic( __METHOD__ ); |
| 120 | return; |
| 121 | } |
| 122 | $dbw->newDeleteQueryBuilder() |
| 123 | ->deleteFrom( 'block' ) |
| 124 | ->where( [ 'bl_id' => $blockId, 'bl_target' => $targetId ] ) |
| 125 | ->caller( __METHOD__ ) |
| 126 | ->execute(); |
| 127 | $affected = $dbw->affectedRows(); |
| 128 | $dbw->endAtomic( __METHOD__ ); |
| 129 | $this->output( $affected ? "OK\n" : "no rows affected\n" ); |
| 130 | } |
| 131 | |
| 132 | /** |
| 133 | * Fix IP address normalization issues: |
| 134 | * - Leading zeroes like 1.1.1.001 |
| 135 | * - Lower-case IPv6 addresses like 200e:: |
| 136 | * - Non-zero range suffixes like 1.1.1.111/24 |
| 137 | */ |
| 138 | private function normalizeAddresses() { |
| 139 | $dbr = $this->getReplicaDB(); |
| 140 | $dbType = $dbr->getType(); |
| 141 | if ( $dbType !== 'mysql' ) { |
| 142 | $this->output( "Skipping IP address normalization: not implemented on $dbType\n" ); |
| 143 | return; |
| 144 | } |
| 145 | $res = $dbr->newSelectQueryBuilder() |
| 146 | ->select( [ 'bt_id', 'bt_address' ] ) |
| 147 | ->from( 'block_target' ) |
| 148 | ->where( [ 'bt_user' => null ] ) |
| 149 | ->andWhere( 'bt_range_start IS NOT NULL OR ' . |
| 150 | 'bt_address RLIKE \'(^|[.:])0[0-9]|[a-f]|::\'' ) |
| 151 | ->caller( __METHOD__ ) |
| 152 | ->fetchResultSet(); |
| 153 | $writeDone = false; |
| 154 | foreach ( $res as $row ) { |
| 155 | $addr = $row->bt_address; |
| 156 | if ( IPUtils::isValid( $addr ) ) { |
| 157 | $norm = IPUtils::sanitizeIP( $addr ); |
| 158 | } elseif ( IPUtils::isValidRange( $addr ) ) { |
| 159 | $norm = IPUtils::sanitizeRange( $addr ); |
| 160 | } else { |
| 161 | continue; |
| 162 | } |
| 163 | if ( $addr !== $norm && is_string( $norm ) ) { |
| 164 | $this->normalizeAddress( (int)$row->bt_id, $addr, $norm ); |
| 165 | $writeDone = true; |
| 166 | } |
| 167 | } |
| 168 | if ( $writeDone ) { |
| 169 | // Ensure that mergeDuplicateBlockTargets() sees our changes |
| 170 | $this->waitForReplication(); |
| 171 | } |
| 172 | } |
| 173 | |
| 174 | /** |
| 175 | * Normalize the IP address in a single block_target row |
| 176 | * |
| 177 | * @param int $targetId |
| 178 | * @param string $address |
| 179 | * @param string $normalizedAddress |
| 180 | */ |
| 181 | private function normalizeAddress( int $targetId, string $address, string $normalizedAddress ) { |
| 182 | $this->output( "Normalizing bt_id=$targetId $address -> $normalizedAddress: " ); |
| 183 | if ( $this->dryRun ) { |
| 184 | $this->output( "dry run\n" ); |
| 185 | return; |
| 186 | } |
| 187 | $dbw = $this->getPrimaryDB(); |
| 188 | $dbw->startAtomic( __METHOD__ ); |
| 189 | $primaryAddr = $dbw->newSelectQueryBuilder() |
| 190 | ->select( 'bt_address' ) |
| 191 | ->from( 'block_target' ) |
| 192 | ->where( [ 'bt_id' => $targetId ] ) |
| 193 | ->forUpdate() |
| 194 | ->caller( __METHOD__ ) |
| 195 | ->fetchField(); |
| 196 | if ( $primaryAddr === false ) { |
| 197 | $this->output( "missing in primary\n" ); |
| 198 | return; |
| 199 | } |
| 200 | if ( $primaryAddr !== $address ) { |
| 201 | $this->output( "changed in primary\n" ); |
| 202 | return; |
| 203 | } |
| 204 | $dbw->newUpdateQueryBuilder() |
| 205 | ->update( 'block_target' ) |
| 206 | ->set( [ 'bt_address' => $normalizedAddress ] ) |
| 207 | ->where( [ 'bt_id' => $targetId ] ) |
| 208 | ->caller( __METHOD__ ) |
| 209 | ->execute(); |
| 210 | $dbw->endAtomic( __METHOD__ ); |
| 211 | $this->output( "done\n" ); |
| 212 | } |
| 213 | |
| 214 | /** |
| 215 | * Merge block_target rows referring to the same user, IP address or range |
| 216 | */ |
| 217 | private function mergeDuplicateBlockTargets() { |
| 218 | $dbr = $this->getReplicaDB(); |
| 219 | $rawGroups = $this->getReplicaDB()->newSelectQueryBuilder() |
| 220 | ->select( 'GROUP_CONCAT(bt_id)' ) |
| 221 | ->from( 'block_target' ) |
| 222 | ->where( $dbr->expr( 'bt_user', '!=', null ) ) |
| 223 | ->groupBy( 'bt_user' ) |
| 224 | ->having( 'COUNT(*) > 1' ) |
| 225 | ->caller( __METHOD__ ) |
| 226 | ->fetchFieldValues(); |
| 227 | $this->processIdGroups( $rawGroups ); |
| 228 | |
| 229 | $rawGroups = $this->getReplicaDB()->newSelectQueryBuilder() |
| 230 | ->select( 'GROUP_CONCAT(bt_id)' ) |
| 231 | ->from( 'block_target' ) |
| 232 | ->where( $dbr->expr( 'bt_address', '!=', null ) ) |
| 233 | ->groupBy( [ 'bt_auto', 'bt_address' ] ) |
| 234 | ->having( 'COUNT(*) > 1' ) |
| 235 | ->caller( __METHOD__ ) |
| 236 | ->fetchFieldValues(); |
| 237 | $this->processIdGroups( $rawGroups ); |
| 238 | } |
| 239 | |
| 240 | /** |
| 241 | * Process a set of duplicate targets |
| 242 | * @param string[] $rawGroups the ID groups, delimited by commas |
| 243 | */ |
| 244 | private function processIdGroups( $rawGroups ) { |
| 245 | foreach ( $rawGroups as $blob ) { |
| 246 | $group = array_map( 'intval', explode( ',', $blob ) ); |
| 247 | sort( $group ); |
| 248 | $main = array_shift( $group ); |
| 249 | $this->mergeGroup( $main, $group ); |
| 250 | } |
| 251 | } |
| 252 | |
| 253 | /** |
| 254 | * Merge a group of duplicate targets |
| 255 | * @param int $mainId The ID to merge into |
| 256 | * @param int[] $badIds The IDs to delete |
| 257 | */ |
| 258 | private function mergeGroup( int $mainId, array $badIds ) { |
| 259 | $this->output( 'Merging bt_id ' . implode( ',', $badIds ) . " into $mainId: " ); |
| 260 | if ( $this->dryRun ) { |
| 261 | $this->output( "dry run\n" ); |
| 262 | return; |
| 263 | } |
| 264 | |
| 265 | $dbw = $this->getPrimaryDB(); |
| 266 | $dbw->startAtomic( __METHOD__ ); |
| 267 | |
| 268 | // Check that the targets are identical in the primary |
| 269 | $fieldsToTest = [ 'bt_address', 'bt_user', 'bt_user_text', 'bt_auto' ]; |
| 270 | $mainRow = $dbw->newSelectQueryBuilder() |
| 271 | ->select( $fieldsToTest ) |
| 272 | ->from( 'block_target' ) |
| 273 | ->where( [ 'bt_id' => $mainId ] ) |
| 274 | ->forUpdate() |
| 275 | ->caller( __METHOD__ ) |
| 276 | ->fetchRow(); |
| 277 | $badRows = $dbw->newSelectQueryBuilder() |
| 278 | ->select( $fieldsToTest ) |
| 279 | ->select( 'bt_id' ) |
| 280 | ->from( 'block_target' ) |
| 281 | ->where( [ 'bt_id' => $badIds ] ) |
| 282 | ->forUpdate() |
| 283 | ->caller( __METHOD__ ) |
| 284 | ->fetchResultSet(); |
| 285 | |
| 286 | if ( $badRows->numRows() !== count( $badIds ) ) { |
| 287 | $this->output( "some IDs are not present in the primary\n" ); |
| 288 | $dbw->endAtomic( __METHOD__ ); |
| 289 | return; |
| 290 | } |
| 291 | |
| 292 | foreach ( $badRows as $badRow ) { |
| 293 | foreach ( $fieldsToTest as $field ) { |
| 294 | if ( $mainRow->$field !== $badRow->$field ) { |
| 295 | $this->output( "mismatch in $field for bt_id={$badRow->bt_id}\n" ); |
| 296 | $dbw->endAtomic( __METHOD__ ); |
| 297 | return; |
| 298 | } |
| 299 | } |
| 300 | } |
| 301 | |
| 302 | // Update the block rows for the targets to be deleted |
| 303 | $dbw->newUpdateQueryBuilder() |
| 304 | ->update( 'block' ) |
| 305 | ->set( [ 'bl_target' => $mainId ] ) |
| 306 | ->where( [ 'bl_target' => $badIds ] ) |
| 307 | ->caller( __METHOD__ ) |
| 308 | ->execute(); |
| 309 | $blockCount = $dbw->affectedRows(); |
| 310 | |
| 311 | // Delete the bad targets |
| 312 | $dbw->newDeleteQueryBuilder() |
| 313 | ->delete( 'block_target' ) |
| 314 | ->where( [ 'bt_id' => $badIds ] ) |
| 315 | ->caller( __METHOD__ ) |
| 316 | ->execute(); |
| 317 | |
| 318 | // Update bt_count for the remaining target |
| 319 | $dbw->newUpdateQueryBuilder() |
| 320 | ->update( 'block_target' ) |
| 321 | ->set( 'bt_count=bt_count + ' . $blockCount ) |
| 322 | ->where( [ 'bt_id' => $mainId ] ) |
| 323 | ->caller( __METHOD__ ) |
| 324 | ->execute(); |
| 325 | |
| 326 | $dbw->endAtomic( __METHOD__ ); |
| 327 | $this->output( "done\n" ); |
| 328 | } |
| 329 | |
| 330 | /** |
| 331 | * Find and fix incorrect bt_count values |
| 332 | */ |
| 333 | private function fixTargetCounts() { |
| 334 | $dbr = $this->getReplicaDB(); |
| 335 | $res = $dbr->newSelectQueryBuilder() |
| 336 | ->select( [ 'bt_id', 'bt_count', 'real_count' => 'COUNT(*)' ] ) |
| 337 | ->from( 'block' ) |
| 338 | ->join( 'block_target', null, 'bt_id=bl_target' ) |
| 339 | ->groupBy( [ 'bt_id', 'bt_count' ] ) |
| 340 | ->having( 'COUNT(*) != bt_count' ) |
| 341 | ->caller( __METHOD__ ) |
| 342 | ->fetchResultSet(); |
| 343 | |
| 344 | foreach ( $res as $row ) { |
| 345 | $this->fixTargetCount( (int)$row->bt_id, (int)$row->bt_count, (int)$row->real_count ); |
| 346 | } |
| 347 | } |
| 348 | |
| 349 | /** |
| 350 | * Fix an incorrect target count |
| 351 | * |
| 352 | * @param int $targetId The bt_id value |
| 353 | * @param int $badCount The bt_count value, from the replica |
| 354 | * @param int $replicaCount The number of associated block rows, from the replica |
| 355 | */ |
| 356 | private function fixTargetCount( int $targetId, int $badCount, int $replicaCount ) { |
| 357 | $this->output( "Fixing bt_id=$targetId count $badCount -> $replicaCount: " ); |
| 358 | if ( $this->dryRun ) { |
| 359 | $this->output( "dry run\n" ); |
| 360 | return; |
| 361 | } |
| 362 | |
| 363 | $dbw = $this->getPrimaryDB(); |
| 364 | $dbw->startAtomic( __METHOD__ ); |
| 365 | $primaryCount = (int)$dbw->newSelectQueryBuilder() |
| 366 | ->select( 'COUNT(*)' ) |
| 367 | ->from( 'block' ) |
| 368 | ->where( [ 'bl_target' => $targetId ] ) |
| 369 | ->forUpdate() |
| 370 | ->caller( __METHOD__ ) |
| 371 | ->fetchField(); |
| 372 | |
| 373 | if ( $primaryCount !== $replicaCount ) { |
| 374 | $dbw->endAtomic( __METHOD__ ); |
| 375 | $this->output( "changed in primary, skipping\n" ); |
| 376 | return; |
| 377 | } |
| 378 | |
| 379 | $dbw->newUpdateQueryBuilder() |
| 380 | ->update( 'block_target' ) |
| 381 | ->set( [ 'bt_count' => $primaryCount ] ) |
| 382 | ->where( [ |
| 383 | 'bt_id' => $targetId, |
| 384 | 'bt_count' => $badCount |
| 385 | ] ) |
| 386 | ->caller( __METHOD__ ) |
| 387 | ->execute(); |
| 388 | $affected = $dbw->affectedRows(); |
| 389 | $dbw->endAtomic( __METHOD__ ); |
| 390 | $this->output( $affected ? "OK\n" : "no rows affected\n" ); |
| 391 | } |
| 392 | } |
| 393 | |
| 394 | $maintClass = CleanupBlocks::class; |
| 395 | require_once RUN_MAINTENANCE_IF_MAIN; |