Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
96.30% covered (success)
96.30%
156 / 162
100.00% covered (success)
100.00%
4 / 4
CRAP
100.00% covered (success)
100.00%
1 / 1
FixGlobalBlockWhitelist
100.00% covered (success)
100.00%
156 / 156
100.00% covered (success)
100.00%
4 / 4
24
100.00% covered (success)
100.00%
1 / 1
 __construct
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
1
 execute
100.00% covered (success)
100.00%
53 / 53
100.00% covered (success)
100.00%
1 / 1
9
 fixBrokenWhitelist
100.00% covered (success)
100.00%
83 / 83
100.00% covered (success)
100.00%
1 / 1
10
 handleDeletions
100.00% covered (success)
100.00%
15 / 15
100.00% covered (success)
100.00%
1 / 1
4
1<?php
2
3namespace MediaWiki\Extension\GlobalBlocking\Maintenance;
4
5use Maintenance;
6use MediaWiki\Extension\GlobalBlocking\GlobalBlockingServices;
7
8$IP = getenv( 'MW_INSTALL_PATH' );
9if ( $IP === false ) {
10    $IP = __DIR__ . '/../../..';
11}
12require_once "$IP/maintenance/Maintenance.php";
13
14/**
15 * If there is a whitelisted IP address or range with a corresponding global block
16 * row but if the ids do not match, this script can be used to make the ids same so
17 * that the whitelist is effective. Optionally, entries in the whitelist table with
18 * no corresponding global block row can be deleted if the 'delete' option is enabled.
19 * See https://phabricator.wikimedia.org/T56496.
20 */
21class FixGlobalBlockWhitelist extends Maintenance {
22
23    protected bool $dryRun = false;
24
25    public function __construct() {
26        parent::__construct();
27        $this->addOption( 'delete', 'Delete whitelist entries with no corresponding global block' );
28        $this->addOption( 'dry-run', 'Run the script without any modifications' );
29        $this->setBatchSize( 500 );
30
31        $this->requireExtension( 'GlobalBlocking' );
32    }
33
34    public function execute() {
35        $this->dryRun = $this->getOption( 'dry-run', false ) !== false;
36        $localDbr = $this->getReplicaDB();
37
38        // First check if there are any rows in global_block_whitelist. If there are no rows, then exit now as there is
39        // nothing for this script to do.
40        $rowsExist = $localDbr->newSelectQueryBuilder()
41            ->select( 'gbw_id' )
42            ->from( 'global_block_whitelist' )
43            ->caller( __METHOD__ )
44            ->limit( 1 )
45            ->fetchRowCount();
46
47        if ( !$rowsExist ) {
48            $this->output( "No whitelist entries.\n" );
49            return;
50        }
51
52        $lastGlobalBlockId = 0;
53        $fixableBroken = [];
54        $unfixableBroken = [];
55        do {
56            // Select a batch of whitelist entries to check which start from a gbw_id greater than the greatest gbw_id
57            // from the last batch.
58            $localWhitelistEntries = $localDbr->newSelectQueryBuilder()
59                ->select( [ 'gbw_id', 'gbw_address' ] )
60                ->from( 'global_block_whitelist' )
61                ->where( $localDbr->expr( 'gbw_id', '>', $lastGlobalBlockId ) )
62                ->orderBy( 'gbw_id' )
63                ->limit( $this->getBatchSize() ?? 500 )
64                ->caller( __METHOD__ )
65                ->fetchResultSet();
66
67            $whitelistEntries = [];
68            foreach ( $localWhitelistEntries as $row ) {
69                $whitelistEntries[ $row->gbw_id ] = $row->gbw_address;
70                $lastGlobalBlockId = $row->gbw_id;
71            }
72
73            $whitelistedIPs = array_values( $whitelistEntries );
74
75            // If there were no whitelist entries in the batch, then exit now as there is nothing more to do.
76            if ( !count( $whitelistedIPs ) ) {
77                break;
78            }
79
80            // Find the associated global block rows for the whitelist entries in this batch.
81            $globalBlockingDbr = GlobalBlockingServices::wrap( $this->getServiceContainer() )
82                ->getGlobalBlockingConnectionProvider()
83                ->getReplicaGlobalBlockingDatabase();
84            $gblocks = $globalBlockingDbr->newSelectQueryBuilder()
85                ->select( [ 'gb_id', 'gb_address' ] )
86                ->from( 'globalblocks' )
87                ->where( [ 'gb_address' => $whitelistedIPs ] )
88                ->caller( __METHOD__ )
89                ->fetchResultSet();
90
91            $gblockEntries = [];
92            foreach ( $gblocks as $gblock ) {
93                $gblockEntries[ $gblock->gb_id ] = $gblock->gb_address;
94            }
95
96            // Try to match the whitelist entries with the global block entries.
97            foreach ( $gblockEntries as $gblockId => $gblockAddress ) {
98                $whitelistId = array_search( $gblockAddress, $whitelistEntries );
99                if ( $whitelistId !== false && $whitelistId !== $gblockId ) {
100                    // If there is a whitelist entry which has the same target as a global block, but the IDs of these
101                    // do not match, then this is a broken whitelist entry which can be fixed.
102                    $fixableBroken[ $gblockId ] = $whitelistEntries[ $whitelistId ];
103                }
104            }
105
106            // Find any whitelist entries that do not have a corresponding global block. These are broken entries but
107            // cannot be fixed and will be deleted if the 'delete' option is specified.
108            $unfixableBroken = array_merge(
109                $unfixableBroken,
110                array_diff( $whitelistedIPs, array_values( $gblockEntries ) )
111            );
112        } while ( $localWhitelistEntries->numRows() === ( $this->getBatchSize() ?? 500 ) );
113
114        $this->fixBrokenWhitelist( $fixableBroken );
115
116        if ( $this->getOption( 'delete' ) ) {
117            $this->handleDeletions( $unfixableBroken );
118        }
119    }
120
121    /**
122     * Fixes broken whitelist entries which have a corresponding global block but the IDs do not match.
123     *
124     * @param array $brokenEntries An array of whitelist entries which have a corresponding global block but the IDs
125     *   do not match. The key is the global block ID for the currently applied block and the value is the target of
126     *   that block.
127     * @return void
128     */
129    protected function fixBrokenWhitelist( array $brokenEntries ) {
130        $brokenCount = count( $brokenEntries );
131        if ( $brokenCount === 0 ) {
132            // Return early if there are no broken whitelist entries that can be fixed.
133            $this->output( "No broken whitelist entries which can be fixed.\n" );
134            return;
135        }
136
137        // Start processing the broken whitelist entries that can be fixed.
138        $this->output( "Found $brokenCount broken whitelist entries which can be fixed.\n" );
139        $count = 0;
140        $lbFactory = $this->getServiceContainer()->getDBLoadBalancerFactory();
141        $localDbr = $this->getReplicaDB();
142        $localDbw = $this->getPrimaryDB();
143
144        foreach ( $brokenEntries as $newId => $address ) {
145            if ( !$this->dryRun && $count === $this->mBatchSize ) {
146                // Wait for replication if we have processed a batch of entries
147                // and this is not a dry run.
148                $lbFactory->waitForReplication();
149                $count = 0;
150            }
151            $count++;
152
153            // Check if there is already a whitelist entry using the id we want to use.
154            $entryAlreadyExists = (bool)$localDbr->newSelectQueryBuilder()
155                ->select( '1' )
156                ->from( 'global_block_whitelist' )
157                ->where( [ 'gbw_id' => $newId ] )
158                ->caller( __METHOD__ )
159                ->fetchField();
160            if ( $entryAlreadyExists ) {
161                if ( $this->dryRun ) {
162                    $this->output( " Would delete broken entries for $address: id $newId already is whitelisted.\n" );
163                    continue;
164                }
165                // If a whitelist entry already exists with the gbw_id we want to use, then we cannot update this
166                // broken whitelist entry and should instead delete it.
167                $localDbw->newDeleteQueryBuilder()
168                    ->deleteFrom( 'global_block_whitelist' )
169                    ->where( [
170                        'gbw_address' => $address,
171                        // Only delete the broken entries and not the unbroken entry.
172                        $localDbw->expr( 'gbw_id', '!=', $newId )
173                    ] )
174                    ->caller( __METHOD__ )
175                    ->execute();
176                $this->output( " Deleted broken entries for $address: id $newId already is whitelisted.\n" );
177                continue;
178            }
179
180            // Delete any duplicate whitelist entries with the same address, keeping the one with the highest
181            // gbw_id as this should be the most recent entry.
182            $brokenEntriesForThisAddress = $localDbr->newSelectQueryBuilder()
183                ->select( 'gbw_id' )
184                ->from( 'global_block_whitelist' )
185                ->where( [ 'gbw_address' => $address ] )
186                ->caller( __METHOD__ )
187                ->fetchFieldValues();
188            if ( count( $brokenEntriesForThisAddress ) > 1 ) {
189                // If there are multiple broken entries for this address, then delete all but the one with the highest
190                // gbw_id as this will likely be the most relevant entry (as it was for the most recent global block
191                // on this target).
192                $maxIdForThisAddress = max( $brokenEntriesForThisAddress );
193                if ( $this->dryRun ) {
194                    $this->output(
195                        " Would delete all whitelist entries for $address except the entry with gbw_id as " .
196                        "$maxIdForThisAddress: only one row can be updated to use id $newId\n."
197                    );
198                } else {
199                    $localDbw->newDeleteQueryBuilder()
200                        ->deleteFrom( 'global_block_whitelist' )
201                        ->where( [
202                            'gbw_address' => $address,
203                            $localDbw->expr( 'gbw_id', '!=', $maxIdForThisAddress )
204                        ] )
205                        ->caller( __METHOD__ )
206                        ->execute();
207                    $this->output(
208                        " Deleted all whitelist entries for $address except the entry with gbw_id as " .
209                        "$maxIdForThisAddress: only one row can be updated to use id $newId\n."
210                    );
211                }
212            }
213
214            // Update the one remaining broken whitelist entry to use the correct id, and also to match the expiry
215            // and target central ID of the associated global block.
216            if ( $this->dryRun ) {
217                $this->output( " Whitelist broken $address: current gb_id is $newId\n" );
218                continue;
219            }
220            $globalBlockingDbr = GlobalBlockingServices::wrap( $this->getServiceContainer() )
221                ->getGlobalBlockingConnectionProvider()
222                ->getReplicaGlobalBlockingDatabase();
223            $associatedGlobalBlockEntry = $globalBlockingDbr->newSelectQueryBuilder()
224                ->select( [ 'gb_expiry', 'gb_target_central_id' ] )
225                ->from( 'globalblocks' )
226                ->where( [ 'gb_id' => $newId ] )
227                ->caller( __METHOD__ )
228                ->fetchRow();
229            $localDbw->newUpdateQueryBuilder()
230                ->update( 'global_block_whitelist' )
231                ->set( [
232                    'gbw_id' => $newId,
233                    'gbw_expiry' => $associatedGlobalBlockEntry->gb_expiry,
234                    'gbw_target_central_id' => $associatedGlobalBlockEntry->gb_target_central_id
235                ] )
236                ->where( [ 'gbw_address' => $address ] )
237                ->caller( __METHOD__ )
238                ->execute();
239            $this->output( " Fixed $address: id changed to $newId\n" );
240        }
241        $this->output( "Finished processing broken whitelist entries.\n" );
242    }
243
244    /**
245     * Handles the deletion of whitelist entries which have no corresponding global block.
246     * Only called if the 'delete' option is specified.
247     *
248     * @param array $nonExistent An array of targets which have whitelist entries but no corresponding global block.
249     * @return void
250     */
251    protected function handleDeletions( array $nonExistent ) {
252        $nonExistentCount = count( $nonExistent );
253        if ( $nonExistentCount === 0 ) {
254            // Return early if there are no whitelist entries to be deleted.
255            $this->output( "All whitelist entries have corresponding global blocks.\n" );
256            return;
257        }
258        $this->output( "Found $nonExistentCount whitelist entries with no corresponding global blocks:\n"
259            . implode( "\n", $nonExistent ) . "\n"
260        );
261        if ( !$this->dryRun ) {
262            // Delete the whitelist entries which have no corresponding global block in batches of 'batch-size'
263            // targets.
264            foreach ( array_chunk( $nonExistent, $this->getBatchSize() ?? 500 ) as $chunk ) {
265                $this->getPrimaryDB()->newDeleteQueryBuilder()
266                    ->deleteFrom( 'global_block_whitelist' )
267                    ->where( [ 'gbw_address' => $chunk ] )
268                    ->caller( __METHOD__ )
269                    ->execute();
270            }
271            $this->output( "Finished deleting whitelist entries with no corresponding global blocks.\n" );
272        }
273    }
274}
275
276$maintClass = FixGlobalBlockWhitelist::class;
277require_once RUN_MAINTENANCE_IF_MAIN;