Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
96.27% covered (success)
96.27%
155 / 161
100.00% covered (success)
100.00%
4 / 4
CRAP
100.00% covered (success)
100.00%
1 / 1
FixGlobalBlockWhitelist
100.00% covered (success)
100.00%
155 / 155
100.00% covered (success)
100.00%
4 / 4
24
100.00% covered (success)
100.00%
1 / 1
 __construct
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
1
 execute
100.00% covered (success)
100.00%
53 / 53
100.00% covered (success)
100.00%
1 / 1
9
 fixBrokenWhitelist
100.00% covered (success)
100.00%
82 / 82
100.00% covered (success)
100.00%
1 / 1
10
 handleDeletions
100.00% covered (success)
100.00%
15 / 15
100.00% covered (success)
100.00%
1 / 1
4
1<?php
2
3namespace MediaWiki\Extension\GlobalBlocking\Maintenance;
4
5use Maintenance;
6use MediaWiki\Extension\GlobalBlocking\GlobalBlockingServices;
7
8$IP = getenv( 'MW_INSTALL_PATH' );
9if ( $IP === false ) {
10    $IP = __DIR__ . '/../../..';
11}
12require_once "$IP/maintenance/Maintenance.php";
13
14/**
15 * If there is a whitelisted IP address or range with a corresponding global block
16 * row but if the ids do not match, this script can be used to make the ids same so
17 * that the whitelist is effective. Optionally, entries in the whitelist table with
18 * no corresponding global block row can be deleted if the 'delete' option is enabled.
19 * See https://phabricator.wikimedia.org/T56496.
20 */
21class FixGlobalBlockWhitelist extends Maintenance {
22
23    protected bool $dryRun = false;
24
25    public function __construct() {
26        parent::__construct();
27        $this->addOption( 'delete', 'Delete whitelist entries with no corresponding global block' );
28        $this->addOption( 'dry-run', 'Run the script without any modifications' );
29        $this->setBatchSize( 500 );
30
31        $this->requireExtension( 'GlobalBlocking' );
32    }
33
34    public function execute() {
35        $this->dryRun = $this->getOption( 'dry-run', false ) !== false;
36        $localDbr = $this->getReplicaDB();
37
38        // First check if there are any rows in global_block_whitelist. If there are no rows, then exit now as there is
39        // nothing for this script to do.
40        $rowsExist = $localDbr->newSelectQueryBuilder()
41            ->select( 'gbw_id' )
42            ->from( 'global_block_whitelist' )
43            ->caller( __METHOD__ )
44            ->limit( 1 )
45            ->fetchRowCount();
46
47        if ( !$rowsExist ) {
48            $this->output( "No whitelist entries.\n" );
49            return;
50        }
51
52        $lastGlobalBlockId = 0;
53        $fixableBroken = [];
54        $unfixableBroken = [];
55        do {
56            // Select a batch of whitelist entries to check which start from a gbw_id greater than the greatest gbw_id
57            // from the last batch.
58            $localWhitelistEntries = $localDbr->newSelectQueryBuilder()
59                ->select( [ 'gbw_id', 'gbw_address' ] )
60                ->from( 'global_block_whitelist' )
61                ->where( $localDbr->expr( 'gbw_id', '>', $lastGlobalBlockId ) )
62                ->orderBy( 'gbw_id' )
63                ->limit( $this->getBatchSize() ?? 500 )
64                ->caller( __METHOD__ )
65                ->fetchResultSet();
66
67            $whitelistEntries = [];
68            foreach ( $localWhitelistEntries as $row ) {
69                $whitelistEntries[ $row->gbw_id ] = $row->gbw_address;
70                $lastGlobalBlockId = $row->gbw_id;
71            }
72
73            $whitelistedIPs = array_values( $whitelistEntries );
74
75            // If there were no whitelist entries in the batch, then exit now as there is nothing more to do.
76            if ( !count( $whitelistedIPs ) ) {
77                break;
78            }
79
80            // Find the associated global block rows for the whitelist entries in this batch.
81            $globalBlockingDbr = GlobalBlockingServices::wrap( $this->getServiceContainer() )
82                ->getGlobalBlockingConnectionProvider()
83                ->getReplicaGlobalBlockingDatabase();
84            $gblocks = $globalBlockingDbr->newSelectQueryBuilder()
85                ->select( [ 'gb_id', 'gb_address' ] )
86                ->from( 'globalblocks' )
87                ->where( [ 'gb_address' => $whitelistedIPs ] )
88                ->caller( __METHOD__ )
89                ->fetchResultSet();
90
91            $gblockEntries = [];
92            foreach ( $gblocks as $gblock ) {
93                $gblockEntries[ $gblock->gb_id ] = $gblock->gb_address;
94            }
95
96            // Try to match the whitelist entries with the global block entries.
97            foreach ( $gblockEntries as $gblockId => $gblockAddress ) {
98                $whitelistId = array_search( $gblockAddress, $whitelistEntries );
99                if ( $whitelistId !== false && $whitelistId !== $gblockId ) {
100                    // If there is a whitelist entry which has the same target as a global block, but the IDs of these
101                    // do not match, then this is a broken whitelist entry which can be fixed.
102                    $fixableBroken[ $gblockId ] = $whitelistEntries[ $whitelistId ];
103                }
104            }
105
106            // Find any whitelist entries that do not have a corresponding global block. These are broken entries but
107            // cannot be fixed and will be deleted if the 'delete' option is specified.
108            $unfixableBroken = array_merge(
109                $unfixableBroken,
110                array_diff( $whitelistedIPs, array_values( $gblockEntries ) )
111            );
112        } while ( $localWhitelistEntries->numRows() === ( $this->getBatchSize() ?? 500 ) );
113
114        $this->fixBrokenWhitelist( $fixableBroken );
115
116        if ( $this->getOption( 'delete' ) ) {
117            $this->handleDeletions( $unfixableBroken );
118        }
119    }
120
121    /**
122     * Fixes broken whitelist entries which have a corresponding global block but the IDs do not match.
123     *
124     * @param array $brokenEntries An array of whitelist entries which have a corresponding global block but the IDs
125     *   do not match. The key is the global block ID for the currently applied block and the value is the target of
126     *   that block.
127     * @return void
128     */
129    protected function fixBrokenWhitelist( array $brokenEntries ) {
130        $brokenCount = count( $brokenEntries );
131        if ( $brokenCount === 0 ) {
132            // Return early if there are no broken whitelist entries that can be fixed.
133            $this->output( "No broken whitelist entries which can be fixed.\n" );
134            return;
135        }
136
137        // Start processing the broken whitelist entries that can be fixed.
138        $this->output( "Found $brokenCount broken whitelist entries which can be fixed.\n" );
139        $count = 0;
140        $lbFactory = $this->getServiceContainer()->getDBLoadBalancerFactory();
141        $localDbr = $this->getReplicaDB();
142        $localDbw = $this->getPrimaryDB();
143
144        foreach ( $brokenEntries as $newId => $address ) {
145            if ( !$this->dryRun && $count === $this->mBatchSize ) {
146                // Wait for replication if we have processed a batch of entries
147                // and this is not a dry run.
148                $lbFactory->waitForReplication();
149                $count = 0;
150            }
151            $count++;
152
153            // Check if there is already a whitelist entry using the id we want to use.
154            $entryAlreadyExists = (bool)$localDbr->newSelectQueryBuilder()
155                ->select( '1' )
156                ->from( 'global_block_whitelist' )
157                ->where( [ 'gbw_id' => $newId ] )
158                ->fetchField();
159            if ( $entryAlreadyExists ) {
160                if ( $this->dryRun ) {
161                    $this->output( " Would delete broken entries for $address: id $newId already is whitelisted.\n" );
162                    continue;
163                }
164                // If a whitelist entry already exists with the gbw_id we want to use, then we cannot update this
165                // broken whitelist entry and should instead delete it.
166                $localDbw->newDeleteQueryBuilder()
167                    ->deleteFrom( 'global_block_whitelist' )
168                    ->where( [
169                        'gbw_address' => $address,
170                        // Only delete the broken entries and not the unbroken entry.
171                        $localDbw->expr( 'gbw_id', '!=', $newId )
172                    ] )
173                    ->caller( __METHOD__ )
174                    ->execute();
175                $this->output( " Deleted broken entries for $address: id $newId already is whitelisted.\n" );
176                continue;
177            }
178
179            // Delete any duplicate whitelist entries with the same address, keeping the one with the highest
180            // gbw_id as this should be the most recent entry.
181            $brokenEntriesForThisAddress = $localDbr->newSelectQueryBuilder()
182                ->select( 'gbw_id' )
183                ->from( 'global_block_whitelist' )
184                ->where( [ 'gbw_address' => $address ] )
185                ->caller( __METHOD__ )
186                ->fetchFieldValues();
187            if ( count( $brokenEntriesForThisAddress ) > 1 ) {
188                // If there are multiple broken entries for this address, then delete all but the one with the highest
189                // gbw_id as this will likely be the most relevant entry (as it was for the most recent global block
190                // on this target).
191                $maxIdForThisAddress = max( $brokenEntriesForThisAddress );
192                if ( $this->dryRun ) {
193                    $this->output(
194                        " Would delete all whitelist entries for $address except the entry with gbw_id as " .
195                        "$maxIdForThisAddress: only one row can be updated to use id $newId\n."
196                    );
197                } else {
198                    $localDbw->newDeleteQueryBuilder()
199                        ->deleteFrom( 'global_block_whitelist' )
200                        ->where( [
201                            'gbw_address' => $address,
202                            $localDbw->expr( 'gbw_id', '!=', $maxIdForThisAddress )
203                        ] )
204                        ->caller( __METHOD__ )
205                        ->execute();
206                    $this->output(
207                        " Deleted all whitelist entries for $address except the entry with gbw_id as " .
208                        "$maxIdForThisAddress: only one row can be updated to use id $newId\n."
209                    );
210                }
211            }
212
213            // Update the one remaining broken whitelist entry to use the correct id, and also to match the expiry
214            // and target central ID of the associated global block.
215            if ( $this->dryRun ) {
216                $this->output( " Whitelist broken $address: current gb_id is $newId\n" );
217                continue;
218            }
219            $globalBlockingDbr = GlobalBlockingServices::wrap( $this->getServiceContainer() )
220                ->getGlobalBlockingConnectionProvider()
221                ->getReplicaGlobalBlockingDatabase();
222            $associatedGlobalBlockEntry = $globalBlockingDbr->newSelectQueryBuilder()
223                ->select( [ 'gb_expiry', 'gb_target_central_id' ] )
224                ->from( 'globalblocks' )
225                ->where( [ 'gb_id' => $newId ] )
226                ->caller( __METHOD__ )
227                ->fetchRow();
228            $localDbw->newUpdateQueryBuilder()
229                ->update( 'global_block_whitelist' )
230                ->set( [
231                    'gbw_id' => $newId,
232                    'gbw_expiry' => $associatedGlobalBlockEntry->gb_expiry,
233                    'gbw_target_central_id' => $associatedGlobalBlockEntry->gb_target_central_id
234                ] )
235                ->where( [ 'gbw_address' => $address ] )
236                ->caller( __METHOD__ )
237                ->execute();
238            $this->output( " Fixed $address: id changed to $newId\n" );
239        }
240        $this->output( "Finished processing broken whitelist entries.\n" );
241    }
242
243    /**
244     * Handles the deletion of whitelist entries which have no corresponding global block.
245     * Only called if the 'delete' option is specified.
246     *
247     * @param array $nonExistent An array of targets which have whitelist entries but no corresponding global block.
248     * @return void
249     */
250    protected function handleDeletions( array $nonExistent ) {
251        $nonExistentCount = count( $nonExistent );
252        if ( $nonExistentCount === 0 ) {
253            // Return early if there are no whitelist entries to be deleted.
254            $this->output( "All whitelist entries have corresponding global blocks.\n" );
255            return;
256        }
257        $this->output( "Found $nonExistentCount whitelist entries with no corresponding global blocks:\n"
258            . implode( "\n", $nonExistent ) . "\n"
259        );
260        if ( !$this->dryRun ) {
261            // Delete the whitelist entries which have no corresponding global block in batches of 'batch-size'
262            // targets.
263            foreach ( array_chunk( $nonExistent, $this->getBatchSize() ?? 500 ) as $chunk ) {
264                $this->getPrimaryDB()->newDeleteQueryBuilder()
265                    ->deleteFrom( 'global_block_whitelist' )
266                    ->where( [ 'gbw_address' => $chunk ] )
267                    ->caller( __METHOD__ )
268                    ->execute();
269            }
270            $this->output( "Finished deleting whitelist entries with no corresponding global blocks.\n" );
271        }
272    }
273}
274
275$maintClass = FixGlobalBlockWhitelist::class;
276require_once RUN_MAINTENANCE_IF_MAIN;