Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 2 |
n/a |
0 / 0 |
CRAP | n/a |
0 / 0 |
||
UpdateVarDumps | n/a |
0 / 0 |
n/a |
0 / 0 |
81 | n/a |
0 / 0 |
|||
__construct | n/a |
0 / 0 |
n/a |
0 / 0 |
1 | |||||
getUpdateKey | n/a |
0 / 0 |
n/a |
0 / 0 |
1 | |||||
doDBUpdates | n/a |
0 / 0 |
n/a |
0 / 0 |
4 | |||||
fixMissingDumps | n/a |
0 / 0 |
n/a |
0 / 0 |
2 | |||||
doFixMissingDumps | n/a |
0 / 0 |
n/a |
0 / 0 |
8 | |||||
moveToText | n/a |
0 / 0 |
n/a |
0 / 0 |
2 | |||||
doMoveToText | n/a |
0 / 0 |
n/a |
0 / 0 |
9 | |||||
restoreTruncatedDump | n/a |
0 / 0 |
n/a |
0 / 0 |
17 | |||||
updateText | n/a |
0 / 0 |
n/a |
0 / 0 |
5 | |||||
doUpdateText | n/a |
0 / 0 |
n/a |
0 / 0 |
16 | |||||
updateVariables | n/a |
0 / 0 |
n/a |
0 / 0 |
4 | |||||
getBuiltinVarNames | n/a |
0 / 0 |
n/a |
0 / 0 |
4 | |||||
updateAflVarDump | n/a |
0 / 0 |
n/a |
0 / 0 |
3 | |||||
maybePrintProgress | n/a |
0 / 0 |
n/a |
0 / 0 |
3 | |||||
maybeSleep | n/a |
0 / 0 |
n/a |
0 / 0 |
2 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\AbuseFilter\Maintenance; |
4 | |
5 | use ExternalStoreAccess; |
6 | use FormatJson; |
7 | use LoggedUpdateMaintenance; |
8 | use MediaWiki\Extension\AbuseFilter\AbuseFilterServices; |
9 | use MediaWiki\Extension\AbuseFilter\KeywordsManager; |
10 | use MediaWiki\Extension\AbuseFilter\Variables\VariableHolder; |
11 | use MediaWiki\Extension\AbuseFilter\Variables\VariablesBlobStore; |
12 | use MediaWiki\MediaWikiServices; |
13 | use MediaWiki\Title\Title; |
14 | use UnexpectedValueException; |
15 | use Wikimedia\Rdbms\IMaintainableDatabase; |
16 | use Wikimedia\Rdbms\IResultWrapper; |
17 | |
18 | // @codeCoverageIgnoreStart |
19 | $IP = getenv( 'MW_INSTALL_PATH' ); |
20 | if ( $IP === false ) { |
21 | $IP = __DIR__ . '/../../..'; |
22 | } |
23 | require_once "$IP/maintenance/Maintenance.php"; |
24 | // @codeCoverageIgnoreEnd |
25 | |
26 | /** |
27 | * Performs several tasks aiming to update the stored var dumps for filter hits. |
28 | * |
29 | * See T213006 for a list. |
30 | * |
31 | * @codeCoverageIgnore |
32 | * This script used to be covered by a test, but it was removed: the script was single-use, so |
33 | * no more testing is needed. OTOH, maintaining the test was too hard because we needed to create |
34 | * with serialized classes, which quickly becomes unsustainable. |
35 | */ |
36 | class UpdateVarDumps extends LoggedUpdateMaintenance { |
37 | /** @var IMaintainableDatabase A connection to replica */ |
38 | private $dbr; |
39 | /** @var IMaintainableDatabase A connection to the primary database */ |
40 | private $dbw; |
41 | /** @var bool Whether we're performing a dry run */ |
42 | private $dryRun = false; |
43 | /** @var int Count of rows in the abuse_filter_log table */ |
44 | private $allRowsCount; |
45 | /** @var bool Whether to print progress markers */ |
46 | private $progressMarkers; |
47 | /** @var string|null */ |
48 | private $printOrphanedFile; |
49 | /** @var int|null How many seconds to sleep after each batch. */ |
50 | private $sleep; |
51 | /** @var KeywordsManager */ |
52 | private $keywordsManager; |
53 | /** @var VariablesBlobStore */ |
54 | private $varBlobStore; |
55 | |
56 | /** |
57 | * @inheritDoc |
58 | */ |
59 | public function __construct() { |
60 | parent::__construct(); |
61 | |
62 | $this->addDescription( 'Update AbuseFilter var dumps - T213006' ); |
63 | $this->addOption( 'dry-run-verbose', 'Perform a verbose dry run' ); |
64 | $this->addOption( 'dry-run', 'Perform a dry run' ); |
65 | $this->addOption( 'progress-markers', 'Print progress markers every 10 batches' ); |
66 | $this->addOption( |
67 | 'print-orphaned-records-to', |
68 | 'Print ExternalStore urls of orphaned ExternalStore records (if any) ' . |
69 | 'to the given file. Can use stdout, but it\'s not recommended for big databases.', |
70 | false, |
71 | true |
72 | ); |
73 | $this->addOption( 'sleep', 'Sleep this many seconds after each batch', false, true ); |
74 | $this->requireExtension( 'Abuse Filter' ); |
75 | $this->setBatchSize( 500 ); |
76 | } |
77 | |
78 | /** |
79 | * @inheritDoc |
80 | */ |
81 | public function getUpdateKey() { |
82 | return 'UpdateVarDumps'; |
83 | } |
84 | |
85 | /** |
86 | * @inheritDoc |
87 | */ |
88 | public function doDBUpdates() { |
89 | if ( $this->hasOption( 'dry-run-verbose' ) || $this->hasOption( 'dry-run' ) ) { |
90 | // This way the script can be called with dry-run-verbose only and we can check for dry-run |
91 | $this->dryRun = true; |
92 | } |
93 | $this->progressMarkers = $this->hasOption( 'progress-markers' ); |
94 | $this->printOrphanedFile = $this->getOption( 'print-orphaned-records-to' ); |
95 | $this->sleep = $this->getOption( 'sleep' ); |
96 | |
97 | $this->keywordsManager = AbuseFilterServices::getKeywordsManager(); |
98 | $this->varBlobStore = AbuseFilterServices::getVariablesBlobStore(); |
99 | |
100 | // Faulty rows aren't inserted anymore, hence we can query the replica and update the primary database. |
101 | $this->dbr = $this->getDB( DB_REPLICA ); |
102 | $this->dbw = $this->getDB( DB_PRIMARY ); |
103 | |
104 | // Control batching with the primary key to keep the queries performant and allow gaps |
105 | $this->allRowsCount = (int)$this->dbr->selectField( |
106 | 'abuse_filter_log', |
107 | 'MAX(afl_id)', |
108 | [], |
109 | __METHOD__ |
110 | ); |
111 | |
112 | if ( $this->allRowsCount === 0 ) { |
113 | $this->output( "...the abuse_filter_log table is empty.\n" ); |
114 | return !$this->dryRun; |
115 | } |
116 | |
117 | // Do the actual work. Note that several actions are superfluous (e.g. in fixMissingDumps |
118 | // we use "stored-text" but then we replace it in updateAflVarDump), but that's because of SRP. |
119 | |
120 | // First, ensure that afl_var_dump isn't empty |
121 | $this->fixMissingDumps(); |
122 | // Then, ensure that abuse_filter_log.afl_var_dump only contains "stored-text:xxxx" |
123 | $this->moveToText(); |
124 | // Then update the storage format in the text table |
125 | $this->updateText(); |
126 | // Finally, replace "stored-text:xxxx" with "tt:xxxx" for all rows |
127 | $this->updateAflVarDump(); |
128 | |
129 | return !$this->dryRun; |
130 | } |
131 | |
132 | /** |
133 | * Handle empty afl_var_dump. gerrit/16527 fixed a bug which caused an extra abuse_filter_log |
134 | * row to be inserted without the var dump for a given action. If we find a row identical to |
135 | * the current one but with a valid dump, just delete the current one. Otherwise, store a |
136 | * very basic var dump for sanity. |
137 | * This handles point 7. of T213006. |
138 | */ |
139 | private function fixMissingDumps() { |
140 | $this->output( "...Checking for missing dumps (1/4)\n" ); |
141 | $batchSize = $this->getBatchSize(); |
142 | |
143 | $prevID = 0; |
144 | $curID = $batchSize; |
145 | $deleted = $rebuilt = 0; |
146 | do { |
147 | $this->maybePrintProgress( $prevID ); |
148 | $brokenRows = $this->dbr->select( |
149 | 'abuse_filter_log', |
150 | '*', |
151 | [ |
152 | 'afl_var_dump' => '', |
153 | "afl_id > $prevID", |
154 | "afl_id <= $curID" |
155 | ], |
156 | __METHOD__, |
157 | [ 'ORDER BY' => 'afl_id ASC' ] |
158 | ); |
159 | $prevID = $curID; |
160 | $curID += $batchSize; |
161 | |
162 | $res = $this->doFixMissingDumps( $brokenRows ); |
163 | $deleted += $res['deleted']; |
164 | $rebuilt += $res['rebuilt']; |
165 | $this->waitForReplication(); |
166 | $this->maybeSleep(); |
167 | } while ( $prevID <= $this->allRowsCount ); |
168 | |
169 | if ( $this->dryRun ) { |
170 | $this->output( |
171 | "...found $deleted rows with blank afl_var_dump to delete, and " . |
172 | "$rebuilt rows to rebuild.\n" |
173 | ); |
174 | } else { |
175 | $this->output( |
176 | "...deleted $deleted rows with blank afl_var_dump, and rebuilt " . |
177 | "$rebuilt rows.\n" |
178 | ); |
179 | } |
180 | } |
181 | |
182 | /** |
183 | * @param IResultWrapper $brokenRows |
184 | * @return int[] |
185 | */ |
186 | private function doFixMissingDumps( IResultWrapper $brokenRows ) { |
187 | $deleted = 0; |
188 | foreach ( $brokenRows as $row ) { |
189 | if ( $row->afl_var_dump === '' ) { |
190 | $findRow = array_diff_key( |
191 | get_object_vars( $row ), |
192 | [ 'afl_var_dump' => true, 'afl_id' => true ] |
193 | ); |
194 | // This is the case where we may have a duplicate row. The wrong insertion happened |
195 | // right before the correct one, so their afl_id should only differ by 1, but let's |
196 | // play safe and only assume it's greater. Note that the two entries are guaranteed |
197 | // to have the same timestamp. |
198 | $findRow[] = 'afl_id > ' . $this->dbr->addQuotes( $row->afl_id ); |
199 | $saneDuplicate = $this->dbr->selectRow( |
200 | 'abuse_filter_log', |
201 | '1', |
202 | $findRow, |
203 | __METHOD__ |
204 | ); |
205 | |
206 | if ( $saneDuplicate ) { |
207 | // Just delete the row! |
208 | $deleted++; |
209 | if ( !$this->dryRun ) { |
210 | $this->dbw->newDeleteQueryBuilder() |
211 | ->deleteFrom( 'abuse_filter_log' ) |
212 | ->where( [ 'afl_id' => $row->afl_id ] ) |
213 | ->caller( __METHOD__ ) |
214 | ->execute(); |
215 | } |
216 | continue; |
217 | } |
218 | } |
219 | if ( $this->dryRun ) { |
220 | continue; |
221 | } |
222 | // Build a VariableHolder with the only values we can be sure of |
223 | $vars = VariableHolder::newFromArray( [ |
224 | 'timestamp' => wfTimestamp( TS_UNIX, $row->afl_timestamp ), |
225 | 'action' => $row->afl_action |
226 | ] ); |
227 | // Add some action-specific variables |
228 | if ( strpos( $row->afl_action, 'createaccount' ) !== false ) { |
229 | $vars->setVar( 'accountname', $row->afl_user_text ); |
230 | } else { |
231 | $vars->setVar( 'user_name', $row->afl_user_text ); |
232 | $title = Title::makeTitle( $row->afl_namespace, $row->afl_title ); |
233 | if ( $row->afl_action !== 'move' ) { |
234 | $vars->setVar( 'page_title', $title->getText() ); |
235 | $vars->setVar( 'page_prefixedtitle', $title->getPrefixedText() ); |
236 | } else { |
237 | $vars->setVar( 'moved_from_title', $title->getText() ); |
238 | $vars->setVar( 'moved_from_prefixedtitle', $title->getPrefixedText() ); |
239 | } |
240 | } |
241 | |
242 | $storedID = $this->varBlobStore->storeVarDump( $vars ); |
243 | $this->dbw->update( |
244 | 'abuse_filter_log', |
245 | [ 'afl_var_dump' => $storedID ], |
246 | [ 'afl_id' => $row->afl_id ], |
247 | __METHOD__ |
248 | ); |
249 | } |
250 | $rebuilt = $brokenRows->numRows() - $deleted; |
251 | return [ 'rebuilt' => $rebuilt, 'deleted' => $deleted ]; |
252 | } |
253 | |
254 | /** |
255 | * If afl_var_dump contains serialized data, move the dump to the text table. |
256 | * This handles point 1. of T213006. |
257 | */ |
258 | private function moveToText() { |
259 | $this->output( "...Moving serialized data away from the abuse_filter_log table (2/4).\n" ); |
260 | $batchSize = $this->getBatchSize(); |
261 | |
262 | $prevID = 0; |
263 | $curID = $batchSize; |
264 | $changeRows = $truncatedDumps = 0; |
265 | do { |
266 | $this->maybePrintProgress( $prevID ); |
267 | $res = $this->dbr->select( |
268 | 'abuse_filter_log', |
269 | [ 'afl_id', 'afl_var_dump' ], |
270 | [ |
271 | 'afl_var_dump NOT ' . $this->dbr->buildLike( |
272 | 'stored-text:', |
273 | $this->dbr->anyString() |
274 | ), |
275 | 'afl_var_dump NOT ' . $this->dbr->buildLike( |
276 | 'tt:', |
277 | $this->dbr->anyString() |
278 | ), |
279 | "afl_id > $prevID", |
280 | "afl_id <= $curID" |
281 | ], |
282 | __METHOD__, |
283 | [ 'ORDER BY' => 'afl_id ASC' ] |
284 | ); |
285 | |
286 | $prevID = $curID; |
287 | $curID += $batchSize; |
288 | |
289 | $result = $this->doMoveToText( $res ); |
290 | $changeRows += $result['change']; |
291 | $truncatedDumps += $result['truncated']; |
292 | $this->waitForReplication(); |
293 | $this->maybeSleep(); |
294 | } while ( $prevID <= $this->allRowsCount ); |
295 | |
296 | $msg = $this->dryRun ? |
297 | "...found $changeRows abuse_filter_log rows with serialized data and $truncatedDumps " . |
298 | "truncated dumps to rebuild.\n" : |
299 | "...moved $changeRows abuse_filter_log rows and rebuilt $truncatedDumps " . |
300 | "truncated dumps.\n"; |
301 | |
302 | $this->output( $msg ); |
303 | } |
304 | |
305 | /** |
306 | * @param IResultWrapper $rows |
307 | * @return int[] |
308 | */ |
309 | private function doMoveToText( IResultWrapper $rows ) { |
310 | $changeRows = $truncatedDumps = 0; |
311 | foreach ( $rows as $row ) { |
312 | // Sanity: perform a very raw check to confirm that the dump is indeed a serialized value |
313 | $re = '/^(a:\d+:{|O:25:"[Aa]buse[Ff]ilter[Vv]ariable[Hh]older":\d+:{)/'; |
314 | if ( !preg_match( $re, $row->afl_var_dump ) ) { |
315 | $this->fatalError( |
316 | "...found a value in afl_var_dump for afl_id {$row->afl_id} which is " . |
317 | "neither a reference to the text table or a serialized value: {$row->afl_var_dump}.\n" |
318 | ); |
319 | } |
320 | |
321 | // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged |
322 | $stored = @unserialize( $row->afl_var_dump ); |
323 | if ( !$stored ) { |
324 | $re = '/^O:25:"[Aa]buse[Ff]ilter[Vv]ariable[Hh]older":\d+:{/'; |
325 | if ( preg_match( $re, $row->afl_var_dump ) ) { |
326 | $this->fatalError( |
327 | "...found a corrupted afl_var_dump for afl_id {$row->afl_id} containing " . |
328 | "a truncated object: {$row->afl_var_dump}.\n" |
329 | ); |
330 | } |
331 | $stored = $this->restoreTruncatedDump( $row->afl_var_dump ); |
332 | $truncatedDumps++; |
333 | } |
334 | if ( !is_array( $stored ) && !( $stored instanceof VariableHolder ) ) { |
335 | $this->fatalError( |
336 | '...found unexpected data type ( ' . gettype( $stored ) . ' ) in ' . |
337 | "afl_var_dump for afl_id {$row->afl_id}.\n" |
338 | ); |
339 | } |
340 | $changeRows++; |
341 | |
342 | if ( !$this->dryRun ) { |
343 | $holder = is_array( $stored ) ? VariableHolder::newFromArray( $stored ) : $stored; |
344 | // Note: this will upgrade to the new JSON format, so we use tt: |
345 | $newDump = $this->varBlobStore->storeVarDump( $holder ); |
346 | $this->dbw->update( |
347 | 'abuse_filter_log', |
348 | [ 'afl_var_dump' => $newDump ], |
349 | [ 'afl_id' => $row->afl_id ], |
350 | __METHOD__ |
351 | ); |
352 | } |
353 | } |
354 | return [ 'change' => $changeRows, 'truncated' => $truncatedDumps ]; |
355 | } |
356 | |
357 | /** |
358 | * Try to restore a truncated dumps. This could happen for very old rows, where afl_var_dump |
359 | * was a blob instead of a longblob, and we tried to insert very long strings there. |
360 | * This handles point 9. of T214193. |
361 | * |
362 | * @param string $dump The broken serialized dump |
363 | * @return array With everything that we can restore from $dump on success |
364 | */ |
365 | private function restoreTruncatedDump( $dump ) { |
366 | // This method makes various assumptions: |
367 | // 1 - Everything is wrapped inside an array |
368 | // 2 - Array elements can only be strings, integers, bools or null |
369 | // 3 - Array keys can only be strings |
370 | // As this is what a serialized dump should look like. |
371 | $string = preg_replace( '/^a:\d+:{/', '', $dump ); |
372 | |
373 | $ret = []; |
374 | $key = null; |
375 | |
376 | while ( strlen( $string ) > 2 || $string === 'N;' ) { |
377 | $type = substr( $string, 0, 2 ); |
378 | switch ( $type ) { |
379 | case 's:': |
380 | // Quotes aren't escaped, so we need to figure out how many characters to include |
381 | $matches = []; |
382 | if ( !preg_match( '/^s:(\d+):"/', $string, $matches ) ) { |
383 | break 2; |
384 | } |
385 | $len = (int)$matches[1]; |
386 | $val = substr( $string, strlen( $matches[0] ), $len ); |
387 | if ( strlen( $val ) === $len ) { |
388 | if ( $key === null ) { |
389 | // It's an array key |
390 | $key = $val; |
391 | } else { |
392 | $ret[$key] = $val; |
393 | $key = null; |
394 | } |
395 | $offset = strlen( $matches[0] ) + $len + 2; |
396 | break; |
397 | } else { |
398 | // The truncation happened in the middle of the string |
399 | break 2; |
400 | } |
401 | case 'i:': |
402 | if ( preg_match( '/^i:(-?\d+);/', $string, $matches ) ) { |
403 | if ( $key === null ) { |
404 | throw new UnexpectedValueException( "Unexpected integer key: $string" ); |
405 | } |
406 | $ret[$key] = intval( $matches[1] ); |
407 | $key = null; |
408 | $offset = strlen( $matches[0] ); |
409 | break; |
410 | } else { |
411 | break 2; |
412 | } |
413 | case 'b:': |
414 | if ( preg_match( '/^b:([01]);/', $string, $matches ) ) { |
415 | if ( $key === null ) { |
416 | throw new UnexpectedValueException( "Unexpected bool key: $string" ); |
417 | } |
418 | $ret[$key] = (bool)$matches[1]; |
419 | $key = null; |
420 | $offset = 4; |
421 | break; |
422 | } else { |
423 | break 2; |
424 | } |
425 | case 'N;': |
426 | if ( $key === null ) { |
427 | throw new UnexpectedValueException( "Unexpected null key: $string" ); |
428 | } |
429 | $ret[$key] = null; |
430 | $key = null; |
431 | $offset = 2; |
432 | break; |
433 | default: |
434 | break 2; |
435 | } |
436 | |
437 | // Remove the value we have just parsed |
438 | // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable |
439 | // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal |
440 | $string = substr( $string, $offset ); |
441 | } |
442 | |
443 | if ( $this->hasOption( 'dry-run-verbose' ) ) { |
444 | $this->output( |
445 | "...converted the following corrupted dump:\n\n$dump\n\n to this:\n\n" . |
446 | var_export( $ret, true ) . "\n\n" |
447 | ); |
448 | } |
449 | |
450 | return $ret; |
451 | } |
452 | |
453 | /** |
454 | * If the text table (or the External Storage) contains a serialized variable holder |
455 | * or array, re-store it as a JSON-encoded array. This assumes that afl_var_dump rows starting |
456 | * with 'tt:' already point to JSON dumps, and afl_var_dump rows starting with 'stored-text:' |
457 | * only point to serialized dumps. |
458 | * This handles point 2. and 6. of T213006. |
459 | */ |
460 | private function updateText() { |
461 | $this->output( |
462 | "...Re-storing serialized dumps as JSON-encoded arrays for all rows (3/4).\n" |
463 | ); |
464 | if ( $this->printOrphanedFile !== null && !$this->dryRun ) { |
465 | $this->output( "Printing orphaned records to $this->printOrphanedFile.\n" ); |
466 | file_put_contents( |
467 | $this->printOrphanedFile, |
468 | "Records orphaned by AbuseFilter's updateVarDumps sccript\n", |
469 | FILE_APPEND |
470 | ); |
471 | } |
472 | |
473 | $batchSize = $this->getBatchSize(); |
474 | $prevID = 0; |
475 | $curID = $batchSize; |
476 | $count = 0; |
477 | |
478 | $idSQL = $this->dbr->buildIntegerCast( $this->dbr->strreplace( |
479 | 'afl_var_dump', |
480 | $this->dbr->addQuotes( 'stored-text:' ), |
481 | $this->dbr->addQuotes( '' ) |
482 | ) ); |
483 | |
484 | $dumpLike = $this->dbr->buildLike( 'stored-text:', $this->dbr->anyString() ); |
485 | $esAccess = MediaWikiServices::getInstance()->getExternalStoreAccess(); |
486 | do { |
487 | $this->maybePrintProgress( $prevID ); |
488 | $res = $this->dbr->select( |
489 | [ 'text', 'abuse_filter_log' ], |
490 | [ 'old_id', 'old_text', 'old_flags' ], |
491 | [ |
492 | "afl_var_dump $dumpLike", |
493 | "afl_id > $prevID", |
494 | "afl_id <= $curID" |
495 | ], |
496 | __METHOD__, |
497 | [ 'DISTINCT', 'ORDER BY' => 'old_id ASC' ], |
498 | [ 'abuse_filter_log' => [ 'JOIN', "old_id = $idSQL" ] ] |
499 | ); |
500 | |
501 | $prevID = $curID; |
502 | $curID += $batchSize; |
503 | $count += $res->numRows(); |
504 | |
505 | if ( !$this->dryRun ) { |
506 | $this->doUpdateText( $res, $esAccess ); |
507 | $this->waitForReplication(); |
508 | } |
509 | $this->maybeSleep(); |
510 | } while ( $prevID <= $this->allRowsCount ); |
511 | |
512 | $msg = $this->dryRun |
513 | ? "...found $count text rows to update.\n" |
514 | : "...updated $count text rows.\n"; |
515 | $this->output( $msg ); |
516 | } |
517 | |
518 | /** |
519 | * @param IResultWrapper $res text rows |
520 | * @param ExternalStoreAccess $esAccess |
521 | */ |
522 | private function doUpdateText( IResultWrapper $res, ExternalStoreAccess $esAccess ) { |
523 | $orphaned = []; |
524 | foreach ( $res as $row ) { |
525 | // This is copied from the old AbuseFilter::loadVarDump |
526 | $oldFlags = explode( ',', $row->old_flags ); |
527 | $text = $row->old_text; |
528 | if ( in_array( 'external', $oldFlags ) ) { |
529 | $text = $esAccess->fetchFromURL( $row->old_text ); |
530 | } |
531 | if ( in_array( 'gzip', $oldFlags ) ) { |
532 | $text = gzinflate( $text ); |
533 | } |
534 | |
535 | if ( FormatJson::decode( $text ) !== null ) { |
536 | // Already in the new format, apparently. |
537 | if ( |
538 | !in_array( 'utf-8', $oldFlags, true ) || |
539 | in_array( 'nativeDataArray', $oldFlags, true ) |
540 | ) { |
541 | // Sanity |
542 | $this->fatalError( "Row {$row->old_id} is JSON-encoded with wrong flags: {$row->old_flags}" ); |
543 | } |
544 | continue; |
545 | } |
546 | |
547 | // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged |
548 | $obj = @unserialize( $text ); |
549 | |
550 | if ( !$obj ) { |
551 | // Under certain conditions, there might be a truncated dump here, see T264513 |
552 | $obj = $this->restoreTruncatedDump( $text ); |
553 | } |
554 | |
555 | if ( $obj instanceof VariableHolder ) { |
556 | $varManager = AbuseFilterServices::getVariablesManager(); |
557 | $varArray = $varManager->dumpAllVars( $obj, [ 'old_wikitext', 'new_wikitext' ] ); |
558 | } elseif ( is_array( $obj ) ) { |
559 | $varArray = $obj; |
560 | } else { |
561 | $type = is_object( $obj ) ? get_class( $obj ) : gettype( $obj ); |
562 | throw new UnexpectedValueException( "Unexpected type for stored blob: $type" ); |
563 | } |
564 | $varArray = $this->updateVariables( $varArray ); |
565 | // Recreating flags will also ensure that we don't add 'nativeDataArray' |
566 | $newFlags = [ 'utf-8' ]; |
567 | // This is copied from the old AbuseFilter::storeVarDump |
568 | $toStore = FormatJson::encode( $varArray ); |
569 | if ( in_array( 'gzip', $oldFlags ) && function_exists( 'gzdeflate' ) ) { |
570 | $toStore = gzdeflate( $toStore ); |
571 | $newFlags[] = 'gzip'; |
572 | } |
573 | if ( in_array( 'external', $oldFlags ) ) { |
574 | $orphaned[] = $row->old_text; |
575 | $toStore = $esAccess->insert( $toStore ); |
576 | $newFlags[] = 'external'; |
577 | } |
578 | |
579 | $this->dbw->update( |
580 | 'text', |
581 | [ |
582 | 'old_text' => $toStore, |
583 | 'old_flags' => implode( ',', $newFlags ) |
584 | ], |
585 | [ 'old_id' => $row->old_id ], |
586 | __METHOD__ |
587 | ); |
588 | } |
589 | if ( $this->printOrphanedFile !== null && $orphaned ) { |
590 | file_put_contents( $this->printOrphanedFile, implode( ', ', $orphaned ) . "\n", FILE_APPEND ); |
591 | } |
592 | } |
593 | |
594 | /** |
595 | * Given a stored object, removes some disabled variables and update deprecated ones. |
596 | * Also ensure that core variables are lowercase. |
597 | * Handles points 4., 5. and 8. of T213006. |
598 | * |
599 | * @param array $vars The stored vars. |
600 | * @return array |
601 | */ |
602 | private function updateVariables( array $vars ) { |
603 | // Remove all variables used in the past to store metadata |
604 | unset( $vars['context'], $vars['logged_local_ids'], $vars['logged_global_ids'] ); |
605 | |
606 | $builtinVars = $this->getBuiltinVarNames(); |
607 | $newVars = []; |
608 | foreach ( $vars as $oldName => $value ) { |
609 | $lowerName = strtolower( $oldName ); |
610 | if ( $lowerName !== $oldName && array_key_exists( $lowerName, $builtinVars ) ) { |
611 | $oldName = $lowerName; |
612 | } |
613 | $deprecatedVars = $this->keywordsManager->getDeprecatedVariables(); |
614 | $newName = $deprecatedVars[$oldName] ?? $oldName; |
615 | $newVars[$newName] = $value; |
616 | } |
617 | return $newVars; |
618 | } |
619 | |
620 | /** |
621 | * Get a set of builtin variable names. Copied from VariableHolder::dumpAllVars. |
622 | * @return array [ varname => true ] for instantaneous search. All names are lowercase |
623 | */ |
624 | private function getBuiltinVarNames() { |
625 | global $wgRestrictionTypes; |
626 | |
627 | static $coreVariables = null; |
628 | |
629 | if ( $coreVariables ) { |
630 | return $coreVariables; |
631 | } |
632 | |
633 | $activeVariables = array_keys( $this->keywordsManager->getVarsMappings() ); |
634 | $deprecatedVariables = array_keys( $this->keywordsManager->getDeprecatedVariables() ); |
635 | $disabledVariables = array_keys( $this->keywordsManager->getDisabledVariables() ); |
636 | $coreVariables = array_merge( $activeVariables, $deprecatedVariables, $disabledVariables ); |
637 | |
638 | $prefixes = [ 'moved_from', 'moved_to', 'page' ]; |
639 | foreach ( $wgRestrictionTypes as $action ) { |
640 | foreach ( $prefixes as $prefix ) { |
641 | $coreVariables[] = "{$prefix}_restrictions_$action"; |
642 | } |
643 | } |
644 | |
645 | $coreVariables = array_fill_keys( $coreVariables, true ); |
646 | $coreVariables = array_change_key_case( $coreVariables ); |
647 | |
648 | return $coreVariables; |
649 | } |
650 | |
651 | /** |
652 | * Replace 'stored-text:' with 'tt:' in afl_var_dump. Handles point 3. of T213006. |
653 | */ |
654 | private function updateAflVarDump() { |
655 | $this->output( |
656 | "...Replacing the 'stored-text:' prefix with 'tt:' (4/4).\n" |
657 | ); |
658 | |
659 | $batchSize = $this->getBatchSize(); |
660 | |
661 | // Use native SQL functions so that we can update all rows at the same time. |
662 | $newIdSQL = $this->dbw->strreplace( |
663 | 'afl_var_dump', |
664 | $this->dbr->addQuotes( 'stored-text:' ), |
665 | $this->dbr->addQuotes( 'tt:' ) |
666 | ); |
667 | |
668 | $prevID = 0; |
669 | $curID = $batchSize; |
670 | $numRows = 0; |
671 | do { |
672 | $this->maybePrintProgress( $prevID ); |
673 | $args = [ |
674 | 'abuse_filter_log', |
675 | [ "afl_var_dump = $newIdSQL" ], |
676 | [ |
677 | "afl_id > $prevID", |
678 | "afl_id <= $curID", |
679 | 'afl_var_dump ' . $this->dbr->buildLike( 'stored-text:', $this->dbr->anyString() ) |
680 | ], |
681 | __METHOD__, |
682 | [ 'ORDER BY' => 'afl_id ASC' ] |
683 | ]; |
684 | if ( $this->dryRun ) { |
685 | $numRows += $this->dbr->selectRowCount( ...$args ); |
686 | } else { |
687 | $this->dbw->update( ...$args ); |
688 | $numRows += $this->dbw->affectedRows(); |
689 | $this->waitForReplication(); |
690 | } |
691 | |
692 | $prevID = $curID; |
693 | $curID += $batchSize; |
694 | $this->maybeSleep(); |
695 | } while ( $prevID <= $this->allRowsCount ); |
696 | |
697 | if ( $this->dryRun ) { |
698 | $this->output( "...would change afl_var_dump for $numRows rows.\n" ); |
699 | } else { |
700 | $this->output( "...updated afl_var_dump prefix for $numRows rows.\n" ); |
701 | } |
702 | } |
703 | |
704 | /** |
705 | * Print a progress marker if the respective option is enabled |
706 | * |
707 | * @param int $start |
708 | */ |
709 | private function maybePrintProgress( int $start ): void { |
710 | if ( $this->progressMarkers && $start % ( 10 * $this->getBatchSize() ) === 0 ) { |
711 | $end = $start + $this->getBatchSize(); |
712 | $this->output( "...Doing range $start - $end\n" ); |
713 | } |
714 | } |
715 | |
716 | /** |
717 | * Sleep for a while, if required. Note: checking the value is several |
718 | * orders of magnitude faster than calling sleep(0). |
719 | */ |
720 | private function maybeSleep(): void { |
721 | if ( $this->sleep ) { |
722 | sleep( $this->sleep ); |
723 | } |
724 | } |
725 | } |
726 | |
727 | $maintClass = UpdateVarDumps::class; |
728 | require_once RUN_MAINTENANCE_IF_MAIN; |