MediaWiki master
checkStorage.php
Go to the documentation of this file.
1<?php
17
18// @codeCoverageIgnoreStart
19require_once __DIR__ . '/../Maintenance.php';
20// @codeCoverageIgnoreEnd
21
22// ----------------------------------------------------------------------------------
23
31 private const CONCAT_HEADER = 'O:27:"concatenatedgziphistoryblob"';
32
33 public array $oldIdMap;
34 public array $errors;
35
37 public $dbStore = null;
38
39 public function __construct() {
40 parent::__construct();
41
42 $this->addOption( 'fix', 'Fix errors if possible' );
43 $this->addArg( 'xml', 'Path to an XML dump', false );
44 }
45
46 public function execute() {
47 $fix = $this->hasOption( 'fix' );
48 $xml = $this->getArg( 'xml', false );
49 $this->check( $fix, $xml );
50 }
51
54 'restore text' => 'Damaged text, need to be restored from a backup',
55 'restore revision' => 'Damaged revision row, need to be restored from a backup',
56 'unfixable' => 'Unexpected errors with no automated fixing method',
57 'fixed' => 'Errors already fixed',
58 'fixable' => 'Errors which would already be fixed if --fix was specified',
59 ];
60
61 public function check( bool $fix = false, string|false $xml = '' ) {
62 $dbr = $this->getReplicaDB();
63 if ( $fix ) {
64 print "Checking, will fix errors if possible...\n";
65 } else {
66 print "Checking...\n";
67 }
68 $maxRevId = $dbr->newSelectQueryBuilder()
69 ->select( 'MAX(rev_id)' )
70 ->from( 'revision' )
71 ->caller( __METHOD__ )->fetchField();
72 $chunkSize = 1000;
73 $flagStats = [];
74 $objectStats = [];
75 $knownFlags = [ 'external', 'gzip', 'object', 'utf-8' ];
76 $this->errors = [
77 'restore text' => [],
78 'restore revision' => [],
79 'unfixable' => [],
80 'fixed' => [],
81 'fixable' => [],
82 ];
83
84 for ( $chunkStart = 1; $chunkStart < $maxRevId; $chunkStart += $chunkSize ) {
85 $chunkEnd = $chunkStart + $chunkSize - 1;
86 // print "$chunkStart of $maxRevId\n";
87
88 $this->oldIdMap = [];
89 $dbr->ping();
90
91 // Fetch revision rows
92 $res = $dbr->newSelectQueryBuilder()
93 ->select( [ 'slot_revision_id', 'content_address' ] )
94 ->from( 'slots' )
95 ->join( 'content', null, 'content_id = slot_content_id' )
96 ->where( [
97 $dbr->expr( 'slot_revision_id', '>=', $chunkStart ),
98 $dbr->expr( 'slot_revision_id', '<=', $chunkEnd ),
99 ] )
100 ->caller( __METHOD__ )->fetchResultSet();
102 $blobStore = $this->getServiceContainer()->getBlobStore();
103 '@phan-var \MediaWiki\Storage\SqlBlobStore $blobStore';
104 foreach ( $res as $row ) {
105 $textId = $blobStore->getTextIdFromAddress( $row->content_address );
106 if ( $textId ) {
107 if ( !isset( $this->oldIdMap[$textId] ) ) {
108 $this->oldIdMap[ $textId ] = [ $row->slot_revision_id ];
109 } elseif ( !in_array( $row->slot_revision_id, $this->oldIdMap[$textId] ) ) {
110 $this->oldIdMap[ $textId ][] = $row->slot_revision_id;
111 }
112 }
113 }
114
115 if ( !count( $this->oldIdMap ) ) {
116 continue;
117 }
118
119 // Fetch old_flags
120 $missingTextRows = $this->oldIdMap;
121 $externalRevs = [];
122 $objectRevs = [];
123 $res = $dbr->newSelectQueryBuilder()
124 ->select( [ 'old_id', 'old_flags' ] )
125 ->from( 'text' )
126 ->where( [ 'old_id' => array_keys( $this->oldIdMap ) ] )
127 ->caller( __METHOD__ )->fetchResultSet();
128 foreach ( $res as $row ) {
132 $flags = $row->old_flags;
133 $id = $row->old_id;
134
135 // Create flagStats row if it doesn't exist
136 $flagStats += [ $flags => 0 ];
137 // Increment counter
138 $flagStats[$flags]++;
139
140 // Not missing
141 unset( $missingTextRows[$row->old_id] );
142
143 // Check for external or object
144 if ( $flags == '' ) {
145 $flagArray = [];
146 } else {
147 $flagArray = explode( ',', $flags );
148 }
149 if ( in_array( 'external', $flagArray ) ) {
150 $externalRevs[] = $id;
151 } elseif ( in_array( 'object', $flagArray ) ) {
152 $objectRevs[] = $id;
153 }
154
155 // Check for unrecognised flags
156 if ( $flags == '0' ) {
157 // This is a known bug from 2004
158 // It's safe to just erase the old_flags field
159 if ( $fix ) {
160 $this->addError( 'fixed', "Warning: old_flags set to 0", $id );
161 $dbw = $this->getPrimaryDB();
162 $dbw->ping();
163 $dbw->newUpdateQueryBuilder()
164 ->update( 'text' )
165 ->set( [ 'old_flags' => '' ] )
166 ->where( [ 'old_id' => $id ] )
167 ->caller( __METHOD__ )
168 ->execute();
169 echo "Fixed\n";
170 } else {
171 $this->addError( 'fixable', "Warning: old_flags set to 0", $id );
172 }
173 } elseif ( count( array_diff( $flagArray, $knownFlags ) ) ) {
174 $this->addError( 'unfixable', "Error: invalid flags field \"$flags\"", $id );
175 }
176 }
177
178 // Output errors for any missing text rows
179 foreach ( $missingTextRows as $oldId => $revIds ) {
180 $this->addError( 'restore revision', "Error: missing text row", $oldId );
181 }
182
183 // Verify external revisions
184 $externalConcatBlobs = [];
185 $externalNormalBlobs = [];
186 if ( count( $externalRevs ) ) {
187 $res = $dbr->newSelectQueryBuilder()
188 ->select( [ 'old_id', 'old_flags', 'old_text' ] )
189 ->from( 'text' )
190 ->where( [ 'old_id' => $externalRevs ] )
191 ->caller( __METHOD__ )->fetchResultSet();
192 foreach ( $res as $row ) {
193 $urlParts = explode( '://', $row->old_text, 2 );
194 if ( count( $urlParts ) !== 2 || $urlParts[1] == '' ) {
195 $this->addError( 'restore text', "Error: invalid URL \"{$row->old_text}\"", $row->old_id );
196 continue;
197 }
198 [ $proto, ] = $urlParts;
199 if ( $proto != 'DB' ) {
200 $this->addError(
201 'restore text',
202 "Error: invalid external protocol \"$proto\"",
203 $row->old_id );
204 continue;
205 }
206 $path = explode( '/', $row->old_text );
207 $cluster = $path[2];
208 $id = $path[3];
209 if ( isset( $path[4] ) ) {
210 $externalConcatBlobs[$cluster][$id][] = $row->old_id;
211 } else {
212 $externalNormalBlobs[$cluster][$id][] = $row->old_id;
213 }
214 }
215 }
216
217 // Check external concat blobs for the right header
218 $this->checkExternalConcatBlobs( $externalConcatBlobs );
219
220 // Check external normal blobs for existence
221 if ( count( $externalNormalBlobs ) ) {
222 if ( $this->dbStore === null ) {
223 $esFactory = $this->getServiceContainer()->getExternalStoreFactory();
224 $this->dbStore = $esFactory->getDatabaseStore();
225 }
226 foreach ( $externalConcatBlobs as $cluster => $xBlobIds ) {
227 $blobIds = array_keys( $xBlobIds );
228 $extDb = $this->dbStore->getReplica( $cluster );
229 $blobsTable = $this->dbStore->getTable( $cluster );
230 $res = $extDb->newSelectQueryBuilder()
231 ->select( [ 'blob_id' ] )
232 ->from( $blobsTable )
233 ->where( [ 'blob_id' => $blobIds ] )
234 ->caller( __METHOD__ )->fetchResultSet();
235 foreach ( $res as $row ) {
236 unset( $xBlobIds[$row->blob_id] );
237 }
238 // Print errors for missing blobs rows
239 foreach ( $xBlobIds as $blobId => $oldId ) {
240 $this->addError(
241 'restore text',
242 "Error: missing target $blobId for one-part ES URL",
243 $oldId );
244 }
245 }
246 }
247
248 // Check local objects
249 $dbr->ping();
250 $concatBlobs = [];
251 $curIds = [];
252 if ( count( $objectRevs ) ) {
253 $headerLength = 300;
254 $res = $dbr->newSelectQueryBuilder()
255 ->select( [ 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ] )
256 ->from( 'text' )
257 ->where( [ 'old_id' => $objectRevs ] )
258 ->caller( __METHOD__ )->fetchResultSet();
259 foreach ( $res as $row ) {
260 $oldId = $row->old_id;
261 $matches = [];
262 if ( !preg_match( '/^O:(\d+):"(\w+)"/', $row->header, $matches ) ) {
263 $this->addError( 'restore text', "Error: invalid object header", $oldId );
264 continue;
265 }
266
267 $className = strtolower( $matches[2] );
268 if ( strlen( $className ) != $matches[1] ) {
269 $this->addError(
270 'restore text',
271 "Error: invalid object header, wrong class name length",
272 $oldId
273 );
274 continue;
275 }
276
277 $objectStats += [ $className => 0 ];
278 $objectStats[$className]++;
279
280 switch ( $className ) {
281 case 'concatenatedgziphistoryblob':
282 // Good
283 break;
284 case 'historyblobstub':
285 case 'historyblobcurstub':
286 if ( strlen( $row->header ) == $headerLength ) {
287 $this->addError( 'unfixable', "Error: overlong stub header", $oldId );
288 break;
289 }
290 $stubObj = unserialize( $row->header );
291 if ( !is_object( $stubObj ) ) {
292 $this->addError( 'restore text', "Error: unable to unserialize stub object", $oldId );
293 break;
294 }
295 if ( $className == 'historyblobstub' ) {
296 $concatBlobs[$stubObj->getLocation()][] = $oldId;
297 } else {
298 $curIds[$stubObj->mCurId][] = $oldId;
299 }
300 break;
301 default:
302 $this->addError( 'unfixable', "Error: unrecognised object class \"$className\"", $oldId );
303 }
304 }
305 }
306
307 // Check local concat blob validity
308 $externalConcatBlobs = [];
309 if ( count( $concatBlobs ) ) {
310 $headerLength = 300;
311 $res = $dbr->newSelectQueryBuilder()
312 ->select( [ 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ] )
313 ->from( 'text' )
314 ->where( [ 'old_id' => array_keys( $concatBlobs ) ] )
315 ->caller( __METHOD__ )->fetchResultSet();
316 foreach ( $res as $row ) {
317 $flags = explode( ',', $row->old_flags );
318 if ( in_array( 'external', $flags ) ) {
319 // Concat blob is in external storage?
320 if ( in_array( 'object', $flags ) ) {
321 $urlParts = explode( '/', $row->header );
322 if ( $urlParts[0] != 'DB:' ) {
323 $this->addError(
324 'unfixable',
325 "Error: unrecognised external storage type \"{$urlParts[0]}",
326 $row->old_id
327 );
328 } else {
329 $cluster = $urlParts[2];
330 $id = $urlParts[3];
331 if ( !isset( $externalConcatBlobs[$cluster][$id] ) ) {
332 $externalConcatBlobs[$cluster][$id] = [];
333 }
334 $externalConcatBlobs[$cluster][$id] = array_merge(
335 $externalConcatBlobs[$cluster][$id], $concatBlobs[$row->old_id]
336 );
337 }
338 } else {
339 $this->addError(
340 'unfixable',
341 "Error: invalid flags \"{$row->old_flags}\" on concat bulk row {$row->old_id}",
342 $concatBlobs[$row->old_id] );
343 }
344 } elseif ( strcasecmp(
345 substr( $row->header, 0, strlen( self::CONCAT_HEADER ) ),
346 self::CONCAT_HEADER
347 ) ) {
348 $this->addError(
349 'restore text',
350 "Error: Incorrect object header for concat bulk row {$row->old_id}",
351 $concatBlobs[$row->old_id]
352 );
353 }
354
355 unset( $concatBlobs[$row->old_id] );
356 }
357 }
358
359 // Check targets of unresolved stubs
360 $this->checkExternalConcatBlobs( $externalConcatBlobs );
361 // next chunk
362 }
363
364 print "\n\nErrors:\n";
365 foreach ( $this->errors as $name => $errors ) {
366 if ( count( $errors ) ) {
367 $description = $this->errorDescriptions[$name];
368 echo "$description: " . implode( ',', array_keys( $errors ) ) . "\n";
369 }
370 }
371
372 if ( count( $this->errors['restore text'] ) && $fix ) {
373 if ( (string)$xml !== '' ) {
374 $this->restoreText( array_keys( $this->errors['restore text'] ), $xml );
375 } else {
376 echo "Can't fix text, no XML backup specified\n";
377 }
378 }
379
380 print "\nFlag statistics:\n";
381 $total = array_sum( $flagStats );
382 foreach ( $flagStats as $flag => $count ) {
383 printf( "%-30s %10d %5.2f%%\n", $flag, $count, $count / $total * 100 );
384 }
385 print "\nLocal object statistics:\n";
386 $total = array_sum( $objectStats );
387 foreach ( $objectStats as $className => $count ) {
388 printf( "%-30s %10d %5.2f%%\n", $className, $count, $count / $total * 100 );
389 }
390 }
391
397 private function addError( string $type, string $msg, $ids ) {
398 if ( is_array( $ids ) && count( $ids ) == 1 ) {
399 $ids = reset( $ids );
400 }
401 if ( is_array( $ids ) ) {
402 $revIds = [];
403 foreach ( $ids as $id ) {
404 $revIds = array_unique( array_merge( $revIds, $this->oldIdMap[$id] ) );
405 }
406 print "$msg in text rows " . implode( ', ', $ids ) .
407 ", revisions " . implode( ', ', $revIds ) . "\n";
408 } else {
409 $id = $ids;
410 $revIds = $this->oldIdMap[$id];
411 if ( count( $revIds ) == 1 ) {
412 print "$msg in old_id $id, rev_id {$revIds[0]}\n";
413 } else {
414 print "$msg in old_id $id, revisions " . implode( ', ', $revIds ) . "\n";
415 }
416 }
417 $this->errors[$type] += array_fill_keys( $revIds, true );
418 }
419
420 private function checkExternalConcatBlobs( array $externalConcatBlobs ) {
421 if ( !count( $externalConcatBlobs ) ) {
422 return;
423 }
424
425 if ( $this->dbStore === null ) {
426 $esFactory = $this->getServiceContainer()->getExternalStoreFactory();
427 $this->dbStore = $esFactory->getDatabaseStore();
428 }
429
430 foreach ( $externalConcatBlobs as $cluster => $oldIds ) {
431 $blobIds = array_keys( $oldIds );
432 $extDb = $this->dbStore->getReplica( $cluster );
433 $blobsTable = $this->dbStore->getTable( $cluster );
434 $headerLength = strlen( self::CONCAT_HEADER );
435 $res = $extDb->newSelectQueryBuilder()
436 ->select( [ 'blob_id', "LEFT(blob_text, $headerLength) AS header" ] )
437 ->from( $blobsTable )
438 ->where( [ 'blob_id' => $blobIds ] )
439 ->caller( __METHOD__ )->fetchResultSet();
440 foreach ( $res as $row ) {
441 if ( strcasecmp( $row->header, self::CONCAT_HEADER ) ) {
442 $this->addError(
443 'restore text',
444 "Error: invalid header on target $cluster/{$row->blob_id} of two-part ES URL",
445 $oldIds[$row->blob_id]
446 );
447 }
448 unset( $oldIds[$row->blob_id] );
449 }
450
451 // Print errors for missing blobs rows
452 foreach ( $oldIds as $blobId => $oldIds2 ) {
453 $this->addError(
454 'restore text',
455 "Error: missing target $cluster/$blobId for two-part ES URL",
456 $oldIds2
457 );
458 }
459 }
460 }
461
462 private function restoreText( array $revIds, string $xml ) {
463 global $wgDBname;
464 $tmpDir = wfTempDir();
465
466 if ( !count( $revIds ) ) {
467 return;
468 }
469
470 print "Restoring text from XML backup...\n";
471
472 $revFileName = "$tmpDir/broken-revlist-$wgDBname";
473 $filteredXmlFileName = "$tmpDir/filtered-$wgDBname.xml";
474
475 // Write revision list
476 if ( !file_put_contents( $revFileName, implode( "\n", $revIds ) ) ) {
477 echo "Error writing revision list, can't restore text\n";
478
479 return;
480 }
481
482 // Run mwdumper
483 echo "Filtering XML dump...\n";
484 $exitStatus = 0;
485 // phpcs:ignore MediaWiki.Usage.ForbiddenFunctions.passthru
486 passthru( 'mwdumper ' .
487 Shell::escape(
488 "--output=file:$filteredXmlFileName",
489 "--filter=revlist:$revFileName",
490 $xml
491 ), $exitStatus
492 );
493
494 if ( $exitStatus ) {
495 echo "mwdumper died with exit status $exitStatus\n";
496
497 return;
498 }
499
500 $file = fopen( $filteredXmlFileName, 'r' );
501 if ( !$file ) {
502 echo "Unable to open filtered XML file\n";
503
504 return;
505 }
506
507 $dbr = $this->getReplicaDB();
508 $dbw = $this->getPrimaryDB();
509 $dbr->ping();
510 $dbw->ping();
511
512 $source = new ImportStreamSource( $file );
513 $user = User::newSystemUser( User::MAINTENANCE_SCRIPT_USER, [ 'steal' => true ] );
514 $importer = $this->getServiceContainer()
515 ->getWikiImporterFactory()
516 ->getWikiImporter( $source, new UltimateAuthority( $user ) );
517 $importer->setRevisionCallback( $this->importRevision( ... ) );
518 $importer->setNoticeCallback( static function ( $msg, $params ) {
519 echo wfMessage( $msg, $params )->text() . "\n";
520 } );
521 $importer->doImport();
522 }
523
527 private function importRevision( $revision ) {
528 $id = $revision->getID();
529 $content = $revision->getContent();
530 $id = $id ?: '';
531
532 if ( $content === null ) {
533 echo "Revision $id is broken, we have no content available\n";
534
535 return;
536 }
537
538 $text = $content->serialize();
539 if ( $text === '' ) {
540 // This is what happens if the revision was broken at the time the
541 // dump was made. Unfortunately, it also happens if the revision was
542 // legitimately blank, so there's no way to tell the difference. To
543 // be safe, we'll skip it and leave it broken
544
545 echo "Revision $id is blank in the dump, may have been broken before export\n";
546
547 return;
548 }
549
550 if ( !$id ) {
551 // No ID, can't import
552 echo "No id tag in revision, can't import\n";
553
554 return;
555 }
556
557 // Find text row again
558 $dbr = $this->getReplicaDB();
559 $address = $dbr->newSelectQueryBuilder()
560 ->select( 'content_address' )
561 ->from( 'slots' )
562 ->join( 'content', null, 'content_id = slot_content_id' )
563 ->where( [ 'slot_revision_id' => $id ] )
564 ->caller( __METHOD__ )
565 ->fetchField();
566
567 $blobStore = $this->getServiceContainer()
568 ->getBlobStoreFactory()
569 ->newSqlBlobStore();
570 $oldId = $blobStore->getTextIdFromAddress( $address );
571
572 if ( !$oldId ) {
573 echo "Missing revision row for rev_id $id\n";
574 return;
575 }
576
577 // Compress the text
578 $flags = $blobStore->compressData( $text );
579
580 // Update the text row
581 $dbw = $this->getPrimaryDB();
582 $dbw->newUpdateQueryBuilder()
583 ->update( 'text' )
584 ->set( [ 'old_flags' => $flags, 'old_text' => $text ] )
585 ->where( [ 'old_id' => $oldId ] )
586 ->caller( __METHOD__ )
587 ->execute();
588
589 // Remove it from the unfixed list and add it to the fixed list
590 unset( $this->errors['restore text'][$id] );
591 $this->errors['fixed'][$id] = true;
592 }
593
594}
595
596// @codeCoverageIgnoreStart
597$maintClass = CheckStorage::class;
598require_once RUN_MAINTENANCE_IF_MAIN;
599// @codeCoverageIgnoreEnd
wfTempDir()
Tries to get the system directory for temporary files.
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
$maintClass
Maintenance script to do various checks on external storage.
check(bool $fix=false, string|false $xml='')
__construct()
Default constructor.
string[] $errorDescriptions
execute()
Do the actual work.
ExternalStoreDB $dbStore
External storage in a SQL database.
Imports a XML dump from a file (either from file upload, files on disk, or HTTP)
Represents a revision, log entry or upload during the import process.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
addArg( $arg, $description, $required=true, $multi=false)
Add some args that are needed.
getArg( $argId=0, $default=null)
Get an argument.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
hasOption( $name)
Checks to see if a particular option was set.
getReplicaDB(string|false $virtualDomain=false)
getServiceContainer()
Returns the main service container.
getPrimaryDB(string|false $virtualDomain=false)
Represents an authority that has all permissions.
Executes shell commands.
Definition Shell.php:32
User class for the MediaWiki software.
Definition User.php:130
$wgDBname
Config variable stub for the DBname setting, for use by phpdoc and IDEs.
$source