MediaWiki REL1_31
checkStorage.php
Go to the documentation of this file.
1<?php
25
26if ( !defined( 'MEDIAWIKI' ) ) {
27 $optionsWithoutArgs = [ 'fix' ];
28 require_once __DIR__ . '/../commandLine.inc';
29
30 $cs = new CheckStorage;
31 $fix = isset( $options['fix'] );
32 if ( isset( $args[0] ) ) {
33 $xml = $args[0];
34 } else {
35 $xml = false;
36 }
37 $cs->check( $fix, $xml );
38}
39
40// ----------------------------------------------------------------------------------
41
49 const CONCAT_HEADER = 'O:27:"concatenatedgziphistoryblob"';
51 public $dbStore = null;
52
54 'restore text' => 'Damaged text, need to be restored from a backup',
55 'restore revision' => 'Damaged revision row, need to be restored from a backup',
56 'unfixable' => 'Unexpected errors with no automated fixing method',
57 'fixed' => 'Errors already fixed',
58 'fixable' => 'Errors which would already be fixed if --fix was specified',
59 ];
60
61 function check( $fix = false, $xml = '' ) {
63 if ( $fix ) {
64 print "Checking, will fix errors if possible...\n";
65 } else {
66 print "Checking...\n";
67 }
68 $maxRevId = $dbr->selectField( 'revision', 'MAX(rev_id)', '', __METHOD__ );
69 $chunkSize = 1000;
70 $flagStats = [];
71 $objectStats = [];
72 $knownFlags = [ 'external', 'gzip', 'object', 'utf-8' ];
73 $this->errors = [
74 'restore text' => [],
75 'restore revision' => [],
76 'unfixable' => [],
77 'fixed' => [],
78 'fixable' => [],
79 ];
80
81 for ( $chunkStart = 1; $chunkStart < $maxRevId; $chunkStart += $chunkSize ) {
82 $chunkEnd = $chunkStart + $chunkSize - 1;
83 // print "$chunkStart of $maxRevId\n";
84
85 // Fetch revision rows
86 $this->oldIdMap = [];
87 $dbr->ping();
88 $res = $dbr->select( 'revision', [ 'rev_id', 'rev_text_id' ],
89 [ "rev_id BETWEEN $chunkStart AND $chunkEnd" ], __METHOD__ );
90 foreach ( $res as $row ) {
91 $this->oldIdMap[$row->rev_id] = $row->rev_text_id;
92 }
93 $dbr->freeResult( $res );
94
95 if ( !count( $this->oldIdMap ) ) {
96 continue;
97 }
98
99 // Fetch old_flags
100 $missingTextRows = array_flip( $this->oldIdMap );
101 $externalRevs = [];
102 $objectRevs = [];
103 $res = $dbr->select(
104 'text',
105 [ 'old_id', 'old_flags' ],
106 [ 'old_id' => $this->oldIdMap ],
107 __METHOD__
108 );
109 foreach ( $res as $row ) {
113 $flags = $row->old_flags;
114 $id = $row->old_id;
115
116 // Create flagStats row if it doesn't exist
117 $flagStats = $flagStats + [ $flags => 0 ];
118 // Increment counter
119 $flagStats[$flags]++;
120
121 // Not missing
122 unset( $missingTextRows[$row->old_id] );
123
124 // Check for external or object
125 if ( $flags == '' ) {
126 $flagArray = [];
127 } else {
128 $flagArray = explode( ',', $flags );
129 }
130 if ( in_array( 'external', $flagArray ) ) {
131 $externalRevs[] = $id;
132 } elseif ( in_array( 'object', $flagArray ) ) {
133 $objectRevs[] = $id;
134 }
135
136 // Check for unrecognised flags
137 if ( $flags == '0' ) {
138 // This is a known bug from 2004
139 // It's safe to just erase the old_flags field
140 if ( $fix ) {
141 $this->addError( 'fixed', "Warning: old_flags set to 0", $id );
142 $dbw = wfGetDB( DB_MASTER );
143 $dbw->ping();
144 $dbw->update( 'text', [ 'old_flags' => '' ],
145 [ 'old_id' => $id ], __METHOD__ );
146 echo "Fixed\n";
147 } else {
148 $this->addError( 'fixable', "Warning: old_flags set to 0", $id );
149 }
150 } elseif ( count( array_diff( $flagArray, $knownFlags ) ) ) {
151 $this->addError( 'unfixable', "Error: invalid flags field \"$flags\"", $id );
152 }
153 }
154 $dbr->freeResult( $res );
155
156 // Output errors for any missing text rows
157 foreach ( $missingTextRows as $oldId => $revId ) {
158 $this->addError( 'restore revision', "Error: missing text row", $oldId );
159 }
160
161 // Verify external revisions
162 $externalConcatBlobs = [];
163 $externalNormalBlobs = [];
164 if ( count( $externalRevs ) ) {
165 $res = $dbr->select(
166 'text',
167 [ 'old_id', 'old_flags', 'old_text' ],
168 [ 'old_id' => $externalRevs ],
169 __METHOD__
170 );
171 foreach ( $res as $row ) {
172 $urlParts = explode( '://', $row->old_text, 2 );
173 if ( count( $urlParts ) !== 2 || $urlParts[1] == '' ) {
174 $this->addError( 'restore text', "Error: invalid URL \"{$row->old_text}\"", $row->old_id );
175 continue;
176 }
177 list( $proto, ) = $urlParts;
178 if ( $proto != 'DB' ) {
179 $this->addError(
180 'restore text',
181 "Error: invalid external protocol \"$proto\"",
182 $row->old_id );
183 continue;
184 }
185 $path = explode( '/', $row->old_text );
186 $cluster = $path[2];
187 $id = $path[3];
188 if ( isset( $path[4] ) ) {
189 $externalConcatBlobs[$cluster][$id][] = $row->old_id;
190 } else {
191 $externalNormalBlobs[$cluster][$id][] = $row->old_id;
192 }
193 }
194 $dbr->freeResult( $res );
195 }
196
197 // Check external concat blobs for the right header
198 $this->checkExternalConcatBlobs( $externalConcatBlobs );
199
200 // Check external normal blobs for existence
201 if ( count( $externalNormalBlobs ) ) {
202 if ( is_null( $this->dbStore ) ) {
203 $this->dbStore = new ExternalStoreDB;
204 }
205 foreach ( $externalConcatBlobs as $cluster => $xBlobIds ) {
206 $blobIds = array_keys( $xBlobIds );
207 $extDb =& $this->dbStore->getSlave( $cluster );
208 $blobsTable = $this->dbStore->getTable( $extDb );
209 $res = $extDb->select( $blobsTable,
210 [ 'blob_id' ],
211 [ 'blob_id' => $blobIds ],
212 __METHOD__
213 );
214 foreach ( $res as $row ) {
215 unset( $xBlobIds[$row->blob_id] );
216 }
217 $extDb->freeResult( $res );
218 // Print errors for missing blobs rows
219 foreach ( $xBlobIds as $blobId => $oldId ) {
220 $this->addError(
221 'restore text',
222 "Error: missing target $blobId for one-part ES URL",
223 $oldId );
224 }
225 }
226 }
227
228 // Check local objects
229 $dbr->ping();
230 $concatBlobs = [];
231 $curIds = [];
232 if ( count( $objectRevs ) ) {
233 $headerLength = 300;
234 $res = $dbr->select(
235 'text',
236 [ 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ],
237 [ 'old_id' => $objectRevs ],
238 __METHOD__
239 );
240 foreach ( $res as $row ) {
241 $oldId = $row->old_id;
242 $matches = [];
243 if ( !preg_match( '/^O:(\d+):"(\w+)"/', $row->header, $matches ) ) {
244 $this->addError( 'restore text', "Error: invalid object header", $oldId );
245 continue;
246 }
247
248 $className = strtolower( $matches[2] );
249 if ( strlen( $className ) != $matches[1] ) {
250 $this->addError(
251 'restore text',
252 "Error: invalid object header, wrong class name length",
253 $oldId
254 );
255 continue;
256 }
257
258 $objectStats = $objectStats + [ $className => 0 ];
259 $objectStats[$className]++;
260
261 switch ( $className ) {
262 case 'concatenatedgziphistoryblob':
263 // Good
264 break;
265 case 'historyblobstub':
266 case 'historyblobcurstub':
267 if ( strlen( $row->header ) == $headerLength ) {
268 $this->addError( 'unfixable', "Error: overlong stub header", $oldId );
269 break;
270 }
271 $stubObj = unserialize( $row->header );
272 if ( !is_object( $stubObj ) ) {
273 $this->addError( 'restore text', "Error: unable to unserialize stub object", $oldId );
274 break;
275 }
276 if ( $className == 'historyblobstub' ) {
277 $concatBlobs[$stubObj->mOldId][] = $oldId;
278 } else {
279 $curIds[$stubObj->mCurId][] = $oldId;
280 }
281 break;
282 default:
283 $this->addError( 'unfixable', "Error: unrecognised object class \"$className\"", $oldId );
284 }
285 }
286 $dbr->freeResult( $res );
287 }
288
289 // Check local concat blob validity
290 $externalConcatBlobs = [];
291 if ( count( $concatBlobs ) ) {
292 $headerLength = 300;
293 $res = $dbr->select(
294 'text',
295 [ 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ],
296 [ 'old_id' => array_keys( $concatBlobs ) ],
297 __METHOD__
298 );
299 foreach ( $res as $row ) {
300 $flags = explode( ',', $row->old_flags );
301 if ( in_array( 'external', $flags ) ) {
302 // Concat blob is in external storage?
303 if ( in_array( 'object', $flags ) ) {
304 $urlParts = explode( '/', $row->header );
305 if ( $urlParts[0] != 'DB:' ) {
306 $this->addError(
307 'unfixable',
308 "Error: unrecognised external storage type \"{$urlParts[0]}",
309 $row->old_id
310 );
311 } else {
312 $cluster = $urlParts[2];
313 $id = $urlParts[3];
314 if ( !isset( $externalConcatBlobs[$cluster][$id] ) ) {
315 $externalConcatBlobs[$cluster][$id] = [];
316 }
317 $externalConcatBlobs[$cluster][$id] = array_merge(
318 $externalConcatBlobs[$cluster][$id], $concatBlobs[$row->old_id]
319 );
320 }
321 } else {
322 $this->addError(
323 'unfixable',
324 "Error: invalid flags \"{$row->old_flags}\" on concat bulk row {$row->old_id}",
325 $concatBlobs[$row->old_id] );
326 }
327 } elseif ( strcasecmp(
328 substr( $row->header, 0, strlen( self::CONCAT_HEADER ) ),
329 self::CONCAT_HEADER
330 ) ) {
331 $this->addError(
332 'restore text',
333 "Error: Incorrect object header for concat bulk row {$row->old_id}",
334 $concatBlobs[$row->old_id]
335 );
336 } # else good
337
338 unset( $concatBlobs[$row->old_id] );
339 }
340 $dbr->freeResult( $res );
341 }
342
343 // Check targets of unresolved stubs
344 $this->checkExternalConcatBlobs( $externalConcatBlobs );
345 // next chunk
346 }
347
348 print "\n\nErrors:\n";
349 foreach ( $this->errors as $name => $errors ) {
350 if ( count( $errors ) ) {
351 $description = $this->errorDescriptions[$name];
352 echo "$description: " . implode( ',', array_keys( $errors ) ) . "\n";
353 }
354 }
355
356 if ( count( $this->errors['restore text'] ) && $fix ) {
357 if ( (string)$xml !== '' ) {
358 $this->restoreText( array_keys( $this->errors['restore text'] ), $xml );
359 } else {
360 echo "Can't fix text, no XML backup specified\n";
361 }
362 }
363
364 print "\nFlag statistics:\n";
365 $total = array_sum( $flagStats );
366 foreach ( $flagStats as $flag => $count ) {
367 printf( "%-30s %10d %5.2f%%\n", $flag, $count, $count / $total * 100 );
368 }
369 print "\nLocal object statistics:\n";
370 $total = array_sum( $objectStats );
371 foreach ( $objectStats as $className => $count ) {
372 printf( "%-30s %10d %5.2f%%\n", $className, $count, $count / $total * 100 );
373 }
374 }
375
376 function addError( $type, $msg, $ids ) {
377 if ( is_array( $ids ) && count( $ids ) == 1 ) {
378 $ids = reset( $ids );
379 }
380 if ( is_array( $ids ) ) {
381 $revIds = [];
382 foreach ( $ids as $id ) {
383 $revIds = array_merge( $revIds, array_keys( $this->oldIdMap, $id ) );
384 }
385 print "$msg in text rows " . implode( ', ', $ids ) .
386 ", revisions " . implode( ', ', $revIds ) . "\n";
387 } else {
388 $id = $ids;
389 $revIds = array_keys( $this->oldIdMap, $id );
390 if ( count( $revIds ) == 1 ) {
391 print "$msg in old_id $id, rev_id {$revIds[0]}\n";
392 } else {
393 print "$msg in old_id $id, revisions " . implode( ', ', $revIds ) . "\n";
394 }
395 }
396 $this->errors[$type] = $this->errors[$type] + array_flip( $revIds );
397 }
398
399 function checkExternalConcatBlobs( $externalConcatBlobs ) {
400 if ( !count( $externalConcatBlobs ) ) {
401 return;
402 }
403
404 if ( is_null( $this->dbStore ) ) {
405 $this->dbStore = new ExternalStoreDB;
406 }
407
408 foreach ( $externalConcatBlobs as $cluster => $oldIds ) {
409 $blobIds = array_keys( $oldIds );
410 $extDb =& $this->dbStore->getSlave( $cluster );
411 $blobsTable = $this->dbStore->getTable( $extDb );
412 $headerLength = strlen( self::CONCAT_HEADER );
413 $res = $extDb->select( $blobsTable,
414 [ 'blob_id', "LEFT(blob_text, $headerLength) AS header" ],
415 [ 'blob_id' => $blobIds ],
416 __METHOD__
417 );
418 foreach ( $res as $row ) {
419 if ( strcasecmp( $row->header, self::CONCAT_HEADER ) ) {
420 $this->addError(
421 'restore text',
422 "Error: invalid header on target $cluster/{$row->blob_id} of two-part ES URL",
423 $oldIds[$row->blob_id]
424 );
425 }
426 unset( $oldIds[$row->blob_id] );
427 }
428 $extDb->freeResult( $res );
429
430 // Print errors for missing blobs rows
431 foreach ( $oldIds as $blobId => $oldIds2 ) {
432 $this->addError(
433 'restore text',
434 "Error: missing target $cluster/$blobId for two-part ES URL",
435 $oldIds2
436 );
437 }
438 }
439 }
440
441 function restoreText( $revIds, $xml ) {
442 global $wgDBname;
443 $tmpDir = wfTempDir();
444
445 if ( !count( $revIds ) ) {
446 return;
447 }
448
449 print "Restoring text from XML backup...\n";
450
451 $revFileName = "$tmpDir/broken-revlist-$wgDBname";
452 $filteredXmlFileName = "$tmpDir/filtered-$wgDBname.xml";
453
454 // Write revision list
455 if ( !file_put_contents( $revFileName, implode( "\n", $revIds ) ) ) {
456 echo "Error writing revision list, can't restore text\n";
457
458 return;
459 }
460
461 // Run mwdumper
462 echo "Filtering XML dump...\n";
463 $exitStatus = 0;
464 passthru( 'mwdumper ' .
466 "--output=file:$filteredXmlFileName",
467 "--filter=revlist:$revFileName",
468 $xml
469 ), $exitStatus
470 );
471
472 if ( $exitStatus ) {
473 echo "mwdumper died with exit status $exitStatus\n";
474
475 return;
476 }
477
478 $file = fopen( $filteredXmlFileName, 'r' );
479 if ( !$file ) {
480 echo "Unable to open filtered XML file\n";
481
482 return;
483 }
484
486 $dbw = wfGetDB( DB_MASTER );
487 $dbr->ping();
488 $dbw->ping();
489
490 $source = new ImportStreamSource( $file );
491 $importer = new WikiImporter(
492 $source,
493 MediaWikiServices::getInstance()->getMainConfig()
494 );
495 $importer->setRevisionCallback( [ $this, 'importRevision' ] );
496 $importer->setNoticeCallback( function ( $msg, $params ) {
497 echo wfMessage( $msg, $params )->text() . "\n";
498 } );
499 $importer->doImport();
500 }
501
502 function importRevision( &$revision, &$importer ) {
503 $id = $revision->getID();
504 $content = $revision->getContent( Revision::RAW );
505 $id = $id ? $id : '';
506
507 if ( $content === null ) {
508 echo "Revision $id is broken, we have no content available\n";
509
510 return;
511 }
512
513 $text = $content->serialize();
514 if ( $text === '' ) {
515 // This is what happens if the revision was broken at the time the
516 // dump was made. Unfortunately, it also happens if the revision was
517 // legitimately blank, so there's no way to tell the difference. To
518 // be safe, we'll skip it and leave it broken
519
520 echo "Revision $id is blank in the dump, may have been broken before export\n";
521
522 return;
523 }
524
525 if ( !$id ) {
526 // No ID, can't import
527 echo "No id tag in revision, can't import\n";
528
529 return;
530 }
531
532 // Find text row again
534 $oldId = $dbr->selectField( 'revision', 'rev_text_id', [ 'rev_id' => $id ], __METHOD__ );
535 if ( !$oldId ) {
536 echo "Missing revision row for rev_id $id\n";
537
538 return;
539 }
540
541 // Compress the text
542 $flags = Revision::compressRevisionText( $text );
543
544 // Update the text row
545 $dbw = wfGetDB( DB_MASTER );
546 $dbw->update( 'text',
547 [ 'old_flags' => $flags, 'old_text' => $text ],
548 [ 'old_id' => $oldId ],
549 __METHOD__, [ 'LIMIT' => 1 ]
550 );
551
552 // Remove it from the unfixed list and add it to the fixed list
553 unset( $this->errors['restore text'][$id] );
554 $this->errors['fixed'][$id] = true;
555 }
556}
unserialize( $serialized)
wfTempDir()
Tries to get the system directory for temporary files.
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
wfEscapeShellArg()
Version of escapeshellarg() that works better on Windows.
if( $line===false) $args
Definition cdb.php:64
Maintenance script to do various checks on external storage.
check( $fix=false, $xml='')
restoreText( $revIds, $xml)
importRevision(&$revision, &$importer)
addError( $type, $msg, $ids)
const CONCAT_HEADER
checkExternalConcatBlobs( $externalConcatBlobs)
DB accessible external objects.
Imports a XML dump from a file (either from file upload, files on disk, or HTTP)
MediaWikiServices is the service locator for the application scope of MediaWiki.
XML file reader for the page data importer.
global $optionsWithoutArgs
$res
Definition database.txt:21
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition hooks.txt:2001
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt;div ...>$1&lt;/div>"). - flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException':Called before an exception(or PHP error) is logged. This is meant for integration with external error aggregation services
Allows to change the fields on the form that will be generated $name
Definition hooks.txt:302
while(( $__line=Maintenance::readconsole()) !==false) print
Definition eval.php:64
controlled by $wgMainCacheType controlled by $wgParserCacheType controlled by $wgMessageCacheType If you set CACHE_NONE to one of the three control default value for MediaWiki still create a but requests to it are no ops and we always fall through to the database If the cache daemon can t be it should also disable itself fairly smoothly By $wgMemc is used but when it is $parserMemc or $messageMemc this is mentioned $wgDBname
$source
const DB_REPLICA
Definition defines.php:25
const DB_MASTER
Definition defines.php:29
$params