MediaWiki REL1_33
checkStorage.php
Go to the documentation of this file.
1<?php
26
27if ( !defined( 'MEDIAWIKI' ) ) {
28 $optionsWithoutArgs = [ 'fix' ];
29 require_once __DIR__ . '/../commandLine.inc';
30
31 $cs = new CheckStorage;
32 $fix = isset( $options['fix'] );
33 $xml = $args[0] ?? false;
34 $cs->check( $fix, $xml );
35}
36
37// ----------------------------------------------------------------------------------
38
46 const CONCAT_HEADER = 'O:27:"concatenatedgziphistoryblob"';
48 public $dbStore = null;
49
51 'restore text' => 'Damaged text, need to be restored from a backup',
52 'restore revision' => 'Damaged revision row, need to be restored from a backup',
53 'unfixable' => 'Unexpected errors with no automated fixing method',
54 'fixed' => 'Errors already fixed',
55 'fixable' => 'Errors which would already be fixed if --fix was specified',
56 ];
57
58 function check( $fix = false, $xml = '' ) {
60 if ( $fix ) {
61 print "Checking, will fix errors if possible...\n";
62 } else {
63 print "Checking...\n";
64 }
65 $maxRevId = $dbr->selectField( 'revision', 'MAX(rev_id)', '', __METHOD__ );
66 $chunkSize = 1000;
67 $flagStats = [];
68 $objectStats = [];
69 $knownFlags = [ 'external', 'gzip', 'object', 'utf-8' ];
70 $this->errors = [
71 'restore text' => [],
72 'restore revision' => [],
73 'unfixable' => [],
74 'fixed' => [],
75 'fixable' => [],
76 ];
77
78 for ( $chunkStart = 1; $chunkStart < $maxRevId; $chunkStart += $chunkSize ) {
79 $chunkEnd = $chunkStart + $chunkSize - 1;
80 // print "$chunkStart of $maxRevId\n";
81
82 // Fetch revision rows
83 $this->oldIdMap = [];
84 $dbr->ping();
85 $res = $dbr->select( 'revision', [ 'rev_id', 'rev_text_id' ],
86 [ "rev_id BETWEEN $chunkStart AND $chunkEnd" ], __METHOD__ );
87 foreach ( $res as $row ) {
88 $this->oldIdMap[$row->rev_id] = $row->rev_text_id;
89 }
90
91 if ( !count( $this->oldIdMap ) ) {
92 continue;
93 }
94
95 // Fetch old_flags
96 $missingTextRows = array_flip( $this->oldIdMap );
97 $externalRevs = [];
98 $objectRevs = [];
99 $res = $dbr->select(
100 'text',
101 [ 'old_id', 'old_flags' ],
102 [ 'old_id' => $this->oldIdMap ],
103 __METHOD__
104 );
105 foreach ( $res as $row ) {
109 $flags = $row->old_flags;
110 $id = $row->old_id;
111
112 // Create flagStats row if it doesn't exist
113 $flagStats = $flagStats + [ $flags => 0 ];
114 // Increment counter
115 $flagStats[$flags]++;
116
117 // Not missing
118 unset( $missingTextRows[$row->old_id] );
119
120 // Check for external or object
121 if ( $flags == '' ) {
122 $flagArray = [];
123 } else {
124 $flagArray = explode( ',', $flags );
125 }
126 if ( in_array( 'external', $flagArray ) ) {
127 $externalRevs[] = $id;
128 } elseif ( in_array( 'object', $flagArray ) ) {
129 $objectRevs[] = $id;
130 }
131
132 // Check for unrecognised flags
133 if ( $flags == '0' ) {
134 // This is a known bug from 2004
135 // It's safe to just erase the old_flags field
136 if ( $fix ) {
137 $this->addError( 'fixed', "Warning: old_flags set to 0", $id );
138 $dbw = wfGetDB( DB_MASTER );
139 $dbw->ping();
140 $dbw->update( 'text', [ 'old_flags' => '' ],
141 [ 'old_id' => $id ], __METHOD__ );
142 echo "Fixed\n";
143 } else {
144 $this->addError( 'fixable', "Warning: old_flags set to 0", $id );
145 }
146 } elseif ( count( array_diff( $flagArray, $knownFlags ) ) ) {
147 $this->addError( 'unfixable', "Error: invalid flags field \"$flags\"", $id );
148 }
149 }
150
151 // Output errors for any missing text rows
152 foreach ( $missingTextRows as $oldId => $revId ) {
153 $this->addError( 'restore revision', "Error: missing text row", $oldId );
154 }
155
156 // Verify external revisions
157 $externalConcatBlobs = [];
158 $externalNormalBlobs = [];
159 if ( count( $externalRevs ) ) {
160 $res = $dbr->select(
161 'text',
162 [ 'old_id', 'old_flags', 'old_text' ],
163 [ 'old_id' => $externalRevs ],
164 __METHOD__
165 );
166 foreach ( $res as $row ) {
167 $urlParts = explode( '://', $row->old_text, 2 );
168 if ( count( $urlParts ) !== 2 || $urlParts[1] == '' ) {
169 $this->addError( 'restore text', "Error: invalid URL \"{$row->old_text}\"", $row->old_id );
170 continue;
171 }
172 list( $proto, ) = $urlParts;
173 if ( $proto != 'DB' ) {
174 $this->addError(
175 'restore text',
176 "Error: invalid external protocol \"$proto\"",
177 $row->old_id );
178 continue;
179 }
180 $path = explode( '/', $row->old_text );
181 $cluster = $path[2];
182 $id = $path[3];
183 if ( isset( $path[4] ) ) {
184 $externalConcatBlobs[$cluster][$id][] = $row->old_id;
185 } else {
186 $externalNormalBlobs[$cluster][$id][] = $row->old_id;
187 }
188 }
189 }
190
191 // Check external concat blobs for the right header
192 $this->checkExternalConcatBlobs( $externalConcatBlobs );
193
194 // Check external normal blobs for existence
195 if ( count( $externalNormalBlobs ) ) {
196 if ( is_null( $this->dbStore ) ) {
197 $this->dbStore = new ExternalStoreDB;
198 }
199 foreach ( $externalConcatBlobs as $cluster => $xBlobIds ) {
200 $blobIds = array_keys( $xBlobIds );
201 $extDb =& $this->dbStore->getSlave( $cluster );
202 $blobsTable = $this->dbStore->getTable( $extDb );
203 $res = $extDb->select( $blobsTable,
204 [ 'blob_id' ],
205 [ 'blob_id' => $blobIds ],
206 __METHOD__
207 );
208 foreach ( $res as $row ) {
209 unset( $xBlobIds[$row->blob_id] );
210 }
211 // Print errors for missing blobs rows
212 foreach ( $xBlobIds as $blobId => $oldId ) {
213 $this->addError(
214 'restore text',
215 "Error: missing target $blobId for one-part ES URL",
216 $oldId );
217 }
218 }
219 }
220
221 // Check local objects
222 $dbr->ping();
223 $concatBlobs = [];
224 $curIds = [];
225 if ( count( $objectRevs ) ) {
226 $headerLength = 300;
227 $res = $dbr->select(
228 'text',
229 [ 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ],
230 [ 'old_id' => $objectRevs ],
231 __METHOD__
232 );
233 foreach ( $res as $row ) {
234 $oldId = $row->old_id;
235 $matches = [];
236 if ( !preg_match( '/^O:(\d+):"(\w+)"/', $row->header, $matches ) ) {
237 $this->addError( 'restore text', "Error: invalid object header", $oldId );
238 continue;
239 }
240
241 $className = strtolower( $matches[2] );
242 if ( strlen( $className ) != $matches[1] ) {
243 $this->addError(
244 'restore text',
245 "Error: invalid object header, wrong class name length",
246 $oldId
247 );
248 continue;
249 }
250
251 $objectStats = $objectStats + [ $className => 0 ];
252 $objectStats[$className]++;
253
254 switch ( $className ) {
255 case 'concatenatedgziphistoryblob':
256 // Good
257 break;
258 case 'historyblobstub':
259 case 'historyblobcurstub':
260 if ( strlen( $row->header ) == $headerLength ) {
261 $this->addError( 'unfixable', "Error: overlong stub header", $oldId );
262 break;
263 }
264 $stubObj = unserialize( $row->header );
265 if ( !is_object( $stubObj ) ) {
266 $this->addError( 'restore text', "Error: unable to unserialize stub object", $oldId );
267 break;
268 }
269 if ( $className == 'historyblobstub' ) {
270 $concatBlobs[$stubObj->mOldId][] = $oldId;
271 } else {
272 $curIds[$stubObj->mCurId][] = $oldId;
273 }
274 break;
275 default:
276 $this->addError( 'unfixable', "Error: unrecognised object class \"$className\"", $oldId );
277 }
278 }
279 }
280
281 // Check local concat blob validity
282 $externalConcatBlobs = [];
283 if ( count( $concatBlobs ) ) {
284 $headerLength = 300;
285 $res = $dbr->select(
286 'text',
287 [ 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ],
288 [ 'old_id' => array_keys( $concatBlobs ) ],
289 __METHOD__
290 );
291 foreach ( $res as $row ) {
292 $flags = explode( ',', $row->old_flags );
293 if ( in_array( 'external', $flags ) ) {
294 // Concat blob is in external storage?
295 if ( in_array( 'object', $flags ) ) {
296 $urlParts = explode( '/', $row->header );
297 if ( $urlParts[0] != 'DB:' ) {
298 $this->addError(
299 'unfixable',
300 "Error: unrecognised external storage type \"{$urlParts[0]}",
301 $row->old_id
302 );
303 } else {
304 $cluster = $urlParts[2];
305 $id = $urlParts[3];
306 if ( !isset( $externalConcatBlobs[$cluster][$id] ) ) {
307 $externalConcatBlobs[$cluster][$id] = [];
308 }
309 $externalConcatBlobs[$cluster][$id] = array_merge(
310 $externalConcatBlobs[$cluster][$id], $concatBlobs[$row->old_id]
311 );
312 }
313 } else {
314 $this->addError(
315 'unfixable',
316 "Error: invalid flags \"{$row->old_flags}\" on concat bulk row {$row->old_id}",
317 $concatBlobs[$row->old_id] );
318 }
319 } elseif ( strcasecmp(
320 substr( $row->header, 0, strlen( self::CONCAT_HEADER ) ),
321 self::CONCAT_HEADER
322 ) ) {
323 $this->addError(
324 'restore text',
325 "Error: Incorrect object header for concat bulk row {$row->old_id}",
326 $concatBlobs[$row->old_id]
327 );
328 } # else good
329
330 unset( $concatBlobs[$row->old_id] );
331 }
332 }
333
334 // Check targets of unresolved stubs
335 $this->checkExternalConcatBlobs( $externalConcatBlobs );
336 // next chunk
337 }
338
339 print "\n\nErrors:\n";
340 foreach ( $this->errors as $name => $errors ) {
341 if ( count( $errors ) ) {
342 $description = $this->errorDescriptions[$name];
343 echo "$description: " . implode( ',', array_keys( $errors ) ) . "\n";
344 }
345 }
346
347 if ( count( $this->errors['restore text'] ) && $fix ) {
348 if ( (string)$xml !== '' ) {
349 $this->restoreText( array_keys( $this->errors['restore text'] ), $xml );
350 } else {
351 echo "Can't fix text, no XML backup specified\n";
352 }
353 }
354
355 print "\nFlag statistics:\n";
356 $total = array_sum( $flagStats );
357 foreach ( $flagStats as $flag => $count ) {
358 printf( "%-30s %10d %5.2f%%\n", $flag, $count, $count / $total * 100 );
359 }
360 print "\nLocal object statistics:\n";
361 $total = array_sum( $objectStats );
362 foreach ( $objectStats as $className => $count ) {
363 printf( "%-30s %10d %5.2f%%\n", $className, $count, $count / $total * 100 );
364 }
365 }
366
367 function addError( $type, $msg, $ids ) {
368 if ( is_array( $ids ) && count( $ids ) == 1 ) {
369 $ids = reset( $ids );
370 }
371 if ( is_array( $ids ) ) {
372 $revIds = [];
373 foreach ( $ids as $id ) {
374 $revIds = array_merge( $revIds, array_keys( $this->oldIdMap, $id ) );
375 }
376 print "$msg in text rows " . implode( ', ', $ids ) .
377 ", revisions " . implode( ', ', $revIds ) . "\n";
378 } else {
379 $id = $ids;
380 $revIds = array_keys( $this->oldIdMap, $id );
381 if ( count( $revIds ) == 1 ) {
382 print "$msg in old_id $id, rev_id {$revIds[0]}\n";
383 } else {
384 print "$msg in old_id $id, revisions " . implode( ', ', $revIds ) . "\n";
385 }
386 }
387 $this->errors[$type] = $this->errors[$type] + array_flip( $revIds );
388 }
389
390 function checkExternalConcatBlobs( $externalConcatBlobs ) {
391 if ( !count( $externalConcatBlobs ) ) {
392 return;
393 }
394
395 if ( is_null( $this->dbStore ) ) {
396 $this->dbStore = new ExternalStoreDB;
397 }
398
399 foreach ( $externalConcatBlobs as $cluster => $oldIds ) {
400 $blobIds = array_keys( $oldIds );
401 $extDb =& $this->dbStore->getSlave( $cluster );
402 $blobsTable = $this->dbStore->getTable( $extDb );
403 $headerLength = strlen( self::CONCAT_HEADER );
404 $res = $extDb->select( $blobsTable,
405 [ 'blob_id', "LEFT(blob_text, $headerLength) AS header" ],
406 [ 'blob_id' => $blobIds ],
407 __METHOD__
408 );
409 foreach ( $res as $row ) {
410 if ( strcasecmp( $row->header, self::CONCAT_HEADER ) ) {
411 $this->addError(
412 'restore text',
413 "Error: invalid header on target $cluster/{$row->blob_id} of two-part ES URL",
414 $oldIds[$row->blob_id]
415 );
416 }
417 unset( $oldIds[$row->blob_id] );
418 }
419
420 // Print errors for missing blobs rows
421 foreach ( $oldIds as $blobId => $oldIds2 ) {
422 $this->addError(
423 'restore text',
424 "Error: missing target $cluster/$blobId for two-part ES URL",
425 $oldIds2
426 );
427 }
428 }
429 }
430
431 function restoreText( $revIds, $xml ) {
432 global $wgDBname;
433 $tmpDir = wfTempDir();
434
435 if ( !count( $revIds ) ) {
436 return;
437 }
438
439 print "Restoring text from XML backup...\n";
440
441 $revFileName = "$tmpDir/broken-revlist-$wgDBname";
442 $filteredXmlFileName = "$tmpDir/filtered-$wgDBname.xml";
443
444 // Write revision list
445 if ( !file_put_contents( $revFileName, implode( "\n", $revIds ) ) ) {
446 echo "Error writing revision list, can't restore text\n";
447
448 return;
449 }
450
451 // Run mwdumper
452 echo "Filtering XML dump...\n";
453 $exitStatus = 0;
454 passthru( 'mwdumper ' .
455 Shell::escape(
456 "--output=file:$filteredXmlFileName",
457 "--filter=revlist:$revFileName",
458 $xml
459 ), $exitStatus
460 );
461
462 if ( $exitStatus ) {
463 echo "mwdumper died with exit status $exitStatus\n";
464
465 return;
466 }
467
468 $file = fopen( $filteredXmlFileName, 'r' );
469 if ( !$file ) {
470 echo "Unable to open filtered XML file\n";
471
472 return;
473 }
474
476 $dbw = wfGetDB( DB_MASTER );
477 $dbr->ping();
478 $dbw->ping();
479
480 $source = new ImportStreamSource( $file );
481 $importer = new WikiImporter(
482 $source,
483 MediaWikiServices::getInstance()->getMainConfig()
484 );
485 $importer->setRevisionCallback( [ $this, 'importRevision' ] );
486 $importer->setNoticeCallback( function ( $msg, $params ) {
487 echo wfMessage( $msg, $params )->text() . "\n";
488 } );
489 $importer->doImport();
490 }
491
492 function importRevision( &$revision, &$importer ) {
493 $id = $revision->getID();
494 $content = $revision->getContent( Revision::RAW );
495 $id = $id ?: '';
496
497 if ( $content === null ) {
498 echo "Revision $id is broken, we have no content available\n";
499
500 return;
501 }
502
503 $text = $content->serialize();
504 if ( $text === '' ) {
505 // This is what happens if the revision was broken at the time the
506 // dump was made. Unfortunately, it also happens if the revision was
507 // legitimately blank, so there's no way to tell the difference. To
508 // be safe, we'll skip it and leave it broken
509
510 echo "Revision $id is blank in the dump, may have been broken before export\n";
511
512 return;
513 }
514
515 if ( !$id ) {
516 // No ID, can't import
517 echo "No id tag in revision, can't import\n";
518
519 return;
520 }
521
522 // Find text row again
524 $oldId = $dbr->selectField( 'revision', 'rev_text_id', [ 'rev_id' => $id ], __METHOD__ );
525 if ( !$oldId ) {
526 echo "Missing revision row for rev_id $id\n";
527
528 return;
529 }
530
531 // Compress the text
532 $flags = Revision::compressRevisionText( $text );
533
534 // Update the text row
535 $dbw = wfGetDB( DB_MASTER );
536 $dbw->update( 'text',
537 [ 'old_flags' => $flags, 'old_text' => $text ],
538 [ 'old_id' => $oldId ],
539 __METHOD__, [ 'LIMIT' => 1 ]
540 );
541
542 // Remove it from the unfixed list and add it to the fixed list
543 unset( $this->errors['restore text'][$id] );
544 $this->errors['fixed'][$id] = true;
545 }
546}
unserialize( $serialized)
and that you know you can do these things To protect your we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights These restrictions translate to certain responsibilities for you if you distribute copies of the or if you modify it For if you distribute copies of such a whether gratis or for a you must give the recipients all the rights that you have You must make sure that receive or can get the source code And you must show them these terms so they know their rights We protect your rights with two and(2) offer you this license which gives you legal permission to copy
wfTempDir()
Tries to get the system directory for temporary files.
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
if( $line===false) $args
Definition cdb.php:64
Maintenance script to do various checks on external storage.
check( $fix=false, $xml='')
restoreText( $revIds, $xml)
importRevision(&$revision, &$importer)
addError( $type, $msg, $ids)
const CONCAT_HEADER
checkExternalConcatBlobs( $externalConcatBlobs)
DB accessible external objects.
Imports a XML dump from a file (either from file upload, files on disk, or HTTP)
MediaWikiServices is the service locator for the application scope of MediaWiki.
Executes shell commands.
Definition Shell.php:44
static compressRevisionText(&$text)
If $wgCompressRevisions is enabled, we will compress data.
const RAW
Definition Revision.php:56
XML file reader for the page data importer.
global $optionsWithoutArgs
$res
Definition database.txt:21
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition hooks.txt:1999
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation use $formDescriptor instead default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt;div ...>$1&lt;/div>"). - flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException':Called before an exception(or PHP error) is logged. This is meant for integration with external error aggregation services
Allows to change the fields on the form that will be generated $name
Definition hooks.txt:271
while(( $__line=Maintenance::readconsole()) !==false) print
Definition eval.php:64
controlled by the following MediaWiki still creates a BagOStuff but calls it to it are no ops If the cache daemon can t be it should also disable itself fairly $wgDBname
$source
$content
const DB_REPLICA
Definition defines.php:25
const DB_MASTER
Definition defines.php:26
$params