MediaWiki REL1_30
checkStorage.php
Go to the documentation of this file.
1<?php
25
26if ( !defined( 'MEDIAWIKI' ) ) {
27 $optionsWithoutArgs = [ 'fix' ];
28 require_once __DIR__ . '/../commandLine.inc';
29
30 $cs = new CheckStorage;
31 $fix = isset( $options['fix'] );
32 if ( isset( $args[0] ) ) {
33 $xml = $args[0];
34 } else {
35 $xml = false;
36 }
37 $cs->check( $fix, $xml );
38}
39
40// ----------------------------------------------------------------------------------
41
49 const CONCAT_HEADER = 'O:27:"concatenatedgziphistoryblob"';
51 public $dbStore = null;
52
54 'restore text' => 'Damaged text, need to be restored from a backup',
55 'restore revision' => 'Damaged revision row, need to be restored from a backup',
56 'unfixable' => 'Unexpected errors with no automated fixing method',
57 'fixed' => 'Errors already fixed',
58 'fixable' => 'Errors which would already be fixed if --fix was specified',
59 ];
60
61 function check( $fix = false, $xml = '' ) {
63 if ( $fix ) {
64 print "Checking, will fix errors if possible...\n";
65 } else {
66 print "Checking...\n";
67 }
68 $maxRevId = $dbr->selectField( 'revision', 'MAX(rev_id)', false, __METHOD__ );
69 $chunkSize = 1000;
70 $flagStats = [];
71 $objectStats = [];
72 $knownFlags = [ 'external', 'gzip', 'object', 'utf-8' ];
73 $this->errors = [
74 'restore text' => [],
75 'restore revision' => [],
76 'unfixable' => [],
77 'fixed' => [],
78 'fixable' => [],
79 ];
80
81 for ( $chunkStart = 1; $chunkStart < $maxRevId; $chunkStart += $chunkSize ) {
82 $chunkEnd = $chunkStart + $chunkSize - 1;
83 // print "$chunkStart of $maxRevId\n";
84
85 // Fetch revision rows
86 $this->oldIdMap = [];
87 $dbr->ping();
88 $res = $dbr->select( 'revision', [ 'rev_id', 'rev_text_id' ],
89 [ "rev_id BETWEEN $chunkStart AND $chunkEnd" ], __METHOD__ );
90 foreach ( $res as $row ) {
91 $this->oldIdMap[$row->rev_id] = $row->rev_text_id;
92 }
93 $dbr->freeResult( $res );
94
95 if ( !count( $this->oldIdMap ) ) {
96 continue;
97 }
98
99 // Fetch old_flags
100 $missingTextRows = array_flip( $this->oldIdMap );
101 $externalRevs = [];
102 $objectRevs = [];
103 $res = $dbr->select( 'text', [ 'old_id', 'old_flags' ],
104 'old_id IN (' . implode( ',', $this->oldIdMap ) . ')', __METHOD__ );
105 foreach ( $res as $row ) {
109 $flags = $row->old_flags;
110 $id = $row->old_id;
111
112 // Create flagStats row if it doesn't exist
113 $flagStats = $flagStats + [ $flags => 0 ];
114 // Increment counter
115 $flagStats[$flags]++;
116
117 // Not missing
118 unset( $missingTextRows[$row->old_id] );
119
120 // Check for external or object
121 if ( $flags == '' ) {
122 $flagArray = [];
123 } else {
124 $flagArray = explode( ',', $flags );
125 }
126 if ( in_array( 'external', $flagArray ) ) {
127 $externalRevs[] = $id;
128 } elseif ( in_array( 'object', $flagArray ) ) {
129 $objectRevs[] = $id;
130 }
131
132 // Check for unrecognised flags
133 if ( $flags == '0' ) {
134 // This is a known bug from 2004
135 // It's safe to just erase the old_flags field
136 if ( $fix ) {
137 $this->error( 'fixed', "Warning: old_flags set to 0", $id );
138 $dbw = wfGetDB( DB_MASTER );
139 $dbw->ping();
140 $dbw->update( 'text', [ 'old_flags' => '' ],
141 [ 'old_id' => $id ], __METHOD__ );
142 echo "Fixed\n";
143 } else {
144 $this->error( 'fixable', "Warning: old_flags set to 0", $id );
145 }
146 } elseif ( count( array_diff( $flagArray, $knownFlags ) ) ) {
147 $this->error( 'unfixable', "Error: invalid flags field \"$flags\"", $id );
148 }
149 }
150 $dbr->freeResult( $res );
151
152 // Output errors for any missing text rows
153 foreach ( $missingTextRows as $oldId => $revId ) {
154 $this->error( 'restore revision', "Error: missing text row", $oldId );
155 }
156
157 // Verify external revisions
158 $externalConcatBlobs = [];
159 $externalNormalBlobs = [];
160 if ( count( $externalRevs ) ) {
161 $res = $dbr->select( 'text', [ 'old_id', 'old_flags', 'old_text' ],
162 [ 'old_id IN (' . implode( ',', $externalRevs ) . ')' ], __METHOD__ );
163 foreach ( $res as $row ) {
164 $urlParts = explode( '://', $row->old_text, 2 );
165 if ( count( $urlParts ) !== 2 || $urlParts[1] == '' ) {
166 $this->error( 'restore text', "Error: invalid URL \"{$row->old_text}\"", $row->old_id );
167 continue;
168 }
169 list( $proto, ) = $urlParts;
170 if ( $proto != 'DB' ) {
171 $this->error( 'restore text', "Error: invalid external protocol \"$proto\"", $row->old_id );
172 continue;
173 }
174 $path = explode( '/', $row->old_text );
175 $cluster = $path[2];
176 $id = $path[3];
177 if ( isset( $path[4] ) ) {
178 $externalConcatBlobs[$cluster][$id][] = $row->old_id;
179 } else {
180 $externalNormalBlobs[$cluster][$id][] = $row->old_id;
181 }
182 }
183 $dbr->freeResult( $res );
184 }
185
186 // Check external concat blobs for the right header
187 $this->checkExternalConcatBlobs( $externalConcatBlobs );
188
189 // Check external normal blobs for existence
190 if ( count( $externalNormalBlobs ) ) {
191 if ( is_null( $this->dbStore ) ) {
192 $this->dbStore = new ExternalStoreDB;
193 }
194 foreach ( $externalConcatBlobs as $cluster => $xBlobIds ) {
195 $blobIds = array_keys( $xBlobIds );
196 $extDb =& $this->dbStore->getSlave( $cluster );
197 $blobsTable = $this->dbStore->getTable( $extDb );
198 $res = $extDb->select( $blobsTable,
199 [ 'blob_id' ],
200 [ 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ], __METHOD__ );
201 foreach ( $res as $row ) {
202 unset( $xBlobIds[$row->blob_id] );
203 }
204 $extDb->freeResult( $res );
205 // Print errors for missing blobs rows
206 foreach ( $xBlobIds as $blobId => $oldId ) {
207 $this->error( 'restore text', "Error: missing target $blobId for one-part ES URL", $oldId );
208 }
209 }
210 }
211
212 // Check local objects
213 $dbr->ping();
214 $concatBlobs = [];
215 $curIds = [];
216 if ( count( $objectRevs ) ) {
217 $headerLength = 300;
218 $res = $dbr->select(
219 'text',
220 [ 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ],
221 [ 'old_id IN (' . implode( ',', $objectRevs ) . ')' ],
222 __METHOD__
223 );
224 foreach ( $res as $row ) {
225 $oldId = $row->old_id;
226 $matches = [];
227 if ( !preg_match( '/^O:(\d+):"(\w+)"/', $row->header, $matches ) ) {
228 $this->error( 'restore text', "Error: invalid object header", $oldId );
229 continue;
230 }
231
232 $className = strtolower( $matches[2] );
233 if ( strlen( $className ) != $matches[1] ) {
234 $this->error(
235 'restore text',
236 "Error: invalid object header, wrong class name length",
237 $oldId
238 );
239 continue;
240 }
241
242 $objectStats = $objectStats + [ $className => 0 ];
243 $objectStats[$className]++;
244
245 switch ( $className ) {
246 case 'concatenatedgziphistoryblob':
247 // Good
248 break;
249 case 'historyblobstub':
250 case 'historyblobcurstub':
251 if ( strlen( $row->header ) == $headerLength ) {
252 $this->error( 'unfixable', "Error: overlong stub header", $oldId );
253 continue;
254 }
255 $stubObj = unserialize( $row->header );
256 if ( !is_object( $stubObj ) ) {
257 $this->error( 'restore text', "Error: unable to unserialize stub object", $oldId );
258 continue;
259 }
260 if ( $className == 'historyblobstub' ) {
261 $concatBlobs[$stubObj->mOldId][] = $oldId;
262 } else {
263 $curIds[$stubObj->mCurId][] = $oldId;
264 }
265 break;
266 default:
267 $this->error( 'unfixable', "Error: unrecognised object class \"$className\"", $oldId );
268 }
269 }
270 $dbr->freeResult( $res );
271 }
272
273 // Check local concat blob validity
274 $externalConcatBlobs = [];
275 if ( count( $concatBlobs ) ) {
276 $headerLength = 300;
277 $res = $dbr->select(
278 'text',
279 [ 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ],
280 [ 'old_id IN (' . implode( ',', array_keys( $concatBlobs ) ) . ')' ],
281 __METHOD__
282 );
283 foreach ( $res as $row ) {
284 $flags = explode( ',', $row->old_flags );
285 if ( in_array( 'external', $flags ) ) {
286 // Concat blob is in external storage?
287 if ( in_array( 'object', $flags ) ) {
288 $urlParts = explode( '/', $row->header );
289 if ( $urlParts[0] != 'DB:' ) {
290 $this->error(
291 'unfixable',
292 "Error: unrecognised external storage type \"{$urlParts[0]}",
293 $row->old_id
294 );
295 } else {
296 $cluster = $urlParts[2];
297 $id = $urlParts[3];
298 if ( !isset( $externalConcatBlobs[$cluster][$id] ) ) {
299 $externalConcatBlobs[$cluster][$id] = [];
300 }
301 $externalConcatBlobs[$cluster][$id] = array_merge(
302 $externalConcatBlobs[$cluster][$id], $concatBlobs[$row->old_id]
303 );
304 }
305 } else {
306 $this->error(
307 'unfixable',
308 "Error: invalid flags \"{$row->old_flags}\" on concat bulk row {$row->old_id}",
309 $concatBlobs[$row->old_id] );
310 }
311 } elseif ( strcasecmp(
312 substr( $row->header, 0, strlen( self::CONCAT_HEADER ) ),
313 self::CONCAT_HEADER
314 ) ) {
315 $this->error(
316 'restore text',
317 "Error: Incorrect object header for concat bulk row {$row->old_id}",
318 $concatBlobs[$row->old_id]
319 );
320 } # else good
321
322 unset( $concatBlobs[$row->old_id] );
323 }
324 $dbr->freeResult( $res );
325 }
326
327 // Check targets of unresolved stubs
328 $this->checkExternalConcatBlobs( $externalConcatBlobs );
329 // next chunk
330 }
331
332 print "\n\nErrors:\n";
333 foreach ( $this->errors as $name => $errors ) {
334 if ( count( $errors ) ) {
335 $description = $this->errorDescriptions[$name];
336 echo "$description: " . implode( ',', array_keys( $errors ) ) . "\n";
337 }
338 }
339
340 if ( count( $this->errors['restore text'] ) && $fix ) {
341 if ( (string)$xml !== '' ) {
342 $this->restoreText( array_keys( $this->errors['restore text'] ), $xml );
343 } else {
344 echo "Can't fix text, no XML backup specified\n";
345 }
346 }
347
348 print "\nFlag statistics:\n";
349 $total = array_sum( $flagStats );
350 foreach ( $flagStats as $flag => $count ) {
351 printf( "%-30s %10d %5.2f%%\n", $flag, $count, $count / $total * 100 );
352 }
353 print "\nLocal object statistics:\n";
354 $total = array_sum( $objectStats );
355 foreach ( $objectStats as $className => $count ) {
356 printf( "%-30s %10d %5.2f%%\n", $className, $count, $count / $total * 100 );
357 }
358 }
359
360 function error( $type, $msg, $ids ) {
361 if ( is_array( $ids ) && count( $ids ) == 1 ) {
362 $ids = reset( $ids );
363 }
364 if ( is_array( $ids ) ) {
365 $revIds = [];
366 foreach ( $ids as $id ) {
367 $revIds = array_merge( $revIds, array_keys( $this->oldIdMap, $id ) );
368 }
369 print "$msg in text rows " . implode( ', ', $ids ) .
370 ", revisions " . implode( ', ', $revIds ) . "\n";
371 } else {
372 $id = $ids;
373 $revIds = array_keys( $this->oldIdMap, $id );
374 if ( count( $revIds ) == 1 ) {
375 print "$msg in old_id $id, rev_id {$revIds[0]}\n";
376 } else {
377 print "$msg in old_id $id, revisions " . implode( ', ', $revIds ) . "\n";
378 }
379 }
380 $this->errors[$type] = $this->errors[$type] + array_flip( $revIds );
381 }
382
383 function checkExternalConcatBlobs( $externalConcatBlobs ) {
384 if ( !count( $externalConcatBlobs ) ) {
385 return;
386 }
387
388 if ( is_null( $this->dbStore ) ) {
389 $this->dbStore = new ExternalStoreDB;
390 }
391
392 foreach ( $externalConcatBlobs as $cluster => $oldIds ) {
393 $blobIds = array_keys( $oldIds );
394 $extDb =& $this->dbStore->getSlave( $cluster );
395 $blobsTable = $this->dbStore->getTable( $extDb );
396 $headerLength = strlen( self::CONCAT_HEADER );
397 $res = $extDb->select( $blobsTable,
398 [ 'blob_id', "LEFT(blob_text, $headerLength) AS header" ],
399 [ 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ], __METHOD__ );
400 foreach ( $res as $row ) {
401 if ( strcasecmp( $row->header, self::CONCAT_HEADER ) ) {
402 $this->error(
403 'restore text',
404 "Error: invalid header on target $cluster/{$row->blob_id} of two-part ES URL",
405 $oldIds[$row->blob_id]
406 );
407 }
408 unset( $oldIds[$row->blob_id] );
409 }
410 $extDb->freeResult( $res );
411
412 // Print errors for missing blobs rows
413 foreach ( $oldIds as $blobId => $oldIds2 ) {
414 $this->error(
415 'restore text',
416 "Error: missing target $cluster/$blobId for two-part ES URL",
417 $oldIds2
418 );
419 }
420 }
421 }
422
423 function restoreText( $revIds, $xml ) {
424 global $wgDBname;
425 $tmpDir = wfTempDir();
426
427 if ( !count( $revIds ) ) {
428 return;
429 }
430
431 print "Restoring text from XML backup...\n";
432
433 $revFileName = "$tmpDir/broken-revlist-$wgDBname";
434 $filteredXmlFileName = "$tmpDir/filtered-$wgDBname.xml";
435
436 // Write revision list
437 if ( !file_put_contents( $revFileName, implode( "\n", $revIds ) ) ) {
438 echo "Error writing revision list, can't restore text\n";
439
440 return;
441 }
442
443 // Run mwdumper
444 echo "Filtering XML dump...\n";
445 $exitStatus = 0;
446 passthru( 'mwdumper ' .
448 "--output=file:$filteredXmlFileName",
449 "--filter=revlist:$revFileName",
450 $xml
451 ), $exitStatus
452 );
453
454 if ( $exitStatus ) {
455 echo "mwdumper died with exit status $exitStatus\n";
456
457 return;
458 }
459
460 $file = fopen( $filteredXmlFileName, 'r' );
461 if ( !$file ) {
462 echo "Unable to open filtered XML file\n";
463
464 return;
465 }
466
468 $dbw = wfGetDB( DB_MASTER );
469 $dbr->ping();
470 $dbw->ping();
471
472 $source = new ImportStreamSource( $file );
473 $importer = new WikiImporter(
474 $source,
475 MediaWikiServices::getInstance()->getMainConfig()
476 );
477 $importer->setRevisionCallback( [ $this, 'importRevision' ] );
478 $importer->doImport();
479 }
480
481 function importRevision( &$revision, &$importer ) {
482 $id = $revision->getID();
483 $content = $revision->getContent( Revision::RAW );
484 $id = $id ? $id : '';
485
486 if ( $content === null ) {
487 echo "Revision $id is broken, we have no content available\n";
488
489 return;
490 }
491
492 $text = $content->serialize();
493 if ( $text === '' ) {
494 // This is what happens if the revision was broken at the time the
495 // dump was made. Unfortunately, it also happens if the revision was
496 // legitimately blank, so there's no way to tell the difference. To
497 // be safe, we'll skip it and leave it broken
498
499 echo "Revision $id is blank in the dump, may have been broken before export\n";
500
501 return;
502 }
503
504 if ( !$id ) {
505 // No ID, can't import
506 echo "No id tag in revision, can't import\n";
507
508 return;
509 }
510
511 // Find text row again
513 $oldId = $dbr->selectField( 'revision', 'rev_text_id', [ 'rev_id' => $id ], __METHOD__ );
514 if ( !$oldId ) {
515 echo "Missing revision row for rev_id $id\n";
516
517 return;
518 }
519
520 // Compress the text
522
523 // Update the text row
524 $dbw = wfGetDB( DB_MASTER );
525 $dbw->update( 'text',
526 [ 'old_flags' => $flags, 'old_text' => $text ],
527 [ 'old_id' => $oldId ],
528 __METHOD__, [ 'LIMIT' => 1 ]
529 );
530
531 // Remove it from the unfixed list and add it to the fixed list
532 unset( $this->errors['restore text'][$id] );
533 $this->errors['fixed'][$id] = true;
534 }
535}
unserialize( $serialized)
wfTempDir()
Tries to get the system directory for temporary files.
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
wfEscapeShellArg()
Version of escapeshellarg() that works better on Windows.
if( $line===false) $args
Definition cdb.php:63
Maintenance script to do various checks on external storage.
check( $fix=false, $xml='')
restoreText( $revIds, $xml)
importRevision(&$revision, &$importer)
const CONCAT_HEADER
error( $type, $msg, $ids)
checkExternalConcatBlobs( $externalConcatBlobs)
DB accessable external objects.
Imports a XML dump from a file (either from file upload, files on disk, or HTTP)
MediaWikiServices is the service locator for the application scope of MediaWiki.
static compressRevisionText(&$text)
If $wgCompressRevisions is enabled, we will compress data.
const RAW
Definition Revision.php:100
XML file reader for the page data importer.
if(! $regexes) $dbr
Definition cleanup.php:94
print
Definition cleanup.php:99
global $optionsWithoutArgs
$res
Definition database.txt:21
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
do that in ParserLimitReportFormat instead use this to modify the parameters of the image all existing parser cache entries will be invalid To avoid you ll need to handle that somehow(e.g. with the RejectParserCacheValue hook) because MediaWiki won 't do it for you. & $defaults error
Definition hooks.txt:2581
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition hooks.txt:1971
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition hooks.txt:2805
Allows to change the fields on the form that will be generated $name
Definition hooks.txt:302
controlled by $wgMainCacheType controlled by $wgParserCacheType controlled by $wgMessageCacheType If you set CACHE_NONE to one of the three control default value for MediaWiki still create a but requests to it are no ops and we always fall through to the database If the cache daemon can t be it should also disable itself fairly smoothly By $wgMemc is used but when it is $parserMemc or $messageMemc this is mentioned $wgDBname
$source
const DB_REPLICA
Definition defines.php:25
const DB_MASTER
Definition defines.php:26