MediaWiki REL1_33
checkStorage.php
Go to the documentation of this file.
1<?php
26
27if ( !defined( 'MEDIAWIKI' ) ) {
28 $optionsWithoutArgs = [ 'fix' ];
29 require_once __DIR__ . '/../commandLine.inc';
30
31 $cs = new CheckStorage;
32 $fix = isset( $options['fix'] );
33 $xml = $args[0] ?? false;
34 $cs->check( $fix, $xml );
35}
36
37// ----------------------------------------------------------------------------------
38
46 const CONCAT_HEADER = 'O:27:"concatenatedgziphistoryblob"';
48 public $dbStore = null;
49
51 'restore text' => 'Damaged text, need to be restored from a backup',
52 'restore revision' => 'Damaged revision row, need to be restored from a backup',
53 'unfixable' => 'Unexpected errors with no automated fixing method',
54 'fixed' => 'Errors already fixed',
55 'fixable' => 'Errors which would already be fixed if --fix was specified',
56 ];
57
58 function check( $fix = false, $xml = '' ) {
60 if ( $fix ) {
61 print "Checking, will fix errors if possible...\n";
62 } else {
63 print "Checking...\n";
64 }
65 $maxRevId = $dbr->selectField( 'revision', 'MAX(rev_id)', '', __METHOD__ );
66 $chunkSize = 1000;
67 $flagStats = [];
68 $objectStats = [];
69 $knownFlags = [ 'external', 'gzip', 'object', 'utf-8' ];
70 $this->errors = [
71 'restore text' => [],
72 'restore revision' => [],
73 'unfixable' => [],
74 'fixed' => [],
75 'fixable' => [],
76 ];
77
78 for ( $chunkStart = 1; $chunkStart < $maxRevId; $chunkStart += $chunkSize ) {
79 $chunkEnd = $chunkStart + $chunkSize - 1;
80 // print "$chunkStart of $maxRevId\n";
81
82 // Fetch revision rows
83 $this->oldIdMap = [];
84 $dbr->ping();
85 $res = $dbr->select( 'revision', [ 'rev_id', 'rev_text_id' ],
86 [ "rev_id BETWEEN $chunkStart AND $chunkEnd" ], __METHOD__ );
87 foreach ( $res as $row ) {
88 $this->oldIdMap[$row->rev_id] = $row->rev_text_id;
89 }
90
91 if ( !count( $this->oldIdMap ) ) {
92 continue;
93 }
94
95 // Fetch old_flags
96 $missingTextRows = array_flip( $this->oldIdMap );
97 $externalRevs = [];
98 $objectRevs = [];
99 $res = $dbr->select(
100 'text',
101 [ 'old_id', 'old_flags' ],
102 [ 'old_id' => $this->oldIdMap ],
103 __METHOD__
104 );
105 foreach ( $res as $row ) {
109 $flags = $row->old_flags;
110 $id = $row->old_id;
111
112 // Create flagStats row if it doesn't exist
113 $flagStats = $flagStats + [ $flags => 0 ];
114 // Increment counter
115 $flagStats[$flags]++;
116
117 // Not missing
118 unset( $missingTextRows[$row->old_id] );
119
120 // Check for external or object
121 if ( $flags == '' ) {
122 $flagArray = [];
123 } else {
124 $flagArray = explode( ',', $flags );
125 }
126 if ( in_array( 'external', $flagArray ) ) {
127 $externalRevs[] = $id;
128 } elseif ( in_array( 'object', $flagArray ) ) {
129 $objectRevs[] = $id;
130 }
131
132 // Check for unrecognised flags
133 if ( $flags == '0' ) {
134 // This is a known bug from 2004
135 // It's safe to just erase the old_flags field
136 if ( $fix ) {
137 $this->addError( 'fixed', "Warning: old_flags set to 0", $id );
138 $dbw = wfGetDB( DB_MASTER );
139 $dbw->ping();
140 $dbw->update( 'text', [ 'old_flags' => '' ],
141 [ 'old_id' => $id ], __METHOD__ );
142 echo "Fixed\n";
143 } else {
144 $this->addError( 'fixable', "Warning: old_flags set to 0", $id );
145 }
146 } elseif ( count( array_diff( $flagArray, $knownFlags ) ) ) {
147 $this->addError( 'unfixable', "Error: invalid flags field \"$flags\"", $id );
148 }
149 }
150
151 // Output errors for any missing text rows
152 foreach ( $missingTextRows as $oldId => $revId ) {
153 $this->addError( 'restore revision', "Error: missing text row", $oldId );
154 }
155
156 // Verify external revisions
157 $externalConcatBlobs = [];
158 $externalNormalBlobs = [];
159 if ( count( $externalRevs ) ) {
160 $res = $dbr->select(
161 'text',
162 [ 'old_id', 'old_flags', 'old_text' ],
163 [ 'old_id' => $externalRevs ],
164 __METHOD__
165 );
166 foreach ( $res as $row ) {
167 $urlParts = explode( '://', $row->old_text, 2 );
168 if ( count( $urlParts ) !== 2 || $urlParts[1] == '' ) {
169 $this->addError( 'restore text', "Error: invalid URL \"{$row->old_text}\"", $row->old_id );
170 continue;
171 }
172 list( $proto, ) = $urlParts;
173 if ( $proto != 'DB' ) {
174 $this->addError(
175 'restore text',
176 "Error: invalid external protocol \"$proto\"",
177 $row->old_id );
178 continue;
179 }
180 $path = explode( '/', $row->old_text );
181 $cluster = $path[2];
182 $id = $path[3];
183 if ( isset( $path[4] ) ) {
184 $externalConcatBlobs[$cluster][$id][] = $row->old_id;
185 } else {
186 $externalNormalBlobs[$cluster][$id][] = $row->old_id;
187 }
188 }
189 }
190
191 // Check external concat blobs for the right header
192 $this->checkExternalConcatBlobs( $externalConcatBlobs );
193
194 // Check external normal blobs for existence
195 if ( count( $externalNormalBlobs ) ) {
196 if ( is_null( $this->dbStore ) ) {
197 $this->dbStore = new ExternalStoreDB;
198 }
199 foreach ( $externalConcatBlobs as $cluster => $xBlobIds ) {
200 $blobIds = array_keys( $xBlobIds );
201 $extDb =& $this->dbStore->getSlave( $cluster );
202 $blobsTable = $this->dbStore->getTable( $extDb );
203 $res = $extDb->select( $blobsTable,
204 [ 'blob_id' ],
205 [ 'blob_id' => $blobIds ],
206 __METHOD__
207 );
208 foreach ( $res as $row ) {
209 unset( $xBlobIds[$row->blob_id] );
210 }
211 // Print errors for missing blobs rows
212 foreach ( $xBlobIds as $blobId => $oldId ) {
213 $this->addError(
214 'restore text',
215 "Error: missing target $blobId for one-part ES URL",
216 $oldId );
217 }
218 }
219 }
220
221 // Check local objects
222 $dbr->ping();
223 $concatBlobs = [];
224 $curIds = [];
225 if ( count( $objectRevs ) ) {
226 $headerLength = 300;
227 $res = $dbr->select(
228 'text',
229 [ 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ],
230 [ 'old_id' => $objectRevs ],
231 __METHOD__
232 );
233 foreach ( $res as $row ) {
234 $oldId = $row->old_id;
235 $matches = [];
236 if ( !preg_match( '/^O:(\d+):"(\w+)"/', $row->header, $matches ) ) {
237 $this->addError( 'restore text', "Error: invalid object header", $oldId );
238 continue;
239 }
240
241 $className = strtolower( $matches[2] );
242 if ( strlen( $className ) != $matches[1] ) {
243 $this->addError(
244 'restore text',
245 "Error: invalid object header, wrong class name length",
246 $oldId
247 );
248 continue;
249 }
250
251 $objectStats = $objectStats + [ $className => 0 ];
252 $objectStats[$className]++;
253
254 switch ( $className ) {
255 case 'concatenatedgziphistoryblob':
256 // Good
257 break;
258 case 'historyblobstub':
259 case 'historyblobcurstub':
260 if ( strlen( $row->header ) == $headerLength ) {
261 $this->addError( 'unfixable', "Error: overlong stub header", $oldId );
262 break;
263 }
264 $stubObj = unserialize( $row->header );
265 if ( !is_object( $stubObj ) ) {
266 $this->addError( 'restore text', "Error: unable to unserialize stub object", $oldId );
267 break;
268 }
269 if ( $className == 'historyblobstub' ) {
270 $concatBlobs[$stubObj->mOldId][] = $oldId;
271 } else {
272 $curIds[$stubObj->mCurId][] = $oldId;
273 }
274 break;
275 default:
276 $this->addError( 'unfixable', "Error: unrecognised object class \"$className\"", $oldId );
277 }
278 }
279 }
280
281 // Check local concat blob validity
282 $externalConcatBlobs = [];
283 if ( count( $concatBlobs ) ) {
284 $headerLength = 300;
285 $res = $dbr->select(
286 'text',
287 [ 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ],
288 [ 'old_id' => array_keys( $concatBlobs ) ],
289 __METHOD__
290 );
291 foreach ( $res as $row ) {
292 $flags = explode( ',', $row->old_flags );
293 if ( in_array( 'external', $flags ) ) {
294 // Concat blob is in external storage?
295 if ( in_array( 'object', $flags ) ) {
296 $urlParts = explode( '/', $row->header );
297 if ( $urlParts[0] != 'DB:' ) {
298 $this->addError(
299 'unfixable',
300 "Error: unrecognised external storage type \"{$urlParts[0]}",
301 $row->old_id
302 );
303 } else {
304 $cluster = $urlParts[2];
305 $id = $urlParts[3];
306 if ( !isset( $externalConcatBlobs[$cluster][$id] ) ) {
307 $externalConcatBlobs[$cluster][$id] = [];
308 }
309 $externalConcatBlobs[$cluster][$id] = array_merge(
310 $externalConcatBlobs[$cluster][$id], $concatBlobs[$row->old_id]
311 );
312 }
313 } else {
314 $this->addError(
315 'unfixable',
316 "Error: invalid flags \"{$row->old_flags}\" on concat bulk row {$row->old_id}",
317 $concatBlobs[$row->old_id] );
318 }
319 } elseif ( strcasecmp(
320 substr( $row->header, 0, strlen( self::CONCAT_HEADER ) ),
321 self::CONCAT_HEADER
322 ) ) {
323 $this->addError(
324 'restore text',
325 "Error: Incorrect object header for concat bulk row {$row->old_id}",
326 $concatBlobs[$row->old_id]
327 );
328 } # else good
329
330 unset( $concatBlobs[$row->old_id] );
331 }
332 }
333
334 // Check targets of unresolved stubs
335 $this->checkExternalConcatBlobs( $externalConcatBlobs );
336 // next chunk
337 }
338
339 print "\n\nErrors:\n";
340 foreach ( $this->errors as $name => $errors ) {
341 if ( count( $errors ) ) {
342 $description = $this->errorDescriptions[$name];
343 echo "$description: " . implode( ',', array_keys( $errors ) ) . "\n";
344 }
345 }
346
347 if ( count( $this->errors['restore text'] ) && $fix ) {
348 if ( (string)$xml !== '' ) {
349 $this->restoreText( array_keys( $this->errors['restore text'] ), $xml );
350 } else {
351 echo "Can't fix text, no XML backup specified\n";
352 }
353 }
354
355 print "\nFlag statistics:\n";
356 $total = array_sum( $flagStats );
357 foreach ( $flagStats as $flag => $count ) {
358 printf( "%-30s %10d %5.2f%%\n", $flag, $count, $count / $total * 100 );
359 }
360 print "\nLocal object statistics:\n";
361 $total = array_sum( $objectStats );
362 foreach ( $objectStats as $className => $count ) {
363 printf( "%-30s %10d %5.2f%%\n", $className, $count, $count / $total * 100 );
364 }
365 }
366
367 function addError( $type, $msg, $ids ) {
368 if ( is_array( $ids ) && count( $ids ) == 1 ) {
369 $ids = reset( $ids );
370 }
371 if ( is_array( $ids ) ) {
372 $revIds = [];
373 foreach ( $ids as $id ) {
374 $revIds = array_merge( $revIds, array_keys( $this->oldIdMap, $id ) );
375 }
376 print "$msg in text rows " . implode( ', ', $ids ) .
377 ", revisions " . implode( ', ', $revIds ) . "\n";
378 } else {
379 $id = $ids;
380 $revIds = array_keys( $this->oldIdMap, $id );
381 if ( count( $revIds ) == 1 ) {
382 print "$msg in old_id $id, rev_id {$revIds[0]}\n";
383 } else {
384 print "$msg in old_id $id, revisions " . implode( ', ', $revIds ) . "\n";
385 }
386 }
387 $this->errors[$type] = $this->errors[$type] + array_flip( $revIds );
388 }
389
390 function checkExternalConcatBlobs( $externalConcatBlobs ) {
391 if ( !count( $externalConcatBlobs ) ) {
392 return;
393 }
394
395 if ( is_null( $this->dbStore ) ) {
396 $this->dbStore = new ExternalStoreDB;
397 }
398
399 foreach ( $externalConcatBlobs as $cluster => $oldIds ) {
400 $blobIds = array_keys( $oldIds );
401 $extDb =& $this->dbStore->getSlave( $cluster );
402 $blobsTable = $this->dbStore->getTable( $extDb );
403 $headerLength = strlen( self::CONCAT_HEADER );
404 $res = $extDb->select( $blobsTable,
405 [ 'blob_id', "LEFT(blob_text, $headerLength) AS header" ],
406 [ 'blob_id' => $blobIds ],
407 __METHOD__
408 );
409 foreach ( $res as $row ) {
410 if ( strcasecmp( $row->header, self::CONCAT_HEADER ) ) {
411 $this->addError(
412 'restore text',
413 "Error: invalid header on target $cluster/{$row->blob_id} of two-part ES URL",
414 $oldIds[$row->blob_id]
415 );
416 }
417 unset( $oldIds[$row->blob_id] );
418 }
419
420 // Print errors for missing blobs rows
421 foreach ( $oldIds as $blobId => $oldIds2 ) {
422 $this->addError(
423 'restore text',
424 "Error: missing target $cluster/$blobId for two-part ES URL",
425 $oldIds2
426 );
427 }
428 }
429 }
430
431 function restoreText( $revIds, $xml ) {
432 global $wgDBname;
433 $tmpDir = wfTempDir();
434
435 if ( !count( $revIds ) ) {
436 return;
437 }
438
439 print "Restoring text from XML backup...\n";
440
441 $revFileName = "$tmpDir/broken-revlist-$wgDBname";
442 $filteredXmlFileName = "$tmpDir/filtered-$wgDBname.xml";
443
444 // Write revision list
445 if ( !file_put_contents( $revFileName, implode( "\n", $revIds ) ) ) {
446 echo "Error writing revision list, can't restore text\n";
447
448 return;
449 }
450
451 // Run mwdumper
452 echo "Filtering XML dump...\n";
453 $exitStatus = 0;
454 passthru( 'mwdumper ' .
455 Shell::escape(
456 "--output=file:$filteredXmlFileName",
457 "--filter=revlist:$revFileName",
458 $xml
459 ), $exitStatus
460 );
461
462 if ( $exitStatus ) {
463 echo "mwdumper died with exit status $exitStatus\n";
464
465 return;
466 }
467
468 $file = fopen( $filteredXmlFileName, 'r' );
469 if ( !$file ) {
470 echo "Unable to open filtered XML file\n";
471
472 return;
473 }
474
476 $dbw = wfGetDB( DB_MASTER );
477 $dbr->ping();
478 $dbw->ping();
479
481 $importer = new WikiImporter(
482 $source,
483 MediaWikiServices::getInstance()->getMainConfig()
484 );
485 $importer->setRevisionCallback( [ $this, 'importRevision' ] );
486 $importer->setNoticeCallback( function ( $msg, $params ) {
487 echo wfMessage( $msg, $params )->text() . "\n";
488 } );
489 $importer->doImport();
490 }
491
492 function importRevision( &$revision, &$importer ) {
493 $id = $revision->getID();
494 $content = $revision->getContent( Revision::RAW );
495 $id = $id ?: '';
496
497 if ( $content === null ) {
498 echo "Revision $id is broken, we have no content available\n";
499
500 return;
501 }
502
503 $text = $content->serialize();
504 if ( $text === '' ) {
505 // This is what happens if the revision was broken at the time the
506 // dump was made. Unfortunately, it also happens if the revision was
507 // legitimately blank, so there's no way to tell the difference. To
508 // be safe, we'll skip it and leave it broken
509
510 echo "Revision $id is blank in the dump, may have been broken before export\n";
511
512 return;
513 }
514
515 if ( !$id ) {
516 // No ID, can't import
517 echo "No id tag in revision, can't import\n";
518
519 return;
520 }
521
522 // Find text row again
524 $oldId = $dbr->selectField( 'revision', 'rev_text_id', [ 'rev_id' => $id ], __METHOD__ );
525 if ( !$oldId ) {
526 echo "Missing revision row for rev_id $id\n";
527
528 return;
529 }
530
531 // Compress the text
532 $flags = Revision::compressRevisionText( $text );
533
534 // Update the text row
535 $dbw = wfGetDB( DB_MASTER );
536 $dbw->update( 'text',
537 [ 'old_flags' => $flags, 'old_text' => $text ],
538 [ 'old_id' => $oldId ],
539 __METHOD__, [ 'LIMIT' => 1 ]
540 );
541
542 // Remove it from the unfixed list and add it to the fixed list
543 unset( $this->errors['restore text'][$id] );
544 $this->errors['fixed'][$id] = true;
545 }
546}
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
unserialize( $serialized)
wfTempDir()
Tries to get the system directory for temporary files.
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
if( $line===false) $args
Definition cdb.php:64
Maintenance script to do various checks on external storage.
check( $fix=false, $xml='')
restoreText( $revIds, $xml)
importRevision(&$revision, &$importer)
addError( $type, $msg, $ids)
const CONCAT_HEADER
checkExternalConcatBlobs( $externalConcatBlobs)
DB accessible external objects.
Imports a XML dump from a file (either from file upload, files on disk, or HTTP)
MediaWikiServices is the service locator for the application scope of MediaWiki.
Executes shell commands.
Definition Shell.php:44
static compressRevisionText(&$text)
If $wgCompressRevisions is enabled, we will compress data.
const RAW
Definition Revision.php:56
XML file reader for the page data importer.
global $optionsWithoutArgs
$res
Definition database.txt:21
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
while(( $__line=Maintenance::readconsole()) !==false) print
Definition eval.php:64
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition hooks.txt:1999
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation use $formDescriptor instead default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt;div ...>$1&lt;/div>"). - flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException':Called before an exception(or PHP error) is logged. This is meant for integration with external error aggregation services
Allows to change the fields on the form that will be generated $name
Definition hooks.txt:271
the value of this variable comes from LanguageConverter indexed by page_id indexed by prefixed DB keys on which the links will be shown can modify can modify can modify this should be populated with an alert message to that effect to be fed to an HTMLForm object and populate $result with the reason in the form of[messagename, param1, param2,...] or a MessageSpecifier error messages should be plain text with no special etc to show that they re errors
Definition hooks.txt:1750
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
controlled by the following MediaWiki still creates a BagOStuff but calls it to it are no ops If the cache daemon can t be it should also disable itself fairly $wgDBname
$source
$content
const DB_REPLICA
Definition defines.php:25
const DB_MASTER
Definition defines.php:26
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Definition router.php:42
$params