MediaWiki REL1_29
checkStorage.php
Go to the documentation of this file.
1<?php
25
26if ( !defined( 'MEDIAWIKI' ) ) {
27 $optionsWithoutArgs = [ 'fix' ];
28 require_once __DIR__ . '/../commandLine.inc';
29
30 $cs = new CheckStorage;
31 $fix = isset( $options['fix'] );
32 if ( isset( $args[0] ) ) {
33 $xml = $args[0];
34 } else {
35 $xml = false;
36 }
37 $cs->check( $fix, $xml );
38}
39
40// ----------------------------------------------------------------------------------
41
49 const CONCAT_HEADER = 'O:27:"concatenatedgziphistoryblob"';
51 public $dbStore = null;
52
54 'restore text' => 'Damaged text, need to be restored from a backup',
55 'restore revision' => 'Damaged revision row, need to be restored from a backup',
56 'unfixable' => 'Unexpected errors with no automated fixing method',
57 'fixed' => 'Errors already fixed',
58 'fixable' => 'Errors which would already be fixed if --fix was specified',
59 ];
60
61 function check( $fix = false, $xml = '' ) {
63 if ( $fix ) {
64 print "Checking, will fix errors if possible...\n";
65 } else {
66 print "Checking...\n";
67 }
68 $maxRevId = $dbr->selectField( 'revision', 'MAX(rev_id)', false, __METHOD__ );
69 $chunkSize = 1000;
70 $flagStats = [];
71 $objectStats = [];
72 $knownFlags = [ 'external', 'gzip', 'object', 'utf-8' ];
73 $this->errors = [
74 'restore text' => [],
75 'restore revision' => [],
76 'unfixable' => [],
77 'fixed' => [],
78 'fixable' => [],
79 ];
80
81 for ( $chunkStart = 1; $chunkStart < $maxRevId; $chunkStart += $chunkSize ) {
82 $chunkEnd = $chunkStart + $chunkSize - 1;
83 // print "$chunkStart of $maxRevId\n";
84
85 // Fetch revision rows
86 $this->oldIdMap = [];
87 $dbr->ping();
88 $res = $dbr->select( 'revision', [ 'rev_id', 'rev_text_id' ],
89 [ "rev_id BETWEEN $chunkStart AND $chunkEnd" ], __METHOD__ );
90 foreach ( $res as $row ) {
91 $this->oldIdMap[$row->rev_id] = $row->rev_text_id;
92 }
93 $dbr->freeResult( $res );
94
95 if ( !count( $this->oldIdMap ) ) {
96 continue;
97 }
98
99 // Fetch old_flags
100 $missingTextRows = array_flip( $this->oldIdMap );
101 $externalRevs = [];
102 $objectRevs = [];
103 $res = $dbr->select( 'text', [ 'old_id', 'old_flags' ],
104 'old_id IN (' . implode( ',', $this->oldIdMap ) . ')', __METHOD__ );
105 foreach ( $res as $row ) {
109 $flags = $row->old_flags;
110 $id = $row->old_id;
111
112 // Create flagStats row if it doesn't exist
113 $flagStats = $flagStats + [ $flags => 0 ];
114 // Increment counter
115 $flagStats[$flags]++;
116
117 // Not missing
118 unset( $missingTextRows[$row->old_id] );
119
120 // Check for external or object
121 if ( $flags == '' ) {
122 $flagArray = [];
123 } else {
124 $flagArray = explode( ',', $flags );
125 }
126 if ( in_array( 'external', $flagArray ) ) {
127 $externalRevs[] = $id;
128 } elseif ( in_array( 'object', $flagArray ) ) {
129 $objectRevs[] = $id;
130 }
131
132 // Check for unrecognised flags
133 if ( $flags == '0' ) {
134 // This is a known bug from 2004
135 // It's safe to just erase the old_flags field
136 if ( $fix ) {
137 $this->error( 'fixed', "Warning: old_flags set to 0", $id );
138 $dbw = wfGetDB( DB_MASTER );
139 $dbw->ping();
140 $dbw->update( 'text', [ 'old_flags' => '' ],
141 [ 'old_id' => $id ], __METHOD__ );
142 echo "Fixed\n";
143 } else {
144 $this->error( 'fixable', "Warning: old_flags set to 0", $id );
145 }
146 } elseif ( count( array_diff( $flagArray, $knownFlags ) ) ) {
147 $this->error( 'unfixable', "Error: invalid flags field \"$flags\"", $id );
148 }
149 }
150 $dbr->freeResult( $res );
151
152 // Output errors for any missing text rows
153 foreach ( $missingTextRows as $oldId => $revId ) {
154 $this->error( 'restore revision', "Error: missing text row", $oldId );
155 }
156
157 // Verify external revisions
158 $externalConcatBlobs = [];
159 $externalNormalBlobs = [];
160 if ( count( $externalRevs ) ) {
161 $res = $dbr->select( 'text', [ 'old_id', 'old_flags', 'old_text' ],
162 [ 'old_id IN (' . implode( ',', $externalRevs ) . ')' ], __METHOD__ );
163 foreach ( $res as $row ) {
164 $urlParts = explode( '://', $row->old_text, 2 );
165 if ( count( $urlParts ) !== 2 || $urlParts[1] == '' ) {
166 $this->error( 'restore text', "Error: invalid URL \"{$row->old_text}\"", $row->old_id );
167 continue;
168 }
169 list( $proto, ) = $urlParts;
170 if ( $proto != 'DB' ) {
171 $this->error( 'restore text', "Error: invalid external protocol \"$proto\"", $row->old_id );
172 continue;
173 }
174 $path = explode( '/', $row->old_text );
175 $cluster = $path[2];
176 $id = $path[3];
177 if ( isset( $path[4] ) ) {
178 $externalConcatBlobs[$cluster][$id][] = $row->old_id;
179 } else {
180 $externalNormalBlobs[$cluster][$id][] = $row->old_id;
181 }
182 }
183 $dbr->freeResult( $res );
184 }
185
186 // Check external concat blobs for the right header
187 $this->checkExternalConcatBlobs( $externalConcatBlobs );
188
189 // Check external normal blobs for existence
190 if ( count( $externalNormalBlobs ) ) {
191 if ( is_null( $this->dbStore ) ) {
192 $this->dbStore = new ExternalStoreDB;
193 }
194 foreach ( $externalConcatBlobs as $cluster => $xBlobIds ) {
195 $blobIds = array_keys( $xBlobIds );
196 $extDb =& $this->dbStore->getSlave( $cluster );
197 $blobsTable = $this->dbStore->getTable( $extDb );
198 $res = $extDb->select( $blobsTable,
199 [ 'blob_id' ],
200 [ 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ], __METHOD__ );
201 foreach ( $res as $row ) {
202 unset( $xBlobIds[$row->blob_id] );
203 }
204 $extDb->freeResult( $res );
205 // Print errors for missing blobs rows
206 foreach ( $xBlobIds as $blobId => $oldId ) {
207 $this->error( 'restore text', "Error: missing target $blobId for one-part ES URL", $oldId );
208 }
209 }
210 }
211
212 // Check local objects
213 $dbr->ping();
214 $concatBlobs = [];
215 $curIds = [];
216 if ( count( $objectRevs ) ) {
217 $headerLength = 300;
218 $res = $dbr->select(
219 'text',
220 [ 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ],
221 [ 'old_id IN (' . implode( ',', $objectRevs ) . ')' ],
222 __METHOD__
223 );
224 foreach ( $res as $row ) {
225 $oldId = $row->old_id;
226 $matches = [];
227 if ( !preg_match( '/^O:(\d+):"(\w+)"/', $row->header, $matches ) ) {
228 $this->error( 'restore text', "Error: invalid object header", $oldId );
229 continue;
230 }
231
232 $className = strtolower( $matches[2] );
233 if ( strlen( $className ) != $matches[1] ) {
234 $this->error(
235 'restore text',
236 "Error: invalid object header, wrong class name length",
237 $oldId
238 );
239 continue;
240 }
241
242 $objectStats = $objectStats + [ $className => 0 ];
243 $objectStats[$className]++;
244
245 switch ( $className ) {
246 case 'concatenatedgziphistoryblob':
247 // Good
248 break;
249 case 'historyblobstub':
250 case 'historyblobcurstub':
251 if ( strlen( $row->header ) == $headerLength ) {
252 $this->error( 'unfixable', "Error: overlong stub header", $oldId );
253 continue;
254 }
255 $stubObj = unserialize( $row->header );
256 if ( !is_object( $stubObj ) ) {
257 $this->error( 'restore text', "Error: unable to unserialize stub object", $oldId );
258 continue;
259 }
260 if ( $className == 'historyblobstub' ) {
261 $concatBlobs[$stubObj->mOldId][] = $oldId;
262 } else {
263 $curIds[$stubObj->mCurId][] = $oldId;
264 }
265 break;
266 default:
267 $this->error( 'unfixable', "Error: unrecognised object class \"$className\"", $oldId );
268 }
269 }
270 $dbr->freeResult( $res );
271 }
272
273 // Check local concat blob validity
274 $externalConcatBlobs = [];
275 if ( count( $concatBlobs ) ) {
276 $headerLength = 300;
277 $res = $dbr->select(
278 'text',
279 [ 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ],
280 [ 'old_id IN (' . implode( ',', array_keys( $concatBlobs ) ) . ')' ],
281 __METHOD__
282 );
283 foreach ( $res as $row ) {
284 $flags = explode( ',', $row->old_flags );
285 if ( in_array( 'external', $flags ) ) {
286 // Concat blob is in external storage?
287 if ( in_array( 'object', $flags ) ) {
288 $urlParts = explode( '/', $row->header );
289 if ( $urlParts[0] != 'DB:' ) {
290 $this->error(
291 'unfixable',
292 "Error: unrecognised external storage type \"{$urlParts[0]}",
293 $row->old_id
294 );
295 } else {
296 $cluster = $urlParts[2];
297 $id = $urlParts[3];
298 if ( !isset( $externalConcatBlobs[$cluster][$id] ) ) {
299 $externalConcatBlobs[$cluster][$id] = [];
300 }
301 $externalConcatBlobs[$cluster][$id] = array_merge(
302 $externalConcatBlobs[$cluster][$id], $concatBlobs[$row->old_id]
303 );
304 }
305 } else {
306 $this->error(
307 'unfixable',
308 "Error: invalid flags \"{$row->old_flags}\" on concat bulk row {$row->old_id}",
309 $concatBlobs[$row->old_id] );
310 }
311 } elseif ( strcasecmp(
312 substr( $row->header, 0, strlen( self::CONCAT_HEADER ) ),
313 self::CONCAT_HEADER
314 ) ) {
315 $this->error(
316 'restore text',
317 "Error: Incorrect object header for concat bulk row {$row->old_id}",
318 $concatBlobs[$row->old_id]
319 );
320 } # else good
321
322 unset( $concatBlobs[$row->old_id] );
323 }
324 $dbr->freeResult( $res );
325 }
326
327 // Check targets of unresolved stubs
328 $this->checkExternalConcatBlobs( $externalConcatBlobs );
329 // next chunk
330 }
331
332 print "\n\nErrors:\n";
333 foreach ( $this->errors as $name => $errors ) {
334 if ( count( $errors ) ) {
335 $description = $this->errorDescriptions[$name];
336 echo "$description: " . implode( ',', array_keys( $errors ) ) . "\n";
337 }
338 }
339
340 if ( count( $this->errors['restore text'] ) && $fix ) {
341 if ( (string)$xml !== '' ) {
342 $this->restoreText( array_keys( $this->errors['restore text'] ), $xml );
343 } else {
344 echo "Can't fix text, no XML backup specified\n";
345 }
346 }
347
348 print "\nFlag statistics:\n";
349 $total = array_sum( $flagStats );
350 foreach ( $flagStats as $flag => $count ) {
351 printf( "%-30s %10d %5.2f%%\n", $flag, $count, $count / $total * 100 );
352 }
353 print "\nLocal object statistics:\n";
354 $total = array_sum( $objectStats );
355 foreach ( $objectStats as $className => $count ) {
356 printf( "%-30s %10d %5.2f%%\n", $className, $count, $count / $total * 100 );
357 }
358 }
359
360 function error( $type, $msg, $ids ) {
361 if ( is_array( $ids ) && count( $ids ) == 1 ) {
362 $ids = reset( $ids );
363 }
364 if ( is_array( $ids ) ) {
365 $revIds = [];
366 foreach ( $ids as $id ) {
367 $revIds = array_merge( $revIds, array_keys( $this->oldIdMap, $id ) );
368 }
369 print "$msg in text rows " . implode( ', ', $ids ) .
370 ", revisions " . implode( ', ', $revIds ) . "\n";
371 } else {
372 $id = $ids;
373 $revIds = array_keys( $this->oldIdMap, $id );
374 if ( count( $revIds ) == 1 ) {
375 print "$msg in old_id $id, rev_id {$revIds[0]}\n";
376 } else {
377 print "$msg in old_id $id, revisions " . implode( ', ', $revIds ) . "\n";
378 }
379 }
380 $this->errors[$type] = $this->errors[$type] + array_flip( $revIds );
381 }
382
383 function checkExternalConcatBlobs( $externalConcatBlobs ) {
384 if ( !count( $externalConcatBlobs ) ) {
385 return;
386 }
387
388 if ( is_null( $this->dbStore ) ) {
389 $this->dbStore = new ExternalStoreDB;
390 }
391
392 foreach ( $externalConcatBlobs as $cluster => $oldIds ) {
393 $blobIds = array_keys( $oldIds );
394 $extDb =& $this->dbStore->getSlave( $cluster );
395 $blobsTable = $this->dbStore->getTable( $extDb );
396 $headerLength = strlen( self::CONCAT_HEADER );
397 $res = $extDb->select( $blobsTable,
398 [ 'blob_id', "LEFT(blob_text, $headerLength) AS header" ],
399 [ 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ], __METHOD__ );
400 foreach ( $res as $row ) {
401 if ( strcasecmp( $row->header, self::CONCAT_HEADER ) ) {
402 $this->error(
403 'restore text',
404 "Error: invalid header on target $cluster/{$row->blob_id} of two-part ES URL",
405 $oldIds[$row->blob_id]
406 );
407 }
408 unset( $oldIds[$row->blob_id] );
409 }
410 $extDb->freeResult( $res );
411
412 // Print errors for missing blobs rows
413 foreach ( $oldIds as $blobId => $oldIds2 ) {
414 $this->error(
415 'restore text',
416 "Error: missing target $cluster/$blobId for two-part ES URL",
417 $oldIds2
418 );
419 }
420 }
421 }
422
423 function restoreText( $revIds, $xml ) {
425 $tmpDir = wfTempDir();
426
427 if ( !count( $revIds ) ) {
428 return;
429 }
430
431 print "Restoring text from XML backup...\n";
432
433 $revFileName = "$tmpDir/broken-revlist-$wgDBname";
434 $filteredXmlFileName = "$tmpDir/filtered-$wgDBname.xml";
435
436 // Write revision list
437 if ( !file_put_contents( $revFileName, implode( "\n", $revIds ) ) ) {
438 echo "Error writing revision list, can't restore text\n";
439
440 return;
441 }
442
443 // Run mwdumper
444 echo "Filtering XML dump...\n";
445 $exitStatus = 0;
446 passthru( 'mwdumper ' .
448 "--output=file:$filteredXmlFileName",
449 "--filter=revlist:$revFileName",
450 $xml
451 ), $exitStatus
452 );
453
454 if ( $exitStatus ) {
455 echo "mwdumper died with exit status $exitStatus\n";
456
457 return;
458 }
459
460 $file = fopen( $filteredXmlFileName, 'r' );
461 if ( !$file ) {
462 echo "Unable to open filtered XML file\n";
463
464 return;
465 }
466
468 $dbw = wfGetDB( DB_MASTER );
469 $dbr->ping();
470 $dbw->ping();
471
472 $source = new ImportStreamSource( $file );
473 $importer = new WikiImporter(
474 $source,
475 MediaWikiServices::getInstance()->getMainConfig()
476 );
477 $importer->setRevisionCallback( [ $this, 'importRevision' ] );
478 $importer->doImport();
479 }
480
481 function importRevision( &$revision, &$importer ) {
482 $id = $revision->getID();
483 $content = $revision->getContent( Revision::RAW );
484 $id = $id ? $id : '';
485
486 if ( $content === null ) {
487 echo "Revision $id is broken, we have no content available\n";
488
489 return;
490 }
491
492 $text = $content->serialize();
493 if ( $text === '' ) {
494 // This is what happens if the revision was broken at the time the
495 // dump was made. Unfortunately, it also happens if the revision was
496 // legitimately blank, so there's no way to tell the difference. To
497 // be safe, we'll skip it and leave it broken
498
499 echo "Revision $id is blank in the dump, may have been broken before export\n";
500
501 return;
502 }
503
504 if ( !$id ) {
505 // No ID, can't import
506 echo "No id tag in revision, can't import\n";
507
508 return;
509 }
510
511 // Find text row again
513 $oldId = $dbr->selectField( 'revision', 'rev_text_id', [ 'rev_id' => $id ], __METHOD__ );
514 if ( !$oldId ) {
515 echo "Missing revision row for rev_id $id\n";
516
517 return;
518 }
519
520 // Compress the text
522
523 // Update the text row
524 $dbw = wfGetDB( DB_MASTER );
525 $dbw->update( 'text',
526 [ 'old_flags' => $flags, 'old_text' => $text ],
527 [ 'old_id' => $oldId ],
528 __METHOD__, [ 'LIMIT' => 1 ]
529 );
530
531 // Remove it from the unfixed list and add it to the fixed list
532 unset( $this->errors['restore text'][$id] );
533 $this->errors['fixed'][$id] = true;
534 }
535}
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
unserialize( $serialized)
wfTempDir()
Tries to get the system directory for temporary files.
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
wfEscapeShellArg()
Version of escapeshellarg() that works better on Windows.
if( $line===false) $args
Definition cdb.php:63
Maintenance script to do various checks on external storage.
check( $fix=false, $xml='')
restoreText( $revIds, $xml)
importRevision(&$revision, &$importer)
const CONCAT_HEADER
error( $type, $msg, $ids)
checkExternalConcatBlobs( $externalConcatBlobs)
DB accessable external objects.
Imports a XML dump from a file (either from file upload, files on disk, or HTTP)
MediaWikiServices is the service locator for the application scope of MediaWiki.
static compressRevisionText(&$text)
If $wgCompressRevisions is enabled, we will compress data.
const RAW
Definition Revision.php:100
XML file reader for the page data importer.
global $optionsWithoutArgs
$res
Definition database.txt:21
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
when a variable name is used in a it is silently declared as a new local masking the global
Definition design.txt:95
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
while(( $__line=Maintenance::readconsole()) !==false) print
Definition eval.php:63
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist Do not use this to implement individual filters if they are compatible with the ChangesListFilter and ChangesListFilterGroup structure use sub classes of those in conjunction with the ChangesListSpecialPageStructuredFilters hook This hook can be used to implement filters that do not implement that or custom behavior that is not an individual filter e g Watchlist and Watchlist you will want to construct new ChangesListBooleanFilter or ChangesListStringOptionsFilter objects When constructing you specify which group they belong to You can reuse existing or create your you must register them with $special registerFilterGroup removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context $options
Definition hooks.txt:1102
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist Do not use this to implement individual filters if they are compatible with the ChangesListFilter and ChangesListFilterGroup structure use sub classes of those in conjunction with the ChangesListSpecialPageStructuredFilters hook This hook can be used to implement filters that do not implement that or custom behavior that is not an individual filter e g Watchlist and Watchlist you will want to construct new ChangesListBooleanFilter or ChangesListStringOptionsFilter objects When constructing you specify which group they belong to You can reuse existing or create your you must register them with $special registerFilterGroup removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content $content
Definition hooks.txt:1100
if the prop value should be in the metadata multi language array can modify can modify indexed by page_id indexed by prefixed DB keys can modify can modify can modify this should be populated with an alert message to that effect to be fed to an HTMLForm object and populate $result with the reason in the form of error messages should be plain text with no special etc to show that they re errors
Definition hooks.txt:1722
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition hooks.txt:2753
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition hooks.txt:2604
Allows to change the fields on the form that will be generated $name
Definition hooks.txt:304
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist Do not use this to implement individual filters if they are compatible with the ChangesListFilter and ChangesListFilterGroup structure use sub classes of those in conjunction with the ChangesListSpecialPageStructuredFilters hook This hook can be used to implement filters that do not implement that or custom behavior that is not an individual filter e g Watchlist and Watchlist you will want to construct new ChangesListBooleanFilter or ChangesListStringOptionsFilter objects When constructing you specify which group they belong to You can reuse existing or create your you must register them with $special registerFilterGroup removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context $revId
Definition hooks.txt:1101
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
controlled by $wgMainCacheType controlled by $wgParserCacheType controlled by $wgMessageCacheType If you set CACHE_NONE to one of the three control default value for MediaWiki still create a but requests to it are no ops and we always fall through to the database If the cache daemon can t be it should also disable itself fairly smoothly By $wgMemc is used but when it is $parserMemc or $messageMemc this is mentioned $wgDBname
$source
const DB_REPLICA
Definition defines.php:25
const DB_MASTER
Definition defines.php:26