MediaWiki REL1_28
checkStorage.php
Go to the documentation of this file.
1<?php
24if ( !defined( 'MEDIAWIKI' ) ) {
25 $optionsWithoutArgs = [ 'fix' ];
26 require_once __DIR__ . '/../commandLine.inc';
27
28 $cs = new CheckStorage;
29 $fix = isset( $options['fix'] );
30 if ( isset( $args[0] ) ) {
31 $xml = $args[0];
32 } else {
33 $xml = false;
34 }
35 $cs->check( $fix, $xml );
36}
37
38// ----------------------------------------------------------------------------------
39
47 const CONCAT_HEADER = 'O:27:"concatenatedgziphistoryblob"';
49 public $dbStore = null;
50
52 'restore text' => 'Damaged text, need to be restored from a backup',
53 'restore revision' => 'Damaged revision row, need to be restored from a backup',
54 'unfixable' => 'Unexpected errors with no automated fixing method',
55 'fixed' => 'Errors already fixed',
56 'fixable' => 'Errors which would already be fixed if --fix was specified',
57 ];
58
59 function check( $fix = false, $xml = '' ) {
61 if ( $fix ) {
62 print "Checking, will fix errors if possible...\n";
63 } else {
64 print "Checking...\n";
65 }
66 $maxRevId = $dbr->selectField( 'revision', 'MAX(rev_id)', false, __METHOD__ );
67 $chunkSize = 1000;
68 $flagStats = [];
69 $objectStats = [];
70 $knownFlags = [ 'external', 'gzip', 'object', 'utf-8' ];
71 $this->errors = [
72 'restore text' => [],
73 'restore revision' => [],
74 'unfixable' => [],
75 'fixed' => [],
76 'fixable' => [],
77 ];
78
79 for ( $chunkStart = 1; $chunkStart < $maxRevId; $chunkStart += $chunkSize ) {
80 $chunkEnd = $chunkStart + $chunkSize - 1;
81 // print "$chunkStart of $maxRevId\n";
82
83 // Fetch revision rows
84 $this->oldIdMap = [];
85 $dbr->ping();
86 $res = $dbr->select( 'revision', [ 'rev_id', 'rev_text_id' ],
87 [ "rev_id BETWEEN $chunkStart AND $chunkEnd" ], __METHOD__ );
88 foreach ( $res as $row ) {
89 $this->oldIdMap[$row->rev_id] = $row->rev_text_id;
90 }
91 $dbr->freeResult( $res );
92
93 if ( !count( $this->oldIdMap ) ) {
94 continue;
95 }
96
97 // Fetch old_flags
98 $missingTextRows = array_flip( $this->oldIdMap );
99 $externalRevs = [];
100 $objectRevs = [];
101 $res = $dbr->select( 'text', [ 'old_id', 'old_flags' ],
102 'old_id IN (' . implode( ',', $this->oldIdMap ) . ')', __METHOD__ );
103 foreach ( $res as $row ) {
107 $flags = $row->old_flags;
108 $id = $row->old_id;
109
110 // Create flagStats row if it doesn't exist
111 $flagStats = $flagStats + [ $flags => 0 ];
112 // Increment counter
113 $flagStats[$flags]++;
114
115 // Not missing
116 unset( $missingTextRows[$row->old_id] );
117
118 // Check for external or object
119 if ( $flags == '' ) {
120 $flagArray = [];
121 } else {
122 $flagArray = explode( ',', $flags );
123 }
124 if ( in_array( 'external', $flagArray ) ) {
125 $externalRevs[] = $id;
126 } elseif ( in_array( 'object', $flagArray ) ) {
127 $objectRevs[] = $id;
128 }
129
130 // Check for unrecognised flags
131 if ( $flags == '0' ) {
132 // This is a known bug from 2004
133 // It's safe to just erase the old_flags field
134 if ( $fix ) {
135 $this->error( 'fixed', "Warning: old_flags set to 0", $id );
136 $dbw = wfGetDB( DB_MASTER );
137 $dbw->ping();
138 $dbw->update( 'text', [ 'old_flags' => '' ],
139 [ 'old_id' => $id ], __METHOD__ );
140 echo "Fixed\n";
141 } else {
142 $this->error( 'fixable', "Warning: old_flags set to 0", $id );
143 }
144 } elseif ( count( array_diff( $flagArray, $knownFlags ) ) ) {
145 $this->error( 'unfixable', "Error: invalid flags field \"$flags\"", $id );
146 }
147 }
148 $dbr->freeResult( $res );
149
150 // Output errors for any missing text rows
151 foreach ( $missingTextRows as $oldId => $revId ) {
152 $this->error( 'restore revision', "Error: missing text row", $oldId );
153 }
154
155 // Verify external revisions
156 $externalConcatBlobs = [];
157 $externalNormalBlobs = [];
158 if ( count( $externalRevs ) ) {
159 $res = $dbr->select( 'text', [ 'old_id', 'old_flags', 'old_text' ],
160 [ 'old_id IN (' . implode( ',', $externalRevs ) . ')' ], __METHOD__ );
161 foreach ( $res as $row ) {
162 $urlParts = explode( '://', $row->old_text, 2 );
163 if ( count( $urlParts ) !== 2 || $urlParts[1] == '' ) {
164 $this->error( 'restore text', "Error: invalid URL \"{$row->old_text}\"", $row->old_id );
165 continue;
166 }
167 list( $proto, ) = $urlParts;
168 if ( $proto != 'DB' ) {
169 $this->error( 'restore text', "Error: invalid external protocol \"$proto\"", $row->old_id );
170 continue;
171 }
172 $path = explode( '/', $row->old_text );
173 $cluster = $path[2];
174 $id = $path[3];
175 if ( isset( $path[4] ) ) {
176 $externalConcatBlobs[$cluster][$id][] = $row->old_id;
177 } else {
178 $externalNormalBlobs[$cluster][$id][] = $row->old_id;
179 }
180 }
181 $dbr->freeResult( $res );
182 }
183
184 // Check external concat blobs for the right header
185 $this->checkExternalConcatBlobs( $externalConcatBlobs );
186
187 // Check external normal blobs for existence
188 if ( count( $externalNormalBlobs ) ) {
189 if ( is_null( $this->dbStore ) ) {
190 $this->dbStore = new ExternalStoreDB;
191 }
192 foreach ( $externalConcatBlobs as $cluster => $xBlobIds ) {
193 $blobIds = array_keys( $xBlobIds );
194 $extDb =& $this->dbStore->getSlave( $cluster );
195 $blobsTable = $this->dbStore->getTable( $extDb );
196 $res = $extDb->select( $blobsTable,
197 [ 'blob_id' ],
198 [ 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ], __METHOD__ );
199 foreach ( $res as $row ) {
200 unset( $xBlobIds[$row->blob_id] );
201 }
202 $extDb->freeResult( $res );
203 // Print errors for missing blobs rows
204 foreach ( $xBlobIds as $blobId => $oldId ) {
205 $this->error( 'restore text', "Error: missing target $blobId for one-part ES URL", $oldId );
206 }
207 }
208 }
209
210 // Check local objects
211 $dbr->ping();
212 $concatBlobs = [];
213 $curIds = [];
214 if ( count( $objectRevs ) ) {
215 $headerLength = 300;
216 $res = $dbr->select(
217 'text',
218 [ 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ],
219 [ 'old_id IN (' . implode( ',', $objectRevs ) . ')' ],
220 __METHOD__
221 );
222 foreach ( $res as $row ) {
223 $oldId = $row->old_id;
224 $matches = [];
225 if ( !preg_match( '/^O:(\d+):"(\w+)"/', $row->header, $matches ) ) {
226 $this->error( 'restore text', "Error: invalid object header", $oldId );
227 continue;
228 }
229
230 $className = strtolower( $matches[2] );
231 if ( strlen( $className ) != $matches[1] ) {
232 $this->error(
233 'restore text',
234 "Error: invalid object header, wrong class name length",
235 $oldId
236 );
237 continue;
238 }
239
240 $objectStats = $objectStats + [ $className => 0 ];
241 $objectStats[$className]++;
242
243 switch ( $className ) {
244 case 'concatenatedgziphistoryblob':
245 // Good
246 break;
247 case 'historyblobstub':
248 case 'historyblobcurstub':
249 if ( strlen( $row->header ) == $headerLength ) {
250 $this->error( 'unfixable', "Error: overlong stub header", $oldId );
251 continue;
252 }
253 $stubObj = unserialize( $row->header );
254 if ( !is_object( $stubObj ) ) {
255 $this->error( 'restore text', "Error: unable to unserialize stub object", $oldId );
256 continue;
257 }
258 if ( $className == 'historyblobstub' ) {
259 $concatBlobs[$stubObj->mOldId][] = $oldId;
260 } else {
261 $curIds[$stubObj->mCurId][] = $oldId;
262 }
263 break;
264 default:
265 $this->error( 'unfixable', "Error: unrecognised object class \"$className\"", $oldId );
266 }
267 }
268 $dbr->freeResult( $res );
269 }
270
271 // Check local concat blob validity
272 $externalConcatBlobs = [];
273 if ( count( $concatBlobs ) ) {
274 $headerLength = 300;
275 $res = $dbr->select(
276 'text',
277 [ 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ],
278 [ 'old_id IN (' . implode( ',', array_keys( $concatBlobs ) ) . ')' ],
279 __METHOD__
280 );
281 foreach ( $res as $row ) {
282 $flags = explode( ',', $row->old_flags );
283 if ( in_array( 'external', $flags ) ) {
284 // Concat blob is in external storage?
285 if ( in_array( 'object', $flags ) ) {
286 $urlParts = explode( '/', $row->header );
287 if ( $urlParts[0] != 'DB:' ) {
288 $this->error(
289 'unfixable',
290 "Error: unrecognised external storage type \"{$urlParts[0]}",
291 $row->old_id
292 );
293 } else {
294 $cluster = $urlParts[2];
295 $id = $urlParts[3];
296 if ( !isset( $externalConcatBlobs[$cluster][$id] ) ) {
297 $externalConcatBlobs[$cluster][$id] = [];
298 }
299 $externalConcatBlobs[$cluster][$id] = array_merge(
300 $externalConcatBlobs[$cluster][$id], $concatBlobs[$row->old_id]
301 );
302 }
303 } else {
304 $this->error(
305 'unfixable',
306 "Error: invalid flags \"{$row->old_flags}\" on concat bulk row {$row->old_id}",
307 $concatBlobs[$row->old_id] );
308 }
309 } elseif ( strcasecmp(
310 substr( $row->header, 0, strlen( self::CONCAT_HEADER ) ),
311 self::CONCAT_HEADER
312 ) ) {
313 $this->error(
314 'restore text',
315 "Error: Incorrect object header for concat bulk row {$row->old_id}",
316 $concatBlobs[$row->old_id]
317 );
318 } # else good
319
320 unset( $concatBlobs[$row->old_id] );
321 }
322 $dbr->freeResult( $res );
323 }
324
325 // Check targets of unresolved stubs
326 $this->checkExternalConcatBlobs( $externalConcatBlobs );
327 // next chunk
328 }
329
330 print "\n\nErrors:\n";
331 foreach ( $this->errors as $name => $errors ) {
332 if ( count( $errors ) ) {
333 $description = $this->errorDescriptions[$name];
334 echo "$description: " . implode( ',', array_keys( $errors ) ) . "\n";
335 }
336 }
337
338 if ( count( $this->errors['restore text'] ) && $fix ) {
339 if ( (string)$xml !== '' ) {
340 $this->restoreText( array_keys( $this->errors['restore text'] ), $xml );
341 } else {
342 echo "Can't fix text, no XML backup specified\n";
343 }
344 }
345
346 print "\nFlag statistics:\n";
347 $total = array_sum( $flagStats );
348 foreach ( $flagStats as $flag => $count ) {
349 printf( "%-30s %10d %5.2f%%\n", $flag, $count, $count / $total * 100 );
350 }
351 print "\nLocal object statistics:\n";
352 $total = array_sum( $objectStats );
353 foreach ( $objectStats as $className => $count ) {
354 printf( "%-30s %10d %5.2f%%\n", $className, $count, $count / $total * 100 );
355 }
356 }
357
358 function error( $type, $msg, $ids ) {
359 if ( is_array( $ids ) && count( $ids ) == 1 ) {
360 $ids = reset( $ids );
361 }
362 if ( is_array( $ids ) ) {
363 $revIds = [];
364 foreach ( $ids as $id ) {
365 $revIds = array_merge( $revIds, array_keys( $this->oldIdMap, $id ) );
366 }
367 print "$msg in text rows " . implode( ', ', $ids ) .
368 ", revisions " . implode( ', ', $revIds ) . "\n";
369 } else {
370 $id = $ids;
371 $revIds = array_keys( $this->oldIdMap, $id );
372 if ( count( $revIds ) == 1 ) {
373 print "$msg in old_id $id, rev_id {$revIds[0]}\n";
374 } else {
375 print "$msg in old_id $id, revisions " . implode( ', ', $revIds ) . "\n";
376 }
377 }
378 $this->errors[$type] = $this->errors[$type] + array_flip( $revIds );
379 }
380
381 function checkExternalConcatBlobs( $externalConcatBlobs ) {
382 if ( !count( $externalConcatBlobs ) ) {
383 return;
384 }
385
386 if ( is_null( $this->dbStore ) ) {
387 $this->dbStore = new ExternalStoreDB;
388 }
389
390 foreach ( $externalConcatBlobs as $cluster => $oldIds ) {
391 $blobIds = array_keys( $oldIds );
392 $extDb =& $this->dbStore->getSlave( $cluster );
393 $blobsTable = $this->dbStore->getTable( $extDb );
394 $headerLength = strlen( self::CONCAT_HEADER );
395 $res = $extDb->select( $blobsTable,
396 [ 'blob_id', "LEFT(blob_text, $headerLength) AS header" ],
397 [ 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ], __METHOD__ );
398 foreach ( $res as $row ) {
399 if ( strcasecmp( $row->header, self::CONCAT_HEADER ) ) {
400 $this->error(
401 'restore text',
402 "Error: invalid header on target $cluster/{$row->blob_id} of two-part ES URL",
403 $oldIds[$row->blob_id]
404 );
405 }
406 unset( $oldIds[$row->blob_id] );
407 }
408 $extDb->freeResult( $res );
409
410 // Print errors for missing blobs rows
411 foreach ( $oldIds as $blobId => $oldIds2 ) {
412 $this->error(
413 'restore text',
414 "Error: missing target $cluster/$blobId for two-part ES URL",
415 $oldIds2
416 );
417 }
418 }
419 }
420
421 function restoreText( $revIds, $xml ) {
423 $tmpDir = wfTempDir();
424
425 if ( !count( $revIds ) ) {
426 return;
427 }
428
429 print "Restoring text from XML backup...\n";
430
431 $revFileName = "$tmpDir/broken-revlist-$wgDBname";
432 $filteredXmlFileName = "$tmpDir/filtered-$wgDBname.xml";
433
434 // Write revision list
435 if ( !file_put_contents( $revFileName, implode( "\n", $revIds ) ) ) {
436 echo "Error writing revision list, can't restore text\n";
437
438 return;
439 }
440
441 // Run mwdumper
442 echo "Filtering XML dump...\n";
443 $exitStatus = 0;
444 passthru( 'mwdumper ' .
446 "--output=file:$filteredXmlFileName",
447 "--filter=revlist:$revFileName",
448 $xml
449 ), $exitStatus
450 );
451
452 if ( $exitStatus ) {
453 echo "mwdumper died with exit status $exitStatus\n";
454
455 return;
456 }
457
458 $file = fopen( $filteredXmlFileName, 'r' );
459 if ( !$file ) {
460 echo "Unable to open filtered XML file\n";
461
462 return;
463 }
464
466 $dbw = wfGetDB( DB_MASTER );
467 $dbr->ping();
468 $dbw->ping();
469
470 $source = new ImportStreamSource( $file );
471 $importer = new WikiImporter(
472 $source,
473 ConfigFactory::getDefaultInstance()->makeConfig( 'main' )
474 );
475 $importer->setRevisionCallback( [ $this, 'importRevision' ] );
476 $importer->doImport();
477 }
478
479 function importRevision( &$revision, &$importer ) {
480 $id = $revision->getID();
481 $content = $revision->getContent( Revision::RAW );
482 $id = $id ? $id : '';
483
484 if ( $content === null ) {
485 echo "Revision $id is broken, we have no content available\n";
486
487 return;
488 }
489
490 $text = $content->serialize();
491 if ( $text === '' ) {
492 // This is what happens if the revision was broken at the time the
493 // dump was made. Unfortunately, it also happens if the revision was
494 // legitimately blank, so there's no way to tell the difference. To
495 // be safe, we'll skip it and leave it broken
496
497 echo "Revision $id is blank in the dump, may have been broken before export\n";
498
499 return;
500 }
501
502 if ( !$id ) {
503 // No ID, can't import
504 echo "No id tag in revision, can't import\n";
505
506 return;
507 }
508
509 // Find text row again
511 $oldId = $dbr->selectField( 'revision', 'rev_text_id', [ 'rev_id' => $id ], __METHOD__ );
512 if ( !$oldId ) {
513 echo "Missing revision row for rev_id $id\n";
514
515 return;
516 }
517
518 // Compress the text
520
521 // Update the text row
522 $dbw = wfGetDB( DB_MASTER );
523 $dbw->update( 'text',
524 [ 'old_flags' => $flags, 'old_text' => $text ],
525 [ 'old_id' => $oldId ],
526 __METHOD__, [ 'LIMIT' => 1 ]
527 );
528
529 // Remove it from the unfixed list and add it to the fixed list
530 unset( $this->errors['restore text'][$id] );
531 $this->errors['fixed'][$id] = true;
532 }
533}
unserialize( $serialized)
wfTempDir()
Tries to get the system directory for temporary files.
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
wfEscapeShellArg()
Version of escapeshellarg() that works better on Windows.
if( $line===false) $args
Definition cdb.php:64
Maintenance script to do various checks on external storage.
check( $fix=false, $xml='')
restoreText( $revIds, $xml)
importRevision(&$revision, &$importer)
const CONCAT_HEADER
error( $type, $msg, $ids)
checkExternalConcatBlobs( $externalConcatBlobs)
DB accessable external objects.
Imports a XML dump from a file (either from file upload, files on disk, or HTTP)
static compressRevisionText(&$text)
If $wgCompressRevisions is enabled, we will compress data.
const RAW
Definition Revision.php:94
XML file reader for the page data importer.
global $optionsWithoutArgs
$res
Definition database.txt:21
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
when a variable name is used in a it is silently declared as a new local masking the global
Definition design.txt:95
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
while(( $__line=Maintenance::readconsole()) !==false) print
Definition eval.php:64
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context $revId
Definition hooks.txt:1095
namespace are movable Hooks may change this value to override the return value of MWNamespace::isMovable(). 'NewDifferenceEngine' do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached one of or reset my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition hooks.txt:2568
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context $options
Definition hooks.txt:1096
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content $content
Definition hooks.txt:1094
if the prop value should be in the metadata multi language array can modify can modify indexed by page_id indexed by prefixed DB keys can modify can modify can modify this should be populated with an alert message to that effect to be fed to an HTMLForm object and populate $result with the reason in the form of error messages should be plain text with no special etc to show that they re errors
Definition hooks.txt:1705
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition hooks.txt:2710
Allows to change the fields on the form that will be generated $name
Definition hooks.txt:304
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
controlled by $wgMainCacheType controlled by $wgParserCacheType controlled by $wgMessageCacheType If you set CACHE_NONE to one of the three control default value for MediaWiki still create a but requests to it are no ops and we always fall through to the database If the cache daemon can t be it should also disable itself fairly smoothly By $wgMemc is used but when it is $parserMemc or $messageMemc this is mentioned $wgDBname
$source
const DB_REPLICA
Definition defines.php:22
const DB_MASTER
Definition defines.php:23