MediaWiki  master
checkStorage.php
Go to the documentation of this file.
1 <?php
26 
27 if ( !defined( 'MEDIAWIKI' ) ) {
28  $optionsWithoutArgs = [ 'fix' ];
29  require_once __DIR__ . '/../commandLine.inc';
30 
31  $cs = new CheckStorage;
32  $fix = isset( $options['fix'] );
33  $xml = $args[0] ?? false;
34  $cs->check( $fix, $xml );
35 }
36 
37 // ----------------------------------------------------------------------------------
38 
45 class CheckStorage {
46  const CONCAT_HEADER = 'O:27:"concatenatedgziphistoryblob"';
47  public $oldIdMap, $errors;
48  public $dbStore = null;
49 
50  public $errorDescriptions = [
51  'restore text' => 'Damaged text, need to be restored from a backup',
52  'restore revision' => 'Damaged revision row, need to be restored from a backup',
53  'unfixable' => 'Unexpected errors with no automated fixing method',
54  'fixed' => 'Errors already fixed',
55  'fixable' => 'Errors which would already be fixed if --fix was specified',
56  ];
57 
58  function check( $fix = false, $xml = '' ) {
60 
61  $dbr = wfGetDB( DB_REPLICA );
62  if ( $fix ) {
63  print "Checking, will fix errors if possible...\n";
64  } else {
65  print "Checking...\n";
66  }
67  $maxRevId = $dbr->selectField( 'revision', 'MAX(rev_id)', '', __METHOD__ );
68  $chunkSize = 1000;
69  $flagStats = [];
70  $objectStats = [];
71  $knownFlags = [ 'external', 'gzip', 'object', 'utf-8' ];
72  $this->errors = [
73  'restore text' => [],
74  'restore revision' => [],
75  'unfixable' => [],
76  'fixed' => [],
77  'fixable' => [],
78  ];
79 
80  for ( $chunkStart = 1; $chunkStart < $maxRevId; $chunkStart += $chunkSize ) {
81  $chunkEnd = $chunkStart + $chunkSize - 1;
82  // print "$chunkStart of $maxRevId\n";
83 
84  $this->oldIdMap = [];
85  $dbr->ping();
86 
87  // Fetch revision rows
88  if ( $wgMultiContentRevisionSchemaMigrationStage & SCHEMA_COMPAT_READ_OLD ) {
89  $res = $dbr->select( 'revision', [ 'rev_id', 'rev_text_id' ],
90  [ "rev_id BETWEEN $chunkStart AND $chunkEnd" ], __METHOD__ );
91  foreach ( $res as $row ) {
92  if ( !isset( $this->oldIdMap[ $row->rev_text_id ] ) ) {
93  $this->oldIdMap[ $row->rev_text_id ] = [ $row->rev_id ];
94  } elseif ( !in_array( $row->rev_id, $this->oldIdMap[ $row->rev_text_id ] ) ) {
95  $this->oldIdMap[ $row->rev_text_id ][] = $row->rev_id;
96  }
97  }
98  } else {
99  $res = $dbr->select(
100  [ 'slots', 'content' ],
101  [ 'slot_revision_id', 'content_address' ],
102  [ "slot_revision_id BETWEEN $chunkStart AND $chunkEnd" ],
103  __METHOD__,
104  [],
105  [ 'content' => [ 'INNER JOIN', [ 'content_id = slot_content_id' ] ] ]
106  );
107  $blobStore = MediaWikiServices::getInstance()->getBlobStore();
108  foreach ( $res as $row ) {
109  $textId = $blobStore->getTextIdFromAddress( $row->content_address );
110  if ( $textId ) {
111  if ( !isset( $this->oldIdMap[$textId] ) ) {
112  $this->oldIdMap[ $textId ] = [ $row->slot_revision_id ];
113  } elseif ( !in_array( $row->slot_revision_id, $this->oldIdMap[$textId] ) ) {
114  $this->oldIdMap[ $textId ][] = $row->slot_revision_id;
115  }
116  }
117  }
118  }
119 
120  if ( !count( $this->oldIdMap ) ) {
121  continue;
122  }
123 
124  // Fetch old_flags
125  $missingTextRows = $this->oldIdMap;
126  $externalRevs = [];
127  $objectRevs = [];
128  $res = $dbr->select(
129  'text',
130  [ 'old_id', 'old_flags' ],
131  [ 'old_id' => array_keys( $this->oldIdMap ) ],
132  __METHOD__
133  );
134  foreach ( $res as $row ) {
138  $flags = $row->old_flags;
139  $id = $row->old_id;
140 
141  // Create flagStats row if it doesn't exist
142  $flagStats = $flagStats + [ $flags => 0 ];
143  // Increment counter
144  $flagStats[$flags]++;
145 
146  // Not missing
147  unset( $missingTextRows[$row->old_id] );
148 
149  // Check for external or object
150  if ( $flags == '' ) {
151  $flagArray = [];
152  } else {
153  $flagArray = explode( ',', $flags );
154  }
155  if ( in_array( 'external', $flagArray ) ) {
156  $externalRevs[] = $id;
157  } elseif ( in_array( 'object', $flagArray ) ) {
158  $objectRevs[] = $id;
159  }
160 
161  // Check for unrecognised flags
162  if ( $flags == '0' ) {
163  // This is a known bug from 2004
164  // It's safe to just erase the old_flags field
165  if ( $fix ) {
166  $this->addError( 'fixed', "Warning: old_flags set to 0", $id );
167  $dbw = wfGetDB( DB_MASTER );
168  $dbw->ping();
169  $dbw->update( 'text', [ 'old_flags' => '' ],
170  [ 'old_id' => $id ], __METHOD__ );
171  echo "Fixed\n";
172  } else {
173  $this->addError( 'fixable', "Warning: old_flags set to 0", $id );
174  }
175  } elseif ( count( array_diff( $flagArray, $knownFlags ) ) ) {
176  $this->addError( 'unfixable', "Error: invalid flags field \"$flags\"", $id );
177  }
178  }
179 
180  // Output errors for any missing text rows
181  foreach ( $missingTextRows as $oldId => $revIds ) {
182  $this->addError( 'restore revision', "Error: missing text row", $oldId );
183  }
184 
185  // Verify external revisions
186  $externalConcatBlobs = [];
187  $externalNormalBlobs = [];
188  if ( count( $externalRevs ) ) {
189  $res = $dbr->select(
190  'text',
191  [ 'old_id', 'old_flags', 'old_text' ],
192  [ 'old_id' => $externalRevs ],
193  __METHOD__
194  );
195  foreach ( $res as $row ) {
196  $urlParts = explode( '://', $row->old_text, 2 );
197  if ( count( $urlParts ) !== 2 || $urlParts[1] == '' ) {
198  $this->addError( 'restore text', "Error: invalid URL \"{$row->old_text}\"", $row->old_id );
199  continue;
200  }
201  list( $proto, ) = $urlParts;
202  if ( $proto != 'DB' ) {
203  $this->addError(
204  'restore text',
205  "Error: invalid external protocol \"$proto\"",
206  $row->old_id );
207  continue;
208  }
209  $path = explode( '/', $row->old_text );
210  $cluster = $path[2];
211  $id = $path[3];
212  if ( isset( $path[4] ) ) {
213  $externalConcatBlobs[$cluster][$id][] = $row->old_id;
214  } else {
215  $externalNormalBlobs[$cluster][$id][] = $row->old_id;
216  }
217  }
218  }
219 
220  // Check external concat blobs for the right header
221  $this->checkExternalConcatBlobs( $externalConcatBlobs );
222 
223  // Check external normal blobs for existence
224  if ( count( $externalNormalBlobs ) ) {
225  if ( is_null( $this->dbStore ) ) {
226  $this->dbStore = new ExternalStoreDB;
227  }
228  foreach ( $externalConcatBlobs as $cluster => $xBlobIds ) {
229  $blobIds = array_keys( $xBlobIds );
230  $extDb =& $this->dbStore->getSlave( $cluster );
231  $blobsTable = $this->dbStore->getTable( $extDb );
232  $res = $extDb->select( $blobsTable,
233  [ 'blob_id' ],
234  [ 'blob_id' => $blobIds ],
235  __METHOD__
236  );
237  foreach ( $res as $row ) {
238  unset( $xBlobIds[$row->blob_id] );
239  }
240  // Print errors for missing blobs rows
241  foreach ( $xBlobIds as $blobId => $oldId ) {
242  $this->addError(
243  'restore text',
244  "Error: missing target $blobId for one-part ES URL",
245  $oldId );
246  }
247  }
248  }
249 
250  // Check local objects
251  $dbr->ping();
252  $concatBlobs = [];
253  $curIds = [];
254  if ( count( $objectRevs ) ) {
255  $headerLength = 300;
256  $res = $dbr->select(
257  'text',
258  [ 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ],
259  [ 'old_id' => $objectRevs ],
260  __METHOD__
261  );
262  foreach ( $res as $row ) {
263  $oldId = $row->old_id;
264  $matches = [];
265  if ( !preg_match( '/^O:(\d+):"(\w+)"/', $row->header, $matches ) ) {
266  $this->addError( 'restore text', "Error: invalid object header", $oldId );
267  continue;
268  }
269 
270  $className = strtolower( $matches[2] );
271  if ( strlen( $className ) != $matches[1] ) {
272  $this->addError(
273  'restore text',
274  "Error: invalid object header, wrong class name length",
275  $oldId
276  );
277  continue;
278  }
279 
280  $objectStats = $objectStats + [ $className => 0 ];
281  $objectStats[$className]++;
282 
283  switch ( $className ) {
284  case 'concatenatedgziphistoryblob':
285  // Good
286  break;
287  case 'historyblobstub':
288  case 'historyblobcurstub':
289  if ( strlen( $row->header ) == $headerLength ) {
290  $this->addError( 'unfixable', "Error: overlong stub header", $oldId );
291  break;
292  }
293  $stubObj = unserialize( $row->header );
294  if ( !is_object( $stubObj ) ) {
295  $this->addError( 'restore text', "Error: unable to unserialize stub object", $oldId );
296  break;
297  }
298  if ( $className == 'historyblobstub' ) {
299  $concatBlobs[$stubObj->mOldId][] = $oldId;
300  } else {
301  $curIds[$stubObj->mCurId][] = $oldId;
302  }
303  break;
304  default:
305  $this->addError( 'unfixable', "Error: unrecognised object class \"$className\"", $oldId );
306  }
307  }
308  }
309 
310  // Check local concat blob validity
311  $externalConcatBlobs = [];
312  if ( count( $concatBlobs ) ) {
313  $headerLength = 300;
314  $res = $dbr->select(
315  'text',
316  [ 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ],
317  [ 'old_id' => array_keys( $concatBlobs ) ],
318  __METHOD__
319  );
320  foreach ( $res as $row ) {
321  $flags = explode( ',', $row->old_flags );
322  if ( in_array( 'external', $flags ) ) {
323  // Concat blob is in external storage?
324  if ( in_array( 'object', $flags ) ) {
325  $urlParts = explode( '/', $row->header );
326  if ( $urlParts[0] != 'DB:' ) {
327  $this->addError(
328  'unfixable',
329  "Error: unrecognised external storage type \"{$urlParts[0]}",
330  $row->old_id
331  );
332  } else {
333  $cluster = $urlParts[2];
334  $id = $urlParts[3];
335  if ( !isset( $externalConcatBlobs[$cluster][$id] ) ) {
336  $externalConcatBlobs[$cluster][$id] = [];
337  }
338  $externalConcatBlobs[$cluster][$id] = array_merge(
339  $externalConcatBlobs[$cluster][$id], $concatBlobs[$row->old_id]
340  );
341  }
342  } else {
343  $this->addError(
344  'unfixable',
345  "Error: invalid flags \"{$row->old_flags}\" on concat bulk row {$row->old_id}",
346  $concatBlobs[$row->old_id] );
347  }
348  } elseif ( strcasecmp(
349  substr( $row->header, 0, strlen( self::CONCAT_HEADER ) ),
350  self::CONCAT_HEADER
351  ) ) {
352  $this->addError(
353  'restore text',
354  "Error: Incorrect object header for concat bulk row {$row->old_id}",
355  $concatBlobs[$row->old_id]
356  );
357  } # else good
358 
359  unset( $concatBlobs[$row->old_id] );
360  }
361  }
362 
363  // Check targets of unresolved stubs
364  $this->checkExternalConcatBlobs( $externalConcatBlobs );
365  // next chunk
366  }
367 
368  print "\n\nErrors:\n";
369  foreach ( $this->errors as $name => $errors ) {
370  if ( count( $errors ) ) {
371  $description = $this->errorDescriptions[$name];
372  echo "$description: " . implode( ',', array_keys( $errors ) ) . "\n";
373  }
374  }
375 
376  if ( count( $this->errors['restore text'] ) && $fix ) {
377  if ( (string)$xml !== '' ) {
378  $this->restoreText( array_keys( $this->errors['restore text'] ), $xml );
379  } else {
380  echo "Can't fix text, no XML backup specified\n";
381  }
382  }
383 
384  print "\nFlag statistics:\n";
385  $total = array_sum( $flagStats );
386  foreach ( $flagStats as $flag => $count ) {
387  printf( "%-30s %10d %5.2f%%\n", $flag, $count, $count / $total * 100 );
388  }
389  print "\nLocal object statistics:\n";
390  $total = array_sum( $objectStats );
391  foreach ( $objectStats as $className => $count ) {
392  printf( "%-30s %10d %5.2f%%\n", $className, $count, $count / $total * 100 );
393  }
394  }
395 
396  function addError( $type, $msg, $ids ) {
397  if ( is_array( $ids ) && count( $ids ) == 1 ) {
398  $ids = reset( $ids );
399  }
400  if ( is_array( $ids ) ) {
401  $revIds = [];
402  foreach ( $ids as $id ) {
403  $revIds = array_unique( array_merge( $revIds, $this->oldIdMap[$id] ) );
404  }
405  print "$msg in text rows " . implode( ', ', $ids ) .
406  ", revisions " . implode( ', ', $revIds ) . "\n";
407  } else {
408  $id = $ids;
409  $revIds = $this->oldIdMap[$id];
410  if ( count( $revIds ) == 1 ) {
411  print "$msg in old_id $id, rev_id {$revIds[0]}\n";
412  } else {
413  print "$msg in old_id $id, revisions " . implode( ', ', $revIds ) . "\n";
414  }
415  }
416  $this->errors[$type] = $this->errors[$type] + array_flip( $revIds );
417  }
418 
419  function checkExternalConcatBlobs( $externalConcatBlobs ) {
420  if ( !count( $externalConcatBlobs ) ) {
421  return;
422  }
423 
424  if ( is_null( $this->dbStore ) ) {
425  $this->dbStore = new ExternalStoreDB;
426  }
427 
428  foreach ( $externalConcatBlobs as $cluster => $oldIds ) {
429  $blobIds = array_keys( $oldIds );
430  $extDb =& $this->dbStore->getSlave( $cluster );
431  $blobsTable = $this->dbStore->getTable( $extDb );
432  $headerLength = strlen( self::CONCAT_HEADER );
433  $res = $extDb->select( $blobsTable,
434  [ 'blob_id', "LEFT(blob_text, $headerLength) AS header" ],
435  [ 'blob_id' => $blobIds ],
436  __METHOD__
437  );
438  foreach ( $res as $row ) {
439  if ( strcasecmp( $row->header, self::CONCAT_HEADER ) ) {
440  $this->addError(
441  'restore text',
442  "Error: invalid header on target $cluster/{$row->blob_id} of two-part ES URL",
443  $oldIds[$row->blob_id]
444  );
445  }
446  unset( $oldIds[$row->blob_id] );
447  }
448 
449  // Print errors for missing blobs rows
450  foreach ( $oldIds as $blobId => $oldIds2 ) {
451  $this->addError(
452  'restore text',
453  "Error: missing target $cluster/$blobId for two-part ES URL",
454  $oldIds2
455  );
456  }
457  }
458  }
459 
460  function restoreText( $revIds, $xml ) {
461  global $wgDBname;
462  $tmpDir = wfTempDir();
463 
464  if ( !count( $revIds ) ) {
465  return;
466  }
467 
468  print "Restoring text from XML backup...\n";
469 
470  $revFileName = "$tmpDir/broken-revlist-$wgDBname";
471  $filteredXmlFileName = "$tmpDir/filtered-$wgDBname.xml";
472 
473  // Write revision list
474  if ( !file_put_contents( $revFileName, implode( "\n", $revIds ) ) ) {
475  echo "Error writing revision list, can't restore text\n";
476 
477  return;
478  }
479 
480  // Run mwdumper
481  echo "Filtering XML dump...\n";
482  $exitStatus = 0;
483  passthru( 'mwdumper ' .
484  Shell::escape(
485  "--output=file:$filteredXmlFileName",
486  "--filter=revlist:$revFileName",
487  $xml
488  ), $exitStatus
489  );
490 
491  if ( $exitStatus ) {
492  echo "mwdumper died with exit status $exitStatus\n";
493 
494  return;
495  }
496 
497  $file = fopen( $filteredXmlFileName, 'r' );
498  if ( !$file ) {
499  echo "Unable to open filtered XML file\n";
500 
501  return;
502  }
503 
504  $dbr = wfGetDB( DB_REPLICA );
505  $dbw = wfGetDB( DB_MASTER );
506  $dbr->ping();
507  $dbw->ping();
508 
510  $importer = new WikiImporter(
511  $source,
512  MediaWikiServices::getInstance()->getMainConfig()
513  );
514  $importer->setRevisionCallback( [ $this, 'importRevision' ] );
515  $importer->setNoticeCallback( function ( $msg, $params ) {
516  echo wfMessage( $msg, $params )->text() . "\n";
517  } );
518  $importer->doImport();
519  }
520 
521  function importRevision( &$revision, &$importer ) {
522  $id = $revision->getID();
523  $content = $revision->getContent( Revision::RAW );
524  $id = $id ?: '';
525 
526  if ( $content === null ) {
527  echo "Revision $id is broken, we have no content available\n";
528 
529  return;
530  }
531 
532  $text = $content->serialize();
533  if ( $text === '' ) {
534  // This is what happens if the revision was broken at the time the
535  // dump was made. Unfortunately, it also happens if the revision was
536  // legitimately blank, so there's no way to tell the difference. To
537  // be safe, we'll skip it and leave it broken
538 
539  echo "Revision $id is blank in the dump, may have been broken before export\n";
540 
541  return;
542  }
543 
544  if ( !$id ) {
545  // No ID, can't import
546  echo "No id tag in revision, can't import\n";
547 
548  return;
549  }
550 
551  // Find text row again
552  $dbr = wfGetDB( DB_REPLICA );
553  $oldId = $dbr->selectField( 'revision', 'rev_text_id', [ 'rev_id' => $id ], __METHOD__ );
554  if ( !$oldId ) {
555  echo "Missing revision row for rev_id $id\n";
556 
557  return;
558  }
559 
560  // Compress the text
561  $flags = Revision::compressRevisionText( $text );
562 
563  // Update the text row
564  $dbw = wfGetDB( DB_MASTER );
565  $dbw->update( 'text',
566  [ 'old_flags' => $flags, 'old_text' => $text ],
567  [ 'old_id' => $oldId ],
568  __METHOD__, [ 'LIMIT' => 1 ]
569  );
570 
571  // Remove it from the unfixed list and add it to the fixed list
572  unset( $this->errors['restore text'][$id] );
573  $this->errors['fixed'][$id] = true;
574  }
575 }
const CONCAT_HEADER
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
int $wgMultiContentRevisionSchemaMigrationStage
RevisionStore table schema migration stage (content, slots, content_models & slot_roles tables)...
XML file reader for the page data importer.
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Definition: router.php:42
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
$source
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency MediaWikiServices
Definition: injection.txt:23
const DB_MASTER
Definition: defines.php:26
addError( $type, $msg, $ids)
if( $line===false) $args
Definition: cdb.php:64
Maintenance script to do various checks on external storage.
wfTempDir()
Tries to get the system directory for temporary files.
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation use $formDescriptor instead default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt
$res
Definition: database.txt:21
static compressRevisionText(&$text)
If $wgCompressRevisions is enabled, we will compress data.
Definition: Revision.php:1122
$params
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition: hooks.txt:1982
unserialize( $serialized)
Imports a XML dump from a file (either from file upload, files on disk, or HTTP)
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not null
Definition: hooks.txt:780
the value of this variable comes from LanguageConverter indexed by page_id indexed by prefixed DB keys on which the links will be shown can modify can modify can modify this should be populated with an alert message to that effect to be fed to an HTMLForm object and populate $result with the reason in the form of [messagename, param1, param2,...] or a MessageSpecifier error messages should be plain text with no special etc to show that they re errors
Definition: hooks.txt:1746
const RAW
Definition: Revision.php:56
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
restoreText( $revIds, $xml)
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
check( $fix=false, $xml='')
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:271
const DB_REPLICA
Definition: defines.php:25
const SCHEMA_COMPAT_READ_OLD
Definition: Defines.php:281
importRevision(&$revision, &$importer)
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a controlled by the following MediaWiki still creates a BagOStuff but calls it to it are no ops If the cache daemon can t be it should also disable itself fairly $wgDBname
Definition: memcached.txt:93
$content
Definition: pageupdater.txt:72
DB accessible external objects.
global $optionsWithoutArgs
Definition: commandLine.inc:24
checkExternalConcatBlobs( $externalConcatBlobs)
$matches