MediaWiki  master
checkStorage.php
Go to the documentation of this file.
1 <?php
26 
27 if ( !defined( 'MEDIAWIKI' ) ) {
28  $optionsWithoutArgs = [ 'fix' ];
29  require_once __DIR__ . '/../commandLine.inc';
30 
31  $cs = new CheckStorage;
32  $fix = isset( $options['fix'] );
33  $xml = $args[0] ?? false;
34  $cs->check( $fix, $xml );
35 }
36 
37 // ----------------------------------------------------------------------------------
38 
45 class CheckStorage {
46  const CONCAT_HEADER = 'O:27:"concatenatedgziphistoryblob"';
47  public $oldIdMap, $errors;
48  public $dbStore = null;
49 
50  public $errorDescriptions = [
51  'restore text' => 'Damaged text, need to be restored from a backup',
52  'restore revision' => 'Damaged revision row, need to be restored from a backup',
53  'unfixable' => 'Unexpected errors with no automated fixing method',
54  'fixed' => 'Errors already fixed',
55  'fixable' => 'Errors which would already be fixed if --fix was specified',
56  ];
57 
58  function check( $fix = false, $xml = '' ) {
59  $dbr = wfGetDB( DB_REPLICA );
60  if ( $fix ) {
61  print "Checking, will fix errors if possible...\n";
62  } else {
63  print "Checking...\n";
64  }
65  $maxRevId = $dbr->selectField( 'revision', 'MAX(rev_id)', '', __METHOD__ );
66  $chunkSize = 1000;
67  $flagStats = [];
68  $objectStats = [];
69  $knownFlags = [ 'external', 'gzip', 'object', 'utf-8' ];
70  $this->errors = [
71  'restore text' => [],
72  'restore revision' => [],
73  'unfixable' => [],
74  'fixed' => [],
75  'fixable' => [],
76  ];
77 
78  for ( $chunkStart = 1; $chunkStart < $maxRevId; $chunkStart += $chunkSize ) {
79  $chunkEnd = $chunkStart + $chunkSize - 1;
80  // print "$chunkStart of $maxRevId\n";
81 
82  // Fetch revision rows
83  $this->oldIdMap = [];
84  $dbr->ping();
85  $res = $dbr->select( 'revision', [ 'rev_id', 'rev_text_id' ],
86  [ "rev_id BETWEEN $chunkStart AND $chunkEnd" ], __METHOD__ );
87  foreach ( $res as $row ) {
88  $this->oldIdMap[$row->rev_id] = $row->rev_text_id;
89  }
90 
91  if ( !count( $this->oldIdMap ) ) {
92  continue;
93  }
94 
95  // Fetch old_flags
96  $missingTextRows = array_flip( $this->oldIdMap );
97  $externalRevs = [];
98  $objectRevs = [];
99  $res = $dbr->select(
100  'text',
101  [ 'old_id', 'old_flags' ],
102  [ 'old_id' => $this->oldIdMap ],
103  __METHOD__
104  );
105  foreach ( $res as $row ) {
109  $flags = $row->old_flags;
110  $id = $row->old_id;
111 
112  // Create flagStats row if it doesn't exist
113  $flagStats = $flagStats + [ $flags => 0 ];
114  // Increment counter
115  $flagStats[$flags]++;
116 
117  // Not missing
118  unset( $missingTextRows[$row->old_id] );
119 
120  // Check for external or object
121  if ( $flags == '' ) {
122  $flagArray = [];
123  } else {
124  $flagArray = explode( ',', $flags );
125  }
126  if ( in_array( 'external', $flagArray ) ) {
127  $externalRevs[] = $id;
128  } elseif ( in_array( 'object', $flagArray ) ) {
129  $objectRevs[] = $id;
130  }
131 
132  // Check for unrecognised flags
133  if ( $flags == '0' ) {
134  // This is a known bug from 2004
135  // It's safe to just erase the old_flags field
136  if ( $fix ) {
137  $this->addError( 'fixed', "Warning: old_flags set to 0", $id );
138  $dbw = wfGetDB( DB_MASTER );
139  $dbw->ping();
140  $dbw->update( 'text', [ 'old_flags' => '' ],
141  [ 'old_id' => $id ], __METHOD__ );
142  echo "Fixed\n";
143  } else {
144  $this->addError( 'fixable', "Warning: old_flags set to 0", $id );
145  }
146  } elseif ( count( array_diff( $flagArray, $knownFlags ) ) ) {
147  $this->addError( 'unfixable', "Error: invalid flags field \"$flags\"", $id );
148  }
149  }
150 
151  // Output errors for any missing text rows
152  foreach ( $missingTextRows as $oldId => $revId ) {
153  $this->addError( 'restore revision', "Error: missing text row", $oldId );
154  }
155 
156  // Verify external revisions
157  $externalConcatBlobs = [];
158  $externalNormalBlobs = [];
159  if ( count( $externalRevs ) ) {
160  $res = $dbr->select(
161  'text',
162  [ 'old_id', 'old_flags', 'old_text' ],
163  [ 'old_id' => $externalRevs ],
164  __METHOD__
165  );
166  foreach ( $res as $row ) {
167  $urlParts = explode( '://', $row->old_text, 2 );
168  if ( count( $urlParts ) !== 2 || $urlParts[1] == '' ) {
169  $this->addError( 'restore text', "Error: invalid URL \"{$row->old_text}\"", $row->old_id );
170  continue;
171  }
172  list( $proto, ) = $urlParts;
173  if ( $proto != 'DB' ) {
174  $this->addError(
175  'restore text',
176  "Error: invalid external protocol \"$proto\"",
177  $row->old_id );
178  continue;
179  }
180  $path = explode( '/', $row->old_text );
181  $cluster = $path[2];
182  $id = $path[3];
183  if ( isset( $path[4] ) ) {
184  $externalConcatBlobs[$cluster][$id][] = $row->old_id;
185  } else {
186  $externalNormalBlobs[$cluster][$id][] = $row->old_id;
187  }
188  }
189  }
190 
191  // Check external concat blobs for the right header
192  $this->checkExternalConcatBlobs( $externalConcatBlobs );
193 
194  // Check external normal blobs for existence
195  if ( count( $externalNormalBlobs ) ) {
196  if ( is_null( $this->dbStore ) ) {
197  $this->dbStore = new ExternalStoreDB;
198  }
199  foreach ( $externalConcatBlobs as $cluster => $xBlobIds ) {
200  $blobIds = array_keys( $xBlobIds );
201  $extDb =& $this->dbStore->getSlave( $cluster );
202  $blobsTable = $this->dbStore->getTable( $extDb );
203  $res = $extDb->select( $blobsTable,
204  [ 'blob_id' ],
205  [ 'blob_id' => $blobIds ],
206  __METHOD__
207  );
208  foreach ( $res as $row ) {
209  unset( $xBlobIds[$row->blob_id] );
210  }
211  // Print errors for missing blobs rows
212  foreach ( $xBlobIds as $blobId => $oldId ) {
213  $this->addError(
214  'restore text',
215  "Error: missing target $blobId for one-part ES URL",
216  $oldId );
217  }
218  }
219  }
220 
221  // Check local objects
222  $dbr->ping();
223  $concatBlobs = [];
224  $curIds = [];
225  if ( count( $objectRevs ) ) {
226  $headerLength = 300;
227  $res = $dbr->select(
228  'text',
229  [ 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ],
230  [ 'old_id' => $objectRevs ],
231  __METHOD__
232  );
233  foreach ( $res as $row ) {
234  $oldId = $row->old_id;
235  $matches = [];
236  if ( !preg_match( '/^O:(\d+):"(\w+)"/', $row->header, $matches ) ) {
237  $this->addError( 'restore text', "Error: invalid object header", $oldId );
238  continue;
239  }
240 
241  $className = strtolower( $matches[2] );
242  if ( strlen( $className ) != $matches[1] ) {
243  $this->addError(
244  'restore text',
245  "Error: invalid object header, wrong class name length",
246  $oldId
247  );
248  continue;
249  }
250 
251  $objectStats = $objectStats + [ $className => 0 ];
252  $objectStats[$className]++;
253 
254  switch ( $className ) {
255  case 'concatenatedgziphistoryblob':
256  // Good
257  break;
258  case 'historyblobstub':
259  case 'historyblobcurstub':
260  if ( strlen( $row->header ) == $headerLength ) {
261  $this->addError( 'unfixable', "Error: overlong stub header", $oldId );
262  break;
263  }
264  $stubObj = unserialize( $row->header );
265  if ( !is_object( $stubObj ) ) {
266  $this->addError( 'restore text', "Error: unable to unserialize stub object", $oldId );
267  break;
268  }
269  if ( $className == 'historyblobstub' ) {
270  $concatBlobs[$stubObj->mOldId][] = $oldId;
271  } else {
272  $curIds[$stubObj->mCurId][] = $oldId;
273  }
274  break;
275  default:
276  $this->addError( 'unfixable', "Error: unrecognised object class \"$className\"", $oldId );
277  }
278  }
279  }
280 
281  // Check local concat blob validity
282  $externalConcatBlobs = [];
283  if ( count( $concatBlobs ) ) {
284  $headerLength = 300;
285  $res = $dbr->select(
286  'text',
287  [ 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ],
288  [ 'old_id' => array_keys( $concatBlobs ) ],
289  __METHOD__
290  );
291  foreach ( $res as $row ) {
292  $flags = explode( ',', $row->old_flags );
293  if ( in_array( 'external', $flags ) ) {
294  // Concat blob is in external storage?
295  if ( in_array( 'object', $flags ) ) {
296  $urlParts = explode( '/', $row->header );
297  if ( $urlParts[0] != 'DB:' ) {
298  $this->addError(
299  'unfixable',
300  "Error: unrecognised external storage type \"{$urlParts[0]}",
301  $row->old_id
302  );
303  } else {
304  $cluster = $urlParts[2];
305  $id = $urlParts[3];
306  if ( !isset( $externalConcatBlobs[$cluster][$id] ) ) {
307  $externalConcatBlobs[$cluster][$id] = [];
308  }
309  $externalConcatBlobs[$cluster][$id] = array_merge(
310  $externalConcatBlobs[$cluster][$id], $concatBlobs[$row->old_id]
311  );
312  }
313  } else {
314  $this->addError(
315  'unfixable',
316  "Error: invalid flags \"{$row->old_flags}\" on concat bulk row {$row->old_id}",
317  $concatBlobs[$row->old_id] );
318  }
319  } elseif ( strcasecmp(
320  substr( $row->header, 0, strlen( self::CONCAT_HEADER ) ),
321  self::CONCAT_HEADER
322  ) ) {
323  $this->addError(
324  'restore text',
325  "Error: Incorrect object header for concat bulk row {$row->old_id}",
326  $concatBlobs[$row->old_id]
327  );
328  } # else good
329 
330  unset( $concatBlobs[$row->old_id] );
331  }
332  }
333 
334  // Check targets of unresolved stubs
335  $this->checkExternalConcatBlobs( $externalConcatBlobs );
336  // next chunk
337  }
338 
339  print "\n\nErrors:\n";
340  foreach ( $this->errors as $name => $errors ) {
341  if ( count( $errors ) ) {
342  $description = $this->errorDescriptions[$name];
343  echo "$description: " . implode( ',', array_keys( $errors ) ) . "\n";
344  }
345  }
346 
347  if ( count( $this->errors['restore text'] ) && $fix ) {
348  if ( (string)$xml !== '' ) {
349  $this->restoreText( array_keys( $this->errors['restore text'] ), $xml );
350  } else {
351  echo "Can't fix text, no XML backup specified\n";
352  }
353  }
354 
355  print "\nFlag statistics:\n";
356  $total = array_sum( $flagStats );
357  foreach ( $flagStats as $flag => $count ) {
358  printf( "%-30s %10d %5.2f%%\n", $flag, $count, $count / $total * 100 );
359  }
360  print "\nLocal object statistics:\n";
361  $total = array_sum( $objectStats );
362  foreach ( $objectStats as $className => $count ) {
363  printf( "%-30s %10d %5.2f%%\n", $className, $count, $count / $total * 100 );
364  }
365  }
366 
367  function addError( $type, $msg, $ids ) {
368  if ( is_array( $ids ) && count( $ids ) == 1 ) {
369  $ids = reset( $ids );
370  }
371  if ( is_array( $ids ) ) {
372  $revIds = [];
373  foreach ( $ids as $id ) {
374  $revIds = array_merge( $revIds, array_keys( $this->oldIdMap, $id ) );
375  }
376  print "$msg in text rows " . implode( ', ', $ids ) .
377  ", revisions " . implode( ', ', $revIds ) . "\n";
378  } else {
379  $id = $ids;
380  $revIds = array_keys( $this->oldIdMap, $id );
381  if ( count( $revIds ) == 1 ) {
382  print "$msg in old_id $id, rev_id {$revIds[0]}\n";
383  } else {
384  print "$msg in old_id $id, revisions " . implode( ', ', $revIds ) . "\n";
385  }
386  }
387  $this->errors[$type] = $this->errors[$type] + array_flip( $revIds );
388  }
389 
390  function checkExternalConcatBlobs( $externalConcatBlobs ) {
391  if ( !count( $externalConcatBlobs ) ) {
392  return;
393  }
394 
395  if ( is_null( $this->dbStore ) ) {
396  $this->dbStore = new ExternalStoreDB;
397  }
398 
399  foreach ( $externalConcatBlobs as $cluster => $oldIds ) {
400  $blobIds = array_keys( $oldIds );
401  $extDb =& $this->dbStore->getSlave( $cluster );
402  $blobsTable = $this->dbStore->getTable( $extDb );
403  $headerLength = strlen( self::CONCAT_HEADER );
404  $res = $extDb->select( $blobsTable,
405  [ 'blob_id', "LEFT(blob_text, $headerLength) AS header" ],
406  [ 'blob_id' => $blobIds ],
407  __METHOD__
408  );
409  foreach ( $res as $row ) {
410  if ( strcasecmp( $row->header, self::CONCAT_HEADER ) ) {
411  $this->addError(
412  'restore text',
413  "Error: invalid header on target $cluster/{$row->blob_id} of two-part ES URL",
414  $oldIds[$row->blob_id]
415  );
416  }
417  unset( $oldIds[$row->blob_id] );
418  }
419 
420  // Print errors for missing blobs rows
421  foreach ( $oldIds as $blobId => $oldIds2 ) {
422  $this->addError(
423  'restore text',
424  "Error: missing target $cluster/$blobId for two-part ES URL",
425  $oldIds2
426  );
427  }
428  }
429  }
430 
431  function restoreText( $revIds, $xml ) {
432  global $wgDBname;
433  $tmpDir = wfTempDir();
434 
435  if ( !count( $revIds ) ) {
436  return;
437  }
438 
439  print "Restoring text from XML backup...\n";
440 
441  $revFileName = "$tmpDir/broken-revlist-$wgDBname";
442  $filteredXmlFileName = "$tmpDir/filtered-$wgDBname.xml";
443 
444  // Write revision list
445  if ( !file_put_contents( $revFileName, implode( "\n", $revIds ) ) ) {
446  echo "Error writing revision list, can't restore text\n";
447 
448  return;
449  }
450 
451  // Run mwdumper
452  echo "Filtering XML dump...\n";
453  $exitStatus = 0;
454  passthru( 'mwdumper ' .
455  Shell::escape(
456  "--output=file:$filteredXmlFileName",
457  "--filter=revlist:$revFileName",
458  $xml
459  ), $exitStatus
460  );
461 
462  if ( $exitStatus ) {
463  echo "mwdumper died with exit status $exitStatus\n";
464 
465  return;
466  }
467 
468  $file = fopen( $filteredXmlFileName, 'r' );
469  if ( !$file ) {
470  echo "Unable to open filtered XML file\n";
471 
472  return;
473  }
474 
475  $dbr = wfGetDB( DB_REPLICA );
476  $dbw = wfGetDB( DB_MASTER );
477  $dbr->ping();
478  $dbw->ping();
479 
481  $importer = new WikiImporter(
482  $source,
483  MediaWikiServices::getInstance()->getMainConfig()
484  );
485  $importer->setRevisionCallback( [ $this, 'importRevision' ] );
486  $importer->setNoticeCallback( function ( $msg, $params ) {
487  echo wfMessage( $msg, $params )->text() . "\n";
488  } );
489  $importer->doImport();
490  }
491 
492  function importRevision( &$revision, &$importer ) {
493  $id = $revision->getID();
494  $content = $revision->getContent( Revision::RAW );
495  $id = $id ?: '';
496 
497  if ( $content === null ) {
498  echo "Revision $id is broken, we have no content available\n";
499 
500  return;
501  }
502 
503  $text = $content->serialize();
504  if ( $text === '' ) {
505  // This is what happens if the revision was broken at the time the
506  // dump was made. Unfortunately, it also happens if the revision was
507  // legitimately blank, so there's no way to tell the difference. To
508  // be safe, we'll skip it and leave it broken
509 
510  echo "Revision $id is blank in the dump, may have been broken before export\n";
511 
512  return;
513  }
514 
515  if ( !$id ) {
516  // No ID, can't import
517  echo "No id tag in revision, can't import\n";
518 
519  return;
520  }
521 
522  // Find text row again
523  $dbr = wfGetDB( DB_REPLICA );
524  $oldId = $dbr->selectField( 'revision', 'rev_text_id', [ 'rev_id' => $id ], __METHOD__ );
525  if ( !$oldId ) {
526  echo "Missing revision row for rev_id $id\n";
527 
528  return;
529  }
530 
531  // Compress the text
532  $flags = Revision::compressRevisionText( $text );
533 
534  // Update the text row
535  $dbw = wfGetDB( DB_MASTER );
536  $dbw->update( 'text',
537  [ 'old_flags' => $flags, 'old_text' => $text ],
538  [ 'old_id' => $oldId ],
539  __METHOD__, [ 'LIMIT' => 1 ]
540  );
541 
542  // Remove it from the unfixed list and add it to the fixed list
543  unset( $this->errors['restore text'][$id] );
544  $this->errors['fixed'][$id] = true;
545  }
546 }
const CONCAT_HEADER
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
XML file reader for the page data importer.
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Definition: router.php:42
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
$source
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency MediaWikiServices
Definition: injection.txt:23
const DB_MASTER
Definition: defines.php:26
addError( $type, $msg, $ids)
if( $line===false) $args
Definition: cdb.php:64
Maintenance script to do various checks on external storage.
wfTempDir()
Tries to get the system directory for temporary files.
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation use $formDescriptor instead default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt
$res
Definition: database.txt:21
static compressRevisionText(&$text)
If $wgCompressRevisions is enabled, we will compress data.
Definition: Revision.php:1124
$params
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition: hooks.txt:1982
unserialize( $serialized)
Imports a XML dump from a file (either from file upload, files on disk, or HTTP)
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not null
Definition: hooks.txt:780
the value of this variable comes from LanguageConverter indexed by page_id indexed by prefixed DB keys on which the links will be shown can modify can modify can modify this should be populated with an alert message to that effect to be fed to an HTMLForm object and populate $result with the reason in the form of [messagename, param1, param2,...] or a MessageSpecifier error messages should be plain text with no special etc to show that they re errors
Definition: hooks.txt:1746
const RAW
Definition: Revision.php:56
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
restoreText( $revIds, $xml)
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
check( $fix=false, $xml='')
controlled by the following MediaWiki still creates a BagOStuff but calls it to it are no ops If the cache daemon can t be it should also disable itself fairly $wgDBname
Definition: memcached.txt:93
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:271
const DB_REPLICA
Definition: defines.php:25
importRevision(&$revision, &$importer)
$content
Definition: pageupdater.txt:72
DB accessible external objects.
global $optionsWithoutArgs
Definition: commandLine.inc:24
checkExternalConcatBlobs( $externalConcatBlobs)
$matches