MediaWiki  1.30.0
checkStorage.php
Go to the documentation of this file.
1 <?php
25 
26 if ( !defined( 'MEDIAWIKI' ) ) {
27  $optionsWithoutArgs = [ 'fix' ];
28  require_once __DIR__ . '/../commandLine.inc';
29 
30  $cs = new CheckStorage;
31  $fix = isset( $options['fix'] );
32  if ( isset( $args[0] ) ) {
33  $xml = $args[0];
34  } else {
35  $xml = false;
36  }
37  $cs->check( $fix, $xml );
38 }
39 
40 // ----------------------------------------------------------------------------------
41 
48 class CheckStorage {
49  const CONCAT_HEADER = 'O:27:"concatenatedgziphistoryblob"';
50  public $oldIdMap, $errors;
51  public $dbStore = null;
52 
53  public $errorDescriptions = [
54  'restore text' => 'Damaged text, need to be restored from a backup',
55  'restore revision' => 'Damaged revision row, need to be restored from a backup',
56  'unfixable' => 'Unexpected errors with no automated fixing method',
57  'fixed' => 'Errors already fixed',
58  'fixable' => 'Errors which would already be fixed if --fix was specified',
59  ];
60 
61  function check( $fix = false, $xml = '' ) {
62  $dbr = wfGetDB( DB_REPLICA );
63  if ( $fix ) {
64  print "Checking, will fix errors if possible...\n";
65  } else {
66  print "Checking...\n";
67  }
68  $maxRevId = $dbr->selectField( 'revision', 'MAX(rev_id)', false, __METHOD__ );
69  $chunkSize = 1000;
70  $flagStats = [];
71  $objectStats = [];
72  $knownFlags = [ 'external', 'gzip', 'object', 'utf-8' ];
73  $this->errors = [
74  'restore text' => [],
75  'restore revision' => [],
76  'unfixable' => [],
77  'fixed' => [],
78  'fixable' => [],
79  ];
80 
81  for ( $chunkStart = 1; $chunkStart < $maxRevId; $chunkStart += $chunkSize ) {
82  $chunkEnd = $chunkStart + $chunkSize - 1;
83  // print "$chunkStart of $maxRevId\n";
84 
85  // Fetch revision rows
86  $this->oldIdMap = [];
87  $dbr->ping();
88  $res = $dbr->select( 'revision', [ 'rev_id', 'rev_text_id' ],
89  [ "rev_id BETWEEN $chunkStart AND $chunkEnd" ], __METHOD__ );
90  foreach ( $res as $row ) {
91  $this->oldIdMap[$row->rev_id] = $row->rev_text_id;
92  }
93  $dbr->freeResult( $res );
94 
95  if ( !count( $this->oldIdMap ) ) {
96  continue;
97  }
98 
99  // Fetch old_flags
100  $missingTextRows = array_flip( $this->oldIdMap );
101  $externalRevs = [];
102  $objectRevs = [];
103  $res = $dbr->select( 'text', [ 'old_id', 'old_flags' ],
104  'old_id IN (' . implode( ',', $this->oldIdMap ) . ')', __METHOD__ );
105  foreach ( $res as $row ) {
109  $flags = $row->old_flags;
110  $id = $row->old_id;
111 
112  // Create flagStats row if it doesn't exist
113  $flagStats = $flagStats + [ $flags => 0 ];
114  // Increment counter
115  $flagStats[$flags]++;
116 
117  // Not missing
118  unset( $missingTextRows[$row->old_id] );
119 
120  // Check for external or object
121  if ( $flags == '' ) {
122  $flagArray = [];
123  } else {
124  $flagArray = explode( ',', $flags );
125  }
126  if ( in_array( 'external', $flagArray ) ) {
127  $externalRevs[] = $id;
128  } elseif ( in_array( 'object', $flagArray ) ) {
129  $objectRevs[] = $id;
130  }
131 
132  // Check for unrecognised flags
133  if ( $flags == '0' ) {
134  // This is a known bug from 2004
135  // It's safe to just erase the old_flags field
136  if ( $fix ) {
137  $this->error( 'fixed', "Warning: old_flags set to 0", $id );
138  $dbw = wfGetDB( DB_MASTER );
139  $dbw->ping();
140  $dbw->update( 'text', [ 'old_flags' => '' ],
141  [ 'old_id' => $id ], __METHOD__ );
142  echo "Fixed\n";
143  } else {
144  $this->error( 'fixable', "Warning: old_flags set to 0", $id );
145  }
146  } elseif ( count( array_diff( $flagArray, $knownFlags ) ) ) {
147  $this->error( 'unfixable', "Error: invalid flags field \"$flags\"", $id );
148  }
149  }
150  $dbr->freeResult( $res );
151 
152  // Output errors for any missing text rows
153  foreach ( $missingTextRows as $oldId => $revId ) {
154  $this->error( 'restore revision', "Error: missing text row", $oldId );
155  }
156 
157  // Verify external revisions
158  $externalConcatBlobs = [];
159  $externalNormalBlobs = [];
160  if ( count( $externalRevs ) ) {
161  $res = $dbr->select( 'text', [ 'old_id', 'old_flags', 'old_text' ],
162  [ 'old_id IN (' . implode( ',', $externalRevs ) . ')' ], __METHOD__ );
163  foreach ( $res as $row ) {
164  $urlParts = explode( '://', $row->old_text, 2 );
165  if ( count( $urlParts ) !== 2 || $urlParts[1] == '' ) {
166  $this->error( 'restore text', "Error: invalid URL \"{$row->old_text}\"", $row->old_id );
167  continue;
168  }
169  list( $proto, ) = $urlParts;
170  if ( $proto != 'DB' ) {
171  $this->error( 'restore text', "Error: invalid external protocol \"$proto\"", $row->old_id );
172  continue;
173  }
174  $path = explode( '/', $row->old_text );
175  $cluster = $path[2];
176  $id = $path[3];
177  if ( isset( $path[4] ) ) {
178  $externalConcatBlobs[$cluster][$id][] = $row->old_id;
179  } else {
180  $externalNormalBlobs[$cluster][$id][] = $row->old_id;
181  }
182  }
183  $dbr->freeResult( $res );
184  }
185 
186  // Check external concat blobs for the right header
187  $this->checkExternalConcatBlobs( $externalConcatBlobs );
188 
189  // Check external normal blobs for existence
190  if ( count( $externalNormalBlobs ) ) {
191  if ( is_null( $this->dbStore ) ) {
192  $this->dbStore = new ExternalStoreDB;
193  }
194  foreach ( $externalConcatBlobs as $cluster => $xBlobIds ) {
195  $blobIds = array_keys( $xBlobIds );
196  $extDb =& $this->dbStore->getSlave( $cluster );
197  $blobsTable = $this->dbStore->getTable( $extDb );
198  $res = $extDb->select( $blobsTable,
199  [ 'blob_id' ],
200  [ 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ], __METHOD__ );
201  foreach ( $res as $row ) {
202  unset( $xBlobIds[$row->blob_id] );
203  }
204  $extDb->freeResult( $res );
205  // Print errors for missing blobs rows
206  foreach ( $xBlobIds as $blobId => $oldId ) {
207  $this->error( 'restore text', "Error: missing target $blobId for one-part ES URL", $oldId );
208  }
209  }
210  }
211 
212  // Check local objects
213  $dbr->ping();
214  $concatBlobs = [];
215  $curIds = [];
216  if ( count( $objectRevs ) ) {
217  $headerLength = 300;
218  $res = $dbr->select(
219  'text',
220  [ 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ],
221  [ 'old_id IN (' . implode( ',', $objectRevs ) . ')' ],
222  __METHOD__
223  );
224  foreach ( $res as $row ) {
225  $oldId = $row->old_id;
226  $matches = [];
227  if ( !preg_match( '/^O:(\d+):"(\w+)"/', $row->header, $matches ) ) {
228  $this->error( 'restore text', "Error: invalid object header", $oldId );
229  continue;
230  }
231 
232  $className = strtolower( $matches[2] );
233  if ( strlen( $className ) != $matches[1] ) {
234  $this->error(
235  'restore text',
236  "Error: invalid object header, wrong class name length",
237  $oldId
238  );
239  continue;
240  }
241 
242  $objectStats = $objectStats + [ $className => 0 ];
243  $objectStats[$className]++;
244 
245  switch ( $className ) {
246  case 'concatenatedgziphistoryblob':
247  // Good
248  break;
249  case 'historyblobstub':
250  case 'historyblobcurstub':
251  if ( strlen( $row->header ) == $headerLength ) {
252  $this->error( 'unfixable', "Error: overlong stub header", $oldId );
253  continue;
254  }
255  $stubObj = unserialize( $row->header );
256  if ( !is_object( $stubObj ) ) {
257  $this->error( 'restore text', "Error: unable to unserialize stub object", $oldId );
258  continue;
259  }
260  if ( $className == 'historyblobstub' ) {
261  $concatBlobs[$stubObj->mOldId][] = $oldId;
262  } else {
263  $curIds[$stubObj->mCurId][] = $oldId;
264  }
265  break;
266  default:
267  $this->error( 'unfixable', "Error: unrecognised object class \"$className\"", $oldId );
268  }
269  }
270  $dbr->freeResult( $res );
271  }
272 
273  // Check local concat blob validity
274  $externalConcatBlobs = [];
275  if ( count( $concatBlobs ) ) {
276  $headerLength = 300;
277  $res = $dbr->select(
278  'text',
279  [ 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ],
280  [ 'old_id IN (' . implode( ',', array_keys( $concatBlobs ) ) . ')' ],
281  __METHOD__
282  );
283  foreach ( $res as $row ) {
284  $flags = explode( ',', $row->old_flags );
285  if ( in_array( 'external', $flags ) ) {
286  // Concat blob is in external storage?
287  if ( in_array( 'object', $flags ) ) {
288  $urlParts = explode( '/', $row->header );
289  if ( $urlParts[0] != 'DB:' ) {
290  $this->error(
291  'unfixable',
292  "Error: unrecognised external storage type \"{$urlParts[0]}",
293  $row->old_id
294  );
295  } else {
296  $cluster = $urlParts[2];
297  $id = $urlParts[3];
298  if ( !isset( $externalConcatBlobs[$cluster][$id] ) ) {
299  $externalConcatBlobs[$cluster][$id] = [];
300  }
301  $externalConcatBlobs[$cluster][$id] = array_merge(
302  $externalConcatBlobs[$cluster][$id], $concatBlobs[$row->old_id]
303  );
304  }
305  } else {
306  $this->error(
307  'unfixable',
308  "Error: invalid flags \"{$row->old_flags}\" on concat bulk row {$row->old_id}",
309  $concatBlobs[$row->old_id] );
310  }
311  } elseif ( strcasecmp(
312  substr( $row->header, 0, strlen( self::CONCAT_HEADER ) ),
313  self::CONCAT_HEADER
314  ) ) {
315  $this->error(
316  'restore text',
317  "Error: Incorrect object header for concat bulk row {$row->old_id}",
318  $concatBlobs[$row->old_id]
319  );
320  } # else good
321 
322  unset( $concatBlobs[$row->old_id] );
323  }
324  $dbr->freeResult( $res );
325  }
326 
327  // Check targets of unresolved stubs
328  $this->checkExternalConcatBlobs( $externalConcatBlobs );
329  // next chunk
330  }
331 
332  print "\n\nErrors:\n";
333  foreach ( $this->errors as $name => $errors ) {
334  if ( count( $errors ) ) {
335  $description = $this->errorDescriptions[$name];
336  echo "$description: " . implode( ',', array_keys( $errors ) ) . "\n";
337  }
338  }
339 
340  if ( count( $this->errors['restore text'] ) && $fix ) {
341  if ( (string)$xml !== '' ) {
342  $this->restoreText( array_keys( $this->errors['restore text'] ), $xml );
343  } else {
344  echo "Can't fix text, no XML backup specified\n";
345  }
346  }
347 
348  print "\nFlag statistics:\n";
349  $total = array_sum( $flagStats );
350  foreach ( $flagStats as $flag => $count ) {
351  printf( "%-30s %10d %5.2f%%\n", $flag, $count, $count / $total * 100 );
352  }
353  print "\nLocal object statistics:\n";
354  $total = array_sum( $objectStats );
355  foreach ( $objectStats as $className => $count ) {
356  printf( "%-30s %10d %5.2f%%\n", $className, $count, $count / $total * 100 );
357  }
358  }
359 
360  function error( $type, $msg, $ids ) {
361  if ( is_array( $ids ) && count( $ids ) == 1 ) {
362  $ids = reset( $ids );
363  }
364  if ( is_array( $ids ) ) {
365  $revIds = [];
366  foreach ( $ids as $id ) {
367  $revIds = array_merge( $revIds, array_keys( $this->oldIdMap, $id ) );
368  }
369  print "$msg in text rows " . implode( ', ', $ids ) .
370  ", revisions " . implode( ', ', $revIds ) . "\n";
371  } else {
372  $id = $ids;
373  $revIds = array_keys( $this->oldIdMap, $id );
374  if ( count( $revIds ) == 1 ) {
375  print "$msg in old_id $id, rev_id {$revIds[0]}\n";
376  } else {
377  print "$msg in old_id $id, revisions " . implode( ', ', $revIds ) . "\n";
378  }
379  }
380  $this->errors[$type] = $this->errors[$type] + array_flip( $revIds );
381  }
382 
383  function checkExternalConcatBlobs( $externalConcatBlobs ) {
384  if ( !count( $externalConcatBlobs ) ) {
385  return;
386  }
387 
388  if ( is_null( $this->dbStore ) ) {
389  $this->dbStore = new ExternalStoreDB;
390  }
391 
392  foreach ( $externalConcatBlobs as $cluster => $oldIds ) {
393  $blobIds = array_keys( $oldIds );
394  $extDb =& $this->dbStore->getSlave( $cluster );
395  $blobsTable = $this->dbStore->getTable( $extDb );
396  $headerLength = strlen( self::CONCAT_HEADER );
397  $res = $extDb->select( $blobsTable,
398  [ 'blob_id', "LEFT(blob_text, $headerLength) AS header" ],
399  [ 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ], __METHOD__ );
400  foreach ( $res as $row ) {
401  if ( strcasecmp( $row->header, self::CONCAT_HEADER ) ) {
402  $this->error(
403  'restore text',
404  "Error: invalid header on target $cluster/{$row->blob_id} of two-part ES URL",
405  $oldIds[$row->blob_id]
406  );
407  }
408  unset( $oldIds[$row->blob_id] );
409  }
410  $extDb->freeResult( $res );
411 
412  // Print errors for missing blobs rows
413  foreach ( $oldIds as $blobId => $oldIds2 ) {
414  $this->error(
415  'restore text',
416  "Error: missing target $cluster/$blobId for two-part ES URL",
417  $oldIds2
418  );
419  }
420  }
421  }
422 
423  function restoreText( $revIds, $xml ) {
425  $tmpDir = wfTempDir();
426 
427  if ( !count( $revIds ) ) {
428  return;
429  }
430 
431  print "Restoring text from XML backup...\n";
432 
433  $revFileName = "$tmpDir/broken-revlist-$wgDBname";
434  $filteredXmlFileName = "$tmpDir/filtered-$wgDBname.xml";
435 
436  // Write revision list
437  if ( !file_put_contents( $revFileName, implode( "\n", $revIds ) ) ) {
438  echo "Error writing revision list, can't restore text\n";
439 
440  return;
441  }
442 
443  // Run mwdumper
444  echo "Filtering XML dump...\n";
445  $exitStatus = 0;
446  passthru( 'mwdumper ' .
448  "--output=file:$filteredXmlFileName",
449  "--filter=revlist:$revFileName",
450  $xml
451  ), $exitStatus
452  );
453 
454  if ( $exitStatus ) {
455  echo "mwdumper died with exit status $exitStatus\n";
456 
457  return;
458  }
459 
460  $file = fopen( $filteredXmlFileName, 'r' );
461  if ( !$file ) {
462  echo "Unable to open filtered XML file\n";
463 
464  return;
465  }
466 
467  $dbr = wfGetDB( DB_REPLICA );
468  $dbw = wfGetDB( DB_MASTER );
469  $dbr->ping();
470  $dbw->ping();
471 
472  $source = new ImportStreamSource( $file );
473  $importer = new WikiImporter(
474  $source,
475  MediaWikiServices::getInstance()->getMainConfig()
476  );
477  $importer->setRevisionCallback( [ $this, 'importRevision' ] );
478  $importer->doImport();
479  }
480 
481  function importRevision( &$revision, &$importer ) {
482  $id = $revision->getID();
483  $content = $revision->getContent( Revision::RAW );
484  $id = $id ? $id : '';
485 
486  if ( $content === null ) {
487  echo "Revision $id is broken, we have no content available\n";
488 
489  return;
490  }
491 
492  $text = $content->serialize();
493  if ( $text === '' ) {
494  // This is what happens if the revision was broken at the time the
495  // dump was made. Unfortunately, it also happens if the revision was
496  // legitimately blank, so there's no way to tell the difference. To
497  // be safe, we'll skip it and leave it broken
498 
499  echo "Revision $id is blank in the dump, may have been broken before export\n";
500 
501  return;
502  }
503 
504  if ( !$id ) {
505  // No ID, can't import
506  echo "No id tag in revision, can't import\n";
507 
508  return;
509  }
510 
511  // Find text row again
512  $dbr = wfGetDB( DB_REPLICA );
513  $oldId = $dbr->selectField( 'revision', 'rev_text_id', [ 'rev_id' => $id ], __METHOD__ );
514  if ( !$oldId ) {
515  echo "Missing revision row for rev_id $id\n";
516 
517  return;
518  }
519 
520  // Compress the text
522 
523  // Update the text row
524  $dbw = wfGetDB( DB_MASTER );
525  $dbw->update( 'text',
526  [ 'old_flags' => $flags, 'old_text' => $text ],
527  [ 'old_id' => $oldId ],
528  __METHOD__, [ 'LIMIT' => 1 ]
529  );
530 
531  // Remove it from the unfixed list and add it to the fixed list
532  unset( $this->errors['restore text'][$id] );
533  $this->errors['fixed'][$id] = true;
534  }
535 }
WikiImporter
XML file reader for the page data importer.
Definition: WikiImporter.php:34
CheckStorage
Maintenance script to do various checks on external storage.
Definition: checkStorage.php:48
errors
if the prop value should be in the metadata multi language array can modify can modify indexed by page_id indexed by prefixed DB keys can modify can modify can modify this should be populated with an alert message to that effect to be fed to an HTMLForm object and populate $result with the reason in the form of error messages should be plain text with no special etc to show that they re errors
Definition: hooks.txt:1730
ExternalStoreDB
DB accessable external objects.
Definition: ExternalStoreDB.php:36
CheckStorage\$dbStore
$dbStore
Definition: checkStorage.php:51
$optionsWithoutArgs
global $optionsWithoutArgs
Definition: commandLine.inc:28
captcha-old.count
count
Definition: captcha-old.py:249
CheckStorage\check
check( $fix=false, $xml='')
Definition: checkStorage.php:61
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
unserialize
unserialize( $serialized)
Definition: ApiMessage.php:185
$res
$res
Definition: database.txt:21
$name
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:302
ImportStreamSource
Imports a XML dump from a file (either from file upload, files on disk, or HTTP)
Definition: ImportStreamSource.php:32
CheckStorage\importRevision
importRevision(&$revision, &$importer)
Definition: checkStorage.php:481
CheckStorage\$errorDescriptions
$errorDescriptions
Definition: checkStorage.php:53
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
CheckStorage\$oldIdMap
$oldIdMap
Definition: checkStorage.php:50
$wgDBname
controlled by $wgMainCacheType controlled by $wgParserCacheType controlled by $wgMessageCacheType If you set CACHE_NONE to one of the three control default value for MediaWiki still create a but requests to it are no ops and we always fall through to the database If the cache daemon can t be it should also disable itself fairly smoothly By $wgMemc is used but when it is $parserMemc or $messageMemc this is mentioned $wgDBname
Definition: memcached.txt:96
wfGetDB
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
Definition: GlobalFunctions.php:2856
$matches
$matches
Definition: NoLocalSettings.php:24
Revision\compressRevisionText
static compressRevisionText(&$text)
If $wgCompressRevisions is enabled, we will compress data.
Definition: Revision.php:1323
global
when a variable name is used in a it is silently declared as a new masking the global
Definition: design.txt:93
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
DB_MASTER
const DB_MASTER
Definition: defines.php:26
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
CheckStorage\CONCAT_HEADER
const CONCAT_HEADER
Definition: checkStorage.php:49
wfEscapeShellArg
wfEscapeShellArg()
Version of escapeshellarg() that works better on Windows.
Definition: GlobalFunctions.php:2243
Revision\RAW
const RAW
Definition: Revision.php:100
CheckStorage\restoreText
restoreText( $revIds, $xml)
Definition: checkStorage.php:423
$args
if( $line===false) $args
Definition: cdb.php:63
wfTempDir
wfTempDir()
Tries to get the system directory for temporary files.
Definition: GlobalFunctions.php:2107
$dbr
if(! $regexes) $dbr
Definition: cleanup.php:94
$options
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition: hooks.txt:1965
CheckStorage\error
error( $type, $msg, $ids)
Definition: checkStorage.php:360
CheckStorage\$errors
$errors
Definition: checkStorage.php:50
$path
$path
Definition: NoLocalSettings.php:26
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
$source
$source
Definition: mwdoc-filter.php:46
MediaWikiServices
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency MediaWikiServices
Definition: injection.txt:23
$flags
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition: hooks.txt:2801
CheckStorage\checkExternalConcatBlobs
checkExternalConcatBlobs( $externalConcatBlobs)
Definition: checkStorage.php:383
$type
$type
Definition: testCompression.php:48