MediaWiki  1.23.5
checkStorage.php
Go to the documentation of this file.
1 <?php
24 if ( !defined( 'MEDIAWIKI' ) ) {
25  require_once __DIR__ . '/../commandLine.inc';
26 
27  $cs = new CheckStorage;
28  $fix = isset( $options['fix'] );
29  if ( isset( $args[0] ) ) {
30  $xml = $args[0];
31  } else {
32  $xml = false;
33  }
34  $cs->check( $fix, $xml );
35 }
36 
37 // ----------------------------------------------------------------------------------
38 
44 class CheckStorage {
45  const CONCAT_HEADER = 'O:27:"concatenatedgziphistoryblob"';
46  public $oldIdMap, $errors;
47  public $dbStore = null;
48 
50  'restore text' => 'Damaged text, need to be restored from a backup',
51  'restore revision' => 'Damaged revision row, need to be restored from a backup',
52  'unfixable' => 'Unexpected errors with no automated fixing method',
53  'fixed' => 'Errors already fixed',
54  'fixable' => 'Errors which would already be fixed if --fix was specified',
55  );
56 
57  function check( $fix = false, $xml = '' ) {
58  $dbr = wfGetDB( DB_SLAVE );
59  if ( $fix ) {
60  print "Checking, will fix errors if possible...\n";
61  } else {
62  print "Checking...\n";
63  }
64  $maxRevId = $dbr->selectField( 'revision', 'MAX(rev_id)', false, __METHOD__ );
65  $chunkSize = 1000;
66  $flagStats = array();
67  $objectStats = array();
68  $knownFlags = array( 'external', 'gzip', 'object', 'utf-8' );
69  $this->errors = array(
70  'restore text' => array(),
71  'restore revision' => array(),
72  'unfixable' => array(),
73  'fixed' => array(),
74  'fixable' => array(),
75  );
76 
77  for ( $chunkStart = 1; $chunkStart < $maxRevId; $chunkStart += $chunkSize ) {
78  $chunkEnd = $chunkStart + $chunkSize - 1;
79  // print "$chunkStart of $maxRevId\n";
80 
81  // Fetch revision rows
82  $this->oldIdMap = array();
83  $dbr->ping();
84  $res = $dbr->select( 'revision', array( 'rev_id', 'rev_text_id' ),
85  array( "rev_id BETWEEN $chunkStart AND $chunkEnd" ), __METHOD__ );
86  foreach ( $res as $row ) {
87  $this->oldIdMap[$row->rev_id] = $row->rev_text_id;
88  }
89  $dbr->freeResult( $res );
90 
91  if ( !count( $this->oldIdMap ) ) {
92  continue;
93  }
94 
95  // Fetch old_flags
96  $missingTextRows = array_flip( $this->oldIdMap );
97  $externalRevs = array();
98  $objectRevs = array();
99  $res = $dbr->select( 'text', array( 'old_id', 'old_flags' ),
100  'old_id IN (' . implode( ',', $this->oldIdMap ) . ')', __METHOD__ );
101  foreach ( $res as $row ) {
105  $flags = $row->old_flags;
106  $id = $row->old_id;
107 
108  // Create flagStats row if it doesn't exist
109  $flagStats = $flagStats + array( $flags => 0 );
110  // Increment counter
111  $flagStats[$flags]++;
112 
113  // Not missing
114  unset( $missingTextRows[$row->old_id] );
115 
116  // Check for external or object
117  if ( $flags == '' ) {
118  $flagArray = array();
119  } else {
120  $flagArray = explode( ',', $flags );
121  }
122  if ( in_array( 'external', $flagArray ) ) {
123  $externalRevs[] = $id;
124  } elseif ( in_array( 'object', $flagArray ) ) {
125  $objectRevs[] = $id;
126  }
127 
128  // Check for unrecognised flags
129  if ( $flags == '0' ) {
130  // This is a known bug from 2004
131  // It's safe to just erase the old_flags field
132  if ( $fix ) {
133  $this->error( 'fixed', "Warning: old_flags set to 0", $id );
134  $dbw = wfGetDB( DB_MASTER );
135  $dbw->ping();
136  $dbw->update( 'text', array( 'old_flags' => '' ),
137  array( 'old_id' => $id ), __METHOD__ );
138  echo "Fixed\n";
139  } else {
140  $this->error( 'fixable', "Warning: old_flags set to 0", $id );
141  }
142  } elseif ( count( array_diff( $flagArray, $knownFlags ) ) ) {
143  $this->error( 'unfixable', "Error: invalid flags field \"$flags\"", $id );
144  }
145  }
146  $dbr->freeResult( $res );
147 
148  // Output errors for any missing text rows
149  foreach ( $missingTextRows as $oldId => $revId ) {
150  $this->error( 'restore revision', "Error: missing text row", $oldId );
151  }
152 
153  // Verify external revisions
154  $externalConcatBlobs = array();
155  $externalNormalBlobs = array();
156  if ( count( $externalRevs ) ) {
157  $res = $dbr->select( 'text', array( 'old_id', 'old_flags', 'old_text' ),
158  array( 'old_id IN (' . implode( ',', $externalRevs ) . ')' ), __METHOD__ );
159  foreach ( $res as $row ) {
160  $urlParts = explode( '://', $row->old_text, 2 );
161  if ( count( $urlParts ) !== 2 || $urlParts[1] == '' ) {
162  $this->error( 'restore text', "Error: invalid URL \"{$row->old_text}\"", $row->old_id );
163  continue;
164  }
165  list( $proto, ) = $urlParts;
166  if ( $proto != 'DB' ) {
167  $this->error( 'restore text', "Error: invalid external protocol \"$proto\"", $row->old_id );
168  continue;
169  }
170  $path = explode( '/', $row->old_text );
171  $cluster = $path[2];
172  $id = $path[3];
173  if ( isset( $path[4] ) ) {
174  $externalConcatBlobs[$cluster][$id][] = $row->old_id;
175  } else {
176  $externalNormalBlobs[$cluster][$id][] = $row->old_id;
177  }
178  }
179  $dbr->freeResult( $res );
180  }
181 
182  // Check external concat blobs for the right header
183  $this->checkExternalConcatBlobs( $externalConcatBlobs );
184 
185  // Check external normal blobs for existence
186  if ( count( $externalNormalBlobs ) ) {
187  if ( is_null( $this->dbStore ) ) {
188  $this->dbStore = new ExternalStoreDB;
189  }
190  foreach ( $externalConcatBlobs as $cluster => $xBlobIds ) {
191  $blobIds = array_keys( $xBlobIds );
192  $extDb =& $this->dbStore->getSlave( $cluster );
193  $blobsTable = $this->dbStore->getTable( $extDb );
194  $res = $extDb->select( $blobsTable,
195  array( 'blob_id' ),
196  array( 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ), __METHOD__ );
197  foreach ( $res as $row ) {
198  unset( $xBlobIds[$row->blob_id] );
199  }
200  $extDb->freeResult( $res );
201  // Print errors for missing blobs rows
202  foreach ( $xBlobIds as $blobId => $oldId ) {
203  $this->error( 'restore text', "Error: missing target $blobId for one-part ES URL", $oldId );
204  }
205  }
206  }
207 
208  // Check local objects
209  $dbr->ping();
210  $concatBlobs = array();
211  $curIds = array();
212  if ( count( $objectRevs ) ) {
213  $headerLength = 300;
214  $res = $dbr->select( 'text', array( 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ),
215  array( 'old_id IN (' . implode( ',', $objectRevs ) . ')' ), __METHOD__ );
216  foreach ( $res as $row ) {
217  $oldId = $row->old_id;
218  $matches = array();
219  if ( !preg_match( '/^O:(\d+):"(\w+)"/', $row->header, $matches ) ) {
220  $this->error( 'restore text', "Error: invalid object header", $oldId );
221  continue;
222  }
223 
224  $className = strtolower( $matches[2] );
225  if ( strlen( $className ) != $matches[1] ) {
226  $this->error( 'restore text', "Error: invalid object header, wrong class name length", $oldId );
227  continue;
228  }
229 
230  $objectStats = $objectStats + array( $className => 0 );
231  $objectStats[$className]++;
232 
233  switch ( $className ) {
234  case 'concatenatedgziphistoryblob':
235  // Good
236  break;
237  case 'historyblobstub':
238  case 'historyblobcurstub':
239  if ( strlen( $row->header ) == $headerLength ) {
240  $this->error( 'unfixable', "Error: overlong stub header", $oldId );
241  continue;
242  }
243  $stubObj = unserialize( $row->header );
244  if ( !is_object( $stubObj ) ) {
245  $this->error( 'restore text', "Error: unable to unserialize stub object", $oldId );
246  continue;
247  }
248  if ( $className == 'historyblobstub' ) {
249  $concatBlobs[$stubObj->mOldId][] = $oldId;
250  } else {
251  $curIds[$stubObj->mCurId][] = $oldId;
252  }
253  break;
254  default:
255  $this->error( 'unfixable', "Error: unrecognised object class \"$className\"", $oldId );
256  }
257  }
258  $dbr->freeResult( $res );
259  }
260 
261  // Check local concat blob validity
262  $externalConcatBlobs = array();
263  if ( count( $concatBlobs ) ) {
264  $headerLength = 300;
265  $res = $dbr->select( 'text', array( 'old_id', 'old_flags', "LEFT(old_text, $headerLength) AS header" ),
266  array( 'old_id IN (' . implode( ',', array_keys( $concatBlobs ) ) . ')' ), __METHOD__ );
267  foreach ( $res as $row ) {
268  $flags = explode( ',', $row->old_flags );
269  if ( in_array( 'external', $flags ) ) {
270  // Concat blob is in external storage?
271  if ( in_array( 'object', $flags ) ) {
272  $urlParts = explode( '/', $row->header );
273  if ( $urlParts[0] != 'DB:' ) {
274  $this->error( 'unfixable', "Error: unrecognised external storage type \"{$urlParts[0]}", $row->old_id );
275  } else {
276  $cluster = $urlParts[2];
277  $id = $urlParts[3];
278  if ( !isset( $externalConcatBlobs[$cluster][$id] ) ) {
279  $externalConcatBlobs[$cluster][$id] = array();
280  }
281  $externalConcatBlobs[$cluster][$id] = array_merge(
282  $externalConcatBlobs[$cluster][$id], $concatBlobs[$row->old_id]
283  );
284  }
285  } else {
286  $this->error( 'unfixable', "Error: invalid flags \"{$row->old_flags}\" on concat bulk row {$row->old_id}",
287  $concatBlobs[$row->old_id] );
288  }
289  } elseif ( strcasecmp( substr( $row->header, 0, strlen( self::CONCAT_HEADER ) ), self::CONCAT_HEADER ) ) {
290  $this->error( 'restore text', "Error: Incorrect object header for concat bulk row {$row->old_id}",
291  $concatBlobs[$row->old_id] );
292  } # else good
293 
294  unset( $concatBlobs[$row->old_id] );
295  }
296  $dbr->freeResult( $res );
297  }
298 
299  // Check targets of unresolved stubs
300  $this->checkExternalConcatBlobs( $externalConcatBlobs );
301 
302  // next chunk
303  }
304 
305  print "\n\nErrors:\n";
306  foreach ( $this->errors as $name => $errors ) {
307  if ( count( $errors ) ) {
308  $description = $this->errorDescriptions[$name];
309  echo "$description: " . implode( ',', array_keys( $errors ) ) . "\n";
310  }
311  }
312 
313  if ( count( $this->errors['restore text'] ) && $fix ) {
314  if ( (string)$xml !== '' ) {
315  $this->restoreText( array_keys( $this->errors['restore text'] ), $xml );
316  } else {
317  echo "Can't fix text, no XML backup specified\n";
318  }
319  }
320 
321  print "\nFlag statistics:\n";
322  $total = array_sum( $flagStats );
323  foreach ( $flagStats as $flag => $count ) {
324  printf( "%-30s %10d %5.2f%%\n", $flag, $count, $count / $total * 100 );
325  }
326  print "\nLocal object statistics:\n";
327  $total = array_sum( $objectStats );
328  foreach ( $objectStats as $className => $count ) {
329  printf( "%-30s %10d %5.2f%%\n", $className, $count, $count / $total * 100 );
330  }
331  }
332 
333  function error( $type, $msg, $ids ) {
334  if ( is_array( $ids ) && count( $ids ) == 1 ) {
335  $ids = reset( $ids );
336  }
337  if ( is_array( $ids ) ) {
338  $revIds = array();
339  foreach ( $ids as $id ) {
340  $revIds = array_merge( $revIds, array_keys( $this->oldIdMap, $id ) );
341  }
342  print "$msg in text rows " . implode( ', ', $ids ) .
343  ", revisions " . implode( ', ', $revIds ) . "\n";
344  } else {
345  $id = $ids;
346  $revIds = array_keys( $this->oldIdMap, $id );
347  if ( count( $revIds ) == 1 ) {
348  print "$msg in old_id $id, rev_id {$revIds[0]}\n";
349  } else {
350  print "$msg in old_id $id, revisions " . implode( ', ', $revIds ) . "\n";
351  }
352  }
353  $this->errors[$type] = $this->errors[$type] + array_flip( $revIds );
354  }
355 
356  function checkExternalConcatBlobs( $externalConcatBlobs ) {
357  if ( !count( $externalConcatBlobs ) ) {
358  return;
359  }
360 
361  if ( is_null( $this->dbStore ) ) {
362  $this->dbStore = new ExternalStoreDB;
363  }
364 
365  foreach ( $externalConcatBlobs as $cluster => $oldIds ) {
366  $blobIds = array_keys( $oldIds );
367  $extDb =& $this->dbStore->getSlave( $cluster );
368  $blobsTable = $this->dbStore->getTable( $extDb );
369  $headerLength = strlen( self::CONCAT_HEADER );
370  $res = $extDb->select( $blobsTable,
371  array( 'blob_id', "LEFT(blob_text, $headerLength) AS header" ),
372  array( 'blob_id IN( ' . implode( ',', $blobIds ) . ')' ), __METHOD__ );
373  foreach ( $res as $row ) {
374  if ( strcasecmp( $row->header, self::CONCAT_HEADER ) ) {
375  $this->error( 'restore text', "Error: invalid header on target $cluster/{$row->blob_id} of two-part ES URL",
376  $oldIds[$row->blob_id] );
377  }
378  unset( $oldIds[$row->blob_id] );
379 
380  }
381  $extDb->freeResult( $res );
382 
383  // Print errors for missing blobs rows
384  foreach ( $oldIds as $blobId => $oldIds2 ) {
385  $this->error( 'restore text', "Error: missing target $cluster/$blobId for two-part ES URL", $oldIds2 );
386  }
387  }
388  }
389 
390  function restoreText( $revIds, $xml ) {
392  $tmpDir = wfTempDir();
393 
394  if ( !count( $revIds ) ) {
395  return;
396  }
397 
398  print "Restoring text from XML backup...\n";
399 
400  $revFileName = "$tmpDir/broken-revlist-$wgDBname";
401  $filteredXmlFileName = "$tmpDir/filtered-$wgDBname.xml";
402 
403  // Write revision list
404  if ( !file_put_contents( $revFileName, implode( "\n", $revIds ) ) ) {
405  echo "Error writing revision list, can't restore text\n";
406  return;
407  }
408 
409  // Run mwdumper
410  echo "Filtering XML dump...\n";
411  $exitStatus = 0;
412  passthru( 'mwdumper ' .
414  "--output=file:$filteredXmlFileName",
415  "--filter=revlist:$revFileName",
416  $xml
417  ), $exitStatus
418  );
419 
420  if ( $exitStatus ) {
421  echo "mwdumper died with exit status $exitStatus\n";
422  return;
423  }
424 
425  $file = fopen( $filteredXmlFileName, 'r' );
426  if ( !$file ) {
427  echo "Unable to open filtered XML file\n";
428  return;
429  }
430 
431  $dbr = wfGetDB( DB_SLAVE );
432  $dbw = wfGetDB( DB_MASTER );
433  $dbr->ping();
434  $dbw->ping();
435 
437  $importer = new WikiImporter( $source );
438  $importer->setRevisionCallback( array( &$this, 'importRevision' ) );
439  $importer->doImport();
440  }
441 
442  function importRevision( &$revision, &$importer ) {
443  $id = $revision->getID();
444  $content = $revision->getContent( Revision::RAW );
445  $id = $id ? $id : '';
446 
447  if ( $content === null ) {
448  echo "Revision $id is broken, we have no content available\n";
449  return;
450  }
451 
452  $text = $content->serialize();
453  if ( $text === '' ) {
454  // This is what happens if the revision was broken at the time the
455  // dump was made. Unfortunately, it also happens if the revision was
456  // legitimately blank, so there's no way to tell the difference. To
457  // be safe, we'll skip it and leave it broken
458 
459  echo "Revision $id is blank in the dump, may have been broken before export\n";
460  return;
461  }
462 
463  if ( !$id ) {
464  // No ID, can't import
465  echo "No id tag in revision, can't import\n";
466  return;
467  }
468 
469  // Find text row again
470  $dbr = wfGetDB( DB_SLAVE );
471  $oldId = $dbr->selectField( 'revision', 'rev_text_id', array( 'rev_id' => $id ), __METHOD__ );
472  if ( !$oldId ) {
473  echo "Missing revision row for rev_id $id\n";
474  return;
475  }
476 
477  // Compress the text
479 
480  // Update the text row
481  $dbw = wfGetDB( DB_MASTER );
482  $dbw->update( 'text',
483  array( 'old_flags' => $flags, 'old_text' => $text ),
484  array( 'old_id' => $oldId ),
485  __METHOD__, array( 'LIMIT' => 1 )
486  );
487 
488  // Remove it from the unfixed list and add it to the fixed list
489  unset( $this->errors['restore text'][$id] );
490  $this->errors['fixed'][$id] = true;
491  }
492 }
WikiImporter
XML file reader for the page data importer.
Definition: Import.php:33
CheckStorage
Maintenance script to do various checks on external storage.
Definition: checkStorage.php:44
DB_MASTER
const DB_MASTER
Definition: Defines.php:56
php
skin txt MediaWiki includes four core it has been set as the default in MediaWiki since the replacing Monobook it had been been the default skin since before being replaced by Vector largely rewritten in while keeping its appearance Several legacy skins were removed in the as the burden of supporting them became too heavy to bear Those in etc for skin dependent CSS etc for skin dependent JavaScript These can also be customised on a per user by etc This feature has led to a wide variety of user styles becoming that gallery is a good place to ending in php
Definition: skin.txt:62
ExternalStoreDB
DB accessable external objects.
Definition: ExternalStoreDB.php:31
CheckStorage\$dbStore
$dbStore
Definition: checkStorage.php:47
wfGetDB
& wfGetDB( $db, $groups=array(), $wiki=false)
Get a Database object.
Definition: GlobalFunctions.php:3659
CheckStorage\check
check( $fix=false, $xml='')
Definition: checkStorage.php:57
$flags
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition: hooks.txt:2113
ImportStreamSource
Definition: Import.php:1630
CheckStorage\importRevision
importRevision(&$revision, &$importer)
Definition: checkStorage.php:442
CheckStorage\$errorDescriptions
$errorDescriptions
Definition: checkStorage.php:49
$dbr
$dbr
Definition: testCompression.php:48
CheckStorage\$oldIdMap
$oldIdMap
Definition: checkStorage.php:46
$total
$total
Definition: Utf8Test.php:92
$wgDBname
controlled by $wgMainCacheType controlled by $wgParserCacheType controlled by $wgMessageCacheType If you set CACHE_NONE to one of the three control default value for MediaWiki still create a but requests to it are no ops and we always fall through to the database If the cache daemon can t be it should also disable itself fairly smoothly By $wgMemc is used but when it is $parserMemc or $messageMemc this is mentioned $wgDBname
Definition: memcached.txt:96
Revision\compressRevisionText
static compressRevisionText(&$text)
If $wgCompressRevisions is enabled, we will compress data.
Definition: Revision.php:1261
array
the array() calling protocol came about after MediaWiki 1.4rc1.
List of Api Query prop modules.
global
when a variable name is used in a it is silently declared as a new masking the global
Definition: design.txt:93
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
errors
if the prop value should be in the metadata multi language array can modify can modify indexed by page_id indexed by prefixed DB keys can modify can modify can modify this should be populated with an alert message to that effect to be fed to an HTMLForm object and populate $result with the reason in the form of error messages should be plain text with no special etc to show that they re errors
Definition: hooks.txt:1318
$options
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition: hooks.txt:1530
CheckStorage\CONCAT_HEADER
const CONCAT_HEADER
Definition: checkStorage.php:45
$name
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:336
$matches
if(!defined( 'MEDIAWIKI')) if(!isset( $wgVersion)) $matches
Definition: NoLocalSettings.php:33
wfEscapeShellArg
wfEscapeShellArg()
Windows-compatible version of escapeshellarg() Windows doesn't recognise single-quotes in the shell,...
Definition: GlobalFunctions.php:2705
Revision\RAW
const RAW
Definition: Revision.php:74
CheckStorage\restoreText
restoreText( $revIds, $xml)
Definition: checkStorage.php:390
$file
if(PHP_SAPI !='cli') $file
Definition: UtfNormalTest2.php:30
$count
$count
Definition: UtfNormalTest2.php:96
$args
if( $line===false) $args
Definition: cdb.php:62
DB_SLAVE
const DB_SLAVE
Definition: Defines.php:55
wfTempDir
wfTempDir()
Tries to get the system directory for temporary files.
Definition: GlobalFunctions.php:2564
CheckStorage\error
error( $type, $msg, $ids)
Definition: checkStorage.php:333
CheckStorage\$errors
$errors
Definition: checkStorage.php:46
$path
$path
Definition: NoLocalSettings.php:35
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
$source
if(PHP_SAPI !='cli') $source
Definition: mwdoc-filter.php:18
$res
$res
Definition: database.txt:21
CheckStorage\checkExternalConcatBlobs
checkExternalConcatBlobs( $externalConcatBlobs)
Definition: checkStorage.php:356
$type
$type
Definition: testCompression.php:46