MediaWiki  1.23.0
fixBug20757.php
Go to the documentation of this file.
1 <?php
24 require_once __DIR__ . '/../Maintenance.php';
25 
31 class FixBug20757 extends Maintenance {
32  public $batchSize = 10000;
33  public $mapCache = array();
34  public $mapCacheSize = 0;
35  public $maxMapCacheSize = 1000000;
36 
37  function __construct() {
38  parent::__construct();
39  $this->mDescription = 'Script to fix bug 20757 assuming that blob_tracking is intact';
40  $this->addOption( 'dry-run', 'Report only' );
41  $this->addOption( 'start', 'old_id to start at', false, true );
42  }
43 
44  function execute() {
45  $dbr = wfGetDB( DB_SLAVE );
46  $dbw = wfGetDB( DB_MASTER );
47 
48  $dryRun = $this->getOption( 'dry-run' );
49  if ( $dryRun ) {
50  print "Dry run only.\n";
51  }
52 
53  $startId = $this->getOption( 'start', 0 );
54  $numGood = 0;
55  $numFixed = 0;
56  $numBad = 0;
57 
58  $totalRevs = $dbr->selectField( 'text', 'MAX(old_id)', false, __METHOD__ );
59 
60  if ( $dbr->getType() == 'mysql' ) {
61  // In MySQL 4.1+, the binary field old_text has a non-working LOWER() function
62  $lowerLeft = 'LOWER(CONVERT(LEFT(old_text,22) USING latin1))';
63  }
64 
65  while ( true ) {
66  print "ID: $startId / $totalRevs\r";
67 
68  $res = $dbr->select(
69  'text',
70  array( 'old_id', 'old_flags', 'old_text' ),
71  array(
72  'old_id > ' . intval( $startId ),
73  'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\'',
74  "$lowerLeft = 'o:15:\"historyblobstub\"'",
75  ),
76  __METHOD__,
77  array(
78  'ORDER BY' => 'old_id',
79  'LIMIT' => $this->batchSize,
80  )
81  );
82 
83  if ( !$res->numRows() ) {
84  break;
85  }
86 
87  $secondaryIds = array();
88  $stubs = array();
89 
90  foreach ( $res as $row ) {
91  $startId = $row->old_id;
92 
93  // Basic sanity checks
94  $obj = unserialize( $row->old_text );
95  if ( $obj === false ) {
96  print "{$row->old_id}: unrecoverable: cannot unserialize\n";
97  ++$numBad;
98  continue;
99  }
100 
101  if ( !is_object( $obj ) ) {
102  print "{$row->old_id}: unrecoverable: unserialized to type " .
103  gettype( $obj ) . ", possible double-serialization\n";
104  ++$numBad;
105  continue;
106  }
107 
108  if ( strtolower( get_class( $obj ) ) !== 'historyblobstub' ) {
109  print "{$row->old_id}: unrecoverable: unexpected object class " .
110  get_class( $obj ) . "\n";
111  ++$numBad;
112  continue;
113  }
114 
115  // Process flags
116  $flags = explode( ',', $row->old_flags );
117  if ( in_array( 'utf-8', $flags ) || in_array( 'utf8', $flags ) ) {
118  $legacyEncoding = false;
119  } else {
120  $legacyEncoding = true;
121  }
122 
123  // Queue the stub for future batch processing
124  $id = intval( $obj->mOldId );
125  $secondaryIds[] = $id;
126  $stubs[$row->old_id] = array(
127  'legacyEncoding' => $legacyEncoding,
128  'secondaryId' => $id,
129  'hash' => $obj->mHash,
130  );
131  }
132 
133  $secondaryIds = array_unique( $secondaryIds );
134 
135  if ( !count( $secondaryIds ) ) {
136  continue;
137  }
138 
139  // Run the batch query on blob_tracking
140  $res = $dbr->select(
141  'blob_tracking',
142  '*',
143  array(
144  'bt_text_id' => $secondaryIds,
145  ),
146  __METHOD__
147  );
148  $trackedBlobs = array();
149  foreach ( $res as $row ) {
150  $trackedBlobs[$row->bt_text_id] = $row;
151  }
152 
153  // Process the stubs
154  foreach ( $stubs as $primaryId => $stub ) {
155  $secondaryId = $stub['secondaryId'];
156  if ( !isset( $trackedBlobs[$secondaryId] ) ) {
157  // No tracked blob. Work out what went wrong
158  $secondaryRow = $dbr->selectRow(
159  'text',
160  array( 'old_flags', 'old_text' ),
161  array( 'old_id' => $secondaryId ),
162  __METHOD__
163  );
164  if ( !$secondaryRow ) {
165  print "$primaryId: unrecoverable: secondary row is missing\n";
166  ++$numBad;
167  } elseif ( $this->isUnbrokenStub( $stub, $secondaryRow ) ) {
168  // Not broken yet, and not in the tracked clusters so it won't get
169  // broken by the current RCT run.
170  ++$numGood;
171  } elseif ( strpos( $secondaryRow->old_flags, 'external' ) !== false ) {
172  print "$primaryId: unrecoverable: secondary gone to {$secondaryRow->old_text}\n";
173  ++$numBad;
174  } else {
175  print "$primaryId: unrecoverable: miscellaneous corruption of secondary row\n";
176  ++$numBad;
177  }
178  unset( $stubs[$primaryId] );
179  continue;
180  }
181  $trackRow = $trackedBlobs[$secondaryId];
182 
183  // Check that the specified text really is available in the tracked source row
184  $url = "DB://{$trackRow->bt_cluster}/{$trackRow->bt_blob_id}/{$stub['hash']}";
185  $text = ExternalStore::fetchFromURL( $url );
186  if ( $text === false ) {
187  print "$primaryId: unrecoverable: source text missing\n";
188  ++$numBad;
189  unset( $stubs[$primaryId] );
190  continue;
191  }
192  if ( md5( $text ) !== $stub['hash'] ) {
193  print "$primaryId: unrecoverable: content hashes do not match\n";
194  ++$numBad;
195  unset( $stubs[$primaryId] );
196  continue;
197  }
198 
199  // Find the page_id and rev_id
200  // The page is probably the same as the page of the secondary row
201  $pageId = intval( $trackRow->bt_page );
202  if ( !$pageId ) {
203  $revId = $pageId = 0;
204  } else {
205  $revId = $this->findTextIdInPage( $pageId, $primaryId );
206  if ( !$revId ) {
207  // Actually an orphan
208  $pageId = $revId = 0;
209  }
210  }
211 
212  $newFlags = $stub['legacyEncoding'] ? 'external' : 'external,utf-8';
213 
214  if ( !$dryRun ) {
215  // Reset the text row to point to the original copy
216  $dbw->begin( __METHOD__ );
217  $dbw->update(
218  'text',
219  // SET
220  array(
221  'old_flags' => $newFlags,
222  'old_text' => $url
223  ),
224  // WHERE
225  array( 'old_id' => $primaryId ),
226  __METHOD__
227  );
228 
229  // Add a blob_tracking row so that the new reference can be recompressed
230  // without needing to run trackBlobs.php again
231  $dbw->insert( 'blob_tracking',
232  array(
233  'bt_page' => $pageId,
234  'bt_rev_id' => $revId,
235  'bt_text_id' => $primaryId,
236  'bt_cluster' => $trackRow->bt_cluster,
237  'bt_blob_id' => $trackRow->bt_blob_id,
238  'bt_cgz_hash' => $stub['hash'],
239  'bt_new_url' => null,
240  'bt_moved' => 0,
241  ),
242  __METHOD__
243  );
244  $dbw->commit( __METHOD__ );
245  $this->waitForSlaves();
246  }
247 
248  print "$primaryId: resolved to $url\n";
249  ++$numFixed;
250  }
251  }
252 
253  print "\n";
254  print "Fixed: $numFixed\n";
255  print "Unrecoverable: $numBad\n";
256  print "Good stubs: $numGood\n";
257  }
258 
259  function waitForSlaves() {
260  static $iteration = 0;
261  ++$iteration;
262  if ( ++$iteration > 50 == 0 ) {
263  wfWaitForSlaves();
264  $iteration = 0;
265  }
266  }
267 
268  function findTextIdInPage( $pageId, $textId ) {
269  $ids = $this->getRevTextMap( $pageId );
270  if ( !isset( $ids[$textId] ) ) {
271  return null;
272  } else {
273  return $ids[$textId];
274  }
275  }
276 
277  function getRevTextMap( $pageId ) {
278  if ( !isset( $this->mapCache[$pageId] ) ) {
279  // Limit cache size
280  while ( $this->mapCacheSize > $this->maxMapCacheSize ) {
281  $key = key( $this->mapCache );
282  $this->mapCacheSize -= count( $this->mapCache[$key] );
283  unset( $this->mapCache[$key] );
284  }
285 
286  $dbr = wfGetDB( DB_SLAVE );
287  $map = array();
288  $res = $dbr->select( 'revision',
289  array( 'rev_id', 'rev_text_id' ),
290  array( 'rev_page' => $pageId ),
291  __METHOD__
292  );
293  foreach ( $res as $row ) {
294  $map[$row->rev_text_id] = $row->rev_id;
295  }
296  $this->mapCache[$pageId] = $map;
297  $this->mapCacheSize += count( $map );
298  }
299  return $this->mapCache[$pageId];
300  }
301 
309  function isUnbrokenStub( $stub, $secondaryRow ) {
310  $flags = explode( ',', $secondaryRow->old_flags );
311  $text = $secondaryRow->old_text;
312  if ( in_array( 'external', $flags ) ) {
313  $url = $text;
314  @list( /* $proto */ , $path ) = explode( '://', $url, 2 );
315  if ( $path == "" ) {
316  return false;
317  }
318  $text = ExternalStore::fetchFromUrl( $url );
319  }
320  if ( !in_array( 'object', $flags ) ) {
321  return false;
322  }
323 
324  if ( in_array( 'gzip', $flags ) ) {
325  $obj = unserialize( gzinflate( $text ) );
326  } else {
327  $obj = unserialize( $text );
328  }
329 
330  if ( !is_object( $obj ) ) {
331  // Correct for old double-serialization bug.
332  $obj = unserialize( $obj );
333  }
334 
335  if ( !is_object( $obj ) ) {
336  return false;
337  }
338 
339  $obj->uncompress();
340  $text = $obj->getItem( $stub['hash'] );
341  return $text !== false;
342  }
343 }
344 
345 $maintClass = 'FixBug20757';
346 require_once RUN_MAINTENANCE_IF_MAIN;
DB_MASTER
const DB_MASTER
Definition: Defines.php:56
php
skin txt MediaWiki includes four core it has been set as the default in MediaWiki since the replacing Monobook it had been been the default skin since before being replaced by Vector largely rewritten in while keeping its appearance Several legacy skins were removed in the as the burden of supporting them became too heavy to bear Those in etc for skin dependent CSS etc for skin dependent JavaScript These can also be customised on a per user by etc This feature has led to a wide variety of user styles becoming that gallery is a good place to ending in php
Definition: skin.txt:62
wfGetDB
& wfGetDB( $db, $groups=array(), $wiki=false)
Get a Database object.
Definition: GlobalFunctions.php:3650
FixBug20757
Maintenance script to fix bug 20757.
Definition: fixBug20757.php:31
FixBug20757\findTextIdInPage
findTextIdInPage( $pageId, $textId)
Definition: fixBug20757.php:268
FixBug20757\execute
execute()
Do the actual work.
Definition: fixBug20757.php:44
Maintenance\addOption
addOption( $name, $description, $required=false, $withArg=false, $shortName=false)
Add a parameter to the script.
Definition: Maintenance.php:169
RUN_MAINTENANCE_IF_MAIN
require_once RUN_MAINTENANCE_IF_MAIN
Definition: maintenance.txt:50
Maintenance
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: maintenance.txt:39
$flags
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition: hooks.txt:2113
$dbr
$dbr
Definition: testCompression.php:48
key
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add in any and then calling but I prefer the flexibility This should also do the output encoding The system allocates a global one in $wgOut Title Represents the title of an and does all the work of translating among various forms such as plain database key
Definition: design.txt:25
FixBug20757\$batchSize
$batchSize
Definition: fixBug20757.php:32
FixBug20757\__construct
__construct()
Default constructor.
Definition: fixBug20757.php:37
FixBug20757\waitForSlaves
waitForSlaves()
Definition: fixBug20757.php:259
FixBug20757\getRevTextMap
getRevTextMap( $pageId)
Definition: fixBug20757.php:277
array
the array() calling protocol came about after MediaWiki 1.4rc1.
List of Api Query prop modules.
wfWaitForSlaves
wfWaitForSlaves( $maxLag=false, $wiki=false, $cluster=false)
Modern version of wfWaitForSlaves().
Definition: GlobalFunctions.php:3795
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
FixBug20757\$maxMapCacheSize
$maxMapCacheSize
Definition: fixBug20757.php:35
FixBug20757\isUnbrokenStub
isUnbrokenStub( $stub, $secondaryRow)
This is based on part of HistoryBlobStub::getText().
Definition: fixBug20757.php:309
FixBug20757\$mapCache
$mapCache
Definition: fixBug20757.php:33
DB_SLAVE
const DB_SLAVE
Definition: Defines.php:55
FixBug20757\$mapCacheSize
$mapCacheSize
Definition: fixBug20757.php:34
Maintenance\getOption
getOption( $name, $default=null)
Get an option, or return the default.
Definition: Maintenance.php:191
ExternalStore\fetchFromURL
static fetchFromURL( $url, array $params=array())
Fetch data from given URL.
Definition: ExternalStore.php:75
$path
$path
Definition: NoLocalSettings.php:35
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
$maintClass
$maintClass
Definition: fixBug20757.php:345
$res
$res
Definition: database.txt:21