24 require_once __DIR__ .
'/../Maintenance.php';
38 parent::__construct();
39 $this->mDescription =
'Script to fix bug 20757 assuming that blob_tracking is intact';
40 $this->
addOption(
'dry-run',
'Report only' );
41 $this->
addOption(
'start',
'old_id to start at',
false,
true );
50 print
"Dry run only.\n";
53 $startId = $this->
getOption(
'start', 0 );
58 $totalRevs =
$dbr->selectField(
'text',
'MAX(old_id)',
false, __METHOD__ );
60 if (
$dbr->getType() ==
'mysql' ) {
62 $lowerLeft =
'LOWER(CONVERT(LEFT(old_text,22) USING latin1))';
66 print
"ID: $startId / $totalRevs\r";
70 array(
'old_id',
'old_flags',
'old_text' ),
72 'old_id > ' . intval( $startId ),
73 'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\'',
74 "$lowerLeft = 'o:15:\"historyblobstub\"'",
78 'ORDER BY' =>
'old_id',
79 'LIMIT' => $this->batchSize,
83 if ( !
$res->numRows() ) {
87 $secondaryIds =
array();
91 $startId = $row->old_id;
94 $obj = unserialize( $row->old_text );
95 if ( $obj ===
false ) {
96 print
"{$row->old_id}: unrecoverable: cannot unserialize\n";
101 if ( !is_object( $obj ) ) {
102 print
"{$row->old_id}: unrecoverable: unserialized to type " .
103 gettype( $obj ) .
", possible double-serialization\n";
108 if ( strtolower( get_class( $obj ) ) !==
'historyblobstub' ) {
109 print
"{$row->old_id}: unrecoverable: unexpected object class " .
110 get_class( $obj ) .
"\n";
116 $flags = explode(
',', $row->old_flags );
117 if ( in_array(
'utf-8',
$flags ) || in_array(
'utf8',
$flags ) ) {
118 $legacyEncoding =
false;
120 $legacyEncoding =
true;
124 $id = intval( $obj->mOldId );
125 $secondaryIds[] = $id;
126 $stubs[$row->old_id] =
array(
127 'legacyEncoding' => $legacyEncoding,
128 'secondaryId' => $id,
129 'hash' => $obj->mHash,
133 $secondaryIds = array_unique( $secondaryIds );
135 if ( !count( $secondaryIds ) ) {
144 'bt_text_id' => $secondaryIds,
148 $trackedBlobs =
array();
149 foreach (
$res as $row ) {
150 $trackedBlobs[$row->bt_text_id] = $row;
154 foreach ( $stubs
as $primaryId => $stub ) {
155 $secondaryId = $stub[
'secondaryId'];
156 if ( !isset( $trackedBlobs[$secondaryId] ) ) {
158 $secondaryRow =
$dbr->selectRow(
160 array(
'old_flags',
'old_text' ),
161 array(
'old_id' => $secondaryId ),
164 if ( !$secondaryRow ) {
165 print
"$primaryId: unrecoverable: secondary row is missing\n";
171 } elseif ( strpos( $secondaryRow->old_flags,
'external' ) !==
false ) {
172 print
"$primaryId: unrecoverable: secondary gone to {$secondaryRow->old_text}\n";
175 print
"$primaryId: unrecoverable: miscellaneous corruption of secondary row\n";
178 unset( $stubs[$primaryId] );
181 $trackRow = $trackedBlobs[$secondaryId];
184 $url =
"DB://{$trackRow->bt_cluster}/{$trackRow->bt_blob_id}/{$stub['hash']}";
186 if ( $text ===
false ) {
187 print
"$primaryId: unrecoverable: source text missing\n";
189 unset( $stubs[$primaryId] );
192 if ( md5( $text ) !== $stub[
'hash'] ) {
193 print
"$primaryId: unrecoverable: content hashes do not match\n";
195 unset( $stubs[$primaryId] );
201 $pageId = intval( $trackRow->bt_page );
203 $revId = $pageId = 0;
208 $pageId = $revId = 0;
212 $newFlags = $stub[
'legacyEncoding'] ?
'external' :
'external,utf-8';
216 $dbw->begin( __METHOD__ );
221 'old_flags' => $newFlags,
225 array(
'old_id' => $primaryId ),
231 $dbw->insert(
'blob_tracking',
233 'bt_page' => $pageId,
234 'bt_rev_id' => $revId,
235 'bt_text_id' => $primaryId,
236 'bt_cluster' => $trackRow->bt_cluster,
237 'bt_blob_id' => $trackRow->bt_blob_id,
238 'bt_cgz_hash' => $stub[
'hash'],
239 'bt_new_url' =>
null,
244 $dbw->commit( __METHOD__ );
248 print
"$primaryId: resolved to $url\n";
254 print
"Fixed: $numFixed\n";
255 print
"Unrecoverable: $numBad\n";
256 print
"Good stubs: $numGood\n";
260 static $iteration = 0;
262 if ( ++$iteration > 50 == 0 ) {
270 if ( !isset( $ids[$textId] ) ) {
273 return $ids[$textId];
278 if ( !isset( $this->mapCache[$pageId] ) ) {
280 while ( $this->mapCacheSize > $this->maxMapCacheSize ) {
281 $key =
key( $this->mapCache );
282 $this->mapCacheSize -= count( $this->mapCache[$key] );
283 unset( $this->mapCache[$key] );
289 array(
'rev_id',
'rev_text_id' ),
290 array(
'rev_page' => $pageId ),
293 foreach (
$res as $row ) {
294 $map[$row->rev_text_id] = $row->rev_id;
296 $this->mapCache[$pageId] = $map;
297 $this->mapCacheSize += count( $map );
299 return $this->mapCache[$pageId];
310 $flags = explode(
',', $secondaryRow->old_flags );
311 $text = $secondaryRow->old_text;
312 if ( in_array(
'external',
$flags ) ) {
314 @
list( ,
$path ) = explode(
'://', $url, 2 );
318 $text = ExternalStore::fetchFromUrl( $url );
320 if ( !in_array(
'object',
$flags ) ) {
324 if ( in_array(
'gzip',
$flags ) ) {
325 $obj = unserialize( gzinflate( $text ) );
327 $obj = unserialize( $text );
330 if ( !is_object( $obj ) ) {
332 $obj = unserialize( $obj );
335 if ( !is_object( $obj ) ) {
340 $text = $obj->getItem( $stub[
'hash'] );
341 return $text !==
false;