25 require __DIR__ .
'/../commandLine.inc';
27 if ( count(
$args ) < 1 ) {
28 echo
"Usage: php trackBlobs.php <cluster> [... <cluster>]\n";
29 echo
"Adds blobs from a given ES cluster to the blob_tracking table\n";
30 echo
"Automatically deletes the tracking table and starts from the start again when restarted.\n";
48 if ( extension_loaded(
'gmp' ) ) {
49 $this->doBlobOrphans =
true;
51 $this->trackedBlobs[$cluster] = gmp_init( 0 );
54 echo
"Warning: the gmp extension is needed to find orphan blobs\n";
63 if ( $this->doBlobOrphans ) {
69 echo
"Doing integrity check...\n";
74 $exists =
$dbr->selectField(
'text', 1,
75 'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\' ' .
76 'AND LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'',
81 echo
"Integrity check failed: found HistoryBlobStub objects in your text table.\n" .
82 "This script could destroy these objects if it continued. Run resolveStubs.php\n" .
88 $flags =
$dbr->selectField(
'archive',
'ar_flags',
89 'ar_flags LIKE \'%external%\' OR (' .
90 'ar_flags LIKE \'%object%\' ' .
91 'AND LOWER(CONVERT(LEFT(ar_text,22) USING latin1)) = \'o:15:"historyblobstub"\' )',
95 if ( strpos(
$flags,
'external' ) !==
false ) {
96 echo
"Integrity check failed: found external storage pointers in your archive table.\n" .
97 "Run normaliseArchiveTable.php to fix this.\n";
100 echo
"Integrity check failed: found HistoryBlobStub objects in your archive table.\n" .
101 "These objects are probably already broken, continuing would make them\n" .
102 "unrecoverable. Run \"normaliseArchiveTable.php --fix-cgz-bug\" to fix this.\n";
106 echo
"Integrity check OK\n";
111 if ( $dbw->tableExists(
'blob_tracking' ) ) {
112 $dbw->query(
'DROP TABLE ' . $dbw->tableName(
'blob_tracking' ) );
113 $dbw->query(
'DROP TABLE ' . $dbw->tableName(
'blob_orphans' ) );
115 $dbw->sourceFile( __DIR__ .
'/blob_tracking.sql' );
119 if ( !$this->textClause ) {
121 $this->textClause =
'';
122 foreach ( $this->clusters
as $cluster ) {
123 if ( $this->textClause !=
'' ) {
124 $this->textClause .=
' OR ';
126 $this->textClause .=
'old_text' .
$dbr->buildLike(
"DB://$cluster/",
$dbr->anyString() );
133 if ( !preg_match(
'!^DB://(\w+)/(\d+)(?:/([0-9a-fA-F]+)|)$!', $text, $m ) ) {
138 'id' => intval( $m[2] ),
139 'hash' => isset( $m[3] ) ? $m[3] :
null
152 $endId =
$dbr->selectField(
'revision',
'MAX(rev_id)',
false, __METHOD__ );
156 echo
"Finding revisions...\n";
160 array(
'rev_id',
'rev_page',
'old_id',
'old_flags',
'old_text' ),
162 'rev_id > ' .
$dbr->addQuotes( $startId ),
163 'rev_text_id=old_id',
165 'old_flags ' .
$dbr->buildLike(
$dbr->anyString(),
'external',
$dbr->anyString() ),
169 'ORDER BY' =>
'rev_id',
170 'LIMIT' => $this->batchSize
173 if ( !
$res->numRows() ) {
177 $insertBatch =
array();
178 foreach (
$res as $row ) {
179 $startId = $row->rev_id;
182 echo
"Invalid DB:// URL in rev_id {$row->rev_id}\n";
185 if ( !in_array( $info[
'cluster'], $this->clusters ) ) {
186 echo
"Invalid cluster returned in SQL query: {$info['cluster']}\n";
189 $insertBatch[] =
array(
190 'bt_page' => $row->rev_page,
191 'bt_rev_id' => $row->rev_id,
192 'bt_text_id' => $row->old_id,
193 'bt_cluster' => $info[
'cluster'],
194 'bt_blob_id' => $info[
'id'],
195 'bt_cgz_hash' => $info[
'hash']
197 if ( $this->doBlobOrphans ) {
198 gmp_setbit( $this->trackedBlobs[$info[
'cluster']], $info[
'id'] );
201 $dbw->insert(
'blob_tracking', $insertBatch, __METHOD__ );
202 $rowsInserted += count( $insertBatch );
205 if ( $batchesDone >= $this->reportingInterval ) {
207 echo
"$startId / $endId\n";
211 echo
"Found $rowsInserted revisions\n";
220 # Wait until the blob_tracking table is available in the slave
223 $pos = $dbw->getMasterPos();
224 $dbr->masterPosWait( $pos, 100000 );
228 $endId =
$dbr->selectField(
'text',
'MAX(old_id)',
false, __METHOD__ );
232 echo
"Finding orphan text...\n";
234 # Scan the text table for orphan text
237 array(
'old_id',
'old_flags',
'old_text' ),
239 'old_id>' .
$dbr->addQuotes( $startId ),
241 'old_flags ' .
$dbr->buildLike(
$dbr->anyString(),
'external',
$dbr->anyString() ),
246 'ORDER BY' =>
'old_id',
247 'LIMIT' => $this->batchSize
249 array(
'blob_tracking' =>
array(
'LEFT JOIN',
'bt_text_id=old_id' ) )
252 foreach (
$res as $row ) {
253 $ids[] = $row->old_id;
256 if ( !
$res->numRows() ) {
260 $insertBatch =
array();
261 foreach (
$res as $row ) {
262 $startId = $row->old_id;
265 echo
"Invalid DB:// URL in old_id {$row->old_id}\n";
268 if ( !in_array( $info[
'cluster'], $this->clusters ) ) {
269 echo
"Invalid cluster returned in SQL query\n";
273 $insertBatch[] =
array(
276 'bt_text_id' => $row->old_id,
277 'bt_cluster' => $info[
'cluster'],
278 'bt_blob_id' => $info[
'id'],
279 'bt_cgz_hash' => $info[
'hash']
281 if ( $this->doBlobOrphans ) {
282 gmp_setbit( $this->trackedBlobs[$info[
'cluster']], $info[
'id'] );
285 $dbw->insert(
'blob_tracking', $insertBatch, __METHOD__ );
287 $rowsInserted += count( $insertBatch );
289 if ( $batchesDone >= $this->reportingInterval ) {
291 echo
"$startId / $endId\n";
295 echo
"Found $rowsInserted orphan text rows\n";
306 if ( !extension_loaded(
'gmp' ) ) {
307 echo
"Can't find orphan blobs, need bitfield support provided by GMP.\n";
313 foreach ( $this->clusters
as $cluster ) {
314 echo
"Searching for orphan blobs in $cluster...\n";
319 if ( strpos(
$e->error,
'Unknown database' ) !==
false ) {
320 echo
"No database on $cluster\n";
322 echo
"Error on $cluster: " .
$e->getMessage() .
"\n";
326 $table = $extDB->getLBInfo(
'blobs table' );
327 if ( is_null( $table ) ) {
330 if ( !$extDB->tableExists( $table ) ) {
331 echo
"No blobs table on cluster $cluster\n";
336 $actualBlobs = gmp_init( 0 );
337 $endId = $extDB->selectField( $table,
'MAX(blob_id)',
false, __METHOD__ );
341 $res = $extDB->select( $table,
343 array(
'blob_id > ' . $extDB->addQuotes( $startId ) ),
345 array(
'LIMIT' => $this->batchSize,
'ORDER BY' =>
'blob_id' )
348 if ( !
$res->numRows() ) {
352 foreach (
$res as $row ) {
353 gmp_setbit( $actualBlobs, $row->blob_id );
355 $startId = $row->blob_id;
358 if ( $batchesDone >= $this->reportingInterval ) {
360 echo
"$startId / $endId\n";
366 $orphans = gmp_and( $actualBlobs, gmp_com( $this->trackedBlobs[$cluster] ) );
369 $insertBatch =
array();
373 $id = gmp_scan1( $orphans, $id );
377 $insertBatch[] =
array(
378 'bo_cluster' => $cluster,
381 if ( count( $insertBatch ) > $this->batchSize ) {
382 $dbw->insert(
'blob_orphans', $insertBatch, __METHOD__ );
383 $insertBatch =
array();
389 if ( $insertBatch ) {
390 $dbw->insert(
'blob_orphans', $insertBatch, __METHOD__ );
392 echo
"Found $numOrphans orphan(s) in $cluster\n";