29 require __DIR__ .
'/../commandLine.inc';
31 if ( count(
$args ) < 1 ) {
32 echo
"Usage: php trackBlobs.php <cluster> [... <cluster>]\n";
33 echo
"Adds blobs from a given ES cluster to the blob_tracking table\n";
34 echo
"Automatically deletes the tracking table and starts from the start again when restarted.\n";
52 if ( extension_loaded(
'gmp' ) ) {
53 $this->doBlobOrphans =
true;
55 $this->trackedBlobs[$cluster] = gmp_init( 0 );
58 echo
"Warning: the gmp extension is needed to find orphan blobs\n";
67 if ( $this->doBlobOrphans ) {
73 echo
"Doing integrity check...\n";
78 $exists =
$dbr->selectField(
'text', 1,
79 'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\' ' .
80 'AND LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'',
85 echo
"Integrity check failed: found HistoryBlobStub objects in your text table.\n" .
86 "This script could destroy these objects if it continued. Run resolveStubs.php\n" .
91 echo
"Integrity check OK\n";
96 if ( $dbw->tableExists(
'blob_tracking' ) ) {
97 $dbw->query(
'DROP TABLE ' . $dbw->tableName(
'blob_tracking' ) );
98 $dbw->query(
'DROP TABLE ' . $dbw->tableName(
'blob_orphans' ) );
100 $dbw->sourceFile( __DIR__ .
'/blob_tracking.sql' );
104 if ( !$this->textClause ) {
106 $this->textClause =
'';
107 foreach ( $this->clusters as $cluster ) {
108 if ( $this->textClause !=
'' ) {
109 $this->textClause .=
' OR ';
111 $this->textClause .=
'old_text' .
$dbr->buildLike(
"DB://$cluster/",
$dbr->anyString() );
119 if ( !preg_match(
'!^DB://(\w+)/(\d+)(?:/([0-9a-fA-F]+)|)$!', $text, $m ) ) {
125 'id' => intval( $m[2] ),
126 'hash' => $m[3] ?? null
141 $endId =
$dbr->selectField(
'revision',
'MAX(rev_id)',
'', __METHOD__ );
145 echo
"Finding revisions...\n";
147 $fields = [
'rev_id',
'rev_page',
'old_id',
'old_flags',
'old_text' ];
149 'ORDER BY' =>
'rev_id',
154 'old_flags ' .
$dbr->buildLike(
$dbr->anyString(),
'external',
$dbr->anyString() ),
157 $tables = [
'revision',
'text' ];
158 $conds = array_merge( [
159 'rev_text_id=old_id',
162 $slotRoleStore = MediaWikiServices::getInstance()->getSlotRoleStore();
163 $tables = [
'revision',
'slots',
'content',
'text' ];
164 $conds = array_merge( [
165 'rev_id=slot_revision_id',
166 'slot_role_id=' . $slotRoleStore->getId( SlotRecord::MAIN ),
167 'content_id=slot_content_id',
168 'SUBSTRING(content_address, 1, 3)=' .
$dbr->addQuotes(
'tt:' ),
169 'SUBSTRING(content_address, 4)=old_id',
177 'rev_id > ' .
$dbr->addQuotes( $startId ),
182 if ( !
$res->numRows() ) {
187 foreach (
$res as $row ) {
188 $startId = $row->rev_id;
191 echo
"Invalid DB:// URL in rev_id {$row->rev_id}\n";
194 if ( !in_array( $info[
'cluster'], $this->clusters ) ) {
195 echo
"Invalid cluster returned in SQL query: {$info['cluster']}\n";
199 'bt_page' => $row->rev_page,
200 'bt_rev_id' => $row->rev_id,
201 'bt_text_id' => $row->old_id,
202 'bt_cluster' => $info[
'cluster'],
203 'bt_blob_id' => $info[
'id'],
204 'bt_cgz_hash' => $info[
'hash']
206 if ( $this->doBlobOrphans ) {
207 gmp_setbit( $this->trackedBlobs[$info[
'cluster']], $info[
'id'] );
210 $dbw->insert(
'blob_tracking', $insertBatch, __METHOD__ );
211 $rowsInserted += count( $insertBatch );
214 if ( $batchesDone >= $this->reportingInterval ) {
216 echo
"$startId / $endId\n";
220 echo
"Found $rowsInserted revisions\n";
229 # Wait until the blob_tracking table is available in the replica DB
232 $pos = $dbw->getMasterPos();
233 $dbr->masterPosWait( $pos, 100000 );
237 $endId =
$dbr->selectField(
'text',
'MAX(old_id)',
'', __METHOD__ );
241 echo
"Finding orphan text...\n";
243 # Scan the text table for orphan text
245 $res =
$dbr->select( [
'text',
'blob_tracking' ],
246 [
'old_id',
'old_flags',
'old_text' ],
248 'old_id>' .
$dbr->addQuotes( $startId ),
250 'old_flags ' .
$dbr->buildLike(
$dbr->anyString(),
'external',
$dbr->anyString() ),
255 'ORDER BY' =>
'old_id',
256 'LIMIT' => $this->batchSize
258 [
'blob_tracking' => [
'LEFT JOIN',
'bt_text_id=old_id' ] ]
261 foreach (
$res as $row ) {
262 $ids[] = $row->old_id;
265 if ( !
$res->numRows() ) {
270 foreach (
$res as $row ) {
271 $startId = $row->old_id;
274 echo
"Invalid DB:// URL in old_id {$row->old_id}\n";
277 if ( !in_array( $info[
'cluster'], $this->clusters ) ) {
278 echo
"Invalid cluster returned in SQL query\n";
285 'bt_text_id' => $row->old_id,
286 'bt_cluster' => $info[
'cluster'],
287 'bt_blob_id' => $info[
'id'],
288 'bt_cgz_hash' => $info[
'hash']
290 if ( $this->doBlobOrphans ) {
291 gmp_setbit( $this->trackedBlobs[$info[
'cluster']], $info[
'id'] );
294 $dbw->insert(
'blob_tracking', $insertBatch, __METHOD__ );
296 $rowsInserted += count( $insertBatch );
298 if ( $batchesDone >= $this->reportingInterval ) {
300 echo
"$startId / $endId\n";
304 echo
"Found $rowsInserted orphan text rows\n";
315 if ( !extension_loaded(
'gmp' ) ) {
316 echo
"Can't find orphan blobs, need bitfield support provided by GMP.\n";
323 foreach ( $this->clusters as $cluster ) {
324 echo
"Searching for orphan blobs in $cluster...\n";
325 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
326 $lb = $lbFactory->getExternalLB( $cluster );
328 $extDB = $lb->getMaintenanceConnectionRef(
DB_REPLICA );
330 if ( strpos( $e->getMessage(),
'Unknown database' ) !==
false ) {
331 echo
"No database on $cluster\n";
333 echo
"Error on $cluster: " . $e->getMessage() .
"\n";
337 $table = $extDB->getLBInfo(
'blobs table' );
338 if ( is_null( $table ) ) {
341 if ( !$extDB->tableExists( $table ) ) {
342 echo
"No blobs table on cluster $cluster\n";
347 $actualBlobs = gmp_init( 0 );
348 $endId = $extDB->selectField( $table,
'MAX(blob_id)',
'', __METHOD__ );
352 $res = $extDB->select( $table,
354 [
'blob_id > ' . $extDB->addQuotes( $startId ) ],
359 if ( !
$res->numRows() ) {
363 foreach (
$res as $row ) {
364 gmp_setbit( $actualBlobs, $row->blob_id );
365 $startId = $row->blob_id;
369 if ( $batchesDone >= $this->reportingInterval ) {
371 echo
"$startId / $endId\n";
377 $orphans = gmp_and( $actualBlobs, gmp_com( $this->trackedBlobs[$cluster] ) );
384 $id = gmp_scan1( $orphans, $id );
389 'bo_cluster' => $cluster,
392 if ( count( $insertBatch ) > $this->batchSize ) {
393 $dbw->insert(
'blob_orphans', $insertBatch, __METHOD__ );
400 if ( $insertBatch ) {
401 $dbw->insert(
'blob_orphans', $insertBatch, __METHOD__ );
403 echo
"Found $numOrphans orphan(s) in $cluster\n";