41 parent::__construct();
43 $this->
addArg(
'cluster',
'cluster(s) to scan',
true,
true );
46 'Adds blobs from a given ES cluster to the blob_tracking table. ' .
47 'Automatically deletes the tracking table and starts from the start again when restarted.'
52 $this->clusters = $this->parameters->getArgs();
53 if ( extension_loaded(
'gmp' ) ) {
54 $this->doBlobOrphans =
true;
55 foreach ( $this->clusters as $cluster ) {
56 $this->trackedBlobs[$cluster] = gmp_init( 0 );
59 echo
"Warning: the gmp extension is needed to find orphan blobs\n";
62 $this->checkIntegrity();
63 $this->initTrackingTable();
64 $this->trackRevisions();
65 $this->trackOrphanText();
66 if ( $this->doBlobOrphans ) {
67 $this->findOrphanBlobs();
69 $this->
output(
"All done.\n" );
72 private function checkIntegrity() {
73 echo
"Doing integrity check...\n";
78 $exists = (bool)$dbr->newSelectQueryBuilder()
82 'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\' ' .
83 'AND LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'' )
84 ->caller( __METHOD__ )->fetchField();
87 echo
"Integrity check failed: found HistoryBlobStub objects in your text table.\n" .
88 "This script could destroy these objects if it continued. Run resolveStubs.php\n" .
93 echo
"Integrity check OK\n";
96 private function initTrackingTable() {
98 if ( $dbw->tableExists(
'blob_tracking', __METHOD__ ) ) {
99 $dbw->query(
'DROP TABLE ' . $dbw->tableName(
'blob_tracking' ), __METHOD__ );
100 $dbw->query(
'DROP TABLE ' . $dbw->tableName(
'blob_orphans' ), __METHOD__ );
102 $dbw->sourceFile( __DIR__ .
'/blob_tracking.sql' );
105 private function getTextClause() {
106 if ( !$this->textClause ) {
109 foreach ( $this->clusters as $cluster ) {
110 $conds[] = $dbr->expr(
113 new LikeValue(
"DB://$cluster/", $dbr->anyString() )
122 private function interpretPointer( $text ) {
123 if ( !preg_match(
'!^DB://(\w+)/(\d+)(?:/([0-9a-fA-F]+)|)$!', $text, $m ) ) {
129 'id' => intval( $m[2] ),
130 'hash' => $m[3] ?? null
137 private function trackRevisions() {
141 $textClause = $this->getTextClause();
143 $endId = (int)$dbr->newSelectQueryBuilder()
144 ->select(
'MAX(rev_id)' )
146 ->caller( __METHOD__ )->fetchField();
150 echo
"Finding revisions...\n";
157 new LikeValue( $dbr->anyString(),
'external', $dbr->anyString() )
162 $conds = array_merge( [
163 'slot_role_id=' . $slotRoleStore->getId( SlotRecord::MAIN ),
164 'SUBSTRING(content_address, 1, 3)=' . $dbr->addQuotes(
'tt:' ),
168 $res = $dbr->newSelectQueryBuilder()
169 ->select( [
'rev_id',
'rev_page',
'old_id',
'old_flags',
'old_text' ] )
171 ->join(
'slots',
null,
'rev_id=slot_revision_id' )
172 ->join(
'content',
null,
'content_id=slot_content_id' )
173 ->join(
'text',
null,
'SUBSTRING(content_address, 4)=old_id' )
174 ->where( $dbr->expr(
'rev_id',
'>', $startId ) )
176 ->orderBy(
'rev_id' )
177 ->limit( $this->batchSize )
178 ->caller( __METHOD__ )->fetchResultSet();
179 if ( !$res->numRows() ) {
184 foreach ( $res as $row ) {
185 $startId = (int)$row->rev_id;
186 $info = $this->interpretPointer( $row->old_text );
188 echo
"Invalid DB:// URL in rev_id {$row->rev_id}\n";
191 if ( !in_array( $info[
'cluster'], $this->clusters ) ) {
192 echo
"Invalid cluster returned in SQL query: {$info['cluster']}\n";
196 'bt_page' => $row->rev_page,
197 'bt_rev_id' => $row->rev_id,
198 'bt_text_id' => $row->old_id,
199 'bt_cluster' => $info[
'cluster'],
200 'bt_blob_id' => $info[
'id'],
201 'bt_cgz_hash' => $info[
'hash']
203 if ( $this->doBlobOrphans ) {
204 gmp_setbit( $this->trackedBlobs[$info[
'cluster']], $info[
'id'] );
207 $dbw->newInsertQueryBuilder()
208 ->insertInto(
'blob_tracking' )
209 ->rows( $insertBatch )
210 ->caller( __METHOD__ )->execute();
211 $rowsInserted += count( $insertBatch );
214 if ( $batchesDone >= $this->reportingInterval ) {
216 echo
"$startId / $endId\n";
220 echo
"Found $rowsInserted revisions\n";
228 private function trackOrphanText() {
229 # Wait until the blob_tracking table is available in the replica DB
232 $pos = $dbw->getPrimaryPos();
233 $dbr->primaryPosWait( $pos, 100_000 );
237 $endId = (int)$dbr->newSelectQueryBuilder()
238 ->select(
'MAX(old_id)' )
240 ->caller( __METHOD__ )->fetchField();
244 echo
"Finding orphan text...\n";
246 # Scan the text table for orphan text
248 $res = $dbr->newSelectQueryBuilder()
249 ->select( [
'old_id',
'old_flags',
'old_text' ] )
251 ->leftJoin(
'blob_tracking',
null,
'bt_text_id=old_id' )
253 $dbr->expr(
'old_id',
'>', $startId ),
258 new LikeValue( $dbr->anyString(),
'external', $dbr->anyString() )
260 'bt_text_id' =>
null,
262 ->orderBy(
'old_id' )
263 ->limit( $this->batchSize )
264 ->caller( __METHOD__ )->fetchResultSet();
266 if ( !$res->numRows() ) {
271 foreach ( $res as $row ) {
272 $startId = (int)$row->old_id;
273 $info = $this->interpretPointer( $row->old_text );
275 echo
"Invalid DB:// URL in old_id {$row->old_id}\n";
278 if ( !in_array( $info[
'cluster'], $this->clusters ) ) {
279 echo
"Invalid cluster returned in SQL query\n";
286 'bt_text_id' => $row->old_id,
287 'bt_cluster' => $info[
'cluster'],
288 'bt_blob_id' => $info[
'id'],
289 'bt_cgz_hash' => $info[
'hash']
291 if ( $this->doBlobOrphans ) {
292 gmp_setbit( $this->trackedBlobs[$info[
'cluster']], $info[
'id'] );
295 $dbw->newInsertQueryBuilder()
296 ->insertInto(
'blob_tracking' )
297 ->rows( $insertBatch )
298 ->caller( __METHOD__ )->execute();
300 $rowsInserted += count( $insertBatch );
302 if ( $batchesDone >= $this->reportingInterval ) {
304 echo
"$startId / $endId\n";
308 echo
"Found $rowsInserted orphan text rows\n";
318 private function findOrphanBlobs() {
319 if ( !extension_loaded(
'gmp' ) ) {
320 echo
"Can't find orphan blobs, need bitfield support provided by GMP.\n";
328 foreach ( $this->clusters as $cluster ) {
329 echo
"Searching for orphan blobs in $cluster...\n";
330 $lb = $lbFactory->getExternalLB( $cluster );
332 $extDB = $lb->getMaintenanceConnectionRef(
DB_REPLICA );
334 if ( strpos( $e->getMessage(),
'Unknown database' ) !==
false ) {
335 echo
"No database on $cluster\n";
337 echo
"Error on $cluster: " . $e->getMessage() .
"\n";
341 $table = $extDB->getLBInfo(
'blobs table' ) ??
'blobs';
342 if ( !$extDB->tableExists( $table, __METHOD__ ) ) {
343 echo
"No blobs table on cluster $cluster\n";
348 $actualBlobs = gmp_init( 0 );
349 $endId = (int)$extDB->newSelectQueryBuilder()
350 ->select(
'MAX(blob_id)' )
352 ->caller( __METHOD__ )->fetchField();
356 $res = $extDB->newSelectQueryBuilder()
357 ->select( [
'blob_id' ] )
359 ->where( $extDB->expr(
'blob_id',
'>', $startId ) )
360 ->orderBy(
'blob_id' )
361 ->limit( $this->batchSize )
362 ->caller( __METHOD__ )->fetchResultSet();
364 if ( !$res->numRows() ) {
368 foreach ( $res as $row ) {
369 gmp_setbit( $actualBlobs, $row->blob_id );
370 $startId = (int)$row->blob_id;
374 if ( $batchesDone >= $this->reportingInterval ) {
376 echo
"$startId / $endId\n";
382 $orphans = gmp_and( $actualBlobs, gmp_com( $this->trackedBlobs[$cluster] ) );
389 $id = gmp_scan1( $orphans, $id );
394 'bo_cluster' => $cluster,
397 if ( count( $insertBatch ) > $this->batchSize ) {
398 $dbw->newInsertQueryBuilder()
399 ->insertInto(
'blob_orphans' )
400 ->rows( $insertBatch )
401 ->caller( __METHOD__ )->execute();
408 if ( $insertBatch ) {
409 $dbw->newInsertQueryBuilder()
410 ->insertInto(
'blob_orphans' )
411 ->rows( $insertBatch )
412 ->caller( __METHOD__ )->execute();
414 echo
"Found $numOrphans orphan(s) in $cluster\n";
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
addArg( $arg, $description, $required=true, $multi=false)
Add some args that are needed.
output( $out, $channel=null)
Throw some output to the user.
waitForReplication()
Wait for replica DBs to catch up.
getServiceContainer()
Returns the main service container.
addDescription( $text)
Set the description text.