38 parent::__construct();
40 $this->
addArg(
'cluster',
'cluster(s) to scan',
true,
true );
43 'Adds blobs from a given ES cluster to the blob_tracking table. ' .
44 'Automatically deletes the tracking table and starts from the start again when restarted.'
49 $this->clusters = $this->parameters->getArgs();
50 if ( extension_loaded(
'gmp' ) ) {
51 $this->doBlobOrphans =
true;
52 foreach ( $this->clusters as $cluster ) {
53 $this->trackedBlobs[$cluster] = gmp_init( 0 );
56 echo
"Warning: the gmp extension is needed to find orphan blobs\n";
59 $this->checkIntegrity();
60 $this->initTrackingTable();
61 $this->trackRevisions();
62 $this->trackOrphanText();
63 if ( $this->doBlobOrphans ) {
64 $this->findOrphanBlobs();
66 $this->
output(
"All done.\n" );
69 private function checkIntegrity() {
70 echo
"Doing integrity check...\n";
75 $exists = (bool)$dbr->newSelectQueryBuilder()
79 'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\' ' .
80 'AND LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'' )
81 ->caller( __METHOD__ )->fetchField();
84 echo
"Integrity check failed: found HistoryBlobStub objects in your text table.\n" .
85 "This script could destroy these objects if it continued. Run resolveStubs.php\n" .
90 echo
"Integrity check OK\n";
93 private function initTrackingTable() {
95 if ( $dbw->tableExists(
'blob_tracking', __METHOD__ ) ) {
96 $dbw->query(
'DROP TABLE ' . $dbw->tableName(
'blob_tracking' ), __METHOD__ );
97 $dbw->query(
'DROP TABLE ' . $dbw->tableName(
'blob_orphans' ), __METHOD__ );
99 $dbw->sourceFile( __DIR__ .
'/blob_tracking.sql' );
102 private function getTextClause() {
103 if ( !$this->textClause ) {
105 $this->textClause =
'';
106 foreach ( $this->clusters as $cluster ) {
107 if ( $this->textClause !=
'' ) {
108 $this->textClause .=
' OR ';
110 $this->textClause .=
'old_text' . $dbr->buildLike(
"DB://$cluster/", $dbr->anyString() );
117 private function interpretPointer( $text ) {
118 if ( !preg_match(
'!^DB://(\w+)/(\d+)(?:/([0-9a-fA-F]+)|)$!', $text, $m ) ) {
124 'id' => intval( $m[2] ),
125 'hash' => $m[3] ?? null
132 private function trackRevisions() {
138 $endId = (int)$dbr->newSelectQueryBuilder()
139 ->select(
'MAX(rev_id)' )
141 ->caller( __METHOD__ )->fetchField();
145 echo
"Finding revisions...\n";
149 'old_flags ' . $dbr->buildLike( $dbr->anyString(),
'external', $dbr->anyString() ),
153 $conds = array_merge( [
154 'slot_role_id=' . $slotRoleStore->getId( SlotRecord::MAIN ),
155 'SUBSTRING(content_address, 1, 3)=' . $dbr->addQuotes(
'tt:' ),
160 $res = $dbr->newSelectQueryBuilder()
161 ->select( [
'rev_id',
'rev_page',
'old_id',
'old_flags',
'old_text' ] )
163 ->join(
'slots',
null,
'rev_id=slot_revision_id' )
164 ->join(
'content',
null,
'content_id=slot_content_id' )
165 ->join(
'text',
null,
'SUBSTRING(content_address, 4)=old_id' )
166 ->where( [
'rev_id > ' . $dbr->addQuotes( $startId ) ] )
168 ->orderBy(
'rev_id' )
169 ->limit( $this->batchSize )
170 ->caller( __METHOD__ )->fetchResultSet();
171 if ( !$res->numRows() ) {
176 foreach ( $res as $row ) {
177 $startId = (int)$row->rev_id;
178 $info = $this->interpretPointer( $row->old_text );
180 echo
"Invalid DB:// URL in rev_id {$row->rev_id}\n";
183 if ( !in_array( $info[
'cluster'], $this->clusters ) ) {
184 echo
"Invalid cluster returned in SQL query: {$info['cluster']}\n";
188 'bt_page' => $row->rev_page,
189 'bt_rev_id' => $row->rev_id,
190 'bt_text_id' => $row->old_id,
191 'bt_cluster' => $info[
'cluster'],
192 'bt_blob_id' => $info[
'id'],
193 'bt_cgz_hash' => $info[
'hash']
195 if ( $this->doBlobOrphans ) {
196 gmp_setbit( $this->trackedBlobs[$info[
'cluster']], $info[
'id'] );
199 $dbw->insert(
'blob_tracking', $insertBatch, __METHOD__ );
200 $rowsInserted += count( $insertBatch );
203 if ( $batchesDone >= $this->reportingInterval ) {
205 echo
"$startId / $endId\n";
206 $lbFactory->waitForReplication();
209 echo
"Found $rowsInserted revisions\n";
217 private function trackOrphanText() {
218 # Wait until the blob_tracking table is available in the replica DB
221 $pos = $dbw->getPrimaryPos();
222 $dbr->primaryPosWait( $pos, 100000 );
226 $endId = (int)$dbr->newSelectQueryBuilder()
227 ->select(
'MAX(old_id)' )
229 ->caller( __METHOD__ )->fetchField();
234 echo
"Finding orphan text...\n";
236 # Scan the text table for orphan text
238 $res = $dbr->newSelectQueryBuilder()
239 ->select( [
'old_id',
'old_flags',
'old_text' ] )
241 ->leftJoin(
'blob_tracking',
null,
'bt_text_id=old_id' )
243 'old_id>' . $dbr->addQuotes( $startId ),
245 'old_flags ' . $dbr->buildLike( $dbr->anyString(),
'external', $dbr->anyString() ),
246 'bt_text_id' =>
null,
248 ->orderBy(
'old_id' )
249 ->limit( $this->batchSize )
250 ->caller( __METHOD__ )->fetchResultSet();
252 if ( !$res->numRows() ) {
257 foreach ( $res as $row ) {
258 $startId = (int)$row->old_id;
259 $info = $this->interpretPointer( $row->old_text );
261 echo
"Invalid DB:// URL in old_id {$row->old_id}\n";
264 if ( !in_array( $info[
'cluster'], $this->clusters ) ) {
265 echo
"Invalid cluster returned in SQL query\n";
272 'bt_text_id' => $row->old_id,
273 'bt_cluster' => $info[
'cluster'],
274 'bt_blob_id' => $info[
'id'],
275 'bt_cgz_hash' => $info[
'hash']
277 if ( $this->doBlobOrphans ) {
278 gmp_setbit( $this->trackedBlobs[$info[
'cluster']], $info[
'id'] );
281 $dbw->insert(
'blob_tracking', $insertBatch, __METHOD__ );
283 $rowsInserted += count( $insertBatch );
285 if ( $batchesDone >= $this->reportingInterval ) {
287 echo
"$startId / $endId\n";
288 $lbFactory->waitForReplication();
291 echo
"Found $rowsInserted orphan text rows\n";
301 private function findOrphanBlobs() {
302 if ( !extension_loaded(
'gmp' ) ) {
303 echo
"Can't find orphan blobs, need bitfield support provided by GMP.\n";
311 foreach ( $this->clusters as $cluster ) {
312 echo
"Searching for orphan blobs in $cluster...\n";
313 $lb = $lbFactory->getExternalLB( $cluster );
315 $extDB = $lb->getMaintenanceConnectionRef(
DB_REPLICA );
317 if ( strpos( $e->getMessage(),
'Unknown database' ) !==
false ) {
318 echo
"No database on $cluster\n";
320 echo
"Error on $cluster: " . $e->getMessage() .
"\n";
324 $table = $extDB->getLBInfo(
'blobs table' ) ??
'blobs';
325 if ( !$extDB->tableExists( $table, __METHOD__ ) ) {
326 echo
"No blobs table on cluster $cluster\n";
331 $actualBlobs = gmp_init( 0 );
332 $endId = (int)$extDB->newSelectQueryBuilder()
333 ->select(
'MAX(blob_id)' )
335 ->caller( __METHOD__ )->fetchField();
339 $res = $extDB->newSelectQueryBuilder()
340 ->select( [
'blob_id' ] )
342 ->where( [
'blob_id > ' . $extDB->addQuotes( $startId ) ] )
343 ->orderBy(
'blob_id' )
344 ->limit( $this->batchSize )
345 ->caller( __METHOD__ )->fetchResultSet();
347 if ( !$res->numRows() ) {
351 foreach ( $res as $row ) {
352 gmp_setbit( $actualBlobs, $row->blob_id );
353 $startId = (int)$row->blob_id;
357 if ( $batchesDone >= $this->reportingInterval ) {
359 echo
"$startId / $endId\n";
365 $orphans = gmp_and( $actualBlobs, gmp_com( $this->trackedBlobs[$cluster] ) );
372 $id = gmp_scan1( $orphans, $id );
377 'bo_cluster' => $cluster,
380 if ( count( $insertBatch ) > $this->batchSize ) {
381 $dbw->insert(
'blob_orphans', $insertBatch, __METHOD__ );
388 if ( $insertBatch ) {
389 $dbw->insert(
'blob_orphans', $insertBatch, __METHOD__ );
391 echo
"Found $numOrphans orphan(s) in $cluster\n";
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
addArg( $arg, $description, $required=true, $multi=false)
Add some args that are needed.
output( $out, $channel=null)
Throw some output to the user.
getServiceContainer()
Returns the main service container.
addDescription( $text)
Set the description text.