51 if ( extension_loaded(
'gmp' ) ) {
52 $this->doBlobOrphans =
true;
54 $this->trackedBlobs[$cluster] = gmp_init( 0 );
57 echo
"Warning: the gmp extension is needed to find orphan blobs\n";
61 public function run() {
62 $this->checkIntegrity();
63 $this->initTrackingTable();
64 $this->trackRevisions();
65 $this->trackOrphanText();
66 if ( $this->doBlobOrphans ) {
67 $this->findOrphanBlobs();
71 private function checkIntegrity() {
72 echo
"Doing integrity check...\n";
77 $exists = (bool)
$dbr->selectField(
'text',
'1',
78 'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\' ' .
79 'AND LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'',
84 echo
"Integrity check failed: found HistoryBlobStub objects in your text table.\n" .
85 "This script could destroy these objects if it continued. Run resolveStubs.php\n" .
90 echo
"Integrity check OK\n";
93 private function initTrackingTable() {
95 if ( $dbw->tableExists(
'blob_tracking', __METHOD__ ) ) {
96 $dbw->query(
'DROP TABLE ' . $dbw->tableName(
'blob_tracking' ), __METHOD__ );
97 $dbw->query(
'DROP TABLE ' . $dbw->tableName(
'blob_orphans' ), __METHOD__ );
99 $dbw->sourceFile( __DIR__ .
'/blob_tracking.sql' );
102 private function getTextClause() {
103 if ( !$this->textClause ) {
105 $this->textClause =
'';
106 foreach ( $this->clusters as $cluster ) {
107 if ( $this->textClause !=
'' ) {
108 $this->textClause .=
' OR ';
110 $this->textClause .=
'old_text' .
$dbr->buildLike(
"DB://$cluster/",
$dbr->anyString() );
117 private function interpretPointer( $text ) {
118 if ( !preg_match(
'!^DB://(\w+)/(\d+)(?:/([0-9a-fA-F]+)|)$!', $text, $m ) ) {
124 'id' => intval( $m[2] ),
125 'hash' => $m[3] ?? null
132 private function trackRevisions() {
138 $endId = (int)
$dbr->selectField(
'revision',
'MAX(rev_id)',
'', __METHOD__ );
142 echo
"Finding revisions...\n";
144 $fields = [
'rev_id',
'rev_page',
'old_id',
'old_flags',
'old_text' ];
146 'ORDER BY' =>
'rev_id',
151 'old_flags ' .
$dbr->buildLike(
$dbr->anyString(),
'external',
$dbr->anyString() ),
153 $slotRoleStore = MediaWikiServices::getInstance()->getSlotRoleStore();
154 $tables = [
'revision',
'slots',
'content',
'text' ];
155 $conds = array_merge( [
156 'rev_id=slot_revision_id',
157 'slot_role_id=' . $slotRoleStore->getId( SlotRecord::MAIN ),
158 'content_id=slot_content_id',
159 'SUBSTRING(content_address, 1, 3)=' .
$dbr->addQuotes(
'tt:' ),
160 'SUBSTRING(content_address, 4)=old_id',
162 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
168 'rev_id > ' .
$dbr->addQuotes( $startId ),
173 if ( !
$res->numRows() ) {
178 foreach (
$res as $row ) {
179 $startId = (int)$row->rev_id;
180 $info = $this->interpretPointer( $row->old_text );
182 echo
"Invalid DB:// URL in rev_id {$row->rev_id}\n";
185 if ( !in_array( $info[
'cluster'], $this->clusters ) ) {
186 echo
"Invalid cluster returned in SQL query: {$info['cluster']}\n";
190 'bt_page' => $row->rev_page,
191 'bt_rev_id' => $row->rev_id,
192 'bt_text_id' => $row->old_id,
193 'bt_cluster' => $info[
'cluster'],
194 'bt_blob_id' => $info[
'id'],
195 'bt_cgz_hash' => $info[
'hash']
197 if ( $this->doBlobOrphans ) {
198 gmp_setbit( $this->trackedBlobs[$info[
'cluster']], $info[
'id'] );
201 $dbw->insert(
'blob_tracking', $insertBatch, __METHOD__ );
202 $rowsInserted += count( $insertBatch );
205 if ( $batchesDone >= $this->reportingInterval ) {
207 echo
"$startId / $endId\n";
208 $lbFactory->waitForReplication();
211 echo
"Found $rowsInserted revisions\n";
219 private function trackOrphanText() {
220 # Wait until the blob_tracking table is available in the replica DB
223 $pos = $dbw->getPrimaryPos();
224 $dbr->primaryPosWait( $pos, 100000 );
228 $endId = (int)
$dbr->selectField(
'text',
'MAX(old_id)',
'', __METHOD__ );
231 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
233 echo
"Finding orphan text...\n";
235 # Scan the text table for orphan text
237 $res =
$dbr->select( [
'text',
'blob_tracking' ],
238 [
'old_id',
'old_flags',
'old_text' ],
240 'old_id>' .
$dbr->addQuotes( $startId ),
242 'old_flags ' .
$dbr->buildLike(
$dbr->anyString(),
'external',
$dbr->anyString() ),
247 'ORDER BY' =>
'old_id',
248 'LIMIT' => $this->batchSize
250 [
'blob_tracking' => [
'LEFT JOIN',
'bt_text_id=old_id' ] ]
253 if ( !
$res->numRows() ) {
258 foreach (
$res as $row ) {
259 $startId = (int)$row->old_id;
260 $info = $this->interpretPointer( $row->old_text );
262 echo
"Invalid DB:// URL in old_id {$row->old_id}\n";
265 if ( !in_array( $info[
'cluster'], $this->clusters ) ) {
266 echo
"Invalid cluster returned in SQL query\n";
273 'bt_text_id' => $row->old_id,
274 'bt_cluster' => $info[
'cluster'],
275 'bt_blob_id' => $info[
'id'],
276 'bt_cgz_hash' => $info[
'hash']
278 if ( $this->doBlobOrphans ) {
279 gmp_setbit( $this->trackedBlobs[$info[
'cluster']], $info[
'id'] );
282 $dbw->insert(
'blob_tracking', $insertBatch, __METHOD__ );
284 $rowsInserted += count( $insertBatch );
286 if ( $batchesDone >= $this->reportingInterval ) {
288 echo
"$startId / $endId\n";
289 $lbFactory->waitForReplication();
292 echo
"Found $rowsInserted orphan text rows\n";
302 private function findOrphanBlobs() {
303 if ( !extension_loaded(
'gmp' ) ) {
304 echo
"Can't find orphan blobs, need bitfield support provided by GMP.\n";
310 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
312 foreach ( $this->clusters as $cluster ) {
313 echo
"Searching for orphan blobs in $cluster...\n";
314 $lb = $lbFactory->getExternalLB( $cluster );
316 $extDB = $lb->getMaintenanceConnectionRef(
DB_REPLICA );
318 if ( strpos( $e->getMessage(),
'Unknown database' ) !==
false ) {
319 echo
"No database on $cluster\n";
321 echo
"Error on $cluster: " . $e->getMessage() .
"\n";
325 $table = $extDB->getLBInfo(
'blobs table' ) ??
'blobs';
326 if ( !$extDB->tableExists( $table, __METHOD__ ) ) {
327 echo
"No blobs table on cluster $cluster\n";
332 $actualBlobs = gmp_init( 0 );
333 $endId = (int)$extDB->selectField( $table,
'MAX(blob_id)',
'', __METHOD__ );
337 $res = $extDB->select( $table,
339 [
'blob_id > ' . $extDB->addQuotes( $startId ) ],
344 if ( !
$res->numRows() ) {
348 foreach (
$res as $row ) {
349 gmp_setbit( $actualBlobs, $row->blob_id );
350 $startId = (int)$row->blob_id;
354 if ( $batchesDone >= $this->reportingInterval ) {
356 echo
"$startId / $endId\n";
362 $orphans = gmp_and( $actualBlobs, gmp_com( $this->trackedBlobs[$cluster] ) );
369 $id = gmp_scan1( $orphans, $id );
374 'bo_cluster' => $cluster,
377 if ( count( $insertBatch ) > $this->batchSize ) {
378 $dbw->insert(
'blob_orphans', $insertBatch, __METHOD__ );
385 if ( $insertBatch ) {
386 $dbw->insert(
'blob_orphans', $insertBatch, __METHOD__ );
388 echo
"Found $numOrphans orphan(s) in $cluster\n";