138 $endId =
$dbr->selectField(
'revision',
'MAX(rev_id)',
'', __METHOD__ );
142 echo
"Finding revisions...\n";
145 $res =
$dbr->select( [
'revision',
'text' ],
146 [
'rev_id',
'rev_page',
'old_id',
'old_flags',
'old_text' ],
148 'rev_id > ' .
$dbr->addQuotes( $startId ),
149 'rev_text_id=old_id',
151 'old_flags ' .
$dbr->buildLike(
$dbr->anyString(),
'external',
$dbr->anyString() ),
155 'ORDER BY' =>
'rev_id',
156 'LIMIT' => $this->batchSize
159 if ( !
$res->numRows() ) {
164 foreach (
$res as $row ) {
165 $startId = $row->rev_id;
168 echo
"Invalid DB:// URL in rev_id {$row->rev_id}\n";
171 if ( !in_array( $info[
'cluster'], $this->clusters ) ) {
172 echo
"Invalid cluster returned in SQL query: {$info['cluster']}\n";
176 'bt_page' => $row->rev_page,
177 'bt_rev_id' => $row->rev_id,
178 'bt_text_id' => $row->old_id,
179 'bt_cluster' => $info[
'cluster'],
180 'bt_blob_id' => $info[
'id'],
181 'bt_cgz_hash' => $info[
'hash']
183 if ( $this->doBlobOrphans ) {
184 gmp_setbit( $this->trackedBlobs[$info[
'cluster']], $info[
'id'] );
187 $dbw->insert(
'blob_tracking', $insertBatch, __METHOD__ );
188 $rowsInserted += count( $insertBatch );
191 if ( $batchesDone >= $this->reportingInterval ) {
193 echo
"$startId / $endId\n";
197 echo
"Found $rowsInserted revisions\n";
206 # Wait until the blob_tracking table is available in the replica DB
209 $pos = $dbw->getMasterPos();
210 $dbr->masterPosWait( $pos, 100000 );
214 $endId =
$dbr->selectField(
'text',
'MAX(old_id)',
'', __METHOD__ );
218 echo
"Finding orphan text...\n";
220 # Scan the text table for orphan text
222 $res =
$dbr->select( [
'text',
'blob_tracking' ],
223 [
'old_id',
'old_flags',
'old_text' ],
225 'old_id>' .
$dbr->addQuotes( $startId ),
227 'old_flags ' .
$dbr->buildLike(
$dbr->anyString(),
'external',
$dbr->anyString() ),
232 'ORDER BY' =>
'old_id',
233 'LIMIT' => $this->batchSize
235 [
'blob_tracking' => [
'LEFT JOIN',
'bt_text_id=old_id' ] ]
238 foreach (
$res as $row ) {
239 $ids[] = $row->old_id;
242 if ( !
$res->numRows() ) {
247 foreach (
$res as $row ) {
248 $startId = $row->old_id;
251 echo
"Invalid DB:// URL in old_id {$row->old_id}\n";
254 if ( !in_array( $info[
'cluster'], $this->clusters ) ) {
255 echo
"Invalid cluster returned in SQL query\n";
262 'bt_text_id' => $row->old_id,
263 'bt_cluster' => $info[
'cluster'],
264 'bt_blob_id' => $info[
'id'],
265 'bt_cgz_hash' => $info[
'hash']
267 if ( $this->doBlobOrphans ) {
268 gmp_setbit( $this->trackedBlobs[$info[
'cluster']], $info[
'id'] );
271 $dbw->insert(
'blob_tracking', $insertBatch, __METHOD__ );
273 $rowsInserted += count( $insertBatch );
275 if ( $batchesDone >= $this->reportingInterval ) {
277 echo
"$startId / $endId\n";
281 echo
"Found $rowsInserted orphan text rows\n";
292 if ( !extension_loaded(
'gmp' ) ) {
293 echo
"Can't find orphan blobs, need bitfield support provided by GMP.\n";
300 foreach ( $this->clusters
as $cluster ) {
301 echo
"Searching for orphan blobs in $cluster...\n";
302 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
303 $lb = $lbFactory->getExternalLB( $cluster );
307 if ( strpos(
$e->error,
'Unknown database' ) !==
false ) {
308 echo
"No database on $cluster\n";
310 echo
"Error on $cluster: " .
$e->getMessage() .
"\n";
314 $table = $extDB->getLBInfo(
'blobs table' );
315 if ( is_null( $table ) ) {
318 if ( !$extDB->tableExists( $table ) ) {
319 echo
"No blobs table on cluster $cluster\n";
324 $actualBlobs = gmp_init( 0 );
325 $endId = $extDB->selectField( $table,
'MAX(blob_id)',
'', __METHOD__ );
329 $res = $extDB->select( $table,
331 [
'blob_id > ' . $extDB->addQuotes( $startId ) ],
333 [
'LIMIT' => $this->batchSize,
'ORDER BY' =>
'blob_id' ]
336 if ( !
$res->numRows() ) {
340 foreach (
$res as $row ) {
341 gmp_setbit( $actualBlobs, $row->blob_id );
343 $startId = $row->blob_id;
346 if ( $batchesDone >= $this->reportingInterval ) {
348 echo
"$startId / $endId\n";
354 $orphans = gmp_and( $actualBlobs, gmp_com( $this->trackedBlobs[$cluster] ) );
361 $id = gmp_scan1( $orphans, $id );
366 'bo_cluster' => $cluster,
369 if ( count( $insertBatch ) > $this->batchSize ) {
370 $dbw->insert(
'blob_orphans', $insertBatch, __METHOD__ );
377 if ( $insertBatch ) {
378 $dbw->insert(
'blob_orphans', $insertBatch, __METHOD__ );
380 echo
"Found $numOrphans orphan(s) in $cluster\n";