31 use Wikimedia\AtEase\AtEase;
34 require __DIR__ .
'/../CommandLineInc.php';
36 if ( count( $args ) < 1 ) {
37 echo
"Usage: php recompressTracked.php [options] <cluster> [... <cluster>...]
38 Moves blobs indexed by trackBlobs.php to a specified list of destination clusters,
39 and recompresses them in the process. Restartable.
42 --procs <procs> Set the number of child processes (default 1)
43 --copy-only Copy only, do not update the text table. Restart
44 without this option to complete.
45 --debug-log <file> Log debugging data to the specified file
46 --info-log <file> Log progress messages to the specified file
47 --critical-log <file> Log error messages to the specified file
80 private static $optionsWithArgs = [
88 private static $cmdLineOptionMap = [
89 'no-count' =>
'noCount',
90 'procs' =>
'numProcs',
91 'copy-only' =>
'copyOnly',
93 'child-id' =>
'childId',
94 'debug-log' =>
'debugLog',
95 'info-log' =>
'infoLog',
96 'critical-log' =>
'criticalLog',
104 $jobOptions = [
'destClusters' => $args ];
105 foreach ( self::$cmdLineOptionMap as $cmdOption => $classOption ) {
106 if ( isset( $options[$cmdOption] ) ) {
107 $jobOptions[$classOption] = $options[$cmdOption];
111 return new self( $jobOptions );
115 foreach ( $options as $name => $value ) {
116 $this->$name = $value;
118 $esFactory = MediaWikiServices::getInstance()->getExternalStoreFactory();
119 $this->store = $esFactory->getStore(
'DB' );
120 if ( !$this->isChild ) {
121 $GLOBALS[
'wgDebugLogPrefix'] =
"RCT M: ";
122 } elseif ( $this->childId !==
false ) {
123 $GLOBALS[
'wgDebugLogPrefix'] =
"RCT {$this->childId}: ";
125 $this->pageBlobClass = function_exists(
'xdiff_string_bdiff' ) ?
126 DiffHistoryBlob::class : ConcatenatedGzipHistoryBlob::class;
127 $this->orphanBlobClass = ConcatenatedGzipHistoryBlob::class;
129 $this->blobStore = MediaWikiServices::getInstance()
130 ->getBlobStoreFactory()
136 if ( $this->debugLog ) {
137 $this->logToFile( $msg, $this->debugLog );
143 if ( $this->infoLog ) {
144 $this->logToFile( $msg, $this->infoLog );
150 if ( $this->criticalLog ) {
151 $this->logToFile( $msg, $this->criticalLog );
155 private function logToFile( $msg,
$file ) {
157 if ( $this->childId !==
false ) {
158 $header .=
"({$this->childId})";
160 $header .=
' ' . WikiMap::getCurrentWikiDbDomain()->getId();
161 LegacyLogger::emit( sprintf(
"%-50s %s\n",
$header, $msg ),
$file );
169 private function syncDBs() {
172 $pos = $dbw->getPrimaryPos();
173 $dbr->primaryPosWait( $pos, 100000 );
180 if ( $this->isChild ) {
191 if ( !$this->checkTrackingTable() ) {
196 $this->startChildProcs();
198 $this->doAllOrphans();
199 $this->killChildProcs();
206 private function checkTrackingTable() {
208 if ( !$dbr->tableExists(
'blob_tracking', __METHOD__ ) ) {
209 $this->
critical(
"Error: blob_tracking table does not exist" );
213 $row = $dbr->newSelectQueryBuilder()
215 ->from(
'blob_tracking' )
216 ->caller( __METHOD__ )->fetchRow();
218 $this->
info(
"Warning: blob_tracking table contains no rows, skipping this wiki." );
232 private function startChildProcs() {
233 $wiki = WikiMap::getCurrentWikiId();
235 $cmd =
'php ' . Shell::escape( __FILE__ );
236 foreach ( self::$cmdLineOptionMap as $cmdOption => $classOption ) {
237 if ( $cmdOption ==
'child-id' ) {
242 $cmd .=
" --$cmdOption " . Shell::escape( $this->$classOption );
243 } elseif ( $this->$classOption ) {
244 $cmd .=
" --$cmdOption";
248 ' --wiki ' . Shell::escape( $wiki ) .
249 ' ' . Shell::escape( ...$this->destClusters );
251 $this->childPipes = $this->childProcs = [];
256 [
'file',
'php://stdout',
'w' ],
257 [
'file',
'php://stderr',
'w' ]
259 AtEase::suppressWarnings();
260 $proc = proc_open(
"$cmd --child-id $i", $spec, $pipes );
261 AtEase::restoreWarnings();
263 $this->
critical(
"Error opening child process: $cmd" );
266 $this->childProcs[$i] = $proc;
267 $this->childPipes[$i] = $pipes[0];
269 $this->prevChildId = -1;
275 private function killChildProcs() {
276 $this->
info(
"Waiting for child processes to finish..." );
278 $this->dispatchToChild( $i,
'quit' );
281 $status = proc_close( $this->childProcs[$i] );
283 $this->
critical(
"Warning: child #$i exited with status $status" );
286 $this->
info(
"Done." );
294 private function dispatch( ...$args ) {
298 $numPipes = stream_select( $x, $pipes, $y, 3600 );
300 $this->
critical(
"Error waiting to write to child process. Aborting" );
304 $childId = ( $i + $this->prevChildId + 1 ) % $this->numProcs;
307 $this->dispatchToChild(
$childId, $args );
321 private function dispatchToChild(
$childId, $args ) {
322 $args = (array)$args;
323 $cmd = implode(
' ', $args );
324 fwrite( $this->childPipes[
$childId],
"$cmd\n" );
330 private function doAllPages() {
334 if ( $this->noCount ) {
335 $numPages =
'[unknown]';
337 $numPages = $dbr->selectField(
'blob_tracking',
338 'COUNT(DISTINCT bt_page)',
339 # A condition is required so that
this query uses the index
344 if ( $this->copyOnly ) {
345 $this->
info(
"Copying pages..." );
347 $this->
info(
"Moving pages..." );
350 $res = $dbr->newSelectQueryBuilder()
351 ->select( [
'bt_page' ] )
353 ->from(
'blob_tracking' )
354 ->where( [
'bt_moved' => 0,
'bt_page > ' . $dbr->addQuotes( $startId ) ] )
355 ->orderBy(
'bt_page' )
356 ->limit( $this->batchSize )
357 ->caller( __METHOD__ )->fetchResultSet();
358 if ( !$res->numRows() ) {
361 foreach ( $res as $row ) {
362 $startId = $row->bt_page;
363 $this->dispatch(
'doPage', $row->bt_page );
366 $this->report(
'pages', $i, $numPages );
368 $this->report(
'pages', $i, $numPages );
369 if ( $this->copyOnly ) {
370 $this->
info(
"All page copies queued." );
372 $this->
info(
"All page moves queued." );
382 private function report( $label, $current, $end ) {
384 if ( $current == $end || $this->numBatches >= $this->reportingInterval ) {
385 $this->numBatches = 0;
386 $this->
info(
"$label: $current / $end" );
387 MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->waitForReplication();
394 private function doAllOrphans() {
398 if ( $this->noCount ) {
399 $numOrphans =
'[unknown]';
401 $numOrphans = $dbr->newSelectQueryBuilder()
402 ->select(
'COUNT(DISTINCT bt_text_id)' )
403 ->from(
'blob_tracking' )
404 ->where( [
'bt_moved' => 0,
'bt_page' => 0 ] )
405 ->caller( __METHOD__ )->fetchField();
406 if ( !$numOrphans ) {
410 if ( $this->copyOnly ) {
411 $this->
info(
"Copying orphans..." );
413 $this->
info(
"Moving orphans..." );
417 $res = $dbr->newSelectQueryBuilder()
418 ->select( [
'bt_text_id' ] )
420 ->from(
'blob_tracking' )
421 ->where( [
'bt_moved' => 0,
'bt_page' => 0,
'bt_text_id > ' . $dbr->addQuotes( $startId ) ] )
422 ->orderBy(
'bt_text_id' )
423 ->limit( $this->batchSize )
424 ->caller( __METHOD__ )->fetchResultSet();
425 if ( !$res->numRows() ) {
429 foreach ( $res as $row ) {
430 $startId = $row->bt_text_id;
431 $ids[] = $row->bt_text_id;
437 while ( count( $ids ) > $this->orphanBatchSize ) {
438 $args = array_slice( $ids, 0, $this->orphanBatchSize );
439 $ids = array_slice( $ids, $this->orphanBatchSize );
440 array_unshift( $args,
'doOrphanList' );
441 $this->dispatch( ...$args );
443 if ( count( $ids ) ) {
445 array_unshift( $args,
'doOrphanList' );
446 $this->dispatch( ...$args );
449 $this->report(
'orphans', $i, $numOrphans );
451 $this->report(
'orphans', $i, $numOrphans );
452 $this->
info(
"All orphans queued." );
459 $this->
debug(
'starting' );
462 while ( !feof( STDIN ) ) {
463 $line = rtrim( fgets( STDIN ) );
467 $this->
debug( $line );
468 $args = explode(
' ', $line );
469 $cmd = array_shift( $args );
472 $this->doPage( intval( $args[0] ) );
475 $this->doOrphanList( array_map(
'intval', $args ) );
480 MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->waitForReplication();
489 private function doPage( $pageId ) {
490 $title = Title::newFromID( $pageId );
492 $titleText = $title->getPrefixedText();
494 $titleText =
'[deleted]';
499 if ( !$this->copyOnly ) {
500 $this->finishIncompleteMoves( [
'bt_page' => $pageId ] );
507 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
509 $res = $dbr->newSelectQueryBuilder()
511 ->from(
'blob_tracking' )
512 ->join(
'text',
null,
'bt_text_id=old_id' )
514 'bt_page' => $pageId,
515 'bt_text_id > ' . $dbr->addQuotes( $startId ),
517 'bt_new_url' =>
null,
519 ->orderBy(
'bt_text_id' )
520 ->limit( $this->batchSize )
521 ->caller( __METHOD__ )->fetchResultSet();
522 if ( !$res->numRows() ) {
527 foreach ( $res as $row ) {
528 $startId = $row->bt_text_id;
529 if ( $lastTextId == $row->bt_text_id ) {
533 $lastTextId = $row->bt_text_id;
535 $text = $this->blobStore->expandBlob( $row->old_text, $row->old_flags );
536 if ( $text ===
false ) {
537 $this->
critical(
"Error loading {$row->bt_rev_id}/{$row->bt_text_id}" );
542 if ( !$trx->addItem( $text, $row->bt_text_id ) ) {
543 $this->
debug(
"$titleText: committing blob with " . $trx->getSize() .
" items" );
546 $lbFactory->waitForReplication();
551 $this->
debug(
"$titleText: committing blob with " . $trx->getSize() .
" items" );
569 if ( $this->copyOnly ) {
570 $this->
critical(
"Internal error: can't call moveTextRow() in --copy-only mode" );
574 $dbw->begin( __METHOD__ );
575 $dbw->update(
'text',
578 'old_flags' =>
'external,utf-8',
585 $dbw->update(
'blob_tracking',
587 [
'bt_text_id' => $textId ],
590 $dbw->commit( __METHOD__ );
603 private function finishIncompleteMoves( $conds ) {
605 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
608 $conds = array_merge( $conds, [
610 'bt_new_url IS NOT NULL'
613 $res = $dbr->newSelectQueryBuilder()
615 ->from(
'blob_tracking' )
617 ->andWhere( [
'bt_text_id > ' . $dbr->addQuotes( $startId ) ] )
618 ->orderBy(
'bt_text_id' )
619 ->limit( $this->batchSize )
620 ->caller( __METHOD__ )->fetchResultSet();
621 if ( !$res->numRows() ) {
624 $this->
debug(
'Incomplete: ' . $res->numRows() .
' rows' );
625 foreach ( $res as $row ) {
626 $startId = $row->bt_text_id;
627 $this->
moveTextRow( $row->bt_text_id, $row->bt_new_url );
628 if ( $row->bt_text_id % 10 == 0 ) {
629 $lbFactory->waitForReplication();
640 $cluster = next( $this->destClusters );
641 if ( $cluster ===
false ) {
642 $cluster = reset( $this->destClusters );
653 private function doOrphanList( $textIds ) {
655 if ( !$this->copyOnly ) {
656 $this->finishIncompleteMoves( [
'bt_text_id' => $textIds ] );
662 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
664 ->select( [
'old_id',
'old_text',
'old_flags' ] )
667 ->join(
'blob_tracking',
null,
'bt_text_id=old_id' )
668 ->where( [
'old_id' => $textIds,
'bt_moved' => 0 ] )
669 ->caller( __METHOD__ )->fetchResultSet();
671 foreach ( $res as $row ) {
672 $text = $this->blobStore->expandBlob( $row->old_text, $row->old_flags );
673 if ( $text ===
false ) {
674 $this->
critical(
"Error: cannot load revision text for old_id={$row->old_id}" );
678 if ( !$trx->addItem( $text, $row->old_id ) ) {
679 $this->
debug(
"[orphan]: committing blob with " . $trx->getSize() .
" rows" );
682 $lbFactory->waitForReplication();
685 $this->
debug(
"[orphan]: committing blob with " . $trx->getSize() .
" rows" );
725 $this->cgz =
new $class;
727 $hash = $this->cgz->addItem( $text );
728 $this->referrers[$textId] = $hash;
729 $this->texts[$textId] = $text;
731 return $this->cgz->isHappy();
735 return count( $this->texts );
743 $this->cgz =
new $class;
744 $this->referrers = [];
745 foreach ( $this->texts as $textId => $text ) {
746 $hash = $this->cgz->addItem( $text );
747 $this->referrers[$textId] = $hash;
757 $originalCount = count( $this->texts );
758 if ( !$originalCount ) {
771 $dbw->begin( __METHOD__ );
772 $res = $dbw->newSelectQueryBuilder()
773 ->select( [
'bt_text_id',
'bt_moved' ] )
775 ->from(
'blob_tracking' )
776 ->where( [
'bt_text_id' => array_keys( $this->referrers ) ] )
777 ->caller( __METHOD__ )->fetchResultSet();
779 foreach ( $res as $row ) {
780 if ( $row->bt_moved ) {
781 # This row has already been moved, remove it
782 $this->parent->debug(
"TRX: conflict detected in old_id={$row->bt_text_id}" );
783 unset( $this->texts[$row->bt_text_id] );
790 if ( !count( $this->texts ) ) {
792 if ( $originalCount > 1 ) {
794 $this->parent->critical(
795 "Warning: concurrent operation detected, are there two conflicting " .
796 "processes running, doing the same job?" );
805 $targetCluster = $this->parent->getTargetCluster();
806 $store = $this->parent->
store;
807 $targetDB = $store->
getPrimary( $targetCluster );
808 $targetDB->clearFlag(
DBO_TRX );
809 $targetDB->begin( __METHOD__ );
810 $baseUrl = $this->parent->store->store( $targetCluster, serialize( $this->cgz ) );
813 foreach ( $this->referrers as $textId => $hash ) {
814 $url = $baseUrl .
'/' . $hash;
815 $dbw->update(
'blob_tracking',
816 [
'bt_new_url' => $url ],
818 'bt_text_id' => $textId,
819 'bt_moved' => 0, # Check
for concurrent conflicting update
825 $targetDB->commit( __METHOD__ );
828 $dbw->commit( __METHOD__ );
831 if ( !$this->parent->copyOnly ) {
832 foreach ( $this->referrers as $textId => $hash ) {
833 $url = $baseUrl .
'/' . $hash;
834 $this->parent->moveTextRow( $textId, $url );
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
wfHostname()
Get host name of the current machine, for use in error reporting.
Class to represent a recompression operation for a single CGZ blob.
RecompressTracked $parent
ConcatenatedGzipHistoryBlob false $cgz
addItem( $text, $textId)
Add text.
recompress()
Recompress text after some aberrant modification.
__construct( $parent, $blobClass)
Create a transaction from a RecompressTracked object.
getPrimary( $cluster)
Get a primary database connection for the specified cluster.
store( $location, $data)
Insert a data item into a given location.The location name The data item string|bool The URL of the s...
Maintenance script that moves blobs indexed by trackBlobs.php to a specified list of destination clus...
executeChild()
Main entry point for worker processes.
moveTextRow( $textId, $url)
Atomic move operation.
execute()
Execute parent or child depending on the isChild option.
static newFromCommandLine( $args, $options)
static getOptionsWithArgs()
getTargetCluster()
Returns the name of the next target cluster.
executeParent()
Execute the parent process.
if(count( $args)< 1) $job
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.