31 use Wikimedia\AtEase\AtEase;
34 require __DIR__ .
'/../CommandLineInc.php';
36 if ( count( $args ) < 1 ) {
37 echo
"Usage: php recompressTracked.php [options] <cluster> [... <cluster>...]
38 Moves blobs indexed by trackBlobs.php to a specified list of destination clusters,
39 and recompresses them in the process. Restartable.
42 --procs <procs> Set the number of child processes (default 1)
43 --copy-only Copy only, do not update the text table. Restart
44 without this option to complete.
45 --debug-log <file> Log debugging data to the specified file
46 --info-log <file> Log progress messages to the specified file
47 --critical-log <file> Log error messages to the specified file
80 private static $optionsWithArgs = [
88 private static $cmdLineOptionMap = [
89 'no-count' =>
'noCount',
90 'procs' =>
'numProcs',
91 'copy-only' =>
'copyOnly',
93 'child-id' =>
'childId',
94 'debug-log' =>
'debugLog',
95 'info-log' =>
'infoLog',
96 'critical-log' =>
'criticalLog',
104 $jobOptions = [
'destClusters' => $args ];
105 foreach ( self::$cmdLineOptionMap as $cmdOption => $classOption ) {
106 if ( isset( $options[$cmdOption] ) ) {
107 $jobOptions[$classOption] = $options[$cmdOption];
111 return new self( $jobOptions );
115 foreach ( $options as $name => $value ) {
116 $this->$name = $value;
118 $esFactory = MediaWikiServices::getInstance()->getExternalStoreFactory();
119 $this->store = $esFactory->getStore(
'DB' );
120 if ( !$this->isChild ) {
121 $GLOBALS[
'wgDebugLogPrefix'] =
"RCT M: ";
122 } elseif ( $this->childId !==
false ) {
123 $GLOBALS[
'wgDebugLogPrefix'] =
"RCT {$this->childId}: ";
125 $this->pageBlobClass = function_exists(
'xdiff_string_bdiff' ) ?
126 DiffHistoryBlob::class : ConcatenatedGzipHistoryBlob::class;
127 $this->orphanBlobClass = ConcatenatedGzipHistoryBlob::class;
129 $this->blobStore = MediaWikiServices::getInstance()
130 ->getBlobStoreFactory()
136 if ( $this->debugLog ) {
137 $this->logToFile( $msg, $this->debugLog );
143 if ( $this->infoLog ) {
144 $this->logToFile( $msg, $this->infoLog );
150 if ( $this->criticalLog ) {
151 $this->logToFile( $msg, $this->criticalLog );
155 private function logToFile( $msg,
$file ) {
157 if ( $this->childId !==
false ) {
158 $header .=
"({$this->childId})";
160 $header .=
' ' . WikiMap::getCurrentWikiDbDomain()->getId();
161 LegacyLogger::emit( sprintf(
"%-50s %s\n",
$header, $msg ),
$file );
169 private function syncDBs() {
172 $pos = $dbw->getPrimaryPos();
173 $dbr->primaryPosWait( $pos, 100000 );
180 if ( $this->isChild ) {
191 if ( !$this->checkTrackingTable() ) {
196 $this->startChildProcs();
198 $this->doAllOrphans();
199 $this->killChildProcs();
206 private function checkTrackingTable() {
208 if ( !
$dbr->tableExists(
'blob_tracking', __METHOD__ ) ) {
209 $this->
critical(
"Error: blob_tracking table does not exist" );
213 $row =
$dbr->selectRow(
'blob_tracking',
'*',
'', __METHOD__ );
215 $this->
info(
"Warning: blob_tracking table contains no rows, skipping this wiki." );
229 private function startChildProcs() {
230 $wiki = WikiMap::getCurrentWikiId();
232 $cmd =
'php ' . Shell::escape( __FILE__ );
233 foreach ( self::$cmdLineOptionMap as $cmdOption => $classOption ) {
234 if ( $cmdOption ==
'child-id' ) {
239 $cmd .=
" --$cmdOption " . Shell::escape( $this->$classOption );
240 } elseif ( $this->$classOption ) {
241 $cmd .=
" --$cmdOption";
245 ' --wiki ' . Shell::escape( $wiki ) .
246 ' ' . Shell::escape( ...$this->destClusters );
248 $this->childPipes = $this->childProcs = [];
253 [
'file',
'php://stdout',
'w' ],
254 [
'file',
'php://stderr',
'w' ]
256 AtEase::suppressWarnings();
257 $proc = proc_open(
"$cmd --child-id $i", $spec, $pipes );
258 AtEase::restoreWarnings();
260 $this->
critical(
"Error opening child process: $cmd" );
263 $this->childProcs[$i] = $proc;
264 $this->childPipes[$i] = $pipes[0];
266 $this->prevChildId = -1;
272 private function killChildProcs() {
273 $this->
info(
"Waiting for child processes to finish..." );
275 $this->dispatchToChild( $i,
'quit' );
278 $status = proc_close( $this->childProcs[$i] );
280 $this->
critical(
"Warning: child #$i exited with status $status" );
283 $this->
info(
"Done." );
291 private function dispatch( ...$args ) {
295 $numPipes = stream_select( $x, $pipes, $y, 3600 );
297 $this->
critical(
"Error waiting to write to child process. Aborting" );
301 $childId = ( $i + $this->prevChildId + 1 ) % $this->numProcs;
304 $this->dispatchToChild(
$childId, $args );
318 private function dispatchToChild(
$childId, $args ) {
319 $args = (array)$args;
320 $cmd = implode(
' ', $args );
321 fwrite( $this->childPipes[
$childId],
"$cmd\n" );
327 private function doAllPages() {
331 if ( $this->noCount ) {
332 $numPages =
'[unknown]';
334 $numPages =
$dbr->selectField(
'blob_tracking',
335 'COUNT(DISTINCT bt_page)',
336 # A condition is required so that
this query uses the index
341 if ( $this->copyOnly ) {
342 $this->
info(
"Copying pages..." );
344 $this->
info(
"Moving pages..." );
347 $res =
$dbr->select(
'blob_tracking',
351 'bt_page > ' .
$dbr->addQuotes( $startId )
356 'ORDER BY' =>
'bt_page',
357 'LIMIT' => $this->batchSize,
360 if ( !
$res->numRows() ) {
363 foreach (
$res as $row ) {
364 $startId = $row->bt_page;
365 $this->dispatch(
'doPage', $row->bt_page );
368 $this->report(
'pages', $i, $numPages );
370 $this->report(
'pages', $i, $numPages );
371 if ( $this->copyOnly ) {
372 $this->
info(
"All page copies queued." );
374 $this->
info(
"All page moves queued." );
384 private function report( $label, $current, $end ) {
386 if ( $current == $end || $this->numBatches >= $this->reportingInterval ) {
387 $this->numBatches = 0;
388 $this->
info(
"$label: $current / $end" );
389 MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->waitForReplication();
396 private function doAllOrphans() {
400 if ( $this->noCount ) {
401 $numOrphans =
'[unknown]';
403 $numOrphans =
$dbr->selectField(
'blob_tracking',
404 'COUNT(DISTINCT bt_text_id)',
405 [
'bt_moved' => 0,
'bt_page' => 0 ],
407 if ( !$numOrphans ) {
411 if ( $this->copyOnly ) {
412 $this->
info(
"Copying orphans..." );
414 $this->
info(
"Moving orphans..." );
418 $res =
$dbr->select(
'blob_tracking',
423 'bt_text_id > ' .
$dbr->addQuotes( $startId )
428 'ORDER BY' =>
'bt_text_id',
429 'LIMIT' => $this->batchSize
432 if ( !
$res->numRows() ) {
436 foreach (
$res as $row ) {
437 $startId = $row->bt_text_id;
438 $ids[] = $row->bt_text_id;
444 while ( count( $ids ) > $this->orphanBatchSize ) {
445 $args = array_slice( $ids, 0, $this->orphanBatchSize );
446 $ids = array_slice( $ids, $this->orphanBatchSize );
447 array_unshift( $args,
'doOrphanList' );
448 $this->dispatch( ...$args );
450 if ( count( $ids ) ) {
452 array_unshift( $args,
'doOrphanList' );
453 $this->dispatch( ...$args );
456 $this->report(
'orphans', $i, $numOrphans );
458 $this->report(
'orphans', $i, $numOrphans );
459 $this->
info(
"All orphans queued." );
466 $this->
debug(
'starting' );
469 while ( !feof( STDIN ) ) {
470 $line = rtrim( fgets( STDIN ) );
474 $this->
debug( $line );
475 $args = explode(
' ', $line );
476 $cmd = array_shift( $args );
479 $this->doPage( intval( $args[0] ) );
482 $this->doOrphanList( array_map(
'intval', $args ) );
487 MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->waitForReplication();
496 private function doPage( $pageId ) {
497 $title = Title::newFromID( $pageId );
499 $titleText =
$title->getPrefixedText();
501 $titleText =
'[deleted]';
506 if ( !$this->copyOnly ) {
507 $this->finishIncompleteMoves( [
'bt_page' => $pageId ] );
514 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
517 [
'blob_tracking',
'text' ],
520 'bt_page' => $pageId,
521 'bt_text_id > ' .
$dbr->addQuotes( $startId ),
523 'bt_new_url IS NULL',
528 'ORDER BY' =>
'bt_text_id',
529 'LIMIT' => $this->batchSize
532 if ( !
$res->numRows() ) {
537 foreach (
$res as $row ) {
538 $startId = $row->bt_text_id;
539 if ( $lastTextId == $row->bt_text_id ) {
543 $lastTextId = $row->bt_text_id;
545 $text = $this->blobStore->expandBlob( $row->old_text, $row->old_flags );
546 if ( $text ===
false ) {
547 $this->
critical(
"Error loading {$row->bt_rev_id}/{$row->bt_text_id}" );
552 if ( !$trx->addItem( $text, $row->bt_text_id ) ) {
553 $this->
debug(
"$titleText: committing blob with " . $trx->getSize() .
" items" );
556 $lbFactory->waitForReplication();
561 $this->
debug(
"$titleText: committing blob with " . $trx->getSize() .
" items" );
579 if ( $this->copyOnly ) {
580 $this->
critical(
"Internal error: can't call moveTextRow() in --copy-only mode" );
584 $dbw->begin( __METHOD__ );
585 $dbw->update(
'text',
588 'old_flags' =>
'external,utf-8',
595 $dbw->update(
'blob_tracking',
597 [
'bt_text_id' => $textId ],
600 $dbw->commit( __METHOD__ );
613 private function finishIncompleteMoves( $conds ) {
615 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
618 $conds = array_merge( $conds, [
620 'bt_new_url IS NOT NULL'
623 $res =
$dbr->select(
'blob_tracking',
625 array_merge( $conds, [
'bt_text_id > ' .
$dbr->addQuotes( $startId ) ] ),
628 'ORDER BY' =>
'bt_text_id',
629 'LIMIT' => $this->batchSize,
632 if ( !
$res->numRows() ) {
635 $this->
debug(
'Incomplete: ' .
$res->numRows() .
' rows' );
636 foreach (
$res as $row ) {
637 $startId = $row->bt_text_id;
638 $this->
moveTextRow( $row->bt_text_id, $row->bt_new_url );
639 if ( $row->bt_text_id % 10 == 0 ) {
640 $lbFactory->waitForReplication();
651 $cluster = next( $this->destClusters );
652 if ( $cluster ===
false ) {
653 $cluster = reset( $this->destClusters );
664 private function doOrphanList( $textIds ) {
666 if ( !$this->copyOnly ) {
667 $this->finishIncompleteMoves( [
'bt_text_id' => $textIds ] );
673 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
675 [
'text',
'blob_tracking' ],
676 [
'old_id',
'old_text',
'old_flags' ],
678 'old_id' => $textIds,
686 foreach (
$res as $row ) {
687 $text = $this->blobStore->expandBlob( $row->old_text, $row->old_flags );
688 if ( $text ===
false ) {
689 $this->
critical(
"Error: cannot load revision text for old_id={$row->old_id}" );
693 if ( !$trx->addItem( $text, $row->old_id ) ) {
694 $this->
debug(
"[orphan]: committing blob with " . $trx->getSize() .
" rows" );
697 $lbFactory->waitForReplication();
700 $this->
debug(
"[orphan]: committing blob with " . $trx->getSize() .
" rows" );
740 $this->cgz =
new $class;
742 $hash = $this->cgz->addItem( $text );
743 $this->referrers[$textId] = $hash;
744 $this->texts[$textId] = $text;
746 return $this->cgz->isHappy();
750 return count( $this->texts );
758 $this->cgz =
new $class;
759 $this->referrers = [];
760 foreach ( $this->texts as $textId => $text ) {
761 $hash = $this->cgz->addItem( $text );
762 $this->referrers[$textId] = $hash;
772 $originalCount = count( $this->texts );
773 if ( !$originalCount ) {
786 $dbw->begin( __METHOD__ );
787 $res = $dbw->select(
'blob_tracking',
788 [
'bt_text_id',
'bt_moved' ],
789 [
'bt_text_id' => array_keys( $this->referrers ) ],
790 __METHOD__, [
'FOR UPDATE' ] );
792 foreach (
$res as $row ) {
793 if ( $row->bt_moved ) {
794 # This row has already been moved, remove it
795 $this->parent->debug(
"TRX: conflict detected in old_id={$row->bt_text_id}" );
796 unset( $this->texts[$row->bt_text_id] );
803 if ( !count( $this->texts ) ) {
805 if ( $originalCount > 1 ) {
807 $this->parent->critical(
808 "Warning: concurrent operation detected, are there two conflicting " .
809 "processes running, doing the same job?" );
818 $targetCluster = $this->parent->getTargetCluster();
819 $store = $this->parent->
store;
820 $targetDB = $store->
getPrimary( $targetCluster );
821 $targetDB->clearFlag(
DBO_TRX );
822 $targetDB->begin( __METHOD__ );
823 $baseUrl = $this->parent->store->store( $targetCluster, serialize( $this->cgz ) );
826 foreach ( $this->referrers as $textId => $hash ) {
827 $url = $baseUrl .
'/' . $hash;
828 $dbw->update(
'blob_tracking',
829 [
'bt_new_url' => $url ],
831 'bt_text_id' => $textId,
832 'bt_moved' => 0, # Check
for concurrent conflicting update
838 $targetDB->commit( __METHOD__ );
841 $dbw->commit( __METHOD__ );
844 if ( !$this->parent->copyOnly ) {
845 foreach ( $this->referrers as $textId => $hash ) {
846 $url = $baseUrl .
'/' . $hash;
847 $this->parent->moveTextRow( $textId, $url );
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
wfHostname()
Get host name of the current machine, for use in error reporting.
Class to represent a recompression operation for a single CGZ blob.
RecompressTracked $parent
ConcatenatedGzipHistoryBlob false $cgz
addItem( $text, $textId)
Add text.
recompress()
Recompress text after some aberrant modification.
__construct( $parent, $blobClass)
Create a transaction from a RecompressTracked object.
getPrimary( $cluster)
Get a primary database connection for the specified cluster.
store( $location, $data)
Insert a data item into a given location.The location name The data item string|bool The URL of the s...
Maintenance script that moves blobs indexed by trackBlobs.php to a specified list of destination clus...
executeChild()
Main entry point for worker processes.
moveTextRow( $textId, $url)
Atomic move operation.
execute()
Execute parent or child depending on the isChild option.
static newFromCommandLine( $args, $options)
static getOptionsWithArgs()
getTargetCluster()
Returns the name of the next target cluster.
executeParent()
Execute the parent process.
if(count( $args)< 1) $job
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.