85 private static $optionsWithArgs = [
93 private static $cmdLineOptionMap = [
94 'no-count' =>
'noCount',
95 'procs' =>
'numProcs',
96 'copy-only' =>
'copyOnly',
98 'child-id' =>
'childId',
99 'debug-log' =>
'debugLog',
100 'info-log' =>
'infoLog',
101 'critical-log' =>
'criticalLog',
105 return self::$optionsWithArgs;
109 $jobOptions = [
'destClusters' => $args ];
110 foreach ( self::$cmdLineOptionMap as $cmdOption => $classOption ) {
111 if ( isset( $options[$cmdOption] ) ) {
112 $jobOptions[$classOption] = $options[$cmdOption];
116 return new self( $jobOptions );
120 foreach ( $options as $name => $value ) {
121 $this->$name = $value;
123 $esFactory = MediaWikiServices::getInstance()->getExternalStoreFactory();
124 $this->store = $esFactory->getStore(
'DB' );
125 if ( !$this->isChild ) {
126 $GLOBALS[
'wgDebugLogPrefix'] =
"RCT M: ";
127 } elseif ( $this->childId !==
false ) {
128 $GLOBALS[
'wgDebugLogPrefix'] =
"RCT {$this->childId}: ";
130 $this->pageBlobClass = function_exists(
'xdiff_string_bdiff' ) ?
131 DiffHistoryBlob::class : ConcatenatedGzipHistoryBlob::class;
132 $this->orphanBlobClass = ConcatenatedGzipHistoryBlob::class;
134 $this->blobStore = MediaWikiServices::getInstance()
135 ->getBlobStoreFactory()
141 if ( $this->debugLog ) {
142 $this->logToFile( $msg, $this->debugLog );
148 if ( $this->infoLog ) {
149 $this->logToFile( $msg, $this->infoLog );
155 if ( $this->criticalLog ) {
156 $this->logToFile( $msg, $this->criticalLog );
160 private function logToFile( $msg, $file ) {
162 if ( $this->childId !==
false ) {
163 $header .=
"({$this->childId})";
165 $header .=
' ' . WikiMap::getCurrentWikiDbDomain()->getId();
166 LegacyLogger::emit( sprintf(
"%-50s %s\n",
$header, $msg ), $file );
174 private function syncDBs() {
175 MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->waitForReplication( [
'timeout' => 100_000 ] );
182 if ( $this->isChild ) {
193 if ( !$this->checkTrackingTable() ) {
198 $this->startChildProcs();
200 $this->doAllOrphans();
201 $this->killChildProcs();
208 private function checkTrackingTable() {
209 $row = MediaWikiServices::getInstance()->getConnectionProvider()->getReplicaDatabase()->newSelectQueryBuilder()
211 ->from(
'blob_tracking' )
212 ->caller( __METHOD__ )->fetchRow();
214 $this->
info(
"Warning: blob_tracking table contains no rows, skipping this wiki." );
228 private function startChildProcs() {
229 $wiki = WikiMap::getCurrentWikiId();
231 $cmd =
'php ' . Shell::escape( __FILE__ );
232 foreach ( self::$cmdLineOptionMap as $cmdOption => $classOption ) {
233 if ( $cmdOption ==
'child-id' ) {
236 if ( in_array( $cmdOption, self::$optionsWithArgs ) && isset( $this->$classOption ) ) {
238 $cmd .=
" --$cmdOption " . Shell::escape( $this->$classOption );
239 } elseif ( $this->$classOption ) {
240 $cmd .=
" --$cmdOption";
244 ' --wiki ' . Shell::escape( $wiki ) .
245 ' ' . Shell::escape( ...$this->destClusters );
247 $this->childPipes = $this->childProcs = [];
252 [
'file',
'php://stdout',
'w' ],
253 [
'file',
'php://stderr',
'w' ]
255 AtEase::suppressWarnings();
256 $proc = proc_open(
"$cmd --child-id $i", $spec, $pipes );
257 AtEase::restoreWarnings();
259 $this->
critical(
"Error opening child process: $cmd" );
262 $this->childProcs[$i] = $proc;
263 $this->childPipes[$i] = $pipes[0];
265 $this->prevChildId = -1;
271 private function killChildProcs() {
272 $this->
info(
"Waiting for child processes to finish..." );
274 $this->dispatchToChild( $i,
'quit' );
277 $status = proc_close( $this->childProcs[$i] );
279 $this->
critical(
"Warning: child #$i exited with status $status" );
282 $this->
info(
"Done." );
290 private function dispatch( ...$args ) {
294 $numPipes = stream_select( $x, $pipes, $y, 3600 );
296 $this->
critical(
"Error waiting to write to child process. Aborting" );
300 $childId = ( $i + $this->prevChildId + 1 ) % $this->numProcs;
303 $this->dispatchToChild(
$childId, $args );
317 private function dispatchToChild(
$childId, $args ) {
318 $args = (array)$args;
319 $cmd = implode(
' ', $args );
320 fwrite( $this->childPipes[
$childId],
"$cmd\n" );
326 private function doAllPages() {
327 $dbr = MediaWikiServices::getInstance()->getConnectionProvider()->getReplicaDatabase();
330 if ( $this->noCount ) {
331 $numPages =
'[unknown]';
333 $numPages = $dbr->newSelectQueryBuilder()
334 ->select(
'COUNT(DISTINCT bt_page)' )
335 ->from(
'blob_tracking' )
336 ->where( [
'bt_moved' => 0 ] )
337 ->caller( __METHOD__ )->fetchField();
339 if ( $this->copyOnly ) {
340 $this->
info(
"Copying pages..." );
342 $this->
info(
"Moving pages..." );
345 $res = $dbr->newSelectQueryBuilder()
346 ->select( [
'bt_page' ] )
348 ->from(
'blob_tracking' )
349 ->where( [
'bt_moved' => 0, $dbr->expr(
'bt_page',
'>', $startId ) ] )
350 ->orderBy(
'bt_page' )
351 ->limit( $this->batchSize )
352 ->caller( __METHOD__ )->fetchResultSet();
353 if ( !$res->numRows() ) {
356 foreach ( $res as $row ) {
357 $startId = $row->bt_page;
358 $this->dispatch(
'doPage', $row->bt_page );
361 $this->report(
'pages', $i, $numPages );
363 $this->report(
'pages', $i, $numPages );
364 if ( $this->copyOnly ) {
365 $this->
info(
"All page copies queued." );
367 $this->
info(
"All page moves queued." );
377 private function report( $label, $current, $end ) {
379 if ( $current == $end || $this->numBatches >= $this->reportingInterval ) {
380 $this->numBatches = 0;
381 $this->
info(
"$label: $current / $end" );
382 MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->waitForReplication();
389 private function doAllOrphans() {
390 $dbr = MediaWikiServices::getInstance()->getConnectionProvider()->getReplicaDatabase();
393 if ( $this->noCount ) {
394 $numOrphans =
'[unknown]';
396 $numOrphans = $dbr->newSelectQueryBuilder()
397 ->select(
'COUNT(DISTINCT bt_text_id)' )
398 ->from(
'blob_tracking' )
399 ->where( [
'bt_moved' => 0,
'bt_page' => 0 ] )
400 ->caller( __METHOD__ )->fetchField();
401 if ( !$numOrphans ) {
405 if ( $this->copyOnly ) {
406 $this->
info(
"Copying orphans..." );
408 $this->
info(
"Moving orphans..." );
412 $res = $dbr->newSelectQueryBuilder()
413 ->select( [
'bt_text_id' ] )
415 ->from(
'blob_tracking' )
416 ->where( [
'bt_moved' => 0,
'bt_page' => 0, $dbr->expr(
'bt_text_id',
'>', $startId ) ] )
417 ->orderBy(
'bt_text_id' )
418 ->limit( $this->batchSize )
419 ->caller( __METHOD__ )->fetchResultSet();
420 if ( !$res->numRows() ) {
424 foreach ( $res as $row ) {
425 $startId = $row->bt_text_id;
426 $ids[] = $row->bt_text_id;
432 while ( count( $ids ) > $this->orphanBatchSize ) {
433 $args = array_slice( $ids, 0, $this->orphanBatchSize );
434 $ids = array_slice( $ids, $this->orphanBatchSize );
435 array_unshift( $args,
'doOrphanList' );
436 $this->dispatch( ...$args );
438 if ( count( $ids ) ) {
440 array_unshift( $args,
'doOrphanList' );
441 $this->dispatch( ...$args );
444 $this->report(
'orphans', $i, $numOrphans );
446 $this->report(
'orphans', $i, $numOrphans );
447 $this->
info(
"All orphans queued." );
454 $this->
debug(
'starting' );
457 while ( !feof( STDIN ) ) {
458 $line = rtrim( fgets( STDIN ) );
462 $this->
debug( $line );
463 $args = explode(
' ', $line );
464 $cmd = array_shift( $args );
467 $this->doPage( intval( $args[0] ) );
470 $this->doOrphanList( array_map(
'intval', $args ) );
475 MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->waitForReplication();
484 private function doPage( $pageId ) {
485 $title = Title::newFromID( $pageId );
487 $titleText = $title->getPrefixedText();
489 $titleText =
'[deleted]';
491 $dbr = MediaWikiServices::getInstance()->getConnectionProvider()->getReplicaDatabase();
494 if ( !$this->copyOnly ) {
495 $this->finishIncompleteMoves( [
'bt_page' => $pageId ] );
502 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
504 $res = $dbr->newSelectQueryBuilder()
506 ->from(
'blob_tracking' )
507 ->join(
'text',
null,
'bt_text_id=old_id' )
509 'bt_page' => $pageId,
510 $dbr->expr(
'bt_text_id',
'>', $startId ),
512 'bt_new_url' =>
null,
514 ->orderBy(
'bt_text_id' )
515 ->limit( $this->batchSize )
516 ->caller( __METHOD__ )->fetchResultSet();
517 if ( !$res->numRows() ) {
522 foreach ( $res as $row ) {
523 $startId = $row->bt_text_id;
524 if ( $lastTextId == $row->bt_text_id ) {
528 $lastTextId = $row->bt_text_id;
530 $text = $this->blobStore->expandBlob( $row->old_text, $row->old_flags );
531 if ( $text ===
false ) {
532 $this->
critical(
"Error loading {$row->bt_rev_id}/{$row->bt_text_id}" );
537 if ( !$trx->addItem( $text, $row->bt_text_id ) ) {
538 $this->
debug(
"$titleText: committing blob with " . $trx->getSize() .
" items" );
541 $lbFactory->waitForReplication();
546 $this->
debug(
"$titleText: committing blob with " . $trx->getSize() .
" items" );
564 if ( $this->copyOnly ) {
565 $this->
critical(
"Internal error: can't call moveTextRow() in --copy-only mode" );
568 $dbw = MediaWikiServices::getInstance()->getConnectionProvider()->getPrimaryDatabase();
570 $dbw->begin( __METHOD__ );
571 $dbw->newUpdateQueryBuilder()
575 'old_flags' =>
'external,utf-8',
580 ->caller( __METHOD__ )
582 $dbw->newUpdateQueryBuilder()
583 ->update(
'blob_tracking' )
584 ->set( [
'bt_moved' => 1 ] )
585 ->where( [
'bt_text_id' => $textId ] )
586 ->caller( __METHOD__ )
588 $dbw->commit( __METHOD__ );
601 private function finishIncompleteMoves( $conds ) {
602 $dbr = MediaWikiServices::getInstance()->getConnectionProvider()->getReplicaDatabase();
603 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
606 $conds = array_merge( $conds, [
608 'bt_new_url IS NOT NULL'
611 $res = $dbr->newSelectQueryBuilder()
613 ->from(
'blob_tracking' )
615 ->andWhere( $dbr->expr(
'bt_text_id',
'>', $startId ) )
616 ->orderBy(
'bt_text_id' )
617 ->limit( $this->batchSize )
618 ->caller( __METHOD__ )->fetchResultSet();
619 if ( !$res->numRows() ) {
622 $this->
debug(
'Incomplete: ' . $res->numRows() .
' rows' );
623 foreach ( $res as $row ) {
624 $startId = $row->bt_text_id;
625 $this->
moveTextRow( $row->bt_text_id, $row->bt_new_url );
626 if ( $row->bt_text_id % 10 == 0 ) {
627 $lbFactory->waitForReplication();
638 $cluster = next( $this->destClusters );
639 if ( $cluster ===
false ) {
640 $cluster = reset( $this->destClusters );
651 private function doOrphanList( $textIds ) {
653 if ( !$this->copyOnly ) {
654 $this->finishIncompleteMoves( [
'bt_text_id' => $textIds ] );
660 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
661 $res = MediaWikiServices::getInstance()->getConnectionProvider()->getReplicaDatabase()->newSelectQueryBuilder()
662 ->select( [
'old_id',
'old_text',
'old_flags' ] )
665 ->join(
'blob_tracking',
null,
'bt_text_id=old_id' )
666 ->where( [
'old_id' => $textIds,
'bt_moved' => 0 ] )
667 ->caller( __METHOD__ )->fetchResultSet();
669 foreach ( $res as $row ) {
670 $text = $this->blobStore->expandBlob( $row->old_text, $row->old_flags );
671 if ( $text ===
false ) {
672 $this->
critical(
"Error: cannot load revision text for old_id={$row->old_id}" );
676 if ( !$trx->addItem( $text, $row->old_id ) ) {
677 $this->
debug(
"[orphan]: committing blob with " . $trx->getSize() .
" rows" );
680 $lbFactory->waitForReplication();
683 $this->
debug(
"[orphan]: committing blob with " . $trx->getSize() .
" rows" );