80 private static $optionsWithArgs = [
88 private static $cmdLineOptionMap = [
89 'no-count' =>
'noCount',
90 'procs' =>
'numProcs',
91 'copy-only' =>
'copyOnly',
93 'child-id' =>
'childId',
94 'debug-log' =>
'debugLog',
95 'info-log' =>
'infoLog',
96 'critical-log' =>
'criticalLog',
100 return self::$optionsWithArgs;
104 $jobOptions = [
'destClusters' => $args ];
105 foreach ( self::$cmdLineOptionMap as $cmdOption => $classOption ) {
106 if ( isset( $options[$cmdOption] ) ) {
107 $jobOptions[$classOption] = $options[$cmdOption];
111 return new self( $jobOptions );
115 foreach ( $options as $name => $value ) {
116 $this->$name = $value;
118 $esFactory = MediaWikiServices::getInstance()->getExternalStoreFactory();
119 $this->store = $esFactory->getStore(
'DB' );
120 if ( !$this->isChild ) {
121 $GLOBALS[
'wgDebugLogPrefix'] =
"RCT M: ";
122 } elseif ( $this->childId !==
false ) {
123 $GLOBALS[
'wgDebugLogPrefix'] =
"RCT {$this->childId}: ";
125 $this->pageBlobClass = function_exists(
'xdiff_string_bdiff' ) ?
126 DiffHistoryBlob::class : ConcatenatedGzipHistoryBlob::class;
127 $this->orphanBlobClass = ConcatenatedGzipHistoryBlob::class;
129 $this->blobStore = MediaWikiServices::getInstance()
130 ->getBlobStoreFactory()
136 if ( $this->debugLog ) {
137 $this->logToFile( $msg, $this->debugLog );
143 if ( $this->infoLog ) {
144 $this->logToFile( $msg, $this->infoLog );
150 if ( $this->criticalLog ) {
151 $this->logToFile( $msg, $this->criticalLog );
155 private function logToFile( $msg, $file ) {
157 if ( $this->childId !==
false ) {
158 $header .=
"({$this->childId})";
160 $header .=
' ' . WikiMap::getCurrentWikiDbDomain()->getId();
161 LegacyLogger::emit( sprintf(
"%-50s %s\n",
$header, $msg ), $file );
169 private function syncDBs() {
170 $icp = MediaWikiServices::getInstance()->getConnectionProvider();
171 $dbw = $icp->getPrimaryDatabase();
173 $dbr = $icp->getReplicaDatabase();
174 $pos = $dbw->getPrimaryPos();
175 $dbr->primaryPosWait( $pos, 100_000 );
182 if ( $this->isChild ) {
193 if ( !$this->checkTrackingTable() ) {
198 $this->startChildProcs();
200 $this->doAllOrphans();
201 $this->killChildProcs();
208 private function checkTrackingTable() {
210 $dbr = MediaWikiServices::getInstance()->getDBLoadBalancer()->getConnectionRef(
DB_REPLICA );
211 if ( !$dbr->tableExists(
'blob_tracking', __METHOD__ ) ) {
212 $this->
critical(
"Error: blob_tracking table does not exist" );
216 $row = $dbr->newSelectQueryBuilder()
218 ->from(
'blob_tracking' )
219 ->caller( __METHOD__ )->fetchRow();
221 $this->
info(
"Warning: blob_tracking table contains no rows, skipping this wiki." );
235 private function startChildProcs() {
236 $wiki = WikiMap::getCurrentWikiId();
238 $cmd =
'php ' . Shell::escape( __FILE__ );
239 foreach ( self::$cmdLineOptionMap as $cmdOption => $classOption ) {
240 if ( $cmdOption ==
'child-id' ) {
243 if ( in_array( $cmdOption, self::$optionsWithArgs ) && isset( $this->$classOption ) ) {
245 $cmd .=
" --$cmdOption " . Shell::escape( $this->$classOption );
246 } elseif ( $this->$classOption ) {
247 $cmd .=
" --$cmdOption";
251 ' --wiki ' . Shell::escape( $wiki ) .
252 ' ' . Shell::escape( ...$this->destClusters );
254 $this->childPipes = $this->childProcs = [];
259 [
'file',
'php://stdout',
'w' ],
260 [
'file',
'php://stderr',
'w' ]
262 AtEase::suppressWarnings();
263 $proc = proc_open(
"$cmd --child-id $i", $spec, $pipes );
264 AtEase::restoreWarnings();
266 $this->
critical(
"Error opening child process: $cmd" );
269 $this->childProcs[$i] = $proc;
270 $this->childPipes[$i] = $pipes[0];
272 $this->prevChildId = -1;
278 private function killChildProcs() {
279 $this->
info(
"Waiting for child processes to finish..." );
281 $this->dispatchToChild( $i,
'quit' );
284 $status = proc_close( $this->childProcs[$i] );
286 $this->
critical(
"Warning: child #$i exited with status $status" );
289 $this->
info(
"Done." );
297 private function dispatch( ...$args ) {
301 $numPipes = stream_select( $x, $pipes, $y, 3600 );
303 $this->
critical(
"Error waiting to write to child process. Aborting" );
307 $childId = ( $i + $this->prevChildId + 1 ) % $this->numProcs;
310 $this->dispatchToChild(
$childId, $args );
324 private function dispatchToChild(
$childId, $args ) {
325 $args = (array)$args;
326 $cmd = implode(
' ', $args );
327 fwrite( $this->childPipes[
$childId],
"$cmd\n" );
333 private function doAllPages() {
334 $dbr = MediaWikiServices::getInstance()->getConnectionProvider()->getReplicaDatabase();
337 if ( $this->noCount ) {
338 $numPages =
'[unknown]';
340 $numPages = $dbr->newSelectQueryBuilder()
341 ->select(
'COUNT(DISTINCT bt_page)' )
342 ->from(
'blob_tracking' )
343 ->where( [
'bt_moved' => 0 ] )
344 ->caller( __METHOD__ )->fetchField();
346 if ( $this->copyOnly ) {
347 $this->
info(
"Copying pages..." );
349 $this->
info(
"Moving pages..." );
352 $res = $dbr->newSelectQueryBuilder()
353 ->select( [
'bt_page' ] )
355 ->from(
'blob_tracking' )
356 ->where( [
'bt_moved' => 0, $dbr->expr(
'bt_page',
'>', $startId ) ] )
357 ->orderBy(
'bt_page' )
358 ->limit( $this->batchSize )
359 ->caller( __METHOD__ )->fetchResultSet();
360 if ( !$res->numRows() ) {
363 foreach ( $res as $row ) {
364 $startId = $row->bt_page;
365 $this->dispatch(
'doPage', $row->bt_page );
368 $this->report(
'pages', $i, $numPages );
370 $this->report(
'pages', $i, $numPages );
371 if ( $this->copyOnly ) {
372 $this->
info(
"All page copies queued." );
374 $this->
info(
"All page moves queued." );
384 private function report( $label, $current, $end ) {
386 if ( $current == $end || $this->numBatches >= $this->reportingInterval ) {
387 $this->numBatches = 0;
388 $this->
info(
"$label: $current / $end" );
389 MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->waitForReplication();
396 private function doAllOrphans() {
397 $dbr = MediaWikiServices::getInstance()->getConnectionProvider()->getReplicaDatabase();
400 if ( $this->noCount ) {
401 $numOrphans =
'[unknown]';
403 $numOrphans = $dbr->newSelectQueryBuilder()
404 ->select(
'COUNT(DISTINCT bt_text_id)' )
405 ->from(
'blob_tracking' )
406 ->where( [
'bt_moved' => 0,
'bt_page' => 0 ] )
407 ->caller( __METHOD__ )->fetchField();
408 if ( !$numOrphans ) {
412 if ( $this->copyOnly ) {
413 $this->
info(
"Copying orphans..." );
415 $this->
info(
"Moving orphans..." );
419 $res = $dbr->newSelectQueryBuilder()
420 ->select( [
'bt_text_id' ] )
422 ->from(
'blob_tracking' )
423 ->where( [
'bt_moved' => 0,
'bt_page' => 0, $dbr->expr(
'bt_text_id',
'>', $startId ) ] )
424 ->orderBy(
'bt_text_id' )
425 ->limit( $this->batchSize )
426 ->caller( __METHOD__ )->fetchResultSet();
427 if ( !$res->numRows() ) {
431 foreach ( $res as $row ) {
432 $startId = $row->bt_text_id;
433 $ids[] = $row->bt_text_id;
439 while ( count( $ids ) > $this->orphanBatchSize ) {
440 $args = array_slice( $ids, 0, $this->orphanBatchSize );
441 $ids = array_slice( $ids, $this->orphanBatchSize );
442 array_unshift( $args,
'doOrphanList' );
443 $this->dispatch( ...$args );
445 if ( count( $ids ) ) {
447 array_unshift( $args,
'doOrphanList' );
448 $this->dispatch( ...$args );
451 $this->report(
'orphans', $i, $numOrphans );
453 $this->report(
'orphans', $i, $numOrphans );
454 $this->
info(
"All orphans queued." );
461 $this->
debug(
'starting' );
464 while ( !feof( STDIN ) ) {
465 $line = rtrim( fgets( STDIN ) );
469 $this->
debug( $line );
470 $args = explode(
' ', $line );
471 $cmd = array_shift( $args );
474 $this->doPage( intval( $args[0] ) );
477 $this->doOrphanList( array_map(
'intval', $args ) );
482 MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->waitForReplication();
491 private function doPage( $pageId ) {
492 $title = Title::newFromID( $pageId );
494 $titleText = $title->getPrefixedText();
496 $titleText =
'[deleted]';
498 $dbr = MediaWikiServices::getInstance()->getConnectionProvider()->getReplicaDatabase();
501 if ( !$this->copyOnly ) {
502 $this->finishIncompleteMoves( [
'bt_page' => $pageId ] );
509 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
511 $res = $dbr->newSelectQueryBuilder()
513 ->from(
'blob_tracking' )
514 ->join(
'text',
null,
'bt_text_id=old_id' )
516 'bt_page' => $pageId,
517 $dbr->expr(
'bt_text_id',
'>', $startId ),
519 'bt_new_url' =>
null,
521 ->orderBy(
'bt_text_id' )
522 ->limit( $this->batchSize )
523 ->caller( __METHOD__ )->fetchResultSet();
524 if ( !$res->numRows() ) {
529 foreach ( $res as $row ) {
530 $startId = $row->bt_text_id;
531 if ( $lastTextId == $row->bt_text_id ) {
535 $lastTextId = $row->bt_text_id;
537 $text = $this->blobStore->expandBlob( $row->old_text, $row->old_flags );
538 if ( $text ===
false ) {
539 $this->
critical(
"Error loading {$row->bt_rev_id}/{$row->bt_text_id}" );
544 if ( !$trx->addItem( $text, $row->bt_text_id ) ) {
545 $this->
debug(
"$titleText: committing blob with " . $trx->getSize() .
" items" );
548 $lbFactory->waitForReplication();
553 $this->
debug(
"$titleText: committing blob with " . $trx->getSize() .
" items" );
571 if ( $this->copyOnly ) {
572 $this->
critical(
"Internal error: can't call moveTextRow() in --copy-only mode" );
575 $dbw = MediaWikiServices::getInstance()->getConnectionProvider()->getPrimaryDatabase();
577 $dbw->begin( __METHOD__ );
578 $dbw->update(
'text',
581 'old_flags' =>
'external,utf-8',
588 $dbw->update(
'blob_tracking',
590 [
'bt_text_id' => $textId ],
593 $dbw->commit( __METHOD__ );
606 private function finishIncompleteMoves( $conds ) {
607 $dbr = MediaWikiServices::getInstance()->getConnectionProvider()->getReplicaDatabase();
608 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
611 $conds = array_merge( $conds, [
613 'bt_new_url IS NOT NULL'
616 $res = $dbr->newSelectQueryBuilder()
618 ->from(
'blob_tracking' )
620 ->andWhere( $dbr->expr(
'bt_text_id',
'>', $startId ) )
621 ->orderBy(
'bt_text_id' )
622 ->limit( $this->batchSize )
623 ->caller( __METHOD__ )->fetchResultSet();
624 if ( !$res->numRows() ) {
627 $this->
debug(
'Incomplete: ' . $res->numRows() .
' rows' );
628 foreach ( $res as $row ) {
629 $startId = $row->bt_text_id;
630 $this->
moveTextRow( $row->bt_text_id, $row->bt_new_url );
631 if ( $row->bt_text_id % 10 == 0 ) {
632 $lbFactory->waitForReplication();
643 $cluster = next( $this->destClusters );
644 if ( $cluster ===
false ) {
645 $cluster = reset( $this->destClusters );
656 private function doOrphanList( $textIds ) {
658 if ( !$this->copyOnly ) {
659 $this->finishIncompleteMoves( [
'bt_text_id' => $textIds ] );
665 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
666 $res = MediaWikiServices::getInstance()->getConnectionProvider()->getReplicaDatabase()->newSelectQueryBuilder()
667 ->select( [
'old_id',
'old_text',
'old_flags' ] )
670 ->join(
'blob_tracking',
null,
'bt_text_id=old_id' )
671 ->where( [
'old_id' => $textIds,
'bt_moved' => 0 ] )
672 ->caller( __METHOD__ )->fetchResultSet();
674 foreach ( $res as $row ) {
675 $text = $this->blobStore->expandBlob( $row->old_text, $row->old_flags );
676 if ( $text ===
false ) {
677 $this->
critical(
"Error: cannot load revision text for old_id={$row->old_id}" );
681 if ( !$trx->addItem( $text, $row->old_id ) ) {
682 $this->
debug(
"[orphan]: committing blob with " . $trx->getSize() .
" rows" );
685 $lbFactory->waitForReplication();
688 $this->
debug(
"[orphan]: committing blob with " . $trx->getSize() .
" rows" );