118 private const NORMAL_MAX_LAG = 10;
120 private const LAG_WAIT_TIMEOUT = 15;
126 'The given PageIdentity {pageIdentity} does not represent a proper page',
127 [
'pageIdentity' => $page ]
131 parent::__construct(
'refreshLinks', $page,
$params );
133 $this->removeDuplicates = (
139 $this->params += [
'causeAction' =>
'RefreshLinksJob',
'causeAgent' =>
'unknown' ];
143 $this->executionFlags |= self::JOB_NO_EXPLICIT_TRX_ROUND;
153 $job->command =
'refreshLinksPrioritized';
165 $job->command =
'refreshLinksDynamic';
173 if ( !empty( $this->params[
'recursive'] ) ) {
180 if ( !isset( $this->params[
'range'] ) ) {
181 $lbFactory = $services->getDBLoadBalancerFactory();
182 if ( !$lbFactory->waitForReplication( [
183 'timeout' => self::LAG_WAIT_TIMEOUT
186 $stats = $services->getStatsFactory();
187 $stats->getCounter(
'refreshlinks_warnings_total' )
188 ->setLabel(
'reason',
'lag_wait_failed' )
189 ->copyToStatsdAt(
'refreshlinks_warning.lag_wait_failed' )
195 $extraParams[
'triggeredRecursive'] =
true;
197 $extraParams[
'causeAction'] = $this->params[
'causeAction'];
198 $extraParams[
'causeAgent'] = $this->params[
'causeAgent'];
201 $jobs = BacklinkJobUtils::partitionBacklinkJob(
205 [
'params' => $extraParams ]
207 $services->getJobQueueGroup()->push( $jobs );
209 } elseif ( isset( $this->params[
'pages'] ) ) {
211 foreach ( $this->params[
'pages'] as [ $ns, $dbKey ] ) {
212 $title = Title::makeTitleSafe( $ns, $dbKey );
235 $stats = $services->getStatsFactory();
236 $renderer = $services->getRevisionRenderer();
237 $parserCache = $services->getParserCache();
238 $lbFactory = $services->getDBLoadBalancerFactory();
239 $ticket = $lbFactory->getEmptyTransactionTicket( __METHOD__ );
242 $page = $services->getWikiPageFactory()->newFromTitle( $pageIdentity );
243 $page->loadPageData( IDBAccessObject::READ_LATEST );
245 if ( !$page->exists() ) {
247 $logger = LoggerFactory::getInstance(
'RefreshLinksJob' );
249 'The page does not exist. Perhaps it was deleted?',
251 'page_title' => $this->title->getPrefixedDBkey(),
252 'job_params' => $this->getParams(),
253 'job_metadata' => $this->getMetadata()
256 $this->incrementFailureCounter( $stats,
'page_not_found' );
266 $dbw = $lbFactory->getPrimaryDatabase();
268 $scopedLock = LinksUpdate::acquirePageLock( $dbw, $page->getId(),
'job' );
269 if ( $scopedLock ===
null ) {
271 $this->
setLastError(
'LinksUpdate already running for this page, try again later.' );
272 $this->incrementFailureCounter( $stats,
'lock_failure' );
278 if ( $this->isAlreadyRefreshed( $page ) ) {
281 $stats->getCounter(
'refreshlinks_superseded_updates_total' )
282 ->copyToStatsdAt(
'refreshlinks_outcome.good_update_superseded' )
289 $lbFactory->beginPrimaryChanges( __METHOD__ );
290 $output = $this->getParserOutput( $renderer, $parserCache, $page, $stats );
291 $options = $this->getDataUpdateOptions();
292 $lbFactory->commitPrimaryChanges( __METHOD__ );
302 $options[
'known-revision-output'] = $output;
304 $page->doSecondaryDataUpdates( $options );
315 $lbFactory->commitAndWaitForReplication( __METHOD__, $ticket );
323 private function getLagAwareRootTimestamp() {
325 $rootTimestamp = $this->params[
'rootJobTimestamp'] ??
null;
326 if ( $rootTimestamp ===
null ) {
330 if ( !empty( $this->params[
'isOpportunistic'] ) ) {
333 $lagAwareTimestamp = $rootTimestamp;
338 (
int)
wfTimestamp( TS_UNIX, $rootTimestamp ) + self::NORMAL_MAX_LAG
342 return $lagAwareTimestamp;
349 private function isAlreadyRefreshed( WikiPage $page ) {
350 $lagAwareTimestamp = $this->getLagAwareRootTimestamp();
352 return ( $lagAwareTimestamp !==
null && $page->getLinksTimestamp() > $lagAwareTimestamp );
360 private function shouldGenerateHTMLOnEdit( RevisionRecord $revision ): bool {
361 $services = MediaWikiServices::getInstance();
362 foreach ( $revision->getSlots()->getSlotRoles() as $role ) {
363 $slot = $revision->getSlots()->getSlot( $role );
364 $contentHandler = $services->getContentHandlerFactory()->getContentHandler( $slot->getModel() );
365 if ( $contentHandler->generateHTMLOnEdit() ) {
381 private function getParserOutput(
382 RevisionRenderer $renderer,
383 ParserCache $parserCache,
387 $revision = $this->getCurrentRevisionIfUnchanged( $page, $stats );
393 $cachedOutput = $this->getParserOutputFromCache( $parserCache, $page, $revision, $stats );
394 $statsCounter = $stats->getCounter(
'refreshlinks_parsercache_operations_total' );
396 if ( $cachedOutput && $this->canUseParserOutputFromCache( $cachedOutput, $revision ) ) {
398 ->setLabel(
'status',
'cache_hit' )
399 ->setLabel(
'html_changed',
'n/a' )
400 ->copyToStatsdAt(
'refreshlinks.parser_cached' )
403 return $cachedOutput;
406 $causeAction = $this->params[
'causeAction'] ??
'RefreshLinksJob';
407 $parserOptions = $page->makeParserOptions(
'canonical' );
414 $doSample = $sampleRate && mt_rand( 1, $sampleRate ) === 1;
415 if ( $doSample && $cachedOutput ===
null ) {
419 $cachedOutput = $parserCache->getDirty( $page, $parserOptions ) ?:
null;
422 $renderedRevision = $renderer->getRenderedRevision(
427 'audience' => $revision::RAW,
428 'causeAction' => $causeAction,
431 'previous-output' => $cachedOutput,
436 $output = $renderedRevision->getRevisionParserOutput( [
438 'generate-html' => $this->shouldGenerateHTMLOnEdit( $revision )
440 $output->setCacheTime( $parseTimestamp );
444 $content = $revision->getContent( SlotRecord::MAIN );
446 'source' =>
'RefreshLinksJob',
447 'type' => $cachedOutput ===
null ?
'full' :
'selective',
448 'reason' => $causeAction,
449 'parser' => $parserOptions->getUseParsoid() ?
'parsoid' :
'legacy',
450 'opportunistic' => empty( $this->params[
'isOpportunistic'] ) ?
'false' :
'true',
451 'wiki' => WikiMap::getCurrentWikiId(),
452 'model' => $content ? $content->getModel() :
'unknown',
455 ->getCounter(
'ParserCache_selective_total' )
456 ->setLabels( $labels )
459 ->getCounter(
'ParserCache_selective_cpu_seconds' )
460 ->setLabels( $labels )
461 ->incrementBy( $output->getTimeProfile(
'cpu' ) );
467 if ( !$cachedOutput ) {
469 $htmlChanged =
'unknown';
470 } elseif ( $cachedOutput->getRawText() === $output->getRawText() ) {
477 $htmlChanged =
'yes';
481 ->setLabel(
'status',
'cache_miss' )
482 ->setLabel(
'html_changed', $htmlChanged )
483 ->copyToStatsdAt(
'refreshlinks.parser_uncached' )
496 private function getCurrentRevisionIfUnchanged(
500 $title = $page->getTitle();
504 $latest = $title->getLatestRevID( IDBAccessObject::READ_LATEST );
506 $triggeringRevisionId = $this->params[
'triggeringRevisionId'] ??
null;
507 if ( $triggeringRevisionId && $triggeringRevisionId !== $latest ) {
509 $this->incrementFailureCounter( $stats,
'rev_not_current' );
510 $this->
setLastError(
"Revision $triggeringRevisionId is not current" );
516 $revision = $page->getRevisionRecord();
519 $this->incrementFailureCounter( $stats,
'rev_not_found' );
520 $this->
setLastError(
"Revision not found for {$title->getPrefixedDBkey()}" );
523 } elseif ( $revision->getId() !== $latest || $revision->getPageId() !== $page->getId() ) {
528 $this->incrementFailureCounter( $stats,
'rev_not_current' );
529 $this->
setLastError(
"Revision {$revision->getId()} is not current" );
546 private function getParserOutputFromCache(
547 ParserCache $parserCache,
549 RevisionRecord $currentRevision,
554 $parserOptions = $page->makeParserOptions(
'canonical' );
555 if ( $parserOptions->getUseParsoid() ) {
556 return $parserCache->getDirty( $page, $parserOptions ) ?:
null;
561 $rootTimestamp = $this->params[
'rootJobTimestamp'] ??
null;
562 if ( $rootTimestamp !==
null ) {
563 $opportunistic = !empty( $this->params[
'isOpportunistic'] );
564 if ( $page->getTouched() >= $rootTimestamp || $opportunistic ) {
567 return $parserCache->getDirty( $page, $parserOptions ) ?:
null;
574 private function canUseParserOutputFromCache(
575 ParserOutput $cachedOutput,
576 RevisionRecord $currentRevision
580 return $cachedOutput->getCacheRevisionId() == $currentRevision->getId()
581 && $cachedOutput->getCacheTime() >= $this->getLagAwareRootTimestamp();
591 private function incrementFailureCounter( StatsFactory $stats, $reason ): void {
592 $stats->getCounter(
'refreshlinks_failures_total' )
601 private function getDataUpdateOptions() {
603 'recursive' => !empty( $this->params[
'useRecursiveLinksUpdate'] ),
605 'causeAction' => $this->params[
'causeAction'],
606 'causeAgent' => $this->params[
'causeAgent']
608 if ( !empty( $this->params[
'triggeringUser'] ) ) {
609 $userInfo = $this->params[
'triggeringUser'];
610 if ( $userInfo[
'userId'] ) {
611 $options[
'triggeringUser'] = User::newFromId( $userInfo[
'userId'] );
614 $options[
'triggeringUser'] = User::newFromName( $userInfo[
'userName'],
false );
622 $info = parent::getDeduplicationInfo();
623 unset( $info[
'causeAction'] );
624 unset( $info[
'causeAgent'] );
625 if ( is_array( $info[
'params'] ) ) {
628 if ( isset( $info[
'params'][
'pages'] ) ) {
629 unset( $info[
'namespace'] );
630 unset( $info[
'title'] );
638 if ( !empty( $this->params[
'recursive'] ) ) {
640 } elseif ( isset( $this->params[
'pages'] ) ) {
641 return count( $this->params[
'pages'] );