112 private const NORMAL_MAX_LAG = 10;
114 private const LAG_WAIT_TIMEOUT = 15;
120 'The given PageIdentity {pageIdentity} does not represent a proper page',
121 [
'pageIdentity' => $page ]
125 parent::__construct(
'refreshLinks', $page,
$params );
127 $this->removeDuplicates = (
133 $this->params += [
'causeAction' =>
'RefreshLinksJob',
'causeAgent' =>
'unknown' ];
137 $this->executionFlags |= self::JOB_NO_EXPLICIT_TRX_ROUND;
147 $job->command =
'refreshLinksPrioritized';
159 $job->command =
'refreshLinksDynamic';
167 if ( !empty( $this->params[
'recursive'] ) ) {
173 $services = MediaWikiServices::getInstance();
174 if ( !isset( $this->params[
'range'] ) ) {
175 $lbFactory = $services->getDBLoadBalancerFactory();
176 if ( !$lbFactory->waitForReplication( [
177 'timeout' => self::LAG_WAIT_TIMEOUT
180 $stats = $services->getStatsFactory();
181 $stats->
getCounter(
'refreshlinks_warnings_total' )
182 ->setLabel(
'reason',
'lag_wait_failed' )
183 ->copyToStatsdAt(
'refreshlinks_warning.lag_wait_failed' )
189 $extraParams[
'triggeredRecursive'] =
true;
191 $extraParams[
'causeAction'] = $this->params[
'causeAction'];
192 $extraParams[
'causeAgent'] = $this->params[
'causeAgent'];
197 $services->getMainConfig()->get( MainConfigNames::UpdateRowsPerJob ),
199 [
'params' => $extraParams ]
201 $services->getJobQueueGroup()->push( $jobs );
203 } elseif ( isset( $this->params[
'pages'] ) ) {
205 foreach ( $this->params[
'pages'] as [ $ns, $dbKey ] ) {
206 $title = Title::makeTitleSafe( $ns, $dbKey );
228 $services = MediaWikiServices::getInstance();
229 $stats = $services->getStatsFactory();
230 $renderer = $services->getRevisionRenderer();
231 $parserCache = $services->getParserCache();
232 $lbFactory = $services->getDBLoadBalancerFactory();
233 $ticket = $lbFactory->getEmptyTransactionTicket( __METHOD__ );
236 $page = $services->getWikiPageFactory()->newFromTitle( $pageIdentity );
241 $logger = LoggerFactory::getInstance(
'RefreshLinksJob' );
243 'The page does not exist. Perhaps it was deleted?',
245 'page_title' => $this->title->getPrefixedDBkey(),
246 'job_params' => $this->getParams(),
247 'job_metadata' => $this->getMetadata()
250 $this->incrementFailureCounter( $stats,
'page_not_found' );
260 $dbw = $lbFactory->getPrimaryDatabase();
262 $scopedLock = LinksUpdate::acquirePageLock( $dbw, $page->
getId(),
'job' );
263 if ( $scopedLock ===
null ) {
265 $this->
setLastError(
'LinksUpdate already running for this page, try again later.' );
266 $this->incrementFailureCounter( $stats,
'lock_failure' );
272 if ( $this->isAlreadyRefreshed( $page ) ) {
275 $stats->
getCounter(
'refreshlinks_superseded_updates_total' )
276 ->copyToStatsdAt(
'refreshlinks_outcome.good_update_superseded' )
283 $lbFactory->beginPrimaryChanges( __METHOD__ );
284 $output = $this->getParserOutput( $renderer, $parserCache, $page, $stats );
285 $options = $this->getDataUpdateOptions();
286 $lbFactory->commitPrimaryChanges( __METHOD__ );
296 $options[
'known-revision-output'] = $output;
299 InfoAction::invalidateCache( $page );
309 $lbFactory->commitAndWaitForReplication( __METHOD__, $ticket );
317 private function getLagAwareRootTimestamp() {
319 $rootTimestamp = $this->params[
'rootJobTimestamp'] ??
null;
320 if ( $rootTimestamp ===
null ) {
324 if ( !empty( $this->params[
'isOpportunistic'] ) ) {
327 $lagAwareTimestamp = $rootTimestamp;
332 (
int)
wfTimestamp( TS_UNIX, $rootTimestamp ) + self::NORMAL_MAX_LAG
336 return $lagAwareTimestamp;
343 private function isAlreadyRefreshed(
WikiPage $page ) {
344 $lagAwareTimestamp = $this->getLagAwareRootTimestamp();
346 return ( $lagAwareTimestamp !==
null && $page->
getLinksTimestamp() > $lagAwareTimestamp );
354 private function shouldGenerateHTMLOnEdit(
RevisionRecord $revision ): bool {
356 foreach ( $revision->
getSlots()->getSlotRoles() as $role ) {
357 $slot = $revision->
getSlots()->getSlot( $role );
358 $contentHandler = $services->getContentHandlerFactory()->getContentHandler( $slot->getModel() );
359 if ( $contentHandler->generateHTMLOnEdit() ) {
375 private function getParserOutput(
381 $revision = $this->getCurrentRevisionIfUnchanged( $page, $stats );
387 $cachedOutput = $this->getParserOutputFromCache( $parserCache, $page, $revision, $stats );
388 $statsCounter = $stats->
getCounter(
'refreshlinks_parsercache_operations_total' );
390 if ( $cachedOutput && $this->canUseParserOutputFromCache( $cachedOutput, $revision ) ) {
392 ->setLabel(
'status',
'cache_hit' )
393 ->setLabel(
'html_changed',
'n/a' )
394 ->copyToStatsdAt(
'refreshlinks.parser_cached' )
397 return $cachedOutput;
400 $causeAction = $this->params[
'causeAction'] ??
'RefreshLinksJob';
405 $sampleRate = MediaWikiServices::getInstance()->getMainConfig()->get(
406 MainConfigNames::ParsoidSelectiveUpdateSampleRate
408 $doSample = $sampleRate && mt_rand( 1, $sampleRate ) === 1;
409 if ( $doSample && $cachedOutput ===
null ) {
413 $cachedOutput = $parserCache->
getDirty( $page, $parserOptions ) ?:
null;
421 'audience' => $revision::RAW,
422 'causeAction' => $causeAction,
425 'previous-output' => $cachedOutput,
430 $output = $renderedRevision->getRevisionParserOutput( [
432 'generate-html' => $this->shouldGenerateHTMLOnEdit( $revision )
434 $output->setCacheTime( $parseTimestamp );
438 $content = $revision->
getContent( SlotRecord::MAIN );
440 'source' =>
'RefreshLinksJob',
441 'type' => $cachedOutput ===
null ?
'full' :
'selective',
442 'reason' => $causeAction,
443 'parser' => $parserOptions->getUseParsoid() ?
'parsoid' :
'legacy',
444 'opportunistic' => empty( $this->params[
'isOpportunistic'] ) ?
'false' :
'true',
445 'wiki' => WikiMap::getCurrentWikiId(),
446 'model' => $content ? $content->getModel() :
'unknown',
450 ->setLabels( $labels )
453 ->
getCounter(
'ParserCache_selective_cpu_seconds' )
454 ->setLabels( $labels )
455 ->incrementBy( $output->getTimeProfile(
'cpu' ) );
461 if ( !$cachedOutput ) {
463 $htmlChanged =
'unknown';
464 } elseif ( $cachedOutput->
getRawText() === $output->getRawText() ) {
471 $htmlChanged =
'yes';
475 ->setLabel(
'status',
'cache_miss' )
476 ->setLabel(
'html_changed', $htmlChanged )
477 ->copyToStatsdAt(
'refreshlinks.parser_uncached' )
490 private function getCurrentRevisionIfUnchanged(
498 $latest = $title->getLatestRevID( IDBAccessObject::READ_LATEST );
500 $triggeringRevisionId = $this->params[
'triggeringRevisionId'] ??
null;
501 if ( $triggeringRevisionId && $triggeringRevisionId !== $latest ) {
503 $this->incrementFailureCounter( $stats,
'rev_not_current' );
504 $this->
setLastError(
"Revision $triggeringRevisionId is not current" );
513 $this->incrementFailureCounter( $stats,
'rev_not_found' );
514 $this->
setLastError(
"Revision not found for {$title->getPrefixedDBkey()}" );
522 $this->incrementFailureCounter( $stats,
'rev_not_current' );
523 $this->
setLastError(
"Revision {$revision->getId()} is not current" );
540 private function getParserOutputFromCache(
548 $parserOptions = $page->makeParserOptions(
'canonical' );
549 if ( $parserOptions->getUseParsoid() ) {
550 return $parserCache->
getDirty( $page, $parserOptions ) ?:
null;
555 $rootTimestamp = $this->params[
'rootJobTimestamp'] ??
null;
556 if ( $rootTimestamp !==
null ) {
557 $opportunistic = !empty( $this->params[
'isOpportunistic'] );
558 if ( $page->
getTouched() >= $rootTimestamp || $opportunistic ) {
561 return $parserCache->
getDirty( $page, $parserOptions ) ?:
null;
568 private function canUseParserOutputFromCache(
575 && $cachedOutput->
getCacheTime() >= $this->getLagAwareRootTimestamp();
585 private function incrementFailureCounter(
StatsFactory $stats, $reason ): void {
586 $stats->getCounter(
'refreshlinks_failures_total' )
595 private function getDataUpdateOptions() {
597 'recursive' => !empty( $this->params[
'useRecursiveLinksUpdate'] ),
599 'causeAction' => $this->params[
'causeAction'],
600 'causeAgent' => $this->params[
'causeAgent']
602 if ( !empty( $this->params[
'triggeringUser'] ) ) {
603 $userInfo = $this->params[
'triggeringUser'];
604 if ( $userInfo[
'userId'] ) {
605 $options[
'triggeringUser'] = User::newFromId( $userInfo[
'userId'] );
608 $options[
'triggeringUser'] = User::newFromName( $userInfo[
'userName'],
false );
616 $info = parent::getDeduplicationInfo();
617 unset( $info[
'causeAction'] );
618 unset( $info[
'causeAgent'] );
619 if ( is_array( $info[
'params'] ) ) {
622 if ( isset( $info[
'params'][
'pages'] ) ) {
623 unset( $info[
'namespace'] );
624 unset( $info[
'title'] );
632 if ( !empty( $this->params[
'recursive'] ) ) {
634 } elseif ( isset( $this->params[
'pages'] ) ) {
635 return count( $this->params[
'pages'] );
Base representation for an editable wiki page.
getLinksTimestamp()
Get the page_links_updated field.
makeParserOptions( $context)
Get parser options suitable for rendering the primary article wikitext.
getId( $wikiId=self::LOCAL)
getTitle()
Get the title object of the article.
doSecondaryDataUpdates(array $options=[])
Do secondary data updates (such as updating link tables).
loadPageData( $from='fromdb')
Load the object from a given source by title.
getTouched()
Get the page_touched field.