108 private const NORMAL_MAX_LAG = 10;
110 private const LAG_WAIT_TIMEOUT = 15;
116 'The given PageIdentity {pageIdentity} does not represent a proper page',
117 [
'pageIdentity' => $page ]
121 parent::__construct(
'refreshLinks', $page,
$params );
123 $this->removeDuplicates = (
129 $this->params += [
'causeAction' =>
'RefreshLinksJob',
'causeAgent' =>
'unknown' ];
133 $this->executionFlags |= self::JOB_NO_EXPLICIT_TRX_ROUND;
143 $job->command =
'refreshLinksPrioritized';
155 $job->command =
'refreshLinksDynamic';
163 if ( !empty( $this->params[
'recursive'] ) ) {
169 $services = MediaWikiServices::getInstance();
170 if ( !isset( $this->params[
'range'] ) ) {
171 $lbFactory = $services->getDBLoadBalancerFactory();
172 if ( !$lbFactory->waitForReplication( [
173 'timeout' => self::LAG_WAIT_TIMEOUT
176 $stats = $services->getStatsFactory();
177 $stats->
getCounter(
'refreshlinks_warnings_total' )
178 ->setLabel(
'reason',
'lag_wait_failed' )
179 ->copyToStatsdAt(
'refreshlinks_warning.lag_wait_failed' )
185 $extraParams[
'triggeredRecursive'] =
true;
187 $extraParams[
'causeAction'] = $this->params[
'causeAction'];
188 $extraParams[
'causeAgent'] = $this->params[
'causeAgent'];
193 $services->getMainConfig()->get( MainConfigNames::UpdateRowsPerJob ),
195 [
'params' => $extraParams ]
197 $services->getJobQueueGroup()->push( $jobs );
199 } elseif ( isset( $this->params[
'pages'] ) ) {
201 foreach ( $this->params[
'pages'] as [ $ns, $dbKey ] ) {
202 $title = Title::makeTitleSafe( $ns, $dbKey );
224 $services = MediaWikiServices::getInstance();
225 $stats = $services->getStatsFactory();
226 $renderer = $services->getRevisionRenderer();
227 $parserCache = $services->getParserCache();
228 $lbFactory = $services->getDBLoadBalancerFactory();
229 $ticket = $lbFactory->getEmptyTransactionTicket( __METHOD__ );
232 $page = $services->getWikiPageFactory()->newFromTitle( $pageIdentity );
237 $logger = LoggerFactory::getInstance(
'RefreshLinksJob' );
239 'The page does not exist. Perhaps it was deleted?',
241 'page_title' => $this->title->getPrefixedDBkey(),
242 'job_params' => $this->getParams(),
243 'job_metadata' => $this->getMetadata()
246 $this->incrementFailureCounter( $stats,
'page_not_found' );
256 $dbw = $lbFactory->getPrimaryDatabase();
258 $scopedLock = LinksUpdate::acquirePageLock( $dbw, $page->
getId(),
'job' );
259 if ( $scopedLock ===
null ) {
261 $this->
setLastError(
'LinksUpdate already running for this page, try again later.' );
262 $this->incrementFailureCounter( $stats,
'lock_failure' );
268 if ( $this->isAlreadyRefreshed( $page ) ) {
271 $stats->
getCounter(
'refreshlinks_superseded_updates_total' )
272 ->copyToStatsdAt(
'refreshlinks_outcome.good_update_superseded' )
280 $lbFactory->flushReplicaSnapshots( __METHOD__ );
282 $lbFactory->beginPrimaryChanges( __METHOD__ );
283 $output = $this->getParserOutput( $renderer, $parserCache, $page, $stats );
284 $options = $this->getDataUpdateOptions();
285 $lbFactory->commitPrimaryChanges( __METHOD__ );
295 $options[
'known-revision-output'] = $output;
298 InfoAction::invalidateCache( $page );
308 $lbFactory->commitAndWaitForReplication( __METHOD__, $ticket );
316 private function getLagAwareRootTimestamp() {
318 $rootTimestamp = $this->params[
'rootJobTimestamp'] ??
null;
319 if ( $rootTimestamp ===
null ) {
323 if ( !empty( $this->params[
'isOpportunistic'] ) ) {
326 $lagAwareTimestamp = $rootTimestamp;
331 (
int)
wfTimestamp( TS_UNIX, $rootTimestamp ) + self::NORMAL_MAX_LAG
335 return $lagAwareTimestamp;
342 private function isAlreadyRefreshed(
WikiPage $page ) {
343 $lagAwareTimestamp = $this->getLagAwareRootTimestamp();
345 return ( $lagAwareTimestamp !==
null && $page->
getLinksTimestamp() > $lagAwareTimestamp );
353 private function shouldGenerateHTMLOnEdit(
RevisionRecord $revision ): bool {
355 foreach ( $revision->
getSlots()->getSlotRoles() as $role ) {
356 $slot = $revision->
getSlots()->getSlot( $role );
357 $contentHandler = $services->getContentHandlerFactory()->getContentHandler( $slot->getModel() );
358 if ( $contentHandler->generateHTMLOnEdit() ) {
374 private function getParserOutput(
380 $revision = $this->getCurrentRevisionIfUnchanged( $page, $stats );
386 $cachedOutput = $this->getParserOutputFromCache( $parserCache, $page, $revision, $stats );
388 if ( $cachedOutput && $this->canUseParserOutputFromCache( $cachedOutput, $revision ) ) {
389 $stats->
getCounter(
'refreshlinks_parsercache_operations_total' )
390 ->setLabel(
'status',
'cache_hit' )
391 ->copyToStatsdAt(
'refreshlinks.parser_cached' )
394 return $cachedOutput;
397 $statsCounter = $stats->
getCounter(
'refreshlinks_parsercache_operations_total' )
398 ->setLabel(
'status',
'cache_miss' )
399 ->copyToStatsdAt(
'refreshlinks.parser_uncached' );
401 $causeAction = $this->params[
'causeAction'] ??
'RefreshLinksJob';
406 [
'audience' => $revision::RAW,
'causeAction' => $causeAction ]
410 $output = $renderedRevision->getRevisionParserOutput( [
412 'generate-html' => $this->shouldGenerateHTMLOnEdit( $revision )
414 $output->setCacheTime( $parseTimestamp );
419 if ( !$cachedOutput ) {
421 $statsCounter->setLabel(
'html_changed',
'unknown' );
422 } elseif ( $cachedOutput->
getRawText() === $output->getRawText() ) {
426 $statsCounter->setLabel(
'html_changed',
'no' );
429 $statsCounter->setLabel(
'html_changed',
'yes' );
432 $statsCounter->increment();
444 private function getCurrentRevisionIfUnchanged(
452 $latest = $title->getLatestRevID( IDBAccessObject::READ_LATEST );
454 $triggeringRevisionId = $this->params[
'triggeringRevisionId'] ??
null;
455 if ( $triggeringRevisionId && $triggeringRevisionId !== $latest ) {
457 $this->incrementFailureCounter( $stats,
'rev_not_current' );
458 $this->
setLastError(
"Revision $triggeringRevisionId is not current" );
467 $this->incrementFailureCounter( $stats,
'rev_not_found' );
468 $this->
setLastError(
"Revision not found for {$title->getPrefixedDBkey()}" );
476 $this->incrementFailureCounter( $stats,
'rev_not_current' );
477 $this->
setLastError(
"Revision {$revision->getId()} is not current" );
494 private function getParserOutputFromCache(
503 $rootTimestamp = $this->params[
'rootJobTimestamp'] ??
null;
504 if ( $rootTimestamp !==
null ) {
505 $opportunistic = !empty( $this->params[
'isOpportunistic'] );
506 if ( $page->
getTouched() >= $rootTimestamp || $opportunistic ) {
510 $output = $parserCache->
getDirty( $page, $parserOptions );
513 $output->getCacheRevisionId() == $currentRevision->
getId()
523 private function canUseParserOutputFromCache(
530 && $cachedOutput->
getCacheTime() >= $this->getLagAwareRootTimestamp();
540 private function incrementFailureCounter(
StatsFactory $stats, $reason ): void {
541 $stats->getCounter(
'refreshlinks_failures_total' )
542 ->setLabel(
'reason', $reason )
543 ->copyToStatsdAt(
"refreshlinks_outcome.bad_$reason" )
550 private function getDataUpdateOptions() {
552 'recursive' => !empty( $this->params[
'useRecursiveLinksUpdate'] ),
554 'causeAction' => $this->params[
'causeAction'],
555 'causeAgent' => $this->params[
'causeAgent']
557 if ( !empty( $this->params[
'triggeringUser'] ) ) {
558 $userInfo = $this->params[
'triggeringUser'];
559 if ( $userInfo[
'userId'] ) {
560 $options[
'triggeringUser'] = User::newFromId( $userInfo[
'userId'] );
563 $options[
'triggeringUser'] = User::newFromName( $userInfo[
'userName'],
false );
571 $info = parent::getDeduplicationInfo();
572 unset( $info[
'causeAction'] );
573 unset( $info[
'causeAgent'] );
574 if ( is_array( $info[
'params'] ) ) {
577 if ( isset( $info[
'params'][
'pages'] ) ) {
578 unset( $info[
'namespace'] );
579 unset( $info[
'title'] );
587 if ( !empty( $this->params[
'recursive'] ) ) {
589 } elseif ( isset( $this->params[
'pages'] ) ) {
590 return count( $this->params[
'pages'] );
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
array $params
The job parameters.
setLastError( $error)
This is actually implemented in the Job class.
static partitionBacklinkJob(Job $job, $bSize, $cSize, $opts=[])
Break down $job into approximately ($bSize/$cSize) leaf jobs and a single partition job that covers t...
Describe and execute a background job.
A class containing constants representing the names of configuration variables.
Exception if a PageIdentity is an invalid argument.
Cache for ParserOutput objects corresponding to the latest page revisions.
getDirty(PageRecord $page, $popts)
Retrieve the ParserOutput from ParserCache, even if it's outdated.
Job to update link tables for rerendered wiki pages.
getDeduplicationInfo()
Subclasses may need to override this to make duplication detection work.
runForTitle(PageIdentity $pageIdentity)
static newDynamic(PageIdentity $page, array $params)
static newPrioritized(PageIdentity $page, array $params)
__construct(PageIdentity $page, array $params)
Base representation for an editable wiki page.
getLinksTimestamp()
Get the page_links_updated field.
makeParserOptions( $context)
Get parser options suitable for rendering the primary article wikitext.
getId( $wikiId=self::LOCAL)
getTitle()
Get the title object of the article.
doSecondaryDataUpdates(array $options=[])
Do secondary data updates (such as updating link tables).
loadPageData( $from='fromdb')
Load the object from a given source by title.
getRevisionRecord()
Get the latest revision.
getTouched()
Get the page_touched field.
Interface for objects (potentially) representing an editable wiki page.
canExist()
Checks whether this PageIdentity represents a "proper" page, meaning that it could exist as an editab...
if(count( $args)< 1) $job