21 use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
60 private const NORMAL_MAX_LAG = 10;
62 private const LAG_WAIT_TIMEOUT = 15;
68 'The given PageIdentity {pageIdentity} does not represent a proper page',
69 [
'pageIdentity' => $page ]
73 parent::__construct(
'refreshLinks', $page,
$params );
75 $this->removeDuplicates = (
81 $this->params += [
'causeAction' =>
'RefreshLinksJob',
'causeAgent' =>
'unknown' ];
85 $this->executionFlags |= self::JOB_NO_EXPLICIT_TRX_ROUND;
95 $job->command =
'refreshLinksPrioritized';
107 $job->command =
'refreshLinksDynamic';
115 if ( !empty( $this->params[
'recursive'] ) ) {
121 $services = MediaWikiServices::getInstance();
122 if ( !isset( $this->params[
'range'] ) ) {
123 $lbFactory = $services->getDBLoadBalancerFactory();
124 if ( !$lbFactory->waitForReplication( [
125 'timeout' => self::LAG_WAIT_TIMEOUT
128 $stats = $services->getStatsdDataFactory();
129 $stats->increment(
'refreshlinks_warning.lag_wait_failed' );
134 $extraParams[
'triggeredRecursive'] =
true;
136 $extraParams[
'causeAction'] = $this->params[
'causeAction'];
137 $extraParams[
'causeAgent'] = $this->params[
'causeAgent'];
142 $services->getMainConfig()->get( MainConfigNames::UpdateRowsPerJob ),
144 [
'params' => $extraParams ]
146 $services->getJobQueueGroup()->push( $jobs );
148 } elseif ( isset( $this->params[
'pages'] ) ) {
150 foreach ( $this->params[
'pages'] as [ $ns, $dbKey ] ) {
151 $title = Title::makeTitleSafe( $ns, $dbKey );
173 $services = MediaWikiServices::getInstance();
174 $stats = $services->getStatsdDataFactory();
175 $renderer = $services->getRevisionRenderer();
176 $parserCache = $services->getParserCache();
177 $lbFactory = $services->getDBLoadBalancerFactory();
178 $ticket = $lbFactory->getEmptyTransactionTicket( __METHOD__ );
181 $page = $services->getWikiPageFactory()->newFromTitle( $pageIdentity );
186 $logger = LoggerFactory::getInstance(
'RefreshLinksJob' );
188 'The page does not exist. Perhaps it was deleted?',
190 'page_title' => $this->title->getPrefixedDBkey(),
191 'job_params' => $this->getParams(),
192 'job_metadata' => $this->getMetadata()
195 $stats->increment(
'refreshlinks_outcome.bad_page_not_found' );
205 $dbw = $lbFactory->getPrimaryDatabase();
207 $scopedLock = LinksUpdate::acquirePageLock( $dbw, $page->
getId(),
'job' );
208 if ( $scopedLock ===
null ) {
210 $this->
setLastError(
'LinksUpdate already running for this page, try again later.' );
211 $stats->increment(
'refreshlinks_outcome.bad_lock_failure' );
217 if ( $this->isAlreadyRefreshed( $page ) ) {
220 $stats->increment(
'refreshlinks_outcome.good_update_superseded' );
227 $lbFactory->flushReplicaSnapshots( __METHOD__ );
229 $lbFactory->beginPrimaryChanges( __METHOD__ );
230 $output = $this->getParserOutput( $renderer, $parserCache, $page, $stats );
231 $options = $this->getDataUpdateOptions();
232 $lbFactory->commitPrimaryChanges( __METHOD__ );
242 $options[
'known-revision-output'] = $output;
255 $lbFactory->commitAndWaitForReplication( __METHOD__, $ticket );
263 private function getLagAwareRootTimestamp() {
265 $rootTimestamp = $this->params[
'rootJobTimestamp'] ??
null;
266 if ( $rootTimestamp ===
null ) {
270 if ( !empty( $this->params[
'isOpportunistic'] ) ) {
273 $lagAwareTimestamp = $rootTimestamp;
278 (
int)
wfTimestamp( TS_UNIX, $rootTimestamp ) + self::NORMAL_MAX_LAG
282 return $lagAwareTimestamp;
289 private function isAlreadyRefreshed(
WikiPage $page ) {
290 $lagAwareTimestamp = $this->getLagAwareRootTimestamp();
292 return ( $lagAwareTimestamp !==
null && $page->
getLinksTimestamp() > $lagAwareTimestamp );
300 private function shouldGenerateHTMLOnEdit(
RevisionRecord $revision ): bool {
302 foreach ( $revision->
getSlots()->getSlotRoles() as $role ) {
303 $slot = $revision->
getSlots()->getSlot( $role );
304 $contentHandler = $services->getContentHandlerFactory()->getContentHandler( $slot->getModel() );
305 if ( $contentHandler->generateHTMLOnEdit() ) {
321 private function getParserOutput(
325 StatsdDataFactoryInterface $stats
327 $revision = $this->getCurrentRevisionIfUnchanged( $page, $stats );
333 $cachedOutput = $this->getParserOutputFromCache( $parserCache, $page, $revision, $stats );
334 if ( $cachedOutput ) {
335 return $cachedOutput;
338 $causeAction = $this->params[
'causeAction'] ??
'RefreshLinksJob';
343 [
'audience' => $revision::RAW,
'causeAction' => $causeAction ]
347 $output = $renderedRevision->getRevisionParserOutput( [
349 'generate-html' => $this->shouldGenerateHTMLOnEdit( $revision )
351 $output->setCacheTime( $parseTimestamp );
363 private function getCurrentRevisionIfUnchanged(
365 StatsdDataFactoryInterface $stats
371 $latest = $title->getLatestRevID( Title::READ_LATEST );
373 $triggeringRevisionId = $this->params[
'triggeringRevisionId'] ??
null;
374 if ( $triggeringRevisionId && $triggeringRevisionId !== $latest ) {
376 $stats->increment(
'refreshlinks_outcome.bad_rev_not_current' );
377 $this->setLastError(
"Revision $triggeringRevisionId is not current" );
386 $stats->increment(
'refreshlinks_outcome.bad_rev_not_found' );
387 $this->setLastError(
"Revision not found for {$title->getPrefixedDBkey()}" );
395 $stats->increment(
'refreshlinks_outcome.bad_rev_not_current' );
396 $this->setLastError(
"Revision {$revision->getId()} is not current" );
413 private function getParserOutputFromCache(
417 StatsdDataFactoryInterface $stats
419 $cachedOutput =
null;
423 $rootTimestamp = $this->params[
'rootJobTimestamp'] ??
null;
424 if ( $rootTimestamp !==
null ) {
425 $opportunistic = !empty( $this->params[
'isOpportunistic'] );
426 if ( $page->
getTouched() >= $rootTimestamp || $opportunistic ) {
431 $output = $parserCache->
getDirty( $page, $parserOptions );
434 $output->getCacheRevisionId() == $currentRevision->
getId() &&
435 $output->getCacheTime() >= $this->getLagAwareRootTimestamp()
437 $cachedOutput = $output;
442 if ( $cachedOutput ) {
443 $stats->increment(
'refreshlinks.parser_cached' );
445 $stats->increment(
'refreshlinks.parser_uncached' );
448 return $cachedOutput;
454 private function getDataUpdateOptions() {
456 'recursive' => !empty( $this->params[
'useRecursiveLinksUpdate'] ),
458 'causeAction' => $this->params[
'causeAction'],
459 'causeAgent' => $this->params[
'causeAgent']
461 if ( !empty( $this->params[
'triggeringUser'] ) ) {
462 $userInfo = $this->params[
'triggeringUser'];
463 if ( $userInfo[
'userId'] ) {
464 $options[
'triggeringUser'] = User::newFromId( $userInfo[
'userId'] );
467 $options[
'triggeringUser'] = User::newFromName( $userInfo[
'userName'],
false );
475 $info = parent::getDeduplicationInfo();
476 unset( $info[
'causeAction'] );
477 unset( $info[
'causeAgent'] );
478 if ( is_array( $info[
'params'] ) ) {
481 if ( isset( $info[
'params'][
'pages'] ) ) {
482 unset( $info[
'namespace'] );
483 unset( $info[
'title'] );
491 if ( !empty( $this->params[
'recursive'] ) ) {
493 } elseif ( isset( $this->params[
'pages'] ) ) {
494 return count( $this->params[
'pages'] );
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
static partitionBacklinkJob(Job $job, $bSize, $cSize, $opts=[])
Break down $job into approximately ($bSize/$cSize) leaf jobs and a single partition job that covers t...
static invalidateCache(PageIdentity $page, $revid=null)
Clear the info cache for a given Title.
Class to both describe a background job and handle jobs.
array $params
Array of job parameters.
A class containing constants representing the names of configuration variables.
Exception if a PageIdentity is an invalid argument.
Cache for ParserOutput objects corresponding to the latest page revisions.
getDirty(PageRecord $page, $popts)
Retrieve the ParserOutput from ParserCache, even if it's outdated.
Job to update link tables for rerendered wiki pages.
getDeduplicationInfo()
Subclasses may need to override this to make duplication detection work.
runForTitle(PageIdentity $pageIdentity)
static newDynamic(PageIdentity $page, array $params)
static newPrioritized(PageIdentity $page, array $params)
__construct(PageIdentity $page, array $params)
Base representation for an editable wiki page.
getLinksTimestamp()
Get the page_links_updated field.
makeParserOptions( $context)
Get parser options suitable for rendering the primary article wikitext.
getId( $wikiId=self::LOCAL)
getTitle()
Get the title object of the article.
doSecondaryDataUpdates(array $options=[])
Do secondary data updates (such as updating link tables).
loadPageData( $from='fromdb')
Load the object from a given source by title.
getRevisionRecord()
Get the latest revision.
getTouched()
Get the page_touched field.
Interface for objects (potentially) representing an editable wiki page.
canExist()
Checks whether this PageIdentity represents a "proper" page, meaning that it could exist as an editab...
if(count( $args)< 1) $job