23use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
60 private const NORMAL_MAX_LAG = 10;
62 private const LAG_WAIT_TIMEOUT = 15;
65 if ( empty( $params[
'pages'] ) && !$page->
canExist() ) {
68 'The given PageIdentity {pageIdentity} does not represent a proper page',
69 [
'pageIdentity' => $page ]
73 parent::__construct(
'refreshLinks', $page, $params );
75 $this->removeDuplicates = (
77 !isset( $params[
'range'] ) &&
79 !( isset( $params[
'pages'] ) && count( $params[
'pages'] ) != 1 )
81 $this->params += [
'causeAction' =>
'unknown',
'causeAgent' =>
'unknown' ];
85 $this->executionFlags |= self::JOB_NO_EXPLICIT_TRX_ROUND;
94 $job =
new self( $page, $params );
95 $job->command =
'refreshLinksPrioritized';
106 $job =
new self( $page, $params );
107 $job->command =
'refreshLinksDynamic';
115 if ( !empty( $this->params[
'recursive'] ) ) {
121 $services = MediaWikiServices::getInstance();
122 if ( !isset( $this->params[
'range'] ) ) {
123 $lbFactory = $services->getDBLoadBalancerFactory();
124 if ( !$lbFactory->waitForReplication( [
125 'domain' => $lbFactory->getLocalDomainID(),
126 'timeout' => self::LAG_WAIT_TIMEOUT
129 $stats = $services->getStatsdDataFactory();
130 $stats->increment(
'refreshlinks_warning.lag_wait_failed' );
135 $extraParams[
'triggeredRecursive'] =
true;
137 $extraParams[
'causeAction'] = $this->params[
'causeAction'];
138 $extraParams[
'causeAgent'] = $this->params[
'causeAgent'];
143 $services->getMainConfig()->get( MainConfigNames::UpdateRowsPerJob ),
145 [
'params' => $extraParams ]
147 $services->getJobQueueGroup()->push( $jobs );
149 } elseif ( isset( $this->params[
'pages'] ) ) {
151 foreach ( $this->params[
'pages'] as list( $ns, $dbKey ) ) {
152 $title = Title::makeTitleSafe( $ns, $dbKey );
174 $services = MediaWikiServices::getInstance();
175 $stats = $services->getStatsdDataFactory();
176 $renderer = $services->getRevisionRenderer();
177 $parserCache = $services->getParserCache();
178 $lbFactory = $services->getDBLoadBalancerFactory();
179 $ticket = $lbFactory->getEmptyTransactionTicket( __METHOD__ );
182 $page = $services->getWikiPageFactory()->newFromTitle( $pageIdentity );
187 $logger = LoggerFactory::getInstance(
'RefreshLinksJob' );
189 'The page does not exist. Perhaps it was deleted?',
191 'page_title' => $this->title->getPrefixedDBkey(),
192 'job_params' => $this->getParams(),
193 'job_metadata' => $this->getMetadata()
196 $stats->increment(
'refreshlinks_outcome.bad_page_not_found' );
206 $dbw = $lbFactory->getMainLB()->getConnectionRef(
DB_PRIMARY );
208 $scopedLock = LinksUpdate::acquirePageLock( $dbw, $page->
getId(),
'job' );
209 if ( $scopedLock ===
null ) {
211 $this->
setLastError(
'LinksUpdate already running for this page, try again later.' );
212 $stats->increment(
'refreshlinks_outcome.bad_lock_failure' );
218 if ( $this->isAlreadyRefreshed( $page ) ) {
221 $stats->increment(
'refreshlinks_outcome.good_update_superseded' );
228 $lbFactory->flushReplicaSnapshots( __METHOD__ );
230 $lbFactory->beginPrimaryChanges( __METHOD__ );
231 $output = $this->getParserOutput( $renderer, $parserCache, $page, $stats );
232 $options = $this->getDataUpdateOptions();
233 $lbFactory->commitPrimaryChanges( __METHOD__ );
243 $options[
'known-revision-output'] = $output;
246 InfoAction::invalidateCache( $page );
250 $lbFactory->commitAndWaitForReplication( __METHOD__, $ticket );
258 private function getLagAwareRootTimestamp() {
260 $rootTimestamp = $this->params[
'rootJobTimestamp'] ??
null;
261 if ( $rootTimestamp ===
null ) {
265 if ( !empty( $this->params[
'isOpportunistic'] ) ) {
268 $lagAwareTimestamp = $rootTimestamp;
273 (
int)
wfTimestamp( TS_UNIX, $rootTimestamp ) + self::NORMAL_MAX_LAG
277 return $lagAwareTimestamp;
284 private function isAlreadyRefreshed(
WikiPage $page ) {
285 $lagAwareTimestamp = $this->getLagAwareRootTimestamp();
287 return ( $lagAwareTimestamp !==
null && $page->
getLinksTimestamp() > $lagAwareTimestamp );
299 private function getParserOutput(
303 StatsdDataFactoryInterface $stats
305 $revision = $this->getCurrentRevisionIfUnchanged( $page, $stats );
311 $cachedOutput = $this->getParserOutputFromCache( $parserCache, $page, $revision, $stats );
312 if ( $cachedOutput ) {
313 return $cachedOutput;
320 [
'audience' => $revision::RAW ]
324 $output = $renderedRevision->getRevisionParserOutput( [
'generate-html' =>
false ] );
325 $output->setCacheTime( $parseTimestamp );
337 private function getCurrentRevisionIfUnchanged(
339 StatsdDataFactoryInterface $stats
345 $latest =
$title->getLatestRevID( Title::READ_LATEST );
347 $triggeringRevisionId = $this->params[
'triggeringRevisionId'] ??
null;
348 if ( $triggeringRevisionId && $triggeringRevisionId !== $latest ) {
350 $stats->increment(
'refreshlinks_outcome.bad_rev_not_current' );
351 $this->
setLastError(
"Revision $triggeringRevisionId is not current" );
360 $stats->increment(
'refreshlinks_outcome.bad_rev_not_found' );
361 $this->
setLastError(
"Revision not found for {$title->getPrefixedDBkey()}" );
364 } elseif ( $revision->getId() !== $latest || $revision->getPageId() !== $page->
getId() ) {
369 $stats->increment(
'refreshlinks_outcome.bad_rev_not_current' );
370 $this->
setLastError(
"Revision {$revision->getId()} is not current" );
387 private function getParserOutputFromCache(
391 StatsdDataFactoryInterface $stats
393 $cachedOutput =
null;
397 $rootTimestamp = $this->params[
'rootJobTimestamp'] ??
null;
398 if ( $rootTimestamp !==
null ) {
399 $opportunistic = !empty( $this->params[
'isOpportunistic'] );
400 if ( $page->
getTouched() >= $rootTimestamp || $opportunistic ) {
405 $output = $parserCache->
getDirty( $page, $parserOptions );
408 $output->getCacheRevisionId() == $currentRevision->
getId() &&
409 $output->getCacheTime() >= $this->getLagAwareRootTimestamp()
411 $cachedOutput = $output;
416 if ( $cachedOutput ) {
417 $stats->increment(
'refreshlinks.parser_cached' );
419 $stats->increment(
'refreshlinks.parser_uncached' );
422 return $cachedOutput;
428 private function getDataUpdateOptions() {
430 'recursive' => !empty( $this->params[
'useRecursiveLinksUpdate'] ),
432 'causeAction' => $this->params[
'causeAction'],
433 'causeAgent' => $this->params[
'causeAgent']
435 if ( !empty( $this->params[
'triggeringUser'] ) ) {
436 $userInfo = $this->params[
'triggeringUser'];
437 if ( $userInfo[
'userId'] ) {
449 $info = parent::getDeduplicationInfo();
450 unset( $info[
'causeAction'] );
451 unset( $info[
'causeAgent'] );
452 if ( is_array( $info[
'params'] ) ) {
455 if ( isset( $info[
'params'][
'pages'] ) ) {
456 unset( $info[
'namespace'] );
457 unset( $info[
'title'] );
465 if ( !empty( $this->params[
'recursive'] ) ) {
467 } elseif ( isset( $this->params[
'pages'] ) ) {
468 return count( $this->params[
'pages'] );
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
static partitionBacklinkJob(Job $job, $bSize, $cSize, $opts=[])
Break down $job into approximately ($bSize/$cSize) leaf jobs and a single partition job that covers t...
Class to both describe a background job and handle jobs.
A class containing constants representing the names of configuration variables.
Exception if a PageIdentity is an invalid argument.
Cache for ParserOutput objects corresponding to the latest page revisions.
getDirty(PageRecord $page, $popts)
Retrieve the ParserOutput from ParserCache, even if it's outdated.
Job to update link tables for pages.
getDeduplicationInfo()
Subclasses may need to override this to make duplication detection work.
runForTitle(PageIdentity $pageIdentity)
static newDynamic(PageIdentity $page, array $params)
static newPrioritized(PageIdentity $page, array $params)
__construct(PageIdentity $page, array $params)
static newFromName( $name, $validate='valid')
static newFromId( $id)
Static factory method for creation from a given user ID.
Base representation for an editable wiki page.
getLinksTimestamp()
Get the page_links_updated field.
makeParserOptions( $context)
Get parser options suitable for rendering the primary article wikitext.
getId( $wikiId=self::LOCAL)
getTitle()
Get the title object of the article.
doSecondaryDataUpdates(array $options=[])
Do secondary data updates (such as updating link tables).
loadPageData( $from='fromdb')
Load the object from a given source by title.
getRevisionRecord()
Get the latest revision.
getTouched()
Get the page_touched field.
Interface for objects (potentially) representing an editable wiki page.
canExist()
Checks whether this PageIdentity represents a "proper" page, meaning that it could exist as an editab...
if(count( $args)< 1) $job