21use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
96 private const NORMAL_MAX_LAG = 10;
98 private const LAG_WAIT_TIMEOUT = 15;
104 'The given PageIdentity {pageIdentity} does not represent a proper page',
105 [
'pageIdentity' => $page ]
109 parent::__construct(
'refreshLinks', $page,
$params );
111 $this->removeDuplicates = (
117 $this->params += [
'causeAction' =>
'RefreshLinksJob',
'causeAgent' =>
'unknown' ];
121 $this->executionFlags |= self::JOB_NO_EXPLICIT_TRX_ROUND;
131 $job->command =
'refreshLinksPrioritized';
143 $job->command =
'refreshLinksDynamic';
151 if ( !empty( $this->params[
'recursive'] ) ) {
157 $services = MediaWikiServices::getInstance();
158 if ( !isset( $this->params[
'range'] ) ) {
159 $lbFactory = $services->getDBLoadBalancerFactory();
160 if ( !$lbFactory->waitForReplication( [
161 'timeout' => self::LAG_WAIT_TIMEOUT
164 $stats = $services->getStatsdDataFactory();
165 $stats->increment(
'refreshlinks_warning.lag_wait_failed' );
170 $extraParams[
'triggeredRecursive'] =
true;
172 $extraParams[
'causeAction'] = $this->params[
'causeAction'];
173 $extraParams[
'causeAgent'] = $this->params[
'causeAgent'];
178 $services->getMainConfig()->get( MainConfigNames::UpdateRowsPerJob ),
180 [
'params' => $extraParams ]
182 $services->getJobQueueGroup()->push( $jobs );
184 } elseif ( isset( $this->params[
'pages'] ) ) {
186 foreach ( $this->params[
'pages'] as [ $ns, $dbKey ] ) {
187 $title = Title::makeTitleSafe( $ns, $dbKey );
209 $services = MediaWikiServices::getInstance();
210 $stats = $services->getStatsdDataFactory();
211 $renderer = $services->getRevisionRenderer();
212 $parserCache = $services->getParserCache();
213 $lbFactory = $services->getDBLoadBalancerFactory();
214 $ticket = $lbFactory->getEmptyTransactionTicket( __METHOD__ );
217 $page = $services->getWikiPageFactory()->newFromTitle( $pageIdentity );
222 $logger = LoggerFactory::getInstance(
'RefreshLinksJob' );
224 'The page does not exist. Perhaps it was deleted?',
226 'page_title' => $this->title->getPrefixedDBkey(),
227 'job_params' => $this->getParams(),
228 'job_metadata' => $this->getMetadata()
231 $stats->increment(
'refreshlinks_outcome.bad_page_not_found' );
241 $dbw = $lbFactory->getPrimaryDatabase();
243 $scopedLock = LinksUpdate::acquirePageLock( $dbw, $page->
getId(),
'job' );
244 if ( $scopedLock ===
null ) {
246 $this->
setLastError(
'LinksUpdate already running for this page, try again later.' );
247 $stats->increment(
'refreshlinks_outcome.bad_lock_failure' );
253 if ( $this->isAlreadyRefreshed( $page ) ) {
256 $stats->increment(
'refreshlinks_outcome.good_update_superseded' );
263 $lbFactory->flushReplicaSnapshots( __METHOD__ );
265 $lbFactory->beginPrimaryChanges( __METHOD__ );
266 $output = $this->getParserOutput( $renderer, $parserCache, $page, $stats );
267 $options = $this->getDataUpdateOptions();
268 $lbFactory->commitPrimaryChanges( __METHOD__ );
278 $options[
'known-revision-output'] = $output;
281 InfoAction::invalidateCache( $page );
291 $lbFactory->commitAndWaitForReplication( __METHOD__, $ticket );
299 private function getLagAwareRootTimestamp() {
301 $rootTimestamp = $this->params[
'rootJobTimestamp'] ??
null;
302 if ( $rootTimestamp ===
null ) {
306 if ( !empty( $this->params[
'isOpportunistic'] ) ) {
309 $lagAwareTimestamp = $rootTimestamp;
314 (
int)
wfTimestamp( TS_UNIX, $rootTimestamp ) + self::NORMAL_MAX_LAG
318 return $lagAwareTimestamp;
325 private function isAlreadyRefreshed(
WikiPage $page ) {
326 $lagAwareTimestamp = $this->getLagAwareRootTimestamp();
328 return ( $lagAwareTimestamp !==
null && $page->
getLinksTimestamp() > $lagAwareTimestamp );
336 private function shouldGenerateHTMLOnEdit(
RevisionRecord $revision ): bool {
338 foreach ( $revision->
getSlots()->getSlotRoles() as $role ) {
339 $slot = $revision->
getSlots()->getSlot( $role );
340 $contentHandler = $services->getContentHandlerFactory()->getContentHandler( $slot->getModel() );
341 if ( $contentHandler->generateHTMLOnEdit() ) {
357 private function getParserOutput(
361 StatsdDataFactoryInterface $stats
363 $revision = $this->getCurrentRevisionIfUnchanged( $page, $stats );
369 $cachedOutput = $this->getParserOutputFromCache( $parserCache, $page, $revision, $stats );
370 if ( $cachedOutput ) {
371 return $cachedOutput;
374 $causeAction = $this->params[
'causeAction'] ??
'RefreshLinksJob';
379 [
'audience' => $revision::RAW,
'causeAction' => $causeAction ]
383 $output = $renderedRevision->getRevisionParserOutput( [
385 'generate-html' => $this->shouldGenerateHTMLOnEdit( $revision )
387 $output->setCacheTime( $parseTimestamp );
399 private function getCurrentRevisionIfUnchanged(
401 StatsdDataFactoryInterface $stats
407 $latest = $title->getLatestRevID( IDBAccessObject::READ_LATEST );
409 $triggeringRevisionId = $this->params[
'triggeringRevisionId'] ??
null;
410 if ( $triggeringRevisionId && $triggeringRevisionId !== $latest ) {
412 $stats->increment(
'refreshlinks_outcome.bad_rev_not_current' );
413 $this->
setLastError(
"Revision $triggeringRevisionId is not current" );
422 $stats->increment(
'refreshlinks_outcome.bad_rev_not_found' );
423 $this->
setLastError(
"Revision not found for {$title->getPrefixedDBkey()}" );
431 $stats->increment(
'refreshlinks_outcome.bad_rev_not_current' );
432 $this->
setLastError(
"Revision {$revision->getId()} is not current" );
449 private function getParserOutputFromCache(
453 StatsdDataFactoryInterface $stats
455 $cachedOutput =
null;
459 $rootTimestamp = $this->params[
'rootJobTimestamp'] ??
null;
460 if ( $rootTimestamp !==
null ) {
461 $opportunistic = !empty( $this->params[
'isOpportunistic'] );
462 if ( $page->
getTouched() >= $rootTimestamp || $opportunistic ) {
467 $output = $parserCache->
getDirty( $page, $parserOptions );
470 $output->getCacheRevisionId() == $currentRevision->
getId() &&
471 $output->getCacheTime() >= $this->getLagAwareRootTimestamp()
473 $cachedOutput = $output;
478 if ( $cachedOutput ) {
479 $stats->increment(
'refreshlinks.parser_cached' );
481 $stats->increment(
'refreshlinks.parser_uncached' );
484 return $cachedOutput;
490 private function getDataUpdateOptions() {
492 'recursive' => !empty( $this->params[
'useRecursiveLinksUpdate'] ),
494 'causeAction' => $this->params[
'causeAction'],
495 'causeAgent' => $this->params[
'causeAgent']
497 if ( !empty( $this->params[
'triggeringUser'] ) ) {
498 $userInfo = $this->params[
'triggeringUser'];
499 if ( $userInfo[
'userId'] ) {
500 $options[
'triggeringUser'] = User::newFromId( $userInfo[
'userId'] );
503 $options[
'triggeringUser'] = User::newFromName( $userInfo[
'userName'],
false );
511 $info = parent::getDeduplicationInfo();
512 unset( $info[
'causeAction'] );
513 unset( $info[
'causeAgent'] );
514 if ( is_array( $info[
'params'] ) ) {
517 if ( isset( $info[
'params'][
'pages'] ) ) {
518 unset( $info[
'namespace'] );
519 unset( $info[
'title'] );
527 if ( !empty( $this->params[
'recursive'] ) ) {
529 } elseif ( isset( $this->params[
'pages'] ) ) {
530 return count( $this->params[
'pages'] );
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
array $params
The job parameters.
setLastError( $error)
This is actually implemented in the Job class.
static partitionBacklinkJob(Job $job, $bSize, $cSize, $opts=[])
Break down $job into approximately ($bSize/$cSize) leaf jobs and a single partition job that covers t...
Describe and execute a background job.
A class containing constants representing the names of configuration variables.
Exception if a PageIdentity is an invalid argument.
Cache for ParserOutput objects corresponding to the latest page revisions.
getDirty(PageRecord $page, $popts)
Retrieve the ParserOutput from ParserCache, even if it's outdated.
Job to update link tables for rerendered wiki pages.
getDeduplicationInfo()
Subclasses may need to override this to make duplication detection work.
runForTitle(PageIdentity $pageIdentity)
static newDynamic(PageIdentity $page, array $params)
static newPrioritized(PageIdentity $page, array $params)
__construct(PageIdentity $page, array $params)
Base representation for an editable wiki page.
getLinksTimestamp()
Get the page_links_updated field.
makeParserOptions( $context)
Get parser options suitable for rendering the primary article wikitext.
getId( $wikiId=self::LOCAL)
getTitle()
Get the title object of the article.
doSecondaryDataUpdates(array $options=[])
Do secondary data updates (such as updating link tables).
loadPageData( $from='fromdb')
Load the object from a given source by title.
getRevisionRecord()
Get the latest revision.
getTouched()
Get the page_touched field.
Interface for objects (potentially) representing an editable wiki page.
canExist()
Checks whether this PageIdentity represents a "proper" page, meaning that it could exist as an editab...
if(count( $args)< 1) $job