MediaWiki master
HTMLCacheUpdateJob.php
Go to the documentation of this file.
1<?php
8
15use Wikimedia\Timestamp\TimestampFormat as TS;
16
29class HTMLCacheUpdateJob extends Job {
31 private const NORMAL_MAX_LAG = 10;
32
33 public function __construct( Title $title, array $params ) {
34 parent::__construct( 'htmlCacheUpdate', $title, $params );
35 // Avoid the overhead of de-duplication when it would be pointless.
36 // Note that these jobs always set page_touched to the current time,
37 // so letting the older existing job "win" is still correct.
38 $this->removeDuplicates = (
39 // Ranges rarely will line up
40 !isset( $params['range'] ) &&
41 // Multiple pages per job make matches unlikely
42 !( isset( $params['pages'] ) && count( $params['pages'] ) != 1 )
43 );
44 $this->params += [ 'causeAction' => 'HTMLCacheUpdateJob', 'causeAgent' => 'unknown' ];
45 }
46
54 public static function newForBacklinks( PageReference $page, $table, $params = [] ) {
55 $title = Title::newFromPageReference( $page );
56 return new self(
57 $title,
58 [
59 'table' => $table,
60 'recursive' => true
61 ] + Job::newRootJobParams( // "overall" refresh links job info
62 "htmlCacheUpdate:{$table}:{$title->getPrefixedText()}"
63 ) + $params
64 );
65 }
66
68 public function run() {
69 $updateRowsPerJob = MediaWikiServices::getInstance()->getMainConfig()->get(
71 $updateRowsPerQuery = MediaWikiServices::getInstance()->getMainConfig()->get(
73 if ( isset( $this->params['table'] ) && !isset( $this->params['pages'] ) ) {
74 $this->params['recursive'] = true; // b/c; base job
75 }
76
77 // Job to purge all (or a range of) backlink pages for a page
78 if ( !empty( $this->params['recursive'] ) ) {
79 // Carry over information for de-duplication
80 $extraParams = $this->getRootJobParams();
81 // Carry over cause information for logging
82 $extraParams['causeAction'] = $this->params['causeAction'];
83 $extraParams['causeAgent'] = $this->params['causeAgent'];
84 // Convert this into no more than $wgUpdateRowsPerJob HTMLCacheUpdateJob per-title
85 // jobs and possibly a recursive HTMLCacheUpdateJob job for the rest of the backlinks
86 $jobs = BacklinkJobUtils::partitionBacklinkJob(
87 $this,
88 $updateRowsPerJob,
89 $updateRowsPerQuery, // jobs-per-title
90 // Carry over information for de-duplication
91 [ 'params' => $extraParams ]
92 );
93 MediaWikiServices::getInstance()->getJobQueueGroup()->push( $jobs );
94 // Job to purge pages for a set of titles
95 } elseif ( isset( $this->params['pages'] ) ) {
96 $this->invalidateTitles( $this->params['pages'] );
97 // Job to update a single title
98 } else {
99 $t = $this->title;
100 $this->invalidateTitles( [
101 $t->getArticleID() => [ $t->getNamespace(), $t->getDBkey() ]
102 ] );
103 }
104
105 return true;
106 }
107
111 protected function invalidateTitles( array $pages ) {
112 // Get all page IDs in this query into an array
113 $pageIds = array_keys( $pages );
114 if ( !$pageIds ) {
115 return;
116 }
117
118 $rootTsUnix = wfTimestampOrNull( TS::UNIX, $this->params['rootJobTimestamp'] ?? null );
119 // Bump page_touched to the current timestamp. This previously used the root job timestamp
120 // (e.g. template/file edit time), which is a bit more efficient when template edits are
121 // rare and don't effect the same pages much. However, this way better de-duplicates jobs,
122 // which is much more useful for wikis with high edit rates. Note that RefreshLinksJob,
123 // enqueued alongside HTMLCacheUpdateJob, saves the parser output since it has to parse
124 // anyway. We assume that vast majority of the cache jobs finish before the link jobs,
125 // so using the current timestamp instead of the root timestamp is not expected to
126 // invalidate these cache entries too often.
127 $newTouchedUnix = (int)wfTimestamp();
128 // Timestamp used to bypass pages already invalided since the triggering event
129 $casTsUnix = $rootTsUnix ?? $newTouchedUnix;
130
131 $services = MediaWikiServices::getInstance();
132 $config = $services->getMainConfig();
133
134 $dbProvider = $services->getConnectionProvider();
135 $dbw = $dbProvider->getPrimaryDatabase();
136 $ticket = $dbProvider->getEmptyTransactionTicket( __METHOD__ );
137 // Update page_touched (skipping pages already touched since the root job).
138 // Check $wgUpdateRowsPerQuery; batch jobs are sized by that already.
139 $batches = array_chunk( $pageIds, $config->get( MainConfigNames::UpdateRowsPerQuery ) );
140 foreach ( $batches as $batch ) {
141 $dbw->newUpdateQueryBuilder()
142 ->update( 'page' )
143 ->set( [ 'page_touched' => $dbw->timestamp( $newTouchedUnix ) ] )
144 ->where( [ 'page_id' => $batch ] )
145 ->andWhere( $dbw->expr( 'page_touched', '<', $dbw->timestamp( $casTsUnix ) ) )
146 ->caller( __METHOD__ )->execute();
147 if ( count( $batches ) > 1 ) {
148 $dbProvider->commitAndWaitForReplication( __METHOD__, $ticket );
149 }
150 }
151 // Get the list of affected pages (races only mean something else did the purge)
152 $queryBuilder = $dbw->newSelectQueryBuilder()
153 ->select( [ 'page_namespace', 'page_title' ] )
154 ->from( 'page' )
155 ->where( [ 'page_id' => $pageIds, 'page_touched' => $dbw->timestamp( $newTouchedUnix ) ] );
156 if ( $config->get( MainConfigNames::PageLanguageUseDB ) ) {
157 $queryBuilder->field( 'page_lang' );
158 }
159 $titleArray = $services->getTitleFactory()->newTitleArrayFromResult(
160 $queryBuilder->caller( __METHOD__ )->fetchResultSet()
161 );
162
163 // Update CDN and file caches
164 $htmlCache = $services->getHtmlCacheUpdater();
165 $htmlCache->purgeTitleUrls(
166 $titleArray,
167 $htmlCache::PURGE_NAIVE | $htmlCache::PURGE_URLS_LINKSUPDATE_ONLY,
168 [ $htmlCache::UNLESS_CACHE_MTIME_AFTER => $casTsUnix + self::NORMAL_MAX_LAG ]
169 );
170 }
171
173 public function getDeduplicationInfo() {
174 $info = parent::getDeduplicationInfo();
175 if ( is_array( $info['params'] ) ) {
176 // For per-pages jobs, the job title is that of the template that changed
177 // (or similar), so remove that since it ruins duplicate detection
178 if ( isset( $info['params']['pages'] ) ) {
179 unset( $info['namespace'] );
180 unset( $info['title'] );
181 }
182 }
183
184 return $info;
185 }
186
188 public function workItemCount() {
189 if ( !empty( $this->params['recursive'] ) ) {
190 return 0; // nothing actually purged
191 } elseif ( isset( $this->params['pages'] ) ) {
192 return count( $this->params['pages'] );
193 }
194
195 return 1; // one title
196 }
197}
198
200class_alias( HTMLCacheUpdateJob::class, 'HTMLCacheUpdateJob' );
wfTimestampOrNull( $outputtype=TS::UNIX, $ts=null)
Return a formatted timestamp, or null if input is null.
wfTimestamp( $outputtype=TS::UNIX, $ts=0)
Get a timestamp string in one of various formats.
Describe and execute a background job.
Definition Job.php:28
array $params
Array of job parameters.
Definition Job.php:33
static newRootJobParams( $key)
Get "root job" parameters for a task.
Definition Job.php:297
Job to purge the HTML/file cache for all pages that link to or use another page or file.
getDeduplicationInfo()
Subclasses may need to override this to make duplication detection work.The resulting map conveys eve...
static newForBacklinks(PageReference $page, $table, $params=[])
run()
Run the job.If this method returns false or completes exceptionally, the job runner will retry execut...
Helper for a Job that updates links to a given page title.
A class containing constants representing the names of configuration variables.
const UpdateRowsPerQuery
Name constant for the UpdateRowsPerQuery setting, for use with Config::get()
const UpdateRowsPerJob
Name constant for the UpdateRowsPerJob setting, for use with Config::get()
const PageLanguageUseDB
Name constant for the PageLanguageUseDB setting, for use with Config::get()
Service locator for MediaWiki core services.
static getInstance()
Returns the global default instance of the top level service locator.
Represents a title within MediaWiki.
Definition Title.php:69
Interface for objects (potentially) representing a page that can be viewable and linked to on a wiki.