Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 88 |
|
0.00% |
0 / 6 |
CRAP | |
0.00% |
0 / 1 |
HTMLCacheUpdateJob | |
0.00% |
0 / 87 |
|
0.00% |
0 / 6 |
420 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
newForBacklinks | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
run | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
30 | |||
invalidateTitles | |
0.00% |
0 / 36 |
|
0.00% |
0 / 1 |
30 | |||
getDeduplicationInfo | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
workItemCount | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | */ |
20 | |
21 | namespace MediaWiki\JobQueue\Jobs; |
22 | |
23 | use MediaWiki\JobQueue\Job; |
24 | use MediaWiki\JobQueue\Utils\BacklinkJobUtils; |
25 | use MediaWiki\MainConfigNames; |
26 | use MediaWiki\MediaWikiServices; |
27 | use MediaWiki\Page\PageReference; |
28 | use MediaWiki\Title\Title; |
29 | |
30 | /** |
31 | * Job to purge the HTML/file cache for all pages that link to or use another page or file |
32 | * |
33 | * This job comes in a few variants: |
34 | * - a) Recursive jobs to purge caches for backlink pages for a given title. |
35 | * These jobs have (recursive:true,table:<table>) set. |
36 | * - b) Jobs to purge caches for a set of titles (the job title is ignored). |
37 | * These jobs have (pages:(<page ID>:(<namespace>,<title>),...) set. |
38 | * |
39 | * @ingroup JobQueue |
40 | * @ingroup Cache |
41 | */ |
42 | class HTMLCacheUpdateJob extends Job { |
43 | /** @var int Lag safety margin when comparing root job time age to CDN max-age */ |
44 | private const NORMAL_MAX_LAG = 10; |
45 | |
46 | public function __construct( Title $title, array $params ) { |
47 | parent::__construct( 'htmlCacheUpdate', $title, $params ); |
48 | // Avoid the overhead of de-duplication when it would be pointless. |
49 | // Note that these jobs always set page_touched to the current time, |
50 | // so letting the older existing job "win" is still correct. |
51 | $this->removeDuplicates = ( |
52 | // Ranges rarely will line up |
53 | !isset( $params['range'] ) && |
54 | // Multiple pages per job make matches unlikely |
55 | !( isset( $params['pages'] ) && count( $params['pages'] ) != 1 ) |
56 | ); |
57 | $this->params += [ 'causeAction' => 'HTMLCacheUpdateJob', 'causeAgent' => 'unknown' ]; |
58 | } |
59 | |
60 | /** |
61 | * @param PageReference $page Page to purge backlink pages from |
62 | * @param string $table Backlink table name |
63 | * @param array $params Additional job parameters |
64 | * |
65 | * @return HTMLCacheUpdateJob |
66 | */ |
67 | public static function newForBacklinks( PageReference $page, $table, $params = [] ) { |
68 | $title = Title::newFromPageReference( $page ); |
69 | return new self( |
70 | $title, |
71 | [ |
72 | 'table' => $table, |
73 | 'recursive' => true |
74 | ] + Job::newRootJobParams( // "overall" refresh links job info |
75 | "htmlCacheUpdate:{$table}:{$title->getPrefixedText()}" |
76 | ) + $params |
77 | ); |
78 | } |
79 | |
80 | public function run() { |
81 | $updateRowsPerJob = MediaWikiServices::getInstance()->getMainConfig()->get( |
82 | MainConfigNames::UpdateRowsPerJob ); |
83 | $updateRowsPerQuery = MediaWikiServices::getInstance()->getMainConfig()->get( |
84 | MainConfigNames::UpdateRowsPerQuery ); |
85 | if ( isset( $this->params['table'] ) && !isset( $this->params['pages'] ) ) { |
86 | $this->params['recursive'] = true; // b/c; base job |
87 | } |
88 | |
89 | // Job to purge all (or a range of) backlink pages for a page |
90 | if ( !empty( $this->params['recursive'] ) ) { |
91 | // Carry over information for de-duplication |
92 | $extraParams = $this->getRootJobParams(); |
93 | // Carry over cause information for logging |
94 | $extraParams['causeAction'] = $this->params['causeAction']; |
95 | $extraParams['causeAgent'] = $this->params['causeAgent']; |
96 | // Convert this into no more than $wgUpdateRowsPerJob HTMLCacheUpdateJob per-title |
97 | // jobs and possibly a recursive HTMLCacheUpdateJob job for the rest of the backlinks |
98 | $jobs = BacklinkJobUtils::partitionBacklinkJob( |
99 | $this, |
100 | $updateRowsPerJob, |
101 | $updateRowsPerQuery, // jobs-per-title |
102 | // Carry over information for de-duplication |
103 | [ 'params' => $extraParams ] |
104 | ); |
105 | MediaWikiServices::getInstance()->getJobQueueGroup()->push( $jobs ); |
106 | // Job to purge pages for a set of titles |
107 | } elseif ( isset( $this->params['pages'] ) ) { |
108 | $this->invalidateTitles( $this->params['pages'] ); |
109 | // Job to update a single title |
110 | } else { |
111 | $t = $this->title; |
112 | $this->invalidateTitles( [ |
113 | $t->getArticleID() => [ $t->getNamespace(), $t->getDBkey() ] |
114 | ] ); |
115 | } |
116 | |
117 | return true; |
118 | } |
119 | |
120 | /** |
121 | * @param array $pages Map of (page ID => (namespace, DB key)) entries |
122 | */ |
123 | protected function invalidateTitles( array $pages ) { |
124 | // Get all page IDs in this query into an array |
125 | $pageIds = array_keys( $pages ); |
126 | if ( !$pageIds ) { |
127 | return; |
128 | } |
129 | |
130 | $rootTsUnix = wfTimestampOrNull( TS_UNIX, $this->params['rootJobTimestamp'] ?? null ); |
131 | // Bump page_touched to the current timestamp. This previously used the root job timestamp |
132 | // (e.g. template/file edit time), which is a bit more efficient when template edits are |
133 | // rare and don't effect the same pages much. However, this way better de-duplicates jobs, |
134 | // which is much more useful for wikis with high edit rates. Note that RefreshLinksJob, |
135 | // enqueued alongside HTMLCacheUpdateJob, saves the parser output since it has to parse |
136 | // anyway. We assume that vast majority of the cache jobs finish before the link jobs, |
137 | // so using the current timestamp instead of the root timestamp is not expected to |
138 | // invalidate these cache entries too often. |
139 | $newTouchedUnix = time(); |
140 | // Timestamp used to bypass pages already invalided since the triggering event |
141 | $casTsUnix = $rootTsUnix ?? $newTouchedUnix; |
142 | |
143 | $services = MediaWikiServices::getInstance(); |
144 | $config = $services->getMainConfig(); |
145 | |
146 | $dbProvider = $services->getConnectionProvider(); |
147 | $dbw = $dbProvider->getPrimaryDatabase(); |
148 | $ticket = $dbProvider->getEmptyTransactionTicket( __METHOD__ ); |
149 | // Update page_touched (skipping pages already touched since the root job). |
150 | // Check $wgUpdateRowsPerQuery; batch jobs are sized by that already. |
151 | $batches = array_chunk( $pageIds, $config->get( MainConfigNames::UpdateRowsPerQuery ) ); |
152 | foreach ( $batches as $batch ) { |
153 | $dbw->newUpdateQueryBuilder() |
154 | ->update( 'page' ) |
155 | ->set( [ 'page_touched' => $dbw->timestamp( $newTouchedUnix ) ] ) |
156 | ->where( [ 'page_id' => $batch ] ) |
157 | ->andWhere( $dbw->expr( 'page_touched', '<', $dbw->timestamp( $casTsUnix ) ) ) |
158 | ->caller( __METHOD__ )->execute(); |
159 | if ( count( $batches ) > 1 ) { |
160 | $dbProvider->commitAndWaitForReplication( __METHOD__, $ticket ); |
161 | } |
162 | } |
163 | // Get the list of affected pages (races only mean something else did the purge) |
164 | $queryBuilder = $dbw->newSelectQueryBuilder() |
165 | ->select( [ 'page_namespace', 'page_title' ] ) |
166 | ->from( 'page' ) |
167 | ->where( [ 'page_id' => $pageIds, 'page_touched' => $dbw->timestamp( $newTouchedUnix ) ] ); |
168 | if ( $config->get( MainConfigNames::PageLanguageUseDB ) ) { |
169 | $queryBuilder->field( 'page_lang' ); |
170 | } |
171 | $titleArray = $services->getTitleFactory()->newTitleArrayFromResult( |
172 | $queryBuilder->caller( __METHOD__ )->fetchResultSet() |
173 | ); |
174 | |
175 | // Update CDN and file caches |
176 | $htmlCache = $services->getHtmlCacheUpdater(); |
177 | $htmlCache->purgeTitleUrls( |
178 | $titleArray, |
179 | $htmlCache::PURGE_NAIVE | $htmlCache::PURGE_URLS_LINKSUPDATE_ONLY, |
180 | [ $htmlCache::UNLESS_CACHE_MTIME_AFTER => $casTsUnix + self::NORMAL_MAX_LAG ] |
181 | ); |
182 | } |
183 | |
184 | public function getDeduplicationInfo() { |
185 | $info = parent::getDeduplicationInfo(); |
186 | if ( is_array( $info['params'] ) ) { |
187 | // For per-pages jobs, the job title is that of the template that changed |
188 | // (or similar), so remove that since it ruins duplicate detection |
189 | if ( isset( $info['params']['pages'] ) ) { |
190 | unset( $info['namespace'] ); |
191 | unset( $info['title'] ); |
192 | } |
193 | } |
194 | |
195 | return $info; |
196 | } |
197 | |
198 | public function workItemCount() { |
199 | if ( !empty( $this->params['recursive'] ) ) { |
200 | return 0; // nothing actually purged |
201 | } elseif ( isset( $this->params['pages'] ) ) { |
202 | return count( $this->params['pages'] ); |
203 | } |
204 | |
205 | return 1; // one title |
206 | } |
207 | } |
208 | |
209 | /** @deprecated class alias since 1.44 */ |
210 | class_alias( HTMLCacheUpdateJob::class, 'HTMLCacheUpdateJob' ); |