Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 87 |
|
0.00% |
0 / 6 |
CRAP | |
0.00% |
0 / 1 |
HTMLCacheUpdateJob | |
0.00% |
0 / 87 |
|
0.00% |
0 / 6 |
420 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
newForBacklinks | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
2 | |||
run | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
30 | |||
invalidateTitles | |
0.00% |
0 / 36 |
|
0.00% |
0 / 1 |
30 | |||
getDeduplicationInfo | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
workItemCount | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | */ |
20 | |
21 | use MediaWiki\MainConfigNames; |
22 | use MediaWiki\MediaWikiServices; |
23 | use MediaWiki\Page\PageReference; |
24 | use MediaWiki\Title\Title; |
25 | |
26 | /** |
27 | * Job to purge the HTML/file cache for all pages that link to or use another page or file |
28 | * |
29 | * This job comes in a few variants: |
30 | * - a) Recursive jobs to purge caches for backlink pages for a given title. |
31 | * These jobs have (recursive:true,table:<table>) set. |
32 | * - b) Jobs to purge caches for a set of titles (the job title is ignored). |
33 | * These jobs have (pages:(<page ID>:(<namespace>,<title>),...) set. |
34 | * |
35 | * @ingroup JobQueue |
36 | * @ingroup Cache |
37 | */ |
38 | class HTMLCacheUpdateJob extends Job { |
39 | /** @var int Lag safety margin when comparing root job time age to CDN max-age */ |
40 | private const NORMAL_MAX_LAG = 10; |
41 | |
42 | public function __construct( Title $title, array $params ) { |
43 | parent::__construct( 'htmlCacheUpdate', $title, $params ); |
44 | // Avoid the overhead of de-duplication when it would be pointless. |
45 | // Note that these jobs always set page_touched to the current time, |
46 | // so letting the older existing job "win" is still correct. |
47 | $this->removeDuplicates = ( |
48 | // Ranges rarely will line up |
49 | !isset( $params['range'] ) && |
50 | // Multiple pages per job make matches unlikely |
51 | !( isset( $params['pages'] ) && count( $params['pages'] ) != 1 ) |
52 | ); |
53 | $this->params += [ 'causeAction' => 'HTMLCacheUpdateJob', 'causeAgent' => 'unknown' ]; |
54 | } |
55 | |
56 | /** |
57 | * @param PageReference $page Page to purge backlink pages from |
58 | * @param string $table Backlink table name |
59 | * @param array $params Additional job parameters |
60 | * |
61 | * @return HTMLCacheUpdateJob |
62 | */ |
63 | public static function newForBacklinks( PageReference $page, $table, $params = [] ) { |
64 | $title = Title::newFromPageReference( $page ); |
65 | return new self( |
66 | $title, |
67 | [ |
68 | 'table' => $table, |
69 | 'recursive' => true |
70 | ] + Job::newRootJobParams( // "overall" refresh links job info |
71 | "htmlCacheUpdate:{$table}:{$title->getPrefixedText()}" |
72 | ) + $params |
73 | ); |
74 | } |
75 | |
76 | public function run() { |
77 | $updateRowsPerJob = MediaWikiServices::getInstance()->getMainConfig()->get( |
78 | MainConfigNames::UpdateRowsPerJob ); |
79 | $updateRowsPerQuery = MediaWikiServices::getInstance()->getMainConfig()->get( |
80 | MainConfigNames::UpdateRowsPerQuery ); |
81 | if ( isset( $this->params['table'] ) && !isset( $this->params['pages'] ) ) { |
82 | $this->params['recursive'] = true; // b/c; base job |
83 | } |
84 | |
85 | // Job to purge all (or a range of) backlink pages for a page |
86 | if ( !empty( $this->params['recursive'] ) ) { |
87 | // Carry over information for de-duplication |
88 | $extraParams = $this->getRootJobParams(); |
89 | // Carry over cause information for logging |
90 | $extraParams['causeAction'] = $this->params['causeAction']; |
91 | $extraParams['causeAgent'] = $this->params['causeAgent']; |
92 | // Convert this into no more than $wgUpdateRowsPerJob HTMLCacheUpdateJob per-title |
93 | // jobs and possibly a recursive HTMLCacheUpdateJob job for the rest of the backlinks |
94 | $jobs = BacklinkJobUtils::partitionBacklinkJob( |
95 | $this, |
96 | $updateRowsPerJob, |
97 | $updateRowsPerQuery, // jobs-per-title |
98 | // Carry over information for de-duplication |
99 | [ 'params' => $extraParams ] |
100 | ); |
101 | MediaWikiServices::getInstance()->getJobQueueGroup()->push( $jobs ); |
102 | // Job to purge pages for a set of titles |
103 | } elseif ( isset( $this->params['pages'] ) ) { |
104 | $this->invalidateTitles( $this->params['pages'] ); |
105 | // Job to update a single title |
106 | } else { |
107 | $t = $this->title; |
108 | $this->invalidateTitles( [ |
109 | $t->getArticleID() => [ $t->getNamespace(), $t->getDBkey() ] |
110 | ] ); |
111 | } |
112 | |
113 | return true; |
114 | } |
115 | |
116 | /** |
117 | * @param array $pages Map of (page ID => (namespace, DB key)) entries |
118 | */ |
119 | protected function invalidateTitles( array $pages ) { |
120 | // Get all page IDs in this query into an array |
121 | $pageIds = array_keys( $pages ); |
122 | if ( !$pageIds ) { |
123 | return; |
124 | } |
125 | |
126 | $rootTsUnix = wfTimestampOrNull( TS_UNIX, $this->params['rootJobTimestamp'] ?? null ); |
127 | // Bump page_touched to the current timestamp. This previously used the root job timestamp |
128 | // (e.g. template/file edit time), which is a bit more efficient when template edits are |
129 | // rare and don't effect the same pages much. However, this way better de-duplicates jobs, |
130 | // which is much more useful for wikis with high edit rates. Note that RefreshLinksJob, |
131 | // enqueued alongside HTMLCacheUpdateJob, saves the parser output since it has to parse |
132 | // anyway. We assume that vast majority of the cache jobs finish before the link jobs, |
133 | // so using the current timestamp instead of the root timestamp is not expected to |
134 | // invalidate these cache entries too often. |
135 | $newTouchedUnix = time(); |
136 | // Timestamp used to bypass pages already invalided since the triggering event |
137 | $casTsUnix = $rootTsUnix ?? $newTouchedUnix; |
138 | |
139 | $services = MediaWikiServices::getInstance(); |
140 | $config = $services->getMainConfig(); |
141 | |
142 | $dbProvider = $services->getConnectionProvider(); |
143 | $dbw = $dbProvider->getPrimaryDatabase(); |
144 | $ticket = $dbProvider->getEmptyTransactionTicket( __METHOD__ ); |
145 | // Update page_touched (skipping pages already touched since the root job). |
146 | // Check $wgUpdateRowsPerQuery; batch jobs are sized by that already. |
147 | $batches = array_chunk( $pageIds, $config->get( MainConfigNames::UpdateRowsPerQuery ) ); |
148 | foreach ( $batches as $batch ) { |
149 | $dbw->newUpdateQueryBuilder() |
150 | ->update( 'page' ) |
151 | ->set( [ 'page_touched' => $dbw->timestamp( $newTouchedUnix ) ] ) |
152 | ->where( [ 'page_id' => $batch ] ) |
153 | ->andWhere( $dbw->expr( 'page_touched', '<', $dbw->timestamp( $casTsUnix ) ) ) |
154 | ->caller( __METHOD__ )->execute(); |
155 | if ( count( $batches ) > 1 ) { |
156 | $dbProvider->commitAndWaitForReplication( __METHOD__, $ticket ); |
157 | } |
158 | } |
159 | // Get the list of affected pages (races only mean something else did the purge) |
160 | $queryBuilder = $dbw->newSelectQueryBuilder() |
161 | ->select( [ 'page_namespace', 'page_title' ] ) |
162 | ->from( 'page' ) |
163 | ->where( [ 'page_id' => $pageIds, 'page_touched' => $dbw->timestamp( $newTouchedUnix ) ] ); |
164 | if ( $config->get( MainConfigNames::PageLanguageUseDB ) ) { |
165 | $queryBuilder->field( 'page_lang' ); |
166 | } |
167 | $titleArray = $services->getTitleFactory()->newTitleArrayFromResult( |
168 | $queryBuilder->caller( __METHOD__ )->fetchResultSet() |
169 | ); |
170 | |
171 | // Update CDN and file caches |
172 | $htmlCache = $services->getHtmlCacheUpdater(); |
173 | $htmlCache->purgeTitleUrls( |
174 | $titleArray, |
175 | $htmlCache::PURGE_NAIVE | $htmlCache::PURGE_URLS_LINKSUPDATE_ONLY, |
176 | [ $htmlCache::UNLESS_CACHE_MTIME_AFTER => $casTsUnix + self::NORMAL_MAX_LAG ] |
177 | ); |
178 | } |
179 | |
180 | public function getDeduplicationInfo() { |
181 | $info = parent::getDeduplicationInfo(); |
182 | if ( is_array( $info['params'] ) ) { |
183 | // For per-pages jobs, the job title is that of the template that changed |
184 | // (or similar), so remove that since it ruins duplicate detection |
185 | if ( isset( $info['params']['pages'] ) ) { |
186 | unset( $info['namespace'] ); |
187 | unset( $info['title'] ); |
188 | } |
189 | } |
190 | |
191 | return $info; |
192 | } |
193 | |
194 | public function workItemCount() { |
195 | if ( !empty( $this->params['recursive'] ) ) { |
196 | return 0; // nothing actually purged |
197 | } elseif ( isset( $this->params['pages'] ) ) { |
198 | return count( $this->params['pages'] ); |
199 | } |
200 | |
201 | return 1; // one title |
202 | } |
203 | } |