Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
64.14% |
127 / 198 |
|
20.00% |
3 / 15 |
CRAP | |
0.00% |
0 / 1 |
RefreshLinksJob | |
64.14% |
127 / 198 |
|
20.00% |
3 / 15 |
200.60 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
5 | |||
newPrioritized | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
newDynamic | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
run | |
27.27% |
9 / 33 |
|
0.00% |
0 / 1 |
40.16 | |||
runForTitle | |
93.02% |
40 / 43 |
|
0.00% |
0 / 1 |
5.01 | |||
getLagAwareRootTimestamp | |
90.00% |
9 / 10 |
|
0.00% |
0 / 1 |
3.01 | |||
isAlreadyRefreshed | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
shouldGenerateHTMLOnEdit | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
3.03 | |||
getParserOutput | |
94.74% |
18 / 19 |
|
0.00% |
0 / 1 |
3.00 | |||
getCurrentRevisionIfUnchanged | |
64.71% |
11 / 17 |
|
0.00% |
0 / 1 |
7.58 | |||
getParserOutputFromCache | |
42.86% |
9 / 21 |
|
0.00% |
0 / 1 |
19.94 | |||
incrementFailureCounter | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
getDataUpdateOptions | |
63.64% |
7 / 11 |
|
0.00% |
0 / 1 |
3.43 | |||
getDeduplicationInfo | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
12 | |||
workItemCount | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | */ |
20 | |
21 | use MediaWiki\Deferred\LinksUpdate\LinksUpdate; |
22 | use MediaWiki\Logger\LoggerFactory; |
23 | use MediaWiki\MainConfigNames; |
24 | use MediaWiki\MediaWikiServices; |
25 | use MediaWiki\Page\PageAssertionException; |
26 | use MediaWiki\Page\PageIdentity; |
27 | use MediaWiki\Parser\ParserOutput; |
28 | use MediaWiki\Revision\RevisionRecord; |
29 | use MediaWiki\Revision\RevisionRenderer; |
30 | use MediaWiki\Title\Title; |
31 | use MediaWiki\User\User; |
32 | use Wikimedia\Stats\StatsFactory; |
33 | |
34 | /** |
35 | * Job to update link tables for rerendered wiki pages. |
36 | * |
37 | * This job comes in a few variants: |
38 | * |
39 | * - a) Recursive jobs to update links for backlink pages for a given title. |
40 | * Scheduled by {@see LinksUpdate::queueRecursiveJobsForTable()}; used to |
41 | * refresh pages which link/transclude a given title. |
42 | * These jobs have (recursive:true,table:<table>) set. They just look up |
43 | * which pages link to the job title and schedule them as a set of non-recursive |
44 | * RefreshLinksJob jobs (and possible one new recursive job as a way of |
45 | * continuation). |
46 | * - b) Jobs to update links for a set of pages (the job title is ignored). |
47 | * These jobs have (pages:(<page ID>:(<namespace>,<title>),...) set. |
48 | * - c) Jobs to update links for a single page (the job title). |
49 | * These jobs need no extra fields set. |
50 | * |
51 | * Job parameters for all jobs: |
52 | * - recursive (bool): When false, updates the current page. When true, updates |
53 | * the pages which link/transclude the current page. |
54 | * - triggeringRevisionId (int): The revision of the edit which caused the link |
55 | * refresh. For manually triggered updates, the last revision of the page (at the |
56 | * time of scheduling). |
57 | * - triggeringUser (array): The user who triggered the refresh, in the form of a |
58 | * [ 'userId' => int, 'userName' => string ] array. This is not necessarily the user |
59 | * who created the revision. |
60 | * - triggeredRecursive (bool): Set on all jobs which were partitioned from another, |
61 | * recursive job. For debugging. |
62 | * - Standard deduplication params (see {@see JobQueue::deduplicateRootJob()}). |
63 | * For recursive jobs: |
64 | * - table (string): Which table to use (imagelinks or templatelinks) when searching for |
65 | * affected pages. |
66 | * - range (array): Used for recursive jobs when some pages have already been partitioned |
67 | * into separate jobs. Contains the list of ranges that still need to be partitioned. |
68 | * See {@see BacklinkJobUtils::partitionBacklinkJob()}. |
69 | * - division: Number of times the job was partitioned already (for debugging). |
70 | * For non-recursive jobs: |
71 | * - pages (array): Associative array of [ <page ID> => [ <namespace>, <dbkey> ] ]. |
72 | * Might be omitted, then the job title will be used. |
73 | * - isOpportunistic (bool): Set for opportunistic single-page updates. These are "free" |
74 | * updates that are queued when most of the work needed to be performed anyway for |
75 | * non-linkrefresh-related reasons, and can be more easily discarded if they don't seem |
76 | * useful. See {@see WikiPage::triggerOpportunisticLinksUpdate()}. |
77 | * - useRecursiveLinksUpdate (bool): When true, triggers recursive jobs for each page. |
78 | * |
79 | * Metrics: |
80 | * - `refreshlinks_superseded_updates_total`: The number of times the job was cancelled |
81 | * because the target page had already been refreshed by a different edit or job. |
82 | * The job is considered to have succeeded in this case. |
83 | * |
84 | * - `refreshlinks_warnings_total`: The number of times the job failed due to a recoverable issue. |
85 | * Possible `reason` label values include: |
86 | * - `lag_wait_failed`: The job timed out while waiting for replication. |
87 | * |
88 | * - `refreshlinks_failures_total`: The number of times the job failed. |
89 | * The `reason` label may be: |
90 | * - `page_not_found`: The target page did not exist. |
91 | * - `rev_not_current`: The target revision was no longer the latest revision for the target page. |
92 | * - `rev_not_found`: The target revision was not found. |
93 | * - `lock_failure`: The job failed to acquire an exclusive lock to refresh the target page. |
94 | * |
95 | * - `refreshlinks_parsercache_operations_total`: The number of times the job attempted |
96 | * to fetch parser output from the parser cache. |
97 | * Possible `status` label values include: |
98 | * - `cache_hit`: The parser output was found in the cache. |
99 | * - `cache_miss`: The parser output was not found in the cache. |
100 | * |
101 | * @ingroup JobQueue |
102 | * @see RefreshSecondaryDataUpdate |
103 | * @see WikiPage::doSecondaryDataUpdates() |
104 | */ |
105 | class RefreshLinksJob extends Job { |
106 | /** @var int Lag safety margin when comparing root job times to last-refresh times */ |
107 | private const NORMAL_MAX_LAG = 10; |
108 | /** @var int How many seconds to wait for replica DBs to catch up */ |
109 | private const LAG_WAIT_TIMEOUT = 15; |
110 | |
111 | public function __construct( PageIdentity $page, array $params ) { |
112 | if ( empty( $params['pages'] ) && !$page->canExist() ) { |
113 | // BC with the Title class |
114 | throw new PageAssertionException( |
115 | 'The given PageIdentity {pageIdentity} does not represent a proper page', |
116 | [ 'pageIdentity' => $page ] |
117 | ); |
118 | } |
119 | |
120 | parent::__construct( 'refreshLinks', $page, $params ); |
121 | // Avoid the overhead of de-duplication when it would be pointless |
122 | $this->removeDuplicates = ( |
123 | // Ranges rarely will line up |
124 | !isset( $params['range'] ) && |
125 | // Multiple pages per job make matches unlikely |
126 | !( isset( $params['pages'] ) && count( $params['pages'] ) != 1 ) |
127 | ); |
128 | $this->params += [ 'causeAction' => 'RefreshLinksJob', 'causeAgent' => 'unknown' ]; |
129 | // Tell JobRunner to not automatically wrap run() in a transaction round. |
130 | // Each runForTitle() call will manage its own rounds in order to run DataUpdates |
131 | // and to avoid contention as well. |
132 | $this->executionFlags |= self::JOB_NO_EXPLICIT_TRX_ROUND; |
133 | } |
134 | |
135 | /** |
136 | * @param PageIdentity $page |
137 | * @param array $params |
138 | * @return RefreshLinksJob |
139 | */ |
140 | public static function newPrioritized( PageIdentity $page, array $params ) { |
141 | $job = new self( $page, $params ); |
142 | $job->command = 'refreshLinksPrioritized'; |
143 | |
144 | return $job; |
145 | } |
146 | |
147 | /** |
148 | * @param PageIdentity $page |
149 | * @param array $params |
150 | * @return RefreshLinksJob |
151 | */ |
152 | public static function newDynamic( PageIdentity $page, array $params ) { |
153 | $job = new self( $page, $params ); |
154 | $job->command = 'refreshLinksDynamic'; |
155 | |
156 | return $job; |
157 | } |
158 | |
159 | public function run() { |
160 | $ok = true; |
161 | |
162 | if ( !empty( $this->params['recursive'] ) ) { |
163 | // Job to update all (or a range of) backlink pages for a page |
164 | |
165 | // When the base job branches, wait for the replica DBs to catch up to the primary. |
166 | // From then on, we know that any template changes at the time the base job was |
167 | // enqueued will be reflected in backlink page parses when the leaf jobs run. |
168 | $services = MediaWikiServices::getInstance(); |
169 | if ( !isset( $this->params['range'] ) ) { |
170 | $lbFactory = $services->getDBLoadBalancerFactory(); |
171 | if ( !$lbFactory->waitForReplication( [ |
172 | 'timeout' => self::LAG_WAIT_TIMEOUT |
173 | ] ) ) { |
174 | // only try so hard, keep going with what we have |
175 | $stats = $services->getStatsFactory(); |
176 | $stats->getCounter( 'refreshlinks_warnings_total' ) |
177 | ->setLabel( 'reason', 'lag_wait_failed' ) |
178 | ->copyToStatsdAt( 'refreshlinks_warning.lag_wait_failed' ) |
179 | ->increment(); |
180 | } |
181 | } |
182 | // Carry over information for de-duplication |
183 | $extraParams = $this->getRootJobParams(); |
184 | $extraParams['triggeredRecursive'] = true; |
185 | // Carry over cause information for logging |
186 | $extraParams['causeAction'] = $this->params['causeAction']; |
187 | $extraParams['causeAgent'] = $this->params['causeAgent']; |
188 | // Convert this into no more than $wgUpdateRowsPerJob RefreshLinks per-title |
189 | // jobs and possibly a recursive RefreshLinks job for the rest of the backlinks |
190 | $jobs = BacklinkJobUtils::partitionBacklinkJob( |
191 | $this, |
192 | $services->getMainConfig()->get( MainConfigNames::UpdateRowsPerJob ), |
193 | 1, // job-per-title |
194 | [ 'params' => $extraParams ] |
195 | ); |
196 | $services->getJobQueueGroup()->push( $jobs ); |
197 | |
198 | } elseif ( isset( $this->params['pages'] ) ) { |
199 | // Job to update link tables for a set of titles |
200 | foreach ( $this->params['pages'] as [ $ns, $dbKey ] ) { |
201 | $title = Title::makeTitleSafe( $ns, $dbKey ); |
202 | if ( $title && $title->canExist() ) { |
203 | $ok = $this->runForTitle( $title ) && $ok; |
204 | } else { |
205 | $ok = false; |
206 | $this->setLastError( "Invalid title ($ns,$dbKey)." ); |
207 | } |
208 | } |
209 | |
210 | } else { |
211 | // Job to update link tables for a given title |
212 | $ok = $this->runForTitle( $this->title ); |
213 | } |
214 | |
215 | return $ok; |
216 | } |
217 | |
218 | /** |
219 | * @param PageIdentity $pageIdentity |
220 | * @return bool |
221 | */ |
222 | protected function runForTitle( PageIdentity $pageIdentity ) { |
223 | $services = MediaWikiServices::getInstance(); |
224 | $stats = $services->getStatsFactory(); |
225 | $renderer = $services->getRevisionRenderer(); |
226 | $parserCache = $services->getParserCache(); |
227 | $lbFactory = $services->getDBLoadBalancerFactory(); |
228 | $ticket = $lbFactory->getEmptyTransactionTicket( __METHOD__ ); |
229 | |
230 | // Load the page from the primary DB |
231 | $page = $services->getWikiPageFactory()->newFromTitle( $pageIdentity ); |
232 | $page->loadPageData( IDBAccessObject::READ_LATEST ); |
233 | |
234 | if ( !$page->exists() ) { |
235 | // Probably due to concurrent deletion or renaming of the page |
236 | $logger = LoggerFactory::getInstance( 'RefreshLinksJob' ); |
237 | $logger->warning( |
238 | 'The page does not exist. Perhaps it was deleted?', |
239 | [ |
240 | 'page_title' => $this->title->getPrefixedDBkey(), |
241 | 'job_params' => $this->getParams(), |
242 | 'job_metadata' => $this->getMetadata() |
243 | ] |
244 | ); |
245 | $this->incrementFailureCounter( $stats, 'page_not_found' ); |
246 | |
247 | // retry later to handle unlucky race condition |
248 | return false; |
249 | } |
250 | |
251 | // Serialize link update job by page ID so they see each others' changes. |
252 | // The page ID and latest revision ID will be queried again after the lock |
253 | // is acquired to bail if they are changed from that of loadPageData() above. |
254 | // Serialize links updates by page ID so they see each others' changes |
255 | $dbw = $lbFactory->getPrimaryDatabase(); |
256 | /** @noinspection PhpUnusedLocalVariableInspection */ |
257 | $scopedLock = LinksUpdate::acquirePageLock( $dbw, $page->getId(), 'job' ); |
258 | if ( $scopedLock === null ) { |
259 | // Another job is already updating the page, likely for a prior revision (T170596) |
260 | $this->setLastError( 'LinksUpdate already running for this page, try again later.' ); |
261 | $this->incrementFailureCounter( $stats, 'lock_failure' ); |
262 | |
263 | // retry later when overlapping job for previous rev is done |
264 | return false; |
265 | } |
266 | |
267 | if ( $this->isAlreadyRefreshed( $page ) ) { |
268 | // this job has been superseded, e.g. by overlapping recursive job |
269 | // for a different template edit, or by direct edit or purge. |
270 | $stats->getCounter( 'refreshlinks_superseded_updates_total' ) |
271 | ->copyToStatsdAt( 'refreshlinks_outcome.good_update_superseded' ) |
272 | ->increment(); |
273 | // treat as success |
274 | return true; |
275 | } |
276 | |
277 | // These can be fairly long-running jobs, while commitAndWaitForReplication |
278 | // releases primary snapshots, let the replica release their snapshot as well |
279 | $lbFactory->flushReplicaSnapshots( __METHOD__ ); |
280 | // Parse during a fresh transaction round for better read consistency |
281 | $lbFactory->beginPrimaryChanges( __METHOD__ ); |
282 | $output = $this->getParserOutput( $renderer, $parserCache, $page, $stats ); |
283 | $options = $this->getDataUpdateOptions(); |
284 | $lbFactory->commitPrimaryChanges( __METHOD__ ); |
285 | |
286 | if ( !$output ) { |
287 | // probably raced out. |
288 | // Specific refreshlinks_outcome metric sent by getCurrentRevisionIfUnchanged(). |
289 | // FIXME: Why do we retry this? Can this be a cancellation? |
290 | return false; |
291 | } |
292 | |
293 | // Tell DerivedPageDataUpdater to use this parser output |
294 | $options['known-revision-output'] = $output; |
295 | // Execute corresponding DataUpdates immediately |
296 | $page->doSecondaryDataUpdates( $options ); |
297 | InfoAction::invalidateCache( $page ); |
298 | |
299 | // NOTE: Since 2019 (f588586e) this no longer saves the new ParserOutput to the ParserCache! |
300 | // This means the page will have to be rendered on-the-fly when it is next viewed. |
301 | // This is to avoid spending limited ParserCache capacity on rarely visited pages. |
302 | // TODO: Save the ParserOutput to ParserCache by calling WikiPage::updateParserCache() |
303 | // for pages that are likely to benefit (T327162). |
304 | |
305 | // Commit any writes here in case this method is called in a loop. |
306 | // In that case, the scoped lock will fail to be acquired. |
307 | $lbFactory->commitAndWaitForReplication( __METHOD__, $ticket ); |
308 | |
309 | return true; |
310 | } |
311 | |
312 | /** |
313 | * @return string|null Minimum lag-safe TS_MW timestamp with regard to root job creation |
314 | */ |
315 | private function getLagAwareRootTimestamp() { |
316 | // Get the timestamp of the change that triggered this job |
317 | $rootTimestamp = $this->params['rootJobTimestamp'] ?? null; |
318 | if ( $rootTimestamp === null ) { |
319 | return null; |
320 | } |
321 | |
322 | if ( !empty( $this->params['isOpportunistic'] ) ) { |
323 | // Neither clock skew nor DB snapshot/replica DB lag matter much for |
324 | // such updates; focus on reusing the (often recently updated) cache |
325 | $lagAwareTimestamp = $rootTimestamp; |
326 | } else { |
327 | // For transclusion updates, the template changes must be reflected |
328 | $lagAwareTimestamp = wfTimestamp( |
329 | TS_MW, |
330 | (int)wfTimestamp( TS_UNIX, $rootTimestamp ) + self::NORMAL_MAX_LAG |
331 | ); |
332 | } |
333 | |
334 | return $lagAwareTimestamp; |
335 | } |
336 | |
337 | /** |
338 | * @param WikiPage $page |
339 | * @return bool Whether something updated the backlinks with data newer than this job |
340 | */ |
341 | private function isAlreadyRefreshed( WikiPage $page ) { |
342 | $lagAwareTimestamp = $this->getLagAwareRootTimestamp(); |
343 | |
344 | return ( $lagAwareTimestamp !== null && $page->getLinksTimestamp() > $lagAwareTimestamp ); |
345 | } |
346 | |
347 | /** |
348 | * @see DerivedPageDataUpdater::shouldGenerateHTMLOnEdit |
349 | * @return bool true if at least one of slots require rendering HTML on edit, false otherwise. |
350 | * This is needed for example in populating ParserCache. |
351 | */ |
352 | private function shouldGenerateHTMLOnEdit( RevisionRecord $revision ): bool { |
353 | $services = MediaWikiServices::getInstance(); |
354 | foreach ( $revision->getSlots()->getSlotRoles() as $role ) { |
355 | $slot = $revision->getSlots()->getSlot( $role ); |
356 | $contentHandler = $services->getContentHandlerFactory()->getContentHandler( $slot->getModel() ); |
357 | if ( $contentHandler->generateHTMLOnEdit() ) { |
358 | return true; |
359 | } |
360 | } |
361 | return false; |
362 | } |
363 | |
364 | /** |
365 | * Get the parser output if the page is unchanged from what was loaded in $page |
366 | * |
367 | * @param RevisionRenderer $renderer |
368 | * @param ParserCache $parserCache |
369 | * @param WikiPage $page Page already loaded with READ_LATEST |
370 | * @param StatsFactory $stats |
371 | * @return ParserOutput|null Combined output for all slots; might only contain metadata |
372 | */ |
373 | private function getParserOutput( |
374 | RevisionRenderer $renderer, |
375 | ParserCache $parserCache, |
376 | WikiPage $page, |
377 | StatsFactory $stats |
378 | ) { |
379 | $revision = $this->getCurrentRevisionIfUnchanged( $page, $stats ); |
380 | if ( !$revision ) { |
381 | // race condition? |
382 | return null; |
383 | } |
384 | |
385 | $cachedOutput = $this->getParserOutputFromCache( $parserCache, $page, $revision, $stats ); |
386 | if ( $cachedOutput ) { |
387 | return $cachedOutput; |
388 | } |
389 | |
390 | $causeAction = $this->params['causeAction'] ?? 'RefreshLinksJob'; |
391 | $renderedRevision = $renderer->getRenderedRevision( |
392 | $revision, |
393 | $page->makeParserOptions( 'canonical' ), |
394 | null, |
395 | [ 'audience' => $revision::RAW, 'causeAction' => $causeAction ] |
396 | ); |
397 | |
398 | $parseTimestamp = wfTimestampNow(); // timestamp that parsing started |
399 | $output = $renderedRevision->getRevisionParserOutput( [ |
400 | // To avoid duplicate parses, this must match DerivedPageDataUpdater::shouldGenerateHTMLOnEdit() (T301309) |
401 | 'generate-html' => $this->shouldGenerateHTMLOnEdit( $revision ) |
402 | ] ); |
403 | $output->setCacheTime( $parseTimestamp ); // notify LinksUpdate::doUpdate() |
404 | |
405 | return $output; |
406 | } |
407 | |
408 | /** |
409 | * Get the current revision record if it is unchanged from what was loaded in $page |
410 | * |
411 | * @param WikiPage $page Page already loaded with READ_LATEST |
412 | * @param StatsFactory $stats |
413 | * @return RevisionRecord|null The same instance that $page->getRevisionRecord() uses |
414 | */ |
415 | private function getCurrentRevisionIfUnchanged( |
416 | WikiPage $page, |
417 | StatsFactory $stats |
418 | ) { |
419 | $title = $page->getTitle(); |
420 | // Get the latest ID since acquirePageLock() in runForTitle() flushed the transaction. |
421 | // This is used to detect edits/moves after loadPageData() but before the scope lock. |
422 | // The works around the chicken/egg problem of determining the scope lock key name |
423 | $latest = $title->getLatestRevID( IDBAccessObject::READ_LATEST ); |
424 | |
425 | $triggeringRevisionId = $this->params['triggeringRevisionId'] ?? null; |
426 | if ( $triggeringRevisionId && $triggeringRevisionId !== $latest ) { |
427 | // This job is obsolete and one for the latest revision will handle updates |
428 | $this->incrementFailureCounter( $stats, 'rev_not_current' ); |
429 | $this->setLastError( "Revision $triggeringRevisionId is not current" ); |
430 | return null; |
431 | } |
432 | |
433 | // Load the current revision. Note that $page should have loaded with READ_LATEST. |
434 | // This instance will be reused in WikiPage::doSecondaryDataUpdates() later on. |
435 | $revision = $page->getRevisionRecord(); |
436 | if ( !$revision ) { |
437 | // revision just got deleted? |
438 | $this->incrementFailureCounter( $stats, 'rev_not_found' ); |
439 | $this->setLastError( "Revision not found for {$title->getPrefixedDBkey()}" ); |
440 | return null; |
441 | |
442 | } elseif ( $revision->getId() !== $latest || $revision->getPageId() !== $page->getId() ) { |
443 | // Do not clobber over newer updates with older ones. If all jobs where FIFO and |
444 | // serialized, it would be OK to update links based on older revisions since it |
445 | // would eventually get to the latest. Since that is not the case (by design), |
446 | // only update the link tables to a state matching the current revision's output. |
447 | $this->incrementFailureCounter( $stats, 'rev_not_current' ); |
448 | $this->setLastError( "Revision {$revision->getId()} is not current" ); |
449 | |
450 | return null; |
451 | } |
452 | |
453 | return $revision; |
454 | } |
455 | |
456 | /** |
457 | * Get the parser output from cache if it reflects the change that triggered this job |
458 | * |
459 | * @param ParserCache $parserCache |
460 | * @param WikiPage $page |
461 | * @param RevisionRecord $currentRevision |
462 | * @param StatsFactory $stats |
463 | * @return ParserOutput|null |
464 | */ |
465 | private function getParserOutputFromCache( |
466 | ParserCache $parserCache, |
467 | WikiPage $page, |
468 | RevisionRecord $currentRevision, |
469 | StatsFactory $stats |
470 | ) { |
471 | $cachedOutput = null; |
472 | // If page_touched changed after this root job, then it is likely that |
473 | // any views of the pages already resulted in re-parses which are now in |
474 | // cache. The cache can be reused to avoid expensive parsing in some cases. |
475 | $rootTimestamp = $this->params['rootJobTimestamp'] ?? null; |
476 | if ( $rootTimestamp !== null ) { |
477 | $opportunistic = !empty( $this->params['isOpportunistic'] ); |
478 | if ( $page->getTouched() >= $rootTimestamp || $opportunistic ) { |
479 | // Cache is suspected to be up-to-date so it's worth the I/O of checking. |
480 | // As long as the cache rev ID matches the current rev ID and it reflects |
481 | // the job's triggering change, then it is usable. |
482 | $parserOptions = $page->makeParserOptions( 'canonical' ); |
483 | $output = $parserCache->getDirty( $page, $parserOptions ); |
484 | if ( |
485 | $output && |
486 | $output->getCacheRevisionId() == $currentRevision->getId() && |
487 | $output->getCacheTime() >= $this->getLagAwareRootTimestamp() |
488 | ) { |
489 | $cachedOutput = $output; |
490 | } |
491 | } |
492 | } |
493 | |
494 | if ( $cachedOutput ) { |
495 | $stats->getCounter( 'refreshlinks_parsercache_operations_total' ) |
496 | ->setLabel( 'status', 'cache_hit' ) |
497 | ->copyToStatsdAt( 'refreshlinks.parser_cached' ) |
498 | ->increment(); |
499 | } else { |
500 | $stats->getCounter( 'refreshlinks_parsercache_operations_total' ) |
501 | ->setLabel( 'status', 'cache_miss' ) |
502 | ->copyToStatsdAt( 'refreshlinks.parser_uncached' ) |
503 | ->increment(); |
504 | } |
505 | |
506 | return $cachedOutput; |
507 | } |
508 | |
509 | /** |
510 | * Increment the RefreshLinks failure counter metric with the given reason. |
511 | * |
512 | * @param StatsFactory $stats |
513 | * @param string $reason Well-known failure reason string |
514 | * @return void |
515 | */ |
516 | private function incrementFailureCounter( StatsFactory $stats, $reason ): void { |
517 | $stats->getCounter( 'refreshlinks_failures_total' ) |
518 | ->setLabel( 'reason', $reason ) |
519 | ->copyToStatsdAt( "refreshlinks_outcome.bad_$reason" ) |
520 | ->increment(); |
521 | } |
522 | |
523 | /** |
524 | * @return array |
525 | */ |
526 | private function getDataUpdateOptions() { |
527 | $options = [ |
528 | 'recursive' => !empty( $this->params['useRecursiveLinksUpdate'] ), |
529 | // Carry over cause so the update can do extra logging |
530 | 'causeAction' => $this->params['causeAction'], |
531 | 'causeAgent' => $this->params['causeAgent'] |
532 | ]; |
533 | if ( !empty( $this->params['triggeringUser'] ) ) { |
534 | $userInfo = $this->params['triggeringUser']; |
535 | if ( $userInfo['userId'] ) { |
536 | $options['triggeringUser'] = User::newFromId( $userInfo['userId'] ); |
537 | } else { |
538 | // Anonymous, use the username |
539 | $options['triggeringUser'] = User::newFromName( $userInfo['userName'], false ); |
540 | } |
541 | } |
542 | |
543 | return $options; |
544 | } |
545 | |
546 | public function getDeduplicationInfo() { |
547 | $info = parent::getDeduplicationInfo(); |
548 | unset( $info['causeAction'] ); |
549 | unset( $info['causeAgent'] ); |
550 | if ( is_array( $info['params'] ) ) { |
551 | // For per-pages jobs, the job title is that of the template that changed |
552 | // (or similar), so remove that since it ruins duplicate detection |
553 | if ( isset( $info['params']['pages'] ) ) { |
554 | unset( $info['namespace'] ); |
555 | unset( $info['title'] ); |
556 | } |
557 | } |
558 | |
559 | return $info; |
560 | } |
561 | |
562 | public function workItemCount() { |
563 | if ( !empty( $this->params['recursive'] ) ) { |
564 | return 0; // nothing actually refreshed |
565 | } elseif ( isset( $this->params['pages'] ) ) { |
566 | return count( $this->params['pages'] ); |
567 | } |
568 | |
569 | return 1; // one title |
570 | } |
571 | } |