MediaWiki master
ParserOutputAccess.php
Go to the documentation of this file.
1<?php
20namespace MediaWiki\Page;
21
22use InvalidArgumentException;
23use MediaWiki\Logger\Spi as LoggerSpi;
44use Wikimedia\Assert\Assert;
46use Wikimedia\Parsoid\Parsoid;
51
62
64 public const PARSOID_PCACHE_NAME = 'parsoid-' . ParserCacheFactory::DEFAULT_NAME;
65
67 public const PARSOID_RCACHE_NAME = 'parsoid-' . ParserCacheFactory::DEFAULT_RCACHE_NAME;
68
72 public const OPT_NO_CHECK_CACHE = 1;
73
75 public const OPT_FORCE_PARSE = self::OPT_NO_CHECK_CACHE;
76
80 public const OPT_NO_UPDATE_CACHE = 2;
81
87 public const OPT_NO_AUDIENCE_CHECK = 4;
88
93 public const OPT_NO_CACHE = self::OPT_NO_UPDATE_CACHE | self::OPT_NO_CHECK_CACHE;
94
99 public const OPT_LINKS_UPDATE = 8;
100
113 public const OPT_FOR_ARTICLE_VIEW = 16;
114
119 public const OPT_IGNORE_PROFILE_VERSION = 128;
120
122 private const CACHE_NONE = 'none';
123
125 private const CACHE_PRIMARY = 'primary';
126
128 private const CACHE_SECONDARY = 'secondary';
129
135 private MapCacheLRU $localCache;
136
137 private ParserCacheFactory $parserCacheFactory;
138 private RevisionLookup $revisionLookup;
139 private RevisionRenderer $revisionRenderer;
140 private StatsFactory $statsFactory;
141 private ChronologyProtector $chronologyProtector;
142 private LoggerSpi $loggerSpi;
143 private WikiPageFactory $wikiPageFactory;
144 private TitleFormatter $titleFormatter;
145 private TracerInterface $tracer;
146 private PoolCounterFactory $poolCounterFactory;
147
148 public function __construct(
149 ParserCacheFactory $parserCacheFactory,
150 RevisionLookup $revisionLookup,
151 RevisionRenderer $revisionRenderer,
152 StatsFactory $statsFactory,
153 ChronologyProtector $chronologyProtector,
154 LoggerSpi $loggerSpi,
155 WikiPageFactory $wikiPageFactory,
156 TitleFormatter $titleFormatter,
157 TracerInterface $tracer,
158 PoolCounterFactory $poolCounterFactory
159 ) {
160 $this->parserCacheFactory = $parserCacheFactory;
161 $this->revisionLookup = $revisionLookup;
162 $this->revisionRenderer = $revisionRenderer;
163 $this->statsFactory = $statsFactory;
164 $this->chronologyProtector = $chronologyProtector;
165 $this->loggerSpi = $loggerSpi;
166 $this->wikiPageFactory = $wikiPageFactory;
167 $this->titleFormatter = $titleFormatter;
168 $this->tracer = $tracer;
169 $this->poolCounterFactory = $poolCounterFactory;
170
171 $this->localCache = new MapCacheLRU( 10 );
172 }
173
182 private function shouldUseCache(
183 PageRecord $page,
184 ?RevisionRecord $rev
185 ) {
186 if ( $rev && !$rev->getId() ) {
187 // The revision isn't from the database, so the output can't safely be cached.
188 return self::CACHE_NONE;
189 }
190
191 // NOTE: Keep in sync with ParserWikiPage::shouldCheckParserCache().
192 // NOTE: when we allow caching of old revisions in the future,
193 // we must not allow caching of deleted revisions.
194
195 $wikiPage = $this->wikiPageFactory->newFromTitle( $page );
196 if ( !$page->exists() || !$wikiPage->getContentHandler()->isParserCacheSupported() ) {
197 return self::CACHE_NONE;
198 }
199
200 $isOld = $rev && $rev->getId() !== $page->getLatest();
201 if ( !$isOld ) {
202 return self::CACHE_PRIMARY;
203 }
204
205 if ( !$rev->audienceCan( RevisionRecord::DELETED_TEXT, RevisionRecord::FOR_PUBLIC ) ) {
206 // deleted/suppressed revision
207 return self::CACHE_NONE;
208 }
209
210 return self::CACHE_SECONDARY;
211 }
212
222 public function getCachedParserOutput(
223 PageRecord $page,
224 ParserOptions $parserOptions,
225 ?RevisionRecord $revision = null,
226 int $options = 0
227 ): ?ParserOutput {
228 $span = $this->startOperationSpan( __FUNCTION__, $page, $revision );
229 $isOld = $revision && $revision->getId() !== $page->getLatest();
230 $useCache = $this->shouldUseCache( $page, $revision );
231 $primaryCache = $this->getPrimaryCache( $parserOptions );
232 $classCacheKey = $primaryCache->makeParserOutputKey( $page, $parserOptions );
233
234 if ( $useCache === self::CACHE_PRIMARY ) {
235 if ( $this->localCache->hasField( $classCacheKey, $page->getLatest() ) && !$isOld ) {
236 return $this->localCache->getField( $classCacheKey, $page->getLatest() );
237 }
238 $output = $primaryCache->get( $page, $parserOptions );
239 } elseif ( $useCache === self::CACHE_SECONDARY && $revision ) {
240 $secondaryCache = $this->getSecondaryCache( $parserOptions );
241 $output = $secondaryCache->get( $revision, $parserOptions );
242 } else {
243 $output = null;
244 }
245
246 $statType = $statReason = $output ? 'hit' : 'miss';
247
248 if (
249 $output && !( $options & self::OPT_IGNORE_PROFILE_VERSION ) &&
250 $parserOptions->getUseParsoid()
251 ) {
252 $pageBundleData = $output->getExtensionData(
253 PageBundleParserOutputConverter::PARSOID_PAGE_BUNDLE_KEY
254 );
255 // T333606: Force a reparse if the version coming from cache is not the default
256 $cachedVersion = $pageBundleData['version'] ?? null;
257 if (
258 $cachedVersion !== null && // T325137: BadContentModel, no sense in reparsing
259 $cachedVersion !== Parsoid::defaultHTMLVersion()
260 ) {
261 $statType = 'miss';
262 $statReason = 'obsolete';
263 $output = null;
264 }
265 }
266
267 if ( $output && !$isOld ) {
268 $this->localCache->setField( $classCacheKey, $page->getLatest(), $output );
269 }
270
271 $this->statsFactory
272 ->getCounter( 'parseroutputaccess_cache_total' )
273 ->setLabel( 'cache', $useCache )
274 ->setLabel( 'reason', $statReason )
275 ->setLabel( 'type', $statType )
276 ->copyToStatsdAt( "ParserOutputAccess.Cache.$useCache.$statReason" )
277 ->increment();
278
279 return $output ?: null; // convert false to null
280 }
281
304 public function getParserOutput(
305 PageRecord $page,
306 ParserOptions $parserOptions,
307 ?RevisionRecord $revision = null,
308 int $options = 0
309 ): Status {
310 $span = $this->startOperationSpan( __FUNCTION__, $page, $revision );
311 $error = $this->checkPreconditions( $page, $revision, $options );
312 if ( $error ) {
313 $this->statsFactory
314 ->getCounter( 'parseroutputaccess_case' )
315 ->setLabel( 'case', 'error' )
316 ->copyToStatsdAt( 'ParserOutputAccess.Case.error' )
317 ->increment();
318 return $error;
319 }
320
321 $isOld = $revision && $revision->getId() !== $page->getLatest();
322 if ( $isOld ) {
323 $this->statsFactory
324 ->getCounter( 'parseroutputaccess_case' )
325 ->setLabel( 'case', 'old' )
326 ->copyToStatsdAt( 'ParserOutputAccess.Case.old' )
327 ->increment();
328 } else {
329 $this->statsFactory
330 ->getCounter( 'parseroutputaccess_case' )
331 ->setLabel( 'case', 'current' )
332 ->copyToStatsdAt( 'ParserOutputAccess.Case.current' )
333 ->increment();
334 }
335
336 if ( !( $options & self::OPT_NO_CHECK_CACHE ) ) {
337 $output = $this->getCachedParserOutput( $page, $parserOptions, $revision );
338 if ( $output ) {
339 return Status::newGood( $output );
340 }
341 }
342
343 if ( !$revision ) {
344 $revId = $page->getLatest();
345 $revision = $revId ? $this->revisionLookup->getRevisionById( $revId ) : null;
346
347 if ( !$revision ) {
348 $this->statsFactory
349 ->getCounter( 'parseroutputaccess_status' )
350 ->setLabel( 'status', 'norev' )
351 ->copyToStatsdAt( "ParserOutputAccess.Status.norev" )
352 ->increment();
353 return Status::newFatal( 'missing-revision', $revId );
354 }
355 }
356
357 if ( $options & self::OPT_FOR_ARTICLE_VIEW ) {
358 $work = $this->newPoolWorkArticleView( $page, $parserOptions, $revision, $options );
360 $status = $work->execute();
361 } else {
362 // XXX: we could try harder to reuse a cache lookup above to
363 // provide the $previous argument here
364 $status = $this->renderRevision( $page, $parserOptions, $revision, $options, null );
365 }
366
367 $output = $status->getValue();
368 Assert::postcondition( $output || !$status->isOK(), 'Inconsistent status' );
369
370 if ( $output && !$isOld ) {
371 $primaryCache = $this->getPrimaryCache( $parserOptions );
372 $classCacheKey = $primaryCache->makeParserOutputKey( $page, $parserOptions );
373 $this->localCache->setField( $classCacheKey, $page->getLatest(), $output );
374 }
375
376 if ( $status->isGood() ) {
377 $this->statsFactory->getCounter( 'parseroutputaccess_status' )
378 ->setLabel( 'status', 'good' )
379 ->copyToStatsdAt( 'ParserOutputAccess.Status.good' )
380 ->increment();
381 } elseif ( $status->isOK() ) {
382 $this->statsFactory->getCounter( 'parseroutputaccess_status' )
383 ->setLabel( 'status', 'ok' )
384 ->copyToStatsdAt( 'ParserOutputAccess.Status.ok' )
385 ->increment();
386 } else {
387 $this->statsFactory->getCounter( 'parseroutputaccess_status' )
388 ->setLabel( 'status', 'error' )
389 ->copyToStatsdAt( 'ParserOutputAccess.Status.error' )
390 ->increment();
391 }
392
393 return $status;
394 }
395
410 private function renderRevision(
411 PageRecord $page,
412 ParserOptions $parserOptions,
413 RevisionRecord $revision,
414 int $options,
415 ?ParserOutput $previousOutput = null
416 ): Status {
417 $span = $this->startOperationSpan( __FUNCTION__, $page, $revision );
418 $this->statsFactory->getCounter( 'parseroutputaccess_render_total' )
419 ->setLabel( 'pool', 'none' )
420 ->setLabel( 'cache', self::CACHE_NONE )
421 ->copyToStatsdAt( 'ParserOutputAccess.PoolWork.None' )
422 ->increment();
423
424 $useCache = $this->shouldUseCache( $page, $revision );
425
426 // T371713: Temporary statistics collection code to determine
427 // feasibility of Parsoid selective update
428 $sampleRate = MediaWikiServices::getInstance()->getMainConfig()->get(
429 MainConfigNames::ParsoidSelectiveUpdateSampleRate
430 );
431 $doSample = ( $sampleRate && mt_rand( 1, $sampleRate ) === 1 );
432
433 if ( $previousOutput === null && ( $doSample || $parserOptions->getUseParsoid() ) ) {
434 // If $useCache === self::CACHE_SECONDARY we could potentially
435 // try to reuse the parse of $revision-1 from the secondary cache,
436 // but it is likely those template transclusions are out of date.
437 // Try to reuse the template transclusions from the most recent
438 // parse, which are more likely to reflect the current template.
439 if ( !( $options & self::OPT_NO_CHECK_CACHE ) ) {
440 $previousOutput = $this->getPrimaryCache( $parserOptions )->getDirty( $page, $parserOptions ) ?: null;
441 }
442 }
443
444 $renderedRev = $this->revisionRenderer->getRenderedRevision(
445 $revision,
446 $parserOptions,
447 null,
448 [
449 'audience' => RevisionRecord::RAW,
450 'previous-output' => $previousOutput,
451 ]
452 );
453
454 $output = $renderedRev->getRevisionParserOutput();
455
456 if ( $doSample ) {
457 $content = $revision->getContent( SlotRecord::MAIN );
458 $labels = [
459 'source' => 'ParserOutputAccess',
460 'type' => $previousOutput === null ? 'full' : 'selective',
461 'reason' => $parserOptions->getRenderReason(),
462 'parser' => $parserOptions->getUseParsoid() ? 'parsoid' : 'legacy',
463 'opportunistic' => 'false',
464 'wiki' => WikiMap::getCurrentWikiId(),
465 'model' => $content ? $content->getModel() : 'unknown',
466 ];
467 $this->statsFactory
468 ->getCounter( 'ParserCache_selective_total' )
469 ->setLabels( $labels )
470 ->increment();
471 $this->statsFactory
472 ->getCounter( 'ParserCache_selective_cpu_seconds' )
473 ->setLabels( $labels )
474 ->incrementBy( $output->getTimeProfile( 'cpu' ) );
475 }
476
477 if ( !( $options & self::OPT_NO_UPDATE_CACHE ) && $output->isCacheable() ) {
478 if ( $useCache === self::CACHE_PRIMARY ) {
479 $primaryCache = $this->getPrimaryCache( $parserOptions );
480 $primaryCache->save( $output, $page, $parserOptions );
481 } elseif ( $useCache === self::CACHE_SECONDARY ) {
482 $secondaryCache = $this->getSecondaryCache( $parserOptions );
483 $secondaryCache->save( $output, $revision, $parserOptions );
484 }
485 }
486
487 if ( $options & self::OPT_LINKS_UPDATE ) {
488 $this->wikiPageFactory->newFromTitle( $page )
489 ->triggerOpportunisticLinksUpdate( $output );
490 }
491
492 return Status::newGood( $output );
493 }
494
502 private function checkPreconditions(
503 PageRecord $page,
504 ?RevisionRecord $revision = null,
505 int $options = 0
506 ): ?Status {
507 if ( !$page->exists() ) {
508 return Status::newFatal( 'nopagetext' );
509 }
510
511 if ( !( $options & self::OPT_NO_UPDATE_CACHE ) && $revision && !$revision->getId() ) {
512 throw new InvalidArgumentException(
513 'The revision does not have a known ID. Use OPT_NO_CACHE.'
514 );
515 }
516
517 if ( $revision && $revision->getPageId() !== $page->getId() ) {
518 throw new InvalidArgumentException(
519 'The revision does not belong to the given page.'
520 );
521 }
522
523 if ( $revision && !( $options & self::OPT_NO_AUDIENCE_CHECK ) ) {
524 // NOTE: If per-user checks are desired, the caller should perform them and
525 // then set OPT_NO_AUDIENCE_CHECK if they passed.
526 if ( !$revision->audienceCan( RevisionRecord::DELETED_TEXT, RevisionRecord::FOR_PUBLIC ) ) {
527 return Status::newFatal(
528 'missing-revision-permission',
529 $revision->getId(),
530 $revision->getTimestamp(),
531 $this->titleFormatter->getPrefixedDBkey( $page )
532 );
533 }
534 }
535
536 return null;
537 }
538
547 protected function newPoolWorkArticleView(
548 PageRecord $page,
549 ParserOptions $parserOptions,
550 RevisionRecord $revision,
551 int $options
552 ): PoolCounterWork {
553 $useCache = $this->shouldUseCache( $page, $revision );
554
555 $statCacheLabelLegacy = [
556 self::CACHE_PRIMARY => 'Current',
557 self::CACHE_SECONDARY => 'Old',
558 ][$useCache] ?? 'Uncached';
559
560 $this->statsFactory->getCounter( 'parseroutputaccess_render_total' )
561 ->setLabel( 'pool', 'articleview' )
562 ->setLabel( 'cache', $useCache )
563 ->copyToStatsdAt( "ParserOutputAccess.PoolWork.$statCacheLabelLegacy" )
564 ->increment();
565
566 switch ( $useCache ) {
567 case self::CACHE_PRIMARY:
568 $primaryCache = $this->getPrimaryCache( $parserOptions );
569 $parserCacheMetadata = $primaryCache->getMetadata( $page );
570 $cacheKey = $primaryCache->makeParserOutputKey( $page, $parserOptions,
571 $parserCacheMetadata ? $parserCacheMetadata->getUsedOptions() : null
572 );
573
574 $workKey = $cacheKey . ':revid:' . $revision->getId();
575
576 $pool = $this->poolCounterFactory->create( 'ArticleView', $workKey );
578 $pool,
579 $page,
580 $revision,
581 $parserOptions,
582 $this->revisionRenderer,
583 $primaryCache,
584 $this->chronologyProtector,
585 $this->loggerSpi,
586 $this->wikiPageFactory,
587 !( $options & self::OPT_NO_UPDATE_CACHE ),
588 (bool)( $options & self::OPT_LINKS_UPDATE )
589 );
590
591 case self::CACHE_SECONDARY:
592 $secondaryCache = $this->getSecondaryCache( $parserOptions );
593 $workKey = $secondaryCache->makeParserOutputKey( $revision, $parserOptions );
594 $pool = $this->poolCounterFactory->create( 'ArticleView', $workKey );
595 return new PoolWorkArticleViewOld(
596 $pool,
597 $secondaryCache,
598 $revision,
599 $parserOptions,
600 $this->revisionRenderer,
601 $this->loggerSpi
602 );
603
604 default:
605 // Without caching, using poolcounter is pointless
606 // The name of the metric is a bit confusing now
607 $secondaryCache = $this->getSecondaryCache( $parserOptions );
608 $workKey = $secondaryCache->makeParserOutputKeyOptionalRevId( $revision, $parserOptions );
609 $pool = $this->poolCounterFactory->create( 'ArticleView', $workKey );
610 return new PoolWorkArticleView(
611 $pool,
612 $revision,
613 $parserOptions,
614 $this->revisionRenderer,
615 $this->loggerSpi
616 );
617 }
618
619 // unreachable
620 }
621
622 private function getPrimaryCache( ParserOptions $pOpts ): ParserCache {
623 if ( $pOpts->getUseParsoid() ) {
624 return $this->parserCacheFactory->getParserCache(
625 self::PARSOID_PCACHE_NAME
626 );
627 }
628
629 return $this->parserCacheFactory->getParserCache(
630 ParserCacheFactory::DEFAULT_NAME
631 );
632 }
633
634 private function getSecondaryCache( ParserOptions $pOpts ): RevisionOutputCache {
635 if ( $pOpts->getUseParsoid() ) {
636 return $this->parserCacheFactory->getRevisionOutputCache(
637 self::PARSOID_RCACHE_NAME
638 );
639 }
640
641 return $this->parserCacheFactory->getRevisionOutputCache(
642 ParserCacheFactory::DEFAULT_RCACHE_NAME
643 );
644 }
645
646 private function startOperationSpan(
647 string $opName,
648 PageRecord $page,
649 ?RevisionRecord $revision = null
650 ): SpanInterface {
651 $span = $this->tracer->createSpan( "ParserOutputAccess::$opName" );
652 if ( $span->getContext()->isSampled() ) {
653 $span->setAttributes( [
654 'org.wikimedia.parser.page' => $page->__toString(),
655 'org.wikimedia.parser.page.id' => $page->getId(),
656 'org.wikimedia.parser.page.wiki' => $page->getWikiId(),
657 ] );
658 if ( $revision ) {
659 $span->setAttributes( [
660 'org.wikimedia.parser.revision.id' => $revision->getId(),
661 'org.wikimedia.parser.revision.parent_id' => $revision->getParentId(),
662 ] );
663 }
664 }
665 return $span->start()->activate();
666 }
667}
const CACHE_NONE
Definition Defines.php:87
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:82
A class containing constants representing the names of configuration variables.
Service locator for MediaWiki core services.
Service for getting rendered output of a given page.
const OPT_FOR_ARTICLE_VIEW
Apply page view semantics.
__construct(ParserCacheFactory $parserCacheFactory, RevisionLookup $revisionLookup, RevisionRenderer $revisionRenderer, StatsFactory $statsFactory, ChronologyProtector $chronologyProtector, LoggerSpi $loggerSpi, WikiPageFactory $wikiPageFactory, TitleFormatter $titleFormatter, TracerInterface $tracer, PoolCounterFactory $poolCounterFactory)
getCachedParserOutput(PageRecord $page, ParserOptions $parserOptions, ?RevisionRecord $revision=null, int $options=0)
Get the rendered output for the given page if it is present in the cache.
newPoolWorkArticleView(PageRecord $page, ParserOptions $parserOptions, RevisionRecord $revision, int $options)
getParserOutput(PageRecord $page, ParserOptions $parserOptions, ?RevisionRecord $revision=null, int $options=0)
Returns the rendered output for the given page.
Service for creating WikiPage objects.
Cache for ParserOutput objects corresponding to the latest page revisions.
Set options of the Parser.
getUseParsoid()
Parsoid-format HTML output, or legacy wikitext parser HTML?
ParserOutput is a rendering of a Content object or a message.
Provides methods for conversion between PageBundle and ParserOutput TODO: Convert to a trait once we ...
Cache for ParserOutput objects.
Class for dealing with PoolCounters using class members.
PoolWorkArticleView for the current revision of a page, using ParserCache.
PoolWorkArticleView for an old revision of a page, using a simple cache.
PoolCounter protected work wrapping RenderedRevision->getRevisionParserOutput.
Page revision base class.
getParentId( $wikiId=self::LOCAL)
Get parent revision ID (the original previous page revision).
audienceCan( $field, $audience, ?Authority $performer=null)
Check that the given audience has access to the given field.
getId( $wikiId=self::LOCAL)
Get revision ID.
The RevisionRenderer service provides access to rendered output for revisions.
Value object representing a content slot associated with a page revision.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:54
A title formatter service for MediaWiki.
Tools for dealing with other locally-hosted wikis.
Definition WikiMap.php:31
Store key-value entries in a size-limited in-memory LRU cache.
Provide a given client with protection against visible database lag.
This is the primary interface for validating metrics definitions, caching defined metrics,...
Service provider interface to create \Psr\Log\LoggerInterface objects.
Definition Spi.php:64
exists()
Checks if the page currently exists.
Data record representing a page that is (or used to be, or could be) an editable page on a wiki.
getLatest( $wikiId=self::LOCAL)
The ID of the page's latest revision.
Service for looking up page revisions.
Represents an OpenTelemetry span, i.e.
Base interface for an OpenTelemetry tracer responsible for creating spans.