MediaWiki master
ParserOutputAccess.php
Go to the documentation of this file.
1<?php
20namespace MediaWiki\Page;
21
22use InvalidArgumentException;
23use MapCacheLRU;
24use MediaWiki\Logger\Spi as LoggerSpi;
44use Wikimedia\Assert\Assert;
45use Wikimedia\Parsoid\Parsoid;
51
62
64 public const PARSOID_PCACHE_NAME = 'parsoid-' . ParserCacheFactory::DEFAULT_NAME;
65
67 public const PARSOID_RCACHE_NAME = 'parsoid-' . ParserCacheFactory::DEFAULT_RCACHE_NAME;
68
72 public const OPT_NO_CHECK_CACHE = 1;
73
75 public const OPT_FORCE_PARSE = self::OPT_NO_CHECK_CACHE;
76
80 public const OPT_NO_UPDATE_CACHE = 2;
81
87 public const OPT_NO_AUDIENCE_CHECK = 4;
88
93 public const OPT_NO_CACHE = self::OPT_NO_UPDATE_CACHE | self::OPT_NO_CHECK_CACHE;
94
99 public const OPT_LINKS_UPDATE = 8;
100
113 public const OPT_FOR_ARTICLE_VIEW = 16;
114
119 public const OPT_IGNORE_PROFILE_VERSION = 128;
120
122 private const CACHE_NONE = 'none';
123
125 private const CACHE_PRIMARY = 'primary';
126
128 private const CACHE_SECONDARY = 'secondary';
129
135 private MapCacheLRU $localCache;
136
137 private ParserCacheFactory $parserCacheFactory;
138 private RevisionLookup $revisionLookup;
139 private RevisionRenderer $revisionRenderer;
140 private StatsFactory $statsFactory;
141 private ILBFactory $lbFactory;
142 private ChronologyProtector $chronologyProtector;
143 private LoggerSpi $loggerSpi;
144 private WikiPageFactory $wikiPageFactory;
145 private TitleFormatter $titleFormatter;
146 private TracerInterface $tracer;
147
148 public function __construct(
149 ParserCacheFactory $parserCacheFactory,
150 RevisionLookup $revisionLookup,
151 RevisionRenderer $revisionRenderer,
152 StatsFactory $statsFactory,
153 ILBFactory $lbFactory,
154 ChronologyProtector $chronologyProtector,
155 LoggerSpi $loggerSpi,
156 WikiPageFactory $wikiPageFactory,
157 TitleFormatter $titleFormatter,
158 TracerInterface $tracer
159 ) {
160 $this->parserCacheFactory = $parserCacheFactory;
161 $this->revisionLookup = $revisionLookup;
162 $this->revisionRenderer = $revisionRenderer;
163 $this->statsFactory = $statsFactory;
164 $this->lbFactory = $lbFactory;
165 $this->chronologyProtector = $chronologyProtector;
166 $this->loggerSpi = $loggerSpi;
167 $this->wikiPageFactory = $wikiPageFactory;
168 $this->titleFormatter = $titleFormatter;
169 $this->tracer = $tracer;
170
171 $this->localCache = new MapCacheLRU( 10 );
172 }
173
182 private function shouldUseCache(
183 PageRecord $page,
184 ?RevisionRecord $rev
185 ) {
186 if ( $rev && !$rev->getId() ) {
187 // The revision isn't from the database, so the output can't safely be cached.
188 return self::CACHE_NONE;
189 }
190
191 // NOTE: Keep in sync with ParserWikiPage::shouldCheckParserCache().
192 // NOTE: when we allow caching of old revisions in the future,
193 // we must not allow caching of deleted revisions.
194
195 $wikiPage = $this->wikiPageFactory->newFromTitle( $page );
196 if ( !$page->exists() || !$wikiPage->getContentHandler()->isParserCacheSupported() ) {
197 return self::CACHE_NONE;
198 }
199
200 $isOld = $rev && $rev->getId() !== $page->getLatest();
201 if ( !$isOld ) {
202 return self::CACHE_PRIMARY;
203 }
204
205 if ( !$rev->audienceCan( RevisionRecord::DELETED_TEXT, RevisionRecord::FOR_PUBLIC ) ) {
206 // deleted/suppressed revision
207 return self::CACHE_NONE;
208 }
209
210 return self::CACHE_SECONDARY;
211 }
212
223 public function getCachedParserOutput(
224 PageRecord $page,
225 ParserOptions $parserOptions,
226 ?RevisionRecord $revision = null,
227 int $options = 0
228 ): ?ParserOutput {
229 $span = $this->startOperationSpan( __FUNCTION__, $page, $revision );
230 $isOld = $revision && $revision->getId() !== $page->getLatest();
231 $useCache = $this->shouldUseCache( $page, $revision );
232 $primaryCache = $this->getPrimaryCache( $parserOptions );
233 $classCacheKey = $primaryCache->makeParserOutputKey( $page, $parserOptions );
234
235 if ( $useCache === self::CACHE_PRIMARY ) {
236 if ( $this->localCache->hasField( $classCacheKey, $page->getLatest() ) && !$isOld ) {
237 return $this->localCache->getField( $classCacheKey, $page->getLatest() );
238 }
239 $output = $primaryCache->get( $page, $parserOptions );
240 } elseif ( $useCache === self::CACHE_SECONDARY && $revision ) {
241 $secondaryCache = $this->getSecondaryCache( $parserOptions );
242 $output = $secondaryCache->get( $revision, $parserOptions );
243 } else {
244 $output = null;
245 }
246
247 $notHitReason = 'miss';
248 if (
249 $output && !( $options & self::OPT_IGNORE_PROFILE_VERSION ) &&
250 $parserOptions->getUseParsoid()
251 ) {
252 $pageBundleData = $output->getExtensionData(
253 PageBundleParserOutputConverter::PARSOID_PAGE_BUNDLE_KEY
254 );
255 // T333606: Force a reparse if the version coming from cache is not the default
256 $cachedVersion = $pageBundleData['version'] ?? null;
257 if (
258 $cachedVersion !== null && // T325137: BadContentModel, no sense in reparsing
259 $cachedVersion !== Parsoid::defaultHTMLVersion()
260 ) {
261 $notHitReason = 'obsolete';
262 $output = null;
263 }
264 }
265
266 if ( $output && !$isOld ) {
267 $this->localCache->setField( $classCacheKey, $page->getLatest(), $output );
268 }
269
270 if ( $output ) {
271 $this->statsFactory
272 ->getCounter( 'parseroutputaccess_cache' )
273 ->setLabel( 'cache', $useCache )
274 ->setLabel( 'reason', 'hit' )
275 ->setLabel( 'type', 'hit' )
276 ->copyToStatsdAt( "ParserOutputAccess.Cache.$useCache.hit" )
277 ->increment();
278 } else {
279 $this->statsFactory
280 ->getCounter( 'parseroutputaccess_cache' )
281 ->setLabel( 'reason', $notHitReason )
282 ->setLabel( 'cache', $useCache )
283 ->setLabel( 'type', 'miss' )
284 ->copyToStatsdAt( "ParserOutputAccess.Cache.$useCache.$notHitReason" )
285 ->increment();
286 }
287
288 return $output ?: null; // convert false to null
289 }
290
313 public function getParserOutput(
314 PageRecord $page,
315 ParserOptions $parserOptions,
316 ?RevisionRecord $revision = null,
317 int $options = 0
318 ): Status {
319 $span = $this->startOperationSpan( __FUNCTION__, $page, $revision );
320 $error = $this->checkPreconditions( $page, $revision, $options );
321 if ( $error ) {
322 $this->statsFactory
323 ->getCounter( 'parseroutputaccess_case' )
324 ->setLabel( 'case', 'error' )
325 ->copyToStatsdAt( 'ParserOutputAccess.Case.error' )
326 ->increment();
327 return $error;
328 }
329
330 $isOld = $revision && $revision->getId() !== $page->getLatest();
331 if ( $isOld ) {
332 $this->statsFactory
333 ->getCounter( 'parseroutputaccess_case' )
334 ->setLabel( 'case', 'old' )
335 ->copyToStatsdAt( 'ParserOutputAccess.Case.old' )
336 ->increment();
337 } else {
338 $this->statsFactory
339 ->getCounter( 'parseroutputaccess_case' )
340 ->setLabel( 'case', 'current' )
341 ->copyToStatsdAt( 'ParserOutputAccess.Case.current' )
342 ->increment();
343 }
344
345 if ( !( $options & self::OPT_NO_CHECK_CACHE ) ) {
346 $output = $this->getCachedParserOutput( $page, $parserOptions, $revision );
347 if ( $output ) {
348 return Status::newGood( $output );
349 }
350 }
351
352 if ( !$revision ) {
353 $revId = $page->getLatest();
354 $revision = $revId ? $this->revisionLookup->getRevisionById( $revId ) : null;
355
356 if ( !$revision ) {
357 $this->statsFactory
358 ->getCounter( 'parseroutputaccess_status' )
359 ->setLabel( 'status', 'norev' )
360 ->copyToStatsdAt( "ParserOutputAccess.Status.norev" )
361 ->increment();
362 return Status::newFatal( 'missing-revision', $revId );
363 }
364 }
365
366 if ( $options & self::OPT_FOR_ARTICLE_VIEW ) {
367 $work = $this->newPoolWorkArticleView( $page, $parserOptions, $revision, $options );
369 $status = $work->execute();
370 } else {
371 // XXX: we could try harder to reuse a cache lookup above to
372 // provide the $previous argument here
373 $status = $this->renderRevision( $page, $parserOptions, $revision, $options, null );
374 }
375
376 $output = $status->getValue();
377 Assert::postcondition( $output || !$status->isOK(), 'Inconsistent status' );
378
379 if ( $output && !$isOld ) {
380 $primaryCache = $this->getPrimaryCache( $parserOptions );
381 $classCacheKey = $primaryCache->makeParserOutputKey( $page, $parserOptions );
382 $this->localCache->setField( $classCacheKey, $page->getLatest(), $output );
383 }
384
385 if ( $status->isGood() ) {
386 $this->statsFactory->getCounter( 'parseroutputaccess_status' )
387 ->setLabel( 'status', 'good' )
388 ->copyToStatsdAt( 'ParserOutputAccess.Status.good' )
389 ->increment();
390 } elseif ( $status->isOK() ) {
391 $this->statsFactory->getCounter( 'parseroutputaccess_status' )
392 ->setLabel( 'status', 'ok' )
393 ->copyToStatsdAt( 'ParserOutputAccess.Status.ok' )
394 ->increment();
395 } else {
396 $this->statsFactory->getCounter( 'parseroutputaccess_status' )
397 ->setLabel( 'status', 'error' )
398 ->copyToStatsdAt( 'ParserOutputAccess.Status.error' )
399 ->increment();
400 }
401
402 return $status;
403 }
404
419 private function renderRevision(
420 PageRecord $page,
421 ParserOptions $parserOptions,
422 RevisionRecord $revision,
423 int $options,
424 ?ParserOutput $previousOutput = null
425 ): Status {
426 $span = $this->startOperationSpan( __FUNCTION__, $page, $revision );
427 $this->statsFactory->getCounter( 'parseroutputaccess_poolwork' )
428 ->copyToStatsdAt( 'ParserOutputAccess.PoolWork.None' )
429 ->setLabel( 'cache', self::CACHE_NONE )
430 ->increment();
431
432 $useCache = $this->shouldUseCache( $page, $revision );
433
434 // T371713: Temporary statistics collection code to determine
435 // feasibility of Parsoid selective update
436 $sampleRate = MediaWikiServices::getInstance()->getMainConfig()->get(
437 MainConfigNames::ParsoidSelectiveUpdateSampleRate
438 );
439 $doSample = ( $sampleRate && mt_rand( 1, $sampleRate ) === 1 );
440
441 if ( $previousOutput === null && ( $doSample || $parserOptions->getUseParsoid() ) ) {
442 // If $useCache === self::CACHE_SECONDARY we could potentially
443 // try to reuse the parse of $revision-1 from the secondary cache,
444 // but it is likely those template transclusions are out of date.
445 // Try to reuse the template transclusions from the most recent
446 // parse, which are more likely to reflect the current template.
447 if ( !( $options & self::OPT_NO_CHECK_CACHE ) ) {
448 $previousOutput = $this->getPrimaryCache( $parserOptions )->getDirty( $page, $parserOptions ) ?: null;
449 }
450 }
451
452 $renderedRev = $this->revisionRenderer->getRenderedRevision(
453 $revision,
454 $parserOptions,
455 null,
456 [
457 'audience' => RevisionRecord::RAW,
458 'previous-output' => $previousOutput,
459 ]
460 );
461
462 $output = $renderedRev->getRevisionParserOutput();
463
464 if ( $doSample ) {
465 $content = $revision->getContent( SlotRecord::MAIN );
466 $labels = [
467 'source' => 'ParserOutputAccess',
468 'type' => $previousOutput === null ? 'full' : 'selective',
469 'reason' => $parserOptions->getRenderReason(),
470 'parser' => $parserOptions->getUseParsoid() ? 'parsoid' : 'legacy',
471 'opportunistic' => 'false',
472 'wiki' => WikiMap::getCurrentWikiId(),
473 'model' => $content ? $content->getModel() : 'unknown',
474 ];
475 $this->statsFactory
476 ->getCounter( 'ParserCache_selective_total' )
477 ->setLabels( $labels )
478 ->increment();
479 $this->statsFactory
480 ->getCounter( 'ParserCache_selective_cpu_seconds' )
481 ->setLabels( $labels )
482 ->incrementBy( $output->getTimeProfile( 'cpu' ) );
483 }
484
485 if ( !( $options & self::OPT_NO_UPDATE_CACHE ) && $output->isCacheable() ) {
486 if ( $useCache === self::CACHE_PRIMARY ) {
487 $primaryCache = $this->getPrimaryCache( $parserOptions );
488 $primaryCache->save( $output, $page, $parserOptions );
489 } elseif ( $useCache === self::CACHE_SECONDARY ) {
490 $secondaryCache = $this->getSecondaryCache( $parserOptions );
491 $secondaryCache->save( $output, $revision, $parserOptions );
492 }
493 }
494
495 if ( $options & self::OPT_LINKS_UPDATE ) {
496 $this->wikiPageFactory->newFromTitle( $page )
497 ->triggerOpportunisticLinksUpdate( $output );
498 }
499
500 return Status::newGood( $output );
501 }
502
510 private function checkPreconditions(
511 PageRecord $page,
512 ?RevisionRecord $revision = null,
513 int $options = 0
514 ): ?Status {
515 if ( !$page->exists() ) {
516 return Status::newFatal( 'nopagetext' );
517 }
518
519 if ( !( $options & self::OPT_NO_UPDATE_CACHE ) && $revision && !$revision->getId() ) {
520 throw new InvalidArgumentException(
521 'The revision does not have a known ID. Use OPT_NO_CACHE.'
522 );
523 }
524
525 if ( $revision && $revision->getPageId() !== $page->getId() ) {
526 throw new InvalidArgumentException(
527 'The revision does not belong to the given page.'
528 );
529 }
530
531 if ( $revision && !( $options & self::OPT_NO_AUDIENCE_CHECK ) ) {
532 // NOTE: If per-user checks are desired, the caller should perform them and
533 // then set OPT_NO_AUDIENCE_CHECK if they passed.
534 if ( !$revision->audienceCan( RevisionRecord::DELETED_TEXT, RevisionRecord::FOR_PUBLIC ) ) {
535 return Status::newFatal(
536 'missing-revision-permission',
537 $revision->getId(),
538 $revision->getTimestamp(),
539 $this->titleFormatter->getPrefixedDBkey( $page )
540 );
541 }
542 }
543
544 return null;
545 }
546
555 protected function newPoolWorkArticleView(
556 PageRecord $page,
557 ParserOptions $parserOptions,
558 RevisionRecord $revision,
559 int $options
560 ): PoolCounterWork {
561 $useCache = $this->shouldUseCache( $page, $revision );
562
563 switch ( $useCache ) {
564 case self::CACHE_PRIMARY:
565 $this->statsFactory->getCounter( 'parseroutputaccess_poolwork' )
566 ->setLabel( 'cache', self::CACHE_PRIMARY )
567 ->copyToStatsdAt( 'ParserOutputAccess.PoolWork.Current' )
568 ->increment();
569 $primaryCache = $this->getPrimaryCache( $parserOptions );
570 $parserCacheMetadata = $primaryCache->getMetadata( $page );
571 $cacheKey = $primaryCache->makeParserOutputKey( $page, $parserOptions,
572 $parserCacheMetadata ? $parserCacheMetadata->getUsedOptions() : null
573 );
574
575 $workKey = $cacheKey . ':revid:' . $revision->getId();
576
578 $workKey,
579 $page,
580 $revision,
581 $parserOptions,
582 $this->revisionRenderer,
583 $primaryCache,
584 $this->lbFactory,
585 $this->chronologyProtector,
586 $this->loggerSpi,
587 $this->wikiPageFactory,
588 !( $options & self::OPT_NO_UPDATE_CACHE ),
589 (bool)( $options & self::OPT_LINKS_UPDATE )
590 );
591
592 case self::CACHE_SECONDARY:
593 $this->statsFactory->getCounter( 'parseroutputaccess_poolwork' )
594 ->setLabel( 'cache', self::CACHE_SECONDARY )
595 ->copyToStatsdAt( 'ParserOutputAccess.PoolWork.Old' )
596 ->increment();
597 $secondaryCache = $this->getSecondaryCache( $parserOptions );
598 $workKey = $secondaryCache->makeParserOutputKey( $revision, $parserOptions );
599 return new PoolWorkArticleViewOld(
600 $workKey,
601 $secondaryCache,
602 $revision,
603 $parserOptions,
604 $this->revisionRenderer,
605 $this->loggerSpi
606 );
607
608 default:
609 $this->statsFactory->getCounter( 'parseroutputaccess_poolwork' )
610 ->setLabel( 'cache', self::CACHE_NONE )
611 ->copyToStatsdAt( 'ParserOutputAccess.PoolWork.Uncached' )
612 ->increment();
613 $secondaryCache = $this->getSecondaryCache( $parserOptions );
614 $workKey = $secondaryCache->makeParserOutputKeyOptionalRevId( $revision, $parserOptions );
615 return new PoolWorkArticleView(
616 $workKey,
617 $revision,
618 $parserOptions,
619 $this->revisionRenderer,
620 $this->loggerSpi
621 );
622 }
623
624 // unreachable
625 }
626
627 private function getPrimaryCache( ParserOptions $pOpts ): ParserCache {
628 if ( $pOpts->getUseParsoid() ) {
629 return $this->parserCacheFactory->getParserCache(
630 self::PARSOID_PCACHE_NAME
631 );
632 }
633
634 return $this->parserCacheFactory->getParserCache(
635 ParserCacheFactory::DEFAULT_NAME
636 );
637 }
638
639 private function getSecondaryCache( ParserOptions $pOpts ): RevisionOutputCache {
640 if ( $pOpts->getUseParsoid() ) {
641 return $this->parserCacheFactory->getRevisionOutputCache(
642 self::PARSOID_RCACHE_NAME
643 );
644 }
645
646 return $this->parserCacheFactory->getRevisionOutputCache(
647 ParserCacheFactory::DEFAULT_RCACHE_NAME
648 );
649 }
650
651 private function startOperationSpan(
652 string $opName,
653 PageRecord $page,
654 ?RevisionRecord $revision = null
655 ): SpanInterface {
656 $span = $this->tracer->createSpan( "ParserOutputAccess::$opName" );
657 if ( $span->getContext()->isSampled() ) {
658 $span->setAttributes( [
659 'org.wikimedia.parser.page' => $page->__toString(),
660 'org.wikimedia.parser.page.id' => $page->getId(),
661 'org.wikimedia.parser.page.wiki' => $page->getWikiId(),
662 ] );
663 if ( $revision ) {
664 $span->setAttributes( [
665 'org.wikimedia.parser.revision.id' => $revision->getId(),
666 'org.wikimedia.parser.revision.parent_id' => $revision->getParentId(),
667 ] );
668 }
669 }
670 return $span->start()->activate();
671 }
672}
const CACHE_NONE
Definition Defines.php:87
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:81
Store key-value entries in a size-limited in-memory LRU cache.
A class containing constants representing the names of configuration variables.
Service locator for MediaWiki core services.
Service for getting rendered output of a given page.
const OPT_FOR_ARTICLE_VIEW
Apply page view semantics.
getCachedParserOutput(PageRecord $page, ParserOptions $parserOptions, ?RevisionRecord $revision=null, int $options=0)
Returns the rendered output for the given page if it is present in the cache.
newPoolWorkArticleView(PageRecord $page, ParserOptions $parserOptions, RevisionRecord $revision, int $options)
__construct(ParserCacheFactory $parserCacheFactory, RevisionLookup $revisionLookup, RevisionRenderer $revisionRenderer, StatsFactory $statsFactory, ILBFactory $lbFactory, ChronologyProtector $chronologyProtector, LoggerSpi $loggerSpi, WikiPageFactory $wikiPageFactory, TitleFormatter $titleFormatter, TracerInterface $tracer)
getParserOutput(PageRecord $page, ParserOptions $parserOptions, ?RevisionRecord $revision=null, int $options=0)
Returns the rendered output for the given page.
Service for creating WikiPage objects.
Cache for ParserOutput objects corresponding to the latest page revisions.
Set options of the Parser.
getUseParsoid()
Parsoid-format HTML output, or legacy wikitext parser HTML?
ParserOutput is a rendering of a Content object or a message.
Provides methods for conversion between PageBundle and ParserOutput TODO: Convert to a trait once we ...
Cache for ParserOutput objects.
Class for dealing with PoolCounters using class members.
PoolWorkArticleView for the current revision of a page, using ParserCache.
PoolWorkArticleView for an old revision of a page, using a simple cache.
PoolCounter protected work wrapping RenderedRevision->getRevisionParserOutput.
Page revision base class.
getParentId( $wikiId=self::LOCAL)
Get parent revision ID (the original previous page revision).
audienceCan( $field, $audience, ?Authority $performer=null)
Check that the given audience has access to the given field.
getId( $wikiId=self::LOCAL)
Get revision ID.
The RevisionRenderer service provides access to rendered output for revisions.
Value object representing a content slot associated with a page revision.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:54
Tools for dealing with other locally-hosted wikis.
Definition WikiMap.php:31
Provide a given client with protection against visible database lag.
This is the primary interface for validating metrics definitions, caching defined metrics,...
Service provider interface to create \Psr\Log\LoggerInterface objects.
Definition Spi.php:64
exists()
Checks if the page currently exists.
Data record representing a page that is (or used to be, or could be) an editable page on a wiki.
getLatest( $wikiId=self::LOCAL)
The ID of the page's latest revision.
Service for looking up page revisions.
A title formatter service for MediaWiki.
Manager of ILoadBalancer objects and, indirectly, IDatabase connections.
Represents an OpenTelemetry span, i.e.
Base interface for an OpenTelemetry tracer responsible for creating spans.