MediaWiki REL1_39
ParsoidOutputAccess.php
Go to the documentation of this file.
1<?php
21
22use Config;
23use HashConfig;
25use InvalidArgumentException;
26use Language;
27use Liuggio\StatsdClient\Factory\StatsdDataFactory;
40use ParserCache;
42use ParserOutput;
43use Status;
44use UnexpectedValueException;
45use Wikimedia\Parsoid\Config\SiteConfig;
46use Wikimedia\Parsoid\Core\ClientError;
47use Wikimedia\Parsoid\Core\PageBundle;
48use Wikimedia\Parsoid\Core\ResourceLimitExceededException;
49use Wikimedia\Parsoid\Parsoid;
51
62 public const PARSOID_PARSER_CACHE_NAME = 'parsoid';
63
67 private const RENDER_ID_KEY = 'parsoid-render-id';
68
72 private const PARSOID_PAGE_BUNDLE_KEY = 'parsoid-page-bundle';
73
75 public const OPT_FORCE_PARSE = 1;
76
77 public const CONSTRUCTOR_OPTIONS = [
79 ];
80
82 private $revisionOutputCache;
83
85 private $parserCache;
86
88 private $globalIdGenerator;
89
91 private $stats;
92
94 private $parsoidCacheConfig;
95
97 private $parsoid;
98
100 private $parsoidPageConfigFactory;
101
103 private $revisionLookup;
107 private $siteConfig;
108
119 public function __construct(
120 ServiceOptions $options,
121 ParserCacheFactory $parserCacheFactory,
122 RevisionLookup $revisionLookup,
123 GlobalIdGenerator $globalIdGenerator,
125 Parsoid $parsoid,
126 SiteConfig $siteConfig,
127 PageConfigFactory $parsoidPageConfigFactory
128 ) {
129 $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
130 $this->parsoidCacheConfig = new HashConfig( $options->get( MainConfigNames::ParsoidCacheConfig ) );
131 $this->revisionOutputCache = $parserCacheFactory
132 ->getRevisionOutputCache( self::PARSOID_PARSER_CACHE_NAME );
133 $this->parserCache = $parserCacheFactory->getParserCache( self::PARSOID_PARSER_CACHE_NAME );
134 $this->revisionLookup = $revisionLookup;
135 $this->globalIdGenerator = $globalIdGenerator;
136 $this->stats = $stats;
137 $this->parsoid = $parsoid;
138 $this->siteConfig = $siteConfig;
139 $this->parsoidPageConfigFactory = $parsoidPageConfigFactory;
140 }
141
147 public function supportsContentModel( string $model ): bool {
148 if ( $model === CONTENT_MODEL_WIKITEXT ) {
149 return true;
150 }
151
152 return $this->siteConfig->getContentModelHandler( $model ) !== null;
153 }
154
163 public function getParserOutput(
164 PageRecord $page,
165 ParserOptions $parserOpts,
166 ?RevisionRecord $revision = null,
167 int $options = 0
168 ): Status {
169 $revId = $revision ? $revision->getId() : $page->getLatest();
170 if ( !$revision ) {
171 $revision = $this->revisionLookup->getRevisionById( $revId );
172
173 if ( !$revision ) {
174 throw new RevisionAccessException(
175 'Revision {revId} not found',
176 [ 'revId' => $revId ]
177 );
178 }
179 }
180
181 $isOld = $revId !== $page->getLatest();
182 $statsKey = $isOld ? 'ParsoidOutputAccess.Cache.revision' : 'ParsoidOutputAccess.Cache.parser';
183
184 $mainSlot = $revision->getSlot( SlotRecord::MAIN );
185 if ( !$this->supportsContentModel( $mainSlot->getModel() ) ) {
186 throw new UnexpectedValueException( 'Parsoid does not support content model ' . $mainSlot->getModel() );
187 }
188
189 if ( !( $options & self::OPT_FORCE_PARSE ) ) {
190 $parserOutput = $this->getCachedParserOutput(
191 $page,
192 $parserOpts,
193 $revision,
194 $isOld,
195 $statsKey
196 );
197
198 if ( $parserOutput ) {
199 return Status::newGood( $parserOutput );
200 }
201 }
202
203 $startTime = microtime( true );
204 $status = $this->parse( $page, $parserOpts, $revision );
205 $time = microtime( true ) - $startTime;
206
207 if ( $status->isOK() ) {
208 if ( $time > $this->parsoidCacheConfig->get( 'CacheThresholdTime' ) ) {
209 $parserOutput = $status->getValue();
210 $now = $parserOutput->getCacheTime();
211
212 if ( $isOld ) {
213 $this->revisionOutputCache->save( $parserOutput, $revision, $parserOpts, $now );
214 } else {
215 $this->parserCache->save( $parserOutput, $page, $parserOpts, $now );
216 }
217 $this->stats->increment( $statsKey . '.save.ok' );
218 } else {
219 $this->stats->increment( $statsKey . '.save.skipfast' );
220 }
221 } else {
222 $this->stats->increment( $statsKey . '.save.notok' );
223 }
224
225 return $status;
226 }
227
235 private function parseInternal(
236 PageIdentity $page,
237 ?RevisionRecord $revision = null,
238 Language $languageOverride = null
239 ): Status {
240 try {
241 $langCode = $languageOverride ? $languageOverride->getCode() : null;
242 $pageConfig = $this->parsoidPageConfigFactory->create(
243 $page,
244 null,
245 $revision,
246 null,
247 $langCode
248 );
249 $startTime = microtime( true );
250 $pageBundle = $this->parsoid->wikitext2html(
251 $pageConfig,
252 [ 'pageBundle' => true ]
253 );
254 $parserOutput = $this->createParserOutputFromPageBundle( $pageBundle );
255 $time = microtime( true ) - $startTime;
256 if ( $time > 3 ) {
257 LoggerFactory::getInstance( 'slow-parsoid' )
258 ->info( 'Parsing {title} was slow, took {time} seconds', [
259 'time' => number_format( $time, 2 ),
260 'title' => (string)$page,
261 ] );
262 }
263 return Status::newGood( $parserOutput );
264 } catch ( ClientError $e ) {
265 return Status::newFatal( 'parsoid-client-error', $e->getMessage() );
266 } catch ( ResourceLimitExceededException $e ) {
267 return Status::newFatal( 'parsoid-resource-limit-exceeded', $e->getMessage() );
268 }
269 }
270
283 private function createParserOutputFromPageBundle( PageBundle $pageBundle ): ParserOutput {
284 $parserOutput = new ParserOutput( $pageBundle->html );
285 $parserOutput->setExtensionData(
286 self::PARSOID_PAGE_BUNDLE_KEY,
287 [
288 'parsoid' => $pageBundle->parsoid,
289 'mw' => $pageBundle->mw
290 ]
291 );
292
293 return $parserOutput;
294 }
295
304 public function getParsoidRenderID( ParserOutput $parserOutput ): ParsoidRenderID {
305 // XXX: ParserOutput may be coming from the parser cache, so we need to be careful
306 // when we change how we store the render key in the ParserOutput object.
307 $renderId = $parserOutput->getExtensionData( self::RENDER_ID_KEY );
308 if ( !$renderId ) {
309 throw new InvalidArgumentException( 'ParserOutput does not have a render ID' );
310 }
311
312 return ParsoidRenderID::newFromKey( $renderId );
313 }
314
322 public function getParsoidPageBundle( ParserOutput $parserOutput ): PageBundle {
323 $pbData = $parserOutput->getExtensionData( self::PARSOID_PAGE_BUNDLE_KEY );
324 return new PageBundle(
325 $parserOutput->getRawText(),
326 $pbData['parsoid'] ?? [],
327 $pbData['mw'] ?? []
328 );
329 }
330
340 protected function getCachedParserOutput(
341 PageRecord $page,
342 ParserOptions $parserOpts,
343 ?RevisionRecord $revision,
344 bool $isOld,
345 string $statsKey
346 ): ?ParserOutput {
347 if ( $isOld ) {
348 $parserOutput = $this->revisionOutputCache->get( $revision, $parserOpts );
349 } else {
350 $parserOutput = $this->parserCache->get( $page, $parserOpts );
351 }
352
353 if ( $parserOutput ) {
354 // Ignore cached ParserOutput if it is incomplete,
355 // because it was stored by an old version of the code.
356 if ( !$parserOutput->getExtensionData( self::PARSOID_PAGE_BUNDLE_KEY )
357 || !$parserOutput->getExtensionData( self::RENDER_ID_KEY )
358 ) {
359 $parserOutput = null;
360 }
361 }
362
363 if ( $parserOutput ) {
364 $this->stats->increment( $statsKey . '.get.hit' );
365 return $parserOutput;
366 } else {
367 $this->stats->increment( $statsKey . '.get.miss' );
368 return null;
369 }
370 }
371
378 public function parse( PageRecord $page, ParserOptions $parserOpts, ?RevisionRecord $revision ): Status {
379 $revId = $revision ? $revision->getId() : $page->getLatest();
380
381 $status = $this->parseInternal( $page, $revision, $parserOpts->getTargetLanguage() );
382
383 if ( !$status->isOK() ) {
384 return $status;
385 }
386
387 $parserOutput = $status->getValue();
388
389 // TODO: when we make tighter integration with Parsoid, render ID should become
390 // a standard ParserOutput property. Nothing else needs it now, so don't generate
391 // it in ParserCache just yet.
392 $parsoidRenderId = new ParsoidRenderID( $revId, $this->globalIdGenerator->newUUIDv1() );
393 $parserOutput->setExtensionData( self::RENDER_ID_KEY, $parsoidRenderId->getKey() );
394
395 // XXX: ParserOutput should just always record the revision ID and timestamp
396 $now = wfTimestampNow();
397 $parserOutput->setCacheRevisionId( $revId );
398 $parserOutput->setCacheTime( $now );
399
400 return $status;
401 }
402}
const CONTENT_MODEL_WIKITEXT
Definition Defines.php:211
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
getExtensionData()
Get the extension data as: augmentor name => data.
if(!defined('MW_SETUP_CALLBACK'))
The persistent session ID (if any) loaded at startup.
Definition WebStart.php:82
setCacheRevisionId( $id)
setCacheTime( $t)
setCacheTime() sets the timestamp expressing when the page has been rendered.
Definition CacheTime.php:81
A Config instance which stores all settings as a member variable.
Base class for language-specific code.
Definition Language.php:53
A class for passing options to services.
assertRequiredOptions(array $expectedKeys)
Assert that the list of options provided in this instance exactly match $expectedKeys,...
PSR-3 logger instance factory.
A class containing constants representing the names of configuration variables.
const ParsoidCacheConfig
Name constant for the ParsoidCacheConfig setting, for use with Config::get()
getParserCache(string $name)
Get a ParserCache instance by $name.
getRevisionOutputCache(string $name)
Get a RevisionOutputCache instance by $name.
Helper class used by MediaWiki to create Parsoid PageConfig objects.
Site-level configuration for Parsoid.
MediaWiki service for getting Parsoid Output objects.
getParsoidRenderID(ParserOutput $parserOutput)
NOTE: This needs to be ParserOutput returned by ->getParserOutput() in this class.
parse(PageRecord $page, ParserOptions $parserOpts, ?RevisionRecord $revision)
getCachedParserOutput(PageRecord $page, ParserOptions $parserOpts, ?RevisionRecord $revision, bool $isOld, string $statsKey)
getParserOutput(PageRecord $page, ParserOptions $parserOpts, ?RevisionRecord $revision=null, int $options=0)
__construct(ServiceOptions $options, ParserCacheFactory $parserCacheFactory, RevisionLookup $revisionLookup, GlobalIdGenerator $globalIdGenerator, IBufferingStatsdDataFactory $stats, Parsoid $parsoid, SiteConfig $siteConfig, PageConfigFactory $parsoidPageConfigFactory)
getParsoidPageBundle(ParserOutput $parserOutput)
Returns a Parsoid PageBundle equivalent to the given ParserOutput.
Represents the identity of a specific rendering of a specific revision at some point in time.
Cache for ParserOutput objects.
Exception representing a failure to look up a revision.
Page revision base class.
getSlot( $role, $audience=self::FOR_PUBLIC, Authority $performer=null)
Returns meta-data for the given slot.
Value object representing a content slot associated with a page revision.
Cache for ParserOutput objects corresponding to the latest page revisions.
Set options of the Parser.
getTargetLanguage()
Target language for the parse.
getExtensionData( $key)
Gets extensions data previously attached to this ParserOutput using setExtensionData().
getRawText()
Get the cacheable text with <mw:editsection> markers still in it.
setExtensionData( $key, $value)
Attaches arbitrary data to this ParserObject.
static newFatal( $message,... $parameters)
Factory function for fatal errors.
static newGood( $value=null)
Factory function for good results.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:44
Class for getting statistically unique IDs without a central coordinator.
Interface for configuration instances.
Definition Config.php:30
MediaWiki adaptation of StatsdDataFactory that provides buffering functionality.
Interface for objects (potentially) representing an editable wiki page.
Data record representing a page that is (or used to be, or could be) an editable page on a wiki.
getLatest( $wikiId=self::LOCAL)
The ID of the page's latest revision.
Service for looking up page revisions.
Copyright (C) 2011-2022 Wikimedia Foundation and others.