MediaWiki REL1_40
ParsoidOutputAccess.php
Go to the documentation of this file.
1<?php
21
22use Config;
23use HashConfig;
25use InvalidArgumentException;
26use Liuggio\StatsdClient\Factory\StatsdDataFactory;
42use ParserCache;
44use ParserOutput;
45use Status;
46use Wikimedia\Parsoid\Config\PageConfig;
47use Wikimedia\Parsoid\Config\SiteConfig;
48use Wikimedia\Parsoid\Core\ClientError;
49use Wikimedia\Parsoid\Core\ResourceLimitExceededException;
50use Wikimedia\Parsoid\Parsoid;
52
63 public const PARSOID_PARSER_CACHE_NAME = 'parsoid';
64
68 private const RENDER_ID_KEY = 'parsoid-render-id';
69
71 public const OPT_FORCE_PARSE = 1;
72
76 public const OPT_NO_UPDATE_CACHE = 2;
77
81 public const OPT_LOG_LINT_DATA = 64;
82
83 public const CONSTRUCTOR_OPTIONS = [
86 'ParsoidWikiID'
87 ];
88
90 private $revisionOutputCache;
91
93 private $parserCache;
94
96 private $globalIdGenerator;
97
99 private $stats;
100
102 private $parsoidCacheConfig;
103
105 private $parsoid;
106
108 private $parsoidPageConfigFactory;
109
111 private $pageLookup;
112
114 private $revisionLookup;
115
117 private $siteConfig;
118
120 private $options;
121
123 private $parsoidWikiId;
124
126 private $contentHandlerFactory;
127
140 public function __construct(
141 ServiceOptions $options,
142 ParserCacheFactory $parserCacheFactory,
143 PageLookup $pageLookup,
144 RevisionLookup $revisionLookup,
145 GlobalIdGenerator $globalIdGenerator,
147 Parsoid $parsoid,
148 SiteConfig $siteConfig,
149 PageConfigFactory $parsoidPageConfigFactory,
150 IContentHandlerFactory $contentHandlerFactory
151 ) {
152 $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
153 $this->options = $options;
154 $this->parsoidCacheConfig = new HashConfig( $options->get( MainConfigNames::ParsoidCacheConfig ) );
155 $this->revisionOutputCache = $parserCacheFactory
156 ->getRevisionOutputCache( self::PARSOID_PARSER_CACHE_NAME );
157 $this->parserCache = $parserCacheFactory->getParserCache( self::PARSOID_PARSER_CACHE_NAME );
158 $this->pageLookup = $pageLookup;
159 $this->revisionLookup = $revisionLookup;
160 $this->globalIdGenerator = $globalIdGenerator;
161 $this->stats = $stats;
162 $this->parsoid = $parsoid;
163 $this->siteConfig = $siteConfig;
164 $this->parsoidPageConfigFactory = $parsoidPageConfigFactory;
165 $this->contentHandlerFactory = $contentHandlerFactory;
166
167 // NOTE: This is passed as the "prefix" option to parsoid, which it uses
168 // to locate wiki specific configuration in the baseconfig directory.
169 // This should probably be managed by SiteConfig instead, so
170 // we hopefully will not need it here in the future.
171 $this->parsoidWikiId = $options->get( 'ParsoidWikiID' );
172 }
173
179 public function supportsContentModel( string $model ): bool {
180 if ( $model === CONTENT_MODEL_WIKITEXT ) {
181 return true;
182 }
183
184 // Check if the content model serializes to wikitext.
185 // NOTE: We could use isSupportedFormat( CONTENT_FORMAT_WIKITEXT ) if PageContent::getContent()
186 // would specify the format when calling serialize().
187 try {
188 $handler = $this->contentHandlerFactory->getContentHandler( $model );
189 if ( $handler->getDefaultFormat() === CONTENT_FORMAT_WIKITEXT ) {
190 return true;
191 }
192 } catch ( MWUnknownContentModelException $ex ) {
193 // If the content model is not known, it can't be supported.
194 return false;
195 }
196
197 return $this->siteConfig->getContentModelHandler( $model ) !== null;
198 }
199
208 public function getParserOutput(
209 PageIdentity $page,
210 ParserOptions $parserOpts,
211 $revision = null,
212 int $options = 0
213 ): Status {
214 [ $page, $revision ] = $this->resolveRevision( $page, $revision );
215 $isOld = $revision->getId() !== $page->getLatest();
216
217 $statsKey = $isOld ? 'ParsoidOutputAccess.Cache.revision' : 'ParsoidOutputAccess.Cache.parser';
218
219 if ( !( $options & self::OPT_FORCE_PARSE ) ) {
220 $parserOutput = $this->getCachedParserOutputInternal(
221 $page,
222 $parserOpts,
223 $revision,
224 $isOld,
225 $statsKey
226 );
227
228 if ( $parserOutput ) {
229 return Status::newGood( $parserOutput );
230 }
231 }
232
233 $parsoidOptions = [];
234
235 if ( $options & self::OPT_LOG_LINT_DATA ) {
236 $parsoidOptions += [
237 'logLinterData' => true
238 ];
239 }
240
241 $mainSlot = $revision->getSlot( SlotRecord::MAIN );
242
243 $startTime = microtime( true );
244 $status = $this->parse( $page, $parserOpts, $parsoidOptions, $revision );
245 $time = microtime( true ) - $startTime;
246
247 if ( !$status->isOK() ) {
248 $this->stats->increment( $statsKey . '.save.notok' );
249 } elseif ( $options & self::OPT_NO_UPDATE_CACHE ) {
250 $this->stats->increment( $statsKey . '.save.disabled' );
251 } elseif ( !$this->supportsContentModel( $mainSlot->getModel() ) ) {
252 // TODO: We really want to cache for all supported content models.
253 // But supportsContentModels() lies, because of T324711.
254 // This causes us to render garbage output for all content models, which we shouldn't cache.
255 // NOTE: this will become irrelevant when we implement T311648.
256 $this->stats->increment( $statsKey . '.save.badmodel' );
257 } else {
258 if ( $time > $this->parsoidCacheConfig->get( 'CacheThresholdTime' ) ) {
259 $parserOutput = $status->getValue();
260 $now = $parserOutput->getCacheTime();
261
262 if ( $isOld ) {
263 $this->revisionOutputCache->save( $parserOutput, $revision, $parserOpts, $now );
264 } else {
265 $this->parserCache->save( $parserOutput, $page, $parserOpts, $now );
266 }
267 $this->stats->increment( $statsKey . '.save.ok' );
268 } else {
269 $this->stats->increment( $statsKey . '.save.skipfast' );
270 }
271 }
272
273 return $status;
274 }
275
282 private function parseInternal(
283 PageConfig $pageConfig,
284 array $parsoidOptions
285 ): Status {
286 $defaultOptions = [
287 'pageBundle' => true,
288 'prefix' => $this->parsoidWikiId,
289 'pageName' => $pageConfig->getTitle(),
290 'htmlVariantLanguage' => $pageConfig->getPageLanguage(),
291 'outputContentVersion' => Parsoid::defaultHTMLVersion(),
292 ];
293
294 try {
295 $startTime = microtime( true );
296 $parserOutput = new ParserOutput();
297 $pageBundle = $this->parsoid->wikitext2html(
298 $pageConfig,
299 $parsoidOptions + $defaultOptions,
300 $headers,
301 $parserOutput
302 );
303
304 $parserOutput = PageBundleParserOutputConverter::parserOutputFromPageBundle( $pageBundle, $parserOutput );
305 $time = microtime( true ) - $startTime;
306 if ( $time > 3 ) {
307 LoggerFactory::getInstance( 'slow-parsoid' )
308 ->info( 'Parsing {title} was slow, took {time} seconds', [
309 'time' => number_format( $time, 2 ),
310 'title' => $pageConfig->getTitle(),
311 ] );
312 }
313 return Status::newGood( $parserOutput );
314 } catch ( ClientError $e ) {
315 return Status::newFatal( 'parsoid-client-error', $e->getMessage() );
316 } catch ( ResourceLimitExceededException $e ) {
317 return Status::newFatal( 'parsoid-resource-limit-exceeded', $e->getMessage() );
318 }
319 }
320
329 public function getParsoidRenderID( ParserOutput $parserOutput ): ParsoidRenderID {
330 // XXX: ParserOutput may be coming from the parser cache, so we need to be careful
331 // when we change how we store the render key in the ParserOutput object.
332 $renderId = $parserOutput->getExtensionData( self::RENDER_ID_KEY );
333 if ( !$renderId ) {
334 throw new InvalidArgumentException( 'ParserOutput does not have a render ID' );
335 }
336
337 return ParsoidRenderID::newFromKey( $renderId );
338 }
339
347 public function getCachedParserOutput(
348 PageIdentity $page,
349 ParserOptions $parserOpts,
350 $revision = null
351 ): ?ParserOutput {
352 [ $page, $revision ] = $this->resolveRevision( $page, $revision );
353 $isOld = $revision->getId() !== $page->getLatest();
354
355 $statsKey = $isOld ? 'ParsoidOutputAccess.Cache.revision' : 'ParsoidOutputAccess.Cache.parser';
356
357 return $this->getCachedParserOutputInternal(
358 $page,
359 $parserOpts,
360 $revision,
361 $isOld,
362 $statsKey
363 );
364 }
365
376 PageRecord $page,
377 ParserOptions $parserOpts,
378 ?RevisionRecord $revision,
379 bool $isOld,
380 string $statsKey
381 ): ?ParserOutput {
382 if ( $isOld ) {
383 $parserOutput = $this->revisionOutputCache->get( $revision, $parserOpts );
384 } else {
385 $parserOutput = $this->parserCache->get( $page, $parserOpts );
386 }
387
388 if ( $parserOutput ) {
389 // Ignore cached ParserOutput if it is incomplete,
390 // because it was stored by an old version of the code.
391 if ( !$parserOutput->getExtensionData( PageBundleParserOutputConverter::PARSOID_PAGE_BUNDLE_KEY )
392 || !$parserOutput->getExtensionData( self::RENDER_ID_KEY )
393 ) {
394 $parserOutput = null;
395 }
396 }
397
398 if ( $parserOutput ) {
399 $this->stats->increment( $statsKey . '.get.hit' );
400 return $parserOutput;
401 } else {
402 $this->stats->increment( $statsKey . '.get.miss' );
403 return null;
404 }
405 }
406
407 private function makeDummyParserOutput( string $contentModel ): Status {
408 $msg = "Dummy output. Parsoid does not support content model $contentModel. See T324711.";
409 $output = new ParserOutput( $msg );
410
411 // This is fast to generate so it's fine not to write this to parser cache.
412 $output->updateCacheExpiry( 0 );
413 // The render ID is required for rendering of dummy output: T311728.
414 $output->setExtensionData( self::RENDER_ID_KEY, '0/dummy-output' );
415
416 return Status::newGood( $output );
417 }
418
427 public function parse(
428 PageIdentity $page,
429 ParserOptions $parserOpts,
430 array $parsoidOptions,
431 $revision
432 ): Status {
433 // NOTE: If we have a RevisionRecord already, just use it, there is no need to resolve $page to
434 // a PageRecord (and it may not be possible if the page doesn't exist).
435 if ( !$revision instanceof RevisionRecord ) {
436 [ $page, $revision ] = $this->resolveRevision( $page, $revision );
437 }
438
439 $mainSlot = $revision->getSlot( SlotRecord::MAIN );
440 $contentModel = $mainSlot->getModel();
441 if ( !$this->supportsContentModel( $contentModel ) ) {
442 // This is a messy fix for T324711. The real solution is T311648.
443 // For now, just return dummy parser output.
444 return $this->makeDummyParserOutput( $contentModel );
445
446 // TODO: go back to throwing, once RESTbase no longer expects to get a parsoid rendering for
447 //any kind of content (T324711).
448 /*
449 // TODO: throw an internal exception here, convert to HttpError in HtmlOutputRendererHelper.
450 throw new HttpException( 'Parsoid does not support content model ' . $mainSlot->getModel(), 400 );
451 }
452 */
453 }
454
455 $languageOverride = $parserOpts->getTargetLanguage();
456 $pageConfig = $this->parsoidPageConfigFactory->create(
457 $page,
458 null,
459 $revision,
460 null,
461 $languageOverride,
462 $this->options->get( MainConfigNames::ParsoidSettings )
463 );
464
465 $status = $this->parseInternal( $pageConfig, $parsoidOptions );
466
467 if ( !$status->isOK() ) {
468 return $status;
469 }
470
471 $parserOutput = $status->getValue();
472
473 // TODO: when we make tighter integration with Parsoid, render ID should become
474 // a standard ParserOutput property. Nothing else needs it now, so don't generate
475 // it in ParserCache just yet.
476 $revId = $revision->getId();
477 $parsoidRenderId = new ParsoidRenderID( $revId, $this->globalIdGenerator->newUUIDv1() );
478 $parserOutput->setExtensionData( self::RENDER_ID_KEY, $parsoidRenderId->getKey() );
479
480 // XXX: ParserOutput should just always record the revision ID and timestamp
481 $now = wfTimestampNow();
482 $parserOutput->setCacheRevisionId( $revId );
483 $parserOutput->setCacheTime( $now );
484
485 return $status;
486 }
487
494 private function resolveRevision( PageIdentity $page, $revision ): array {
495 if ( !$page instanceof PageRecord ) {
496 $name = "$page";
497 $page = $this->pageLookup->getPageByReference( $page );
498 if ( !$page ) {
499 throw new RevisionAccessException(
500 'Page {name} not found',
501 [ 'name' => $name ]
502 );
503 }
504 }
505
506 if ( $revision === null ) {
507 $revision = $page->getLatest();
508 }
509
510 if ( is_int( $revision ) ) {
511 $revId = $revision;
512 $revision = $this->revisionLookup->getRevisionById( $revId );
513
514 if ( !$revision ) {
515 throw new RevisionAccessException(
516 'Revision {revId} not found',
517 [ 'revId' => $revId ]
518 );
519 }
520 }
521
522 return [ $page, $revision ];
523 }
524}
const CONTENT_MODEL_WIKITEXT
Definition Defines.php:211
const CONTENT_FORMAT_WIKITEXT
Wikitext.
Definition Defines.php:227
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
getExtensionData()
Get the extension data as: augmentor name => data.
if(!defined('MW_SETUP_CALLBACK'))
The persistent session ID (if any) loaded at startup.
Definition WebStart.php:88
setCacheRevisionId( $id)
setCacheTime( $t)
setCacheTime() sets the timestamp expressing when the page has been rendered.
Definition CacheTime.php:81
A Config instance which stores all settings as a member variable.
Exception thrown when an unregistered content model is requested.
A class for passing options to services.
assertRequiredOptions(array $expectedKeys)
Assert that the list of options provided in this instance exactly match $expectedKeys,...
PSR-3 logger instance factory.
A class containing constants representing the names of configuration variables.
const ParsoidCacheConfig
Name constant for the ParsoidCacheConfig setting, for use with Config::get()
const ParsoidSettings
Name constant for the ParsoidSettings setting, for use with Config::get()
getParserCache(string $name)
Get a ParserCache instance by $name.
getRevisionOutputCache(string $name)
Get a RevisionOutputCache instance by $name.
Helper class used by MediaWiki to create Parsoid PageConfig objects.
Site-level configuration for Parsoid.
MediaWiki service for getting Parsoid Output objects.
getParsoidRenderID(ParserOutput $parserOutput)
NOTE: This needs to be ParserOutput returned by ->getParserOutput() in this class.
getCachedParserOutputInternal(PageRecord $page, ParserOptions $parserOpts, ?RevisionRecord $revision, bool $isOld, string $statsKey)
__construct(ServiceOptions $options, ParserCacheFactory $parserCacheFactory, PageLookup $pageLookup, RevisionLookup $revisionLookup, GlobalIdGenerator $globalIdGenerator, IBufferingStatsdDataFactory $stats, Parsoid $parsoid, SiteConfig $siteConfig, PageConfigFactory $parsoidPageConfigFactory, IContentHandlerFactory $contentHandlerFactory)
parse(PageIdentity $page, ParserOptions $parserOpts, array $parsoidOptions, $revision)
getParserOutput(PageIdentity $page, ParserOptions $parserOpts, $revision=null, int $options=0)
getCachedParserOutput(PageIdentity $page, ParserOptions $parserOpts, $revision=null)
Represents the identity of a specific rendering of a specific revision at some point in time.
Cache for ParserOutput objects.
Exception representing a failure to look up a revision.
Page revision base class.
Value object representing a content slot associated with a page revision.
Cache for ParserOutput objects corresponding to the latest page revisions.
Set options of the Parser.
getTargetLanguage()
Target language for the parse.
getExtensionData( $key)
Gets extensions data previously attached to this ParserOutput using setExtensionData().
setExtensionData( $key, $value)
Attaches arbitrary data to this ParserObject.
static newFatal( $message,... $parameters)
Factory function for fatal errors.
static newGood( $value=null)
Factory function for good results.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:46
Class for getting statistically unique IDs without a central coordinator.
Interface for configuration instances.
Definition Config.php:30
MediaWiki adaptation of StatsdDataFactory that provides buffering functionality.
Interface for objects (potentially) representing an editable wiki page.
Service for looking up information about wiki pages.
Data record representing a page that is (or used to be, or could be) an editable page on a wiki.
Service for looking up page revisions.
Copyright (C) 2011-2022 Wikimedia Foundation and others.
return true
Definition router.php:92