25use InvalidArgumentException;
26use Liuggio\StatsdClient\Factory\StatsdDataFactory;
46use Wikimedia\Parsoid\Config\PageConfig;
47use Wikimedia\Parsoid\Config\SiteConfig;
48use Wikimedia\Parsoid\Core\ClientError;
49use Wikimedia\Parsoid\Core\ResourceLimitExceededException;
50use Wikimedia\Parsoid\Parsoid;
63 public const PARSOID_PARSER_CACHE_NAME =
'parsoid';
68 private const RENDER_ID_KEY =
'parsoid-render-id';
71 public const OPT_FORCE_PARSE = 1;
76 public const OPT_NO_UPDATE_CACHE = 2;
81 public const OPT_LOG_LINT_DATA = 64;
90 private $revisionOutputCache;
96 private $globalIdGenerator;
102 private $parsoidCacheConfig;
108 private $parsoidPageConfigFactory;
114 private $revisionLookup;
123 private $parsoidWikiId;
126 private $contentHandlerFactory;
153 $this->options = $options;
155 $this->revisionOutputCache = $parserCacheFactory
157 $this->parserCache = $parserCacheFactory->
getParserCache( self::PARSOID_PARSER_CACHE_NAME );
158 $this->pageLookup = $pageLookup;
159 $this->revisionLookup = $revisionLookup;
160 $this->globalIdGenerator = $globalIdGenerator;
161 $this->stats = $stats;
162 $this->parsoid = $parsoid;
163 $this->siteConfig = $siteConfig;
164 $this->parsoidPageConfigFactory = $parsoidPageConfigFactory;
165 $this->contentHandlerFactory = $contentHandlerFactory;
171 $this->parsoidWikiId = $options->
get(
'ParsoidWikiID' );
188 $handler = $this->contentHandlerFactory->getContentHandler( $model );
197 return $this->siteConfig->getContentModelHandler( $model ) !==
null;
214 [ $page, $revision ] = $this->resolveRevision( $page, $revision );
215 $isOld = $revision->getId() !== $page->getLatest();
217 $statsKey = $isOld ?
'ParsoidOutputAccess.Cache.revision' :
'ParsoidOutputAccess.Cache.parser';
219 if ( !( $options & self::OPT_FORCE_PARSE ) ) {
220 $parserOutput = $this->getCachedParserOutputInternal(
228 if ( $parserOutput ) {
229 return Status::newGood( $parserOutput );
233 $parsoidOptions = [];
235 if ( $options & self::OPT_LOG_LINT_DATA ) {
237 'logLinterData' =>
true
241 $mainSlot = $revision->getSlot( SlotRecord::MAIN );
243 $startTime = microtime(
true );
244 $status = $this->parse( $page, $parserOpts, $parsoidOptions, $revision );
245 $time = microtime(
true ) - $startTime;
247 if ( !$status->isOK() ) {
248 $this->stats->increment( $statsKey .
'.save.notok' );
249 } elseif ( $options & self::OPT_NO_UPDATE_CACHE ) {
250 $this->stats->increment( $statsKey .
'.save.disabled' );
251 } elseif ( !$this->supportsContentModel( $mainSlot->getModel() ) ) {
256 $this->stats->increment( $statsKey .
'.save.badmodel' );
258 if ( $time > $this->parsoidCacheConfig->get(
'CacheThresholdTime' ) ) {
259 $parserOutput = $status->getValue();
263 $this->revisionOutputCache->save( $parserOutput, $revision, $parserOpts, $now );
265 $this->parserCache->save( $parserOutput, $page, $parserOpts, $now );
267 $this->stats->increment( $statsKey .
'.save.ok' );
269 $this->stats->increment( $statsKey .
'.save.skipfast' );
282 private function parseInternal(
283 PageConfig $pageConfig,
284 array $parsoidOptions
287 'pageBundle' =>
true,
288 'prefix' => $this->parsoidWikiId,
289 'pageName' => $pageConfig->
getTitle(),
290 'htmlVariantLanguage' => $pageConfig->getPageLanguage(),
291 'outputContentVersion' => Parsoid::defaultHTMLVersion(),
295 $startTime = microtime(
true );
297 $pageBundle = $this->parsoid->wikitext2html(
299 $parsoidOptions + $defaultOptions,
304 $parserOutput = PageBundleParserOutputConverter::parserOutputFromPageBundle( $pageBundle, $parserOutput );
305 $time = microtime(
true ) - $startTime;
307 LoggerFactory::getInstance(
'slow-parsoid' )
308 ->info(
'Parsing {title} was slow, took {time} seconds', [
309 'time' => number_format( $time, 2 ),
310 'title' => $pageConfig->getTitle(),
314 }
catch ( ClientError $e ) {
316 }
catch ( ResourceLimitExceededException $e ) {
317 return Status::newFatal(
'parsoid-resource-limit-exceeded', $e->getMessage() );
334 throw new InvalidArgumentException(
'ParserOutput does not have a render ID' );
337 return ParsoidRenderID::newFromKey( $renderId );
352 [ $page, $revision ] = $this->resolveRevision( $page, $revision );
353 $isOld = $revision->getId() !== $page->getLatest();
355 $statsKey = $isOld ?
'ParsoidOutputAccess.Cache.revision' :
'ParsoidOutputAccess.Cache.parser';
357 return $this->getCachedParserOutputInternal(
383 $parserOutput = $this->revisionOutputCache->get( $revision, $parserOpts );
385 $parserOutput = $this->parserCache->get( $page, $parserOpts );
388 if ( $parserOutput ) {
391 if ( !$parserOutput->
getExtensionData( PageBundleParserOutputConverter::PARSOID_PAGE_BUNDLE_KEY )
394 $parserOutput =
null;
398 if ( $parserOutput ) {
399 $this->stats->increment( $statsKey .
'.get.hit' );
400 return $parserOutput;
402 $this->stats->increment( $statsKey .
'.get.miss' );
407 private function makeDummyParserOutput(
string $contentModel ):
Status {
408 $msg =
"Dummy output. Parsoid does not support content model $contentModel. See T324711.";
412 $output->updateCacheExpiry( 0 );
414 $output->setExtensionData( self::RENDER_ID_KEY,
'0/dummy-output' );
430 array $parsoidOptions,
436 [ $page, $revision ] = $this->resolveRevision( $page, $revision );
439 $mainSlot = $revision->getSlot( SlotRecord::MAIN );
440 $contentModel = $mainSlot->getModel();
441 if ( !$this->supportsContentModel( $contentModel ) ) {
444 return $this->makeDummyParserOutput( $contentModel );
456 $pageConfig = $this->parsoidPageConfigFactory->create(
462 $this->options->get( MainConfigNames::ParsoidSettings )
465 $status = $this->parseInternal( $pageConfig, $parsoidOptions );
467 if ( !$status->isOK() ) {
471 $parserOutput = $status->getValue();
476 $revId = $revision->getId();
477 $parsoidRenderId =
new ParsoidRenderID( $revId, $this->globalIdGenerator->newUUIDv1() );
478 $parserOutput->
setExtensionData( self::RENDER_ID_KEY, $parsoidRenderId->getKey() );
494 private function resolveRevision( PageIdentity $page, $revision ): array {
495 if ( !$page instanceof PageRecord ) {
497 $page = $this->pageLookup->getPageByReference( $page );
499 throw new RevisionAccessException(
500 'Page {name} not found',
506 if ( $revision ===
null ) {
507 $revision = $page->getLatest();
510 if ( is_int( $revision ) ) {
512 $revision = $this->revisionLookup->getRevisionById( $revId );
515 throw new RevisionAccessException(
516 'Revision {revId} not found',
517 [
'revId' => $revId ]
522 return [ $page, $revision ];
const CONTENT_MODEL_WIKITEXT
const CONTENT_FORMAT_WIKITEXT
Wikitext.
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
getExtensionData()
Get the extension data as: augmentor name => data.
if(!defined('MW_SETUP_CALLBACK'))
The persistent session ID (if any) loaded at startup.
setCacheTime( $t)
setCacheTime() sets the timestamp expressing when the page has been rendered.
A Config instance which stores all settings as a member variable.
Exception thrown when an unregistered content model is requested.
A class containing constants representing the names of configuration variables.
const ParsoidCacheConfig
Name constant for the ParsoidCacheConfig setting, for use with Config::get()
const ParsoidSettings
Name constant for the ParsoidSettings setting, for use with Config::get()
Helper class used by MediaWiki to create Parsoid PageConfig objects.
Cache for ParserOutput objects corresponding to the latest page revisions.
Set options of the Parser.
getTargetLanguage()
Target language for the parse.
getExtensionData( $key)
Gets extensions data previously attached to this ParserOutput using setExtensionData().
setExtensionData( $key, $value)
Attaches arbitrary data to this ParserObject.
static newFatal( $message,... $parameters)
Factory function for fatal errors.
static newGood( $value=null)
Factory function for good results.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Interface for configuration instances.
MediaWiki adaptation of StatsdDataFactory that provides buffering functionality.
Interface for objects (potentially) representing an editable wiki page.
Service for looking up information about wiki pages.