MediaWiki 1.41.2
ParsoidParser.php
Go to the documentation of this file.
1<?php
2
4
15use ParserOutput;
16use Wikimedia\Assert\Assert;
17use Wikimedia\Parsoid\Config\PageConfig;
18use Wikimedia\Parsoid\Parsoid;
21
30class ParsoidParser /* eventually this will extend \Parser */ {
32 private $parsoid;
33
35 private $pageConfigFactory;
36
38 private $languageConverterFactory;
39
41 private $legacyParserFactory;
42
44 private $globalIdGenerator;
45
53 public function __construct(
54 Parsoid $parsoid,
55 PageConfigFactory $pageConfigFactory,
56 LanguageConverterFactory $languageConverterFactory,
57 ParserFactory $legacyParserFactory,
58 GlobalIdGenerator $globalIdGenerator
59 ) {
60 $this->parsoid = $parsoid;
61 $this->pageConfigFactory = $pageConfigFactory;
62 $this->languageConverterFactory = $languageConverterFactory;
63 $this->legacyParserFactory = $legacyParserFactory;
64 $this->globalIdGenerator = $globalIdGenerator;
65 }
66
72 private function setParsoidRenderID( int $revId, ParserOutput $parserOutput ): void {
73 $parserOutput->setParsoidRenderId(
74 new ParsoidRenderID( $revId, $this->globalIdGenerator->newUUIDv1() )
75 );
76
77 $now = wfTimestampNow();
78 $parserOutput->setCacheRevisionId( $revId );
79 $parserOutput->setCacheTime( $now );
80 }
81
89 private function genParserOutput(
90 PageConfig $pageConfig, ParserOptions $options
91 ): ParserOutput {
92 $parserOutput = new ParserOutput();
93
94 // The enable/disable logic here matches that in Parser::internalParseHalfParsed(),
95 // although __NOCONTENTCONVERT__ is handled internal to Parsoid.
96 //
97 // TODO: It might be preferable to handle __NOCONTENTCONVERT__ here rather than
98 // by instpecting the DOM inside Parsoid. That will come in a separate patch.
99 $htmlVariantLanguage = null;
100 if ( !( $options->getDisableContentConversion() || $options->getInterfaceMessage() ) ) {
101 // NOTES (some of these are TODOs for read views integration)
102 // 1. This html variant conversion is a pre-cache transform. HtmlOutputRendererHelper
103 // has another variant conversion that is a post-cache transform based on the
104 // 'Accept-Language' header. If that header is set, there is really no reason to
105 // do this conversion here. So, eventually, we are likely to either not pass in
106 // the htmlVariantLanguage option below OR disable language conversion from the
107 // wt2html path in Parsoid and this and the Accept-Language variant conversion
108 // both would have to be handled as post-cache transforms.
109 //
110 // 2. Parser.php calls convert() which computes a preferred variant from the
111 // target language. But, we cannot do that unconditionally here because REST API
112 // requests specify the exact variant via the 'Content-Language' header.
113 //
114 // For Parsoid page views, either the callers will have to compute the
115 // preferred variant and set it in ParserOptions OR the REST API will have
116 // to set some other flag indicating that the preferred variant should not
117 // be computed. For now, I am adding a temporary hack, but this should be
118 // replaced with something more sensible.
119 //
120 // 3. Additionally, Parsoid's callers will have to set targetLanguage in ParserOptions
121 // to mimic the logic in Parser.php (missing right now).
122 $langCode = $pageConfig->getPageLanguageBcp47();
123 if ( $options->getRenderReason() === 'page-view' ) { // TEMPORARY HACK
124 $langFactory = MediaWikiServices::getInstance()->getLanguageFactory();
125 $lang = $langFactory->getLanguage( $langCode );
126 $langConv = $this->languageConverterFactory->getLanguageConverter( $lang );
127 $htmlVariantLanguage = $langFactory->getLanguage( $langConv->getPreferredVariant() );
128 } else {
129 $htmlVariantLanguage = $langCode;
130 }
131 }
132
133 // NOTE: This is useless until the time Parsoid uses the
134 // $options ParserOptions object. But if/when it does, this
135 // will ensure that we track used options correctly.
136 $options->registerWatcher( [ $parserOutput, 'recordOption' ] );
137
138 $defaultOptions = [
139 'pageBundle' => true,
140 'wrapSections' => true,
141 'logLinterData' => true,
142 'body_only' => false,
143 'htmlVariantLanguage' => $htmlVariantLanguage,
144 'offsetType' => 'byte',
145 'outputContentVersion' => Parsoid::defaultHTMLVersion()
146 ];
147
148 // This can throw ClientError or ResourceLimitExceededException.
149 // Callers are responsible for figuring out how to handle them.
150 $pageBundle = $this->parsoid->wikitext2html(
151 $pageConfig,
152 $defaultOptions,
153 $headers,
154 $parserOutput );
155
156 $parserOutput = PageBundleParserOutputConverter::parserOutputFromPageBundle( $pageBundle, $parserOutput );
157
158 // Register a watcher again because the $parserOuptut arg
159 // and $parserOutput return value above are different objects!
160 $options->registerWatcher( [ $parserOutput, 'recordOption' ] );
161
162 $revId = $pageConfig->getRevisionId();
163 if ( $revId !== null ) {
164 $this->setParsoidRenderID( $revId, $parserOutput );
165 }
166
167 // Copied from Parser.php::parse and should probably be abstracted
168 // into the parent base class (probably as part of T236809)
169 // Wrap non-interface parser output in a <div> so it can be targeted
170 // with CSS (T37247)
171 $class = $options->getWrapOutputClass();
172 if ( $class !== false && !$options->getInterfaceMessage() ) {
173 $parserOutput->addWrapperDivClass( $class );
174 }
175
176 $this->makeLimitReport( $options, $parserOutput );
177
178 // Record Parsoid version in extension data; this allows
179 // us to use the onRejectParserCacheValue hook to selectively
180 // expire "bad" generated content in the event of a rollback.
181 $parserOutput->setExtensionData(
182 'core:parsoid-version', Parsoid::version()
183 );
184
185 return $parserOutput;
186 }
187
206 public function parse(
207 string $text, PageReference $page, ParserOptions $options,
208 bool $linestart = true, bool $clearState = true, ?int $revId = null
209 ): ParserOutput {
210 Assert::invariant( $linestart, '$linestart=false is not yet supported' );
211 Assert::invariant( $clearState, '$clearState=false is not yet supported' );
212 $title = Title::newFromPageReference( $page );
213 $lang = $options->getTargetLanguage();
214 if ( $lang === null && $options->getInterfaceMessage() ) {
215 $lang = $options->getUserLangObj();
216 }
217 $pageConfig = $revId === null ? null : $this->pageConfigFactory->create(
218 $title,
219 $options->getUserIdentity(),
220 $revId,
221 null, // unused
222 $lang // defaults to title page language if null
223 );
224 if ( !( $pageConfig && $pageConfig->getPageMainContent() === $text ) ) {
225 // This is a bit awkward! But we really need to parse $text, which
226 // may or may not correspond to the $revId provided!
227 // T332928 suggests one solution: splitting the "have revid"
228 // callers from the "bare text, no associated revision" callers.
229 $revisionRecord = new MutableRevisionRecord( $title );
230 if ( $revId !== null ) {
231 $revisionRecord->setId( $revId );
232 }
233 $revisionRecord->setSlot(
234 SlotRecord::newUnsaved(
235 SlotRecord::MAIN,
236 new WikitextContent( $text )
237 )
238 );
239 $pageConfig = $this->pageConfigFactory->create(
240 $title,
241 $options->getUserIdentity(),
242 $revisionRecord,
243 null, // unused
244 $lang // defaults to title page language if null
245 );
246 }
247
248 return $this->genParserOutput( $pageConfig, $options );
249 }
250
263 public function parseFakeRevision(
264 RevisionRecord $fakeRev, PageReference $page, ParserOptions $options
265 ): ParserOutput {
266 $title = Title::newFromPageReference( $page );
267 $lang = $options->getTargetLanguage();
268 if ( $lang === null && $options->getInterfaceMessage() ) {
269 $lang = $options->getUserLangObj();
270 }
271 $pageConfig = $this->pageConfigFactory->create(
272 $title,
273 $options->getUserIdentity(),
274 $fakeRev,
275 null, // unused
276 $lang // defaults to title page language if null
277 );
278
279 return $this->genParserOutput( $pageConfig, $options );
280 }
281
287 protected function makeLimitReport(
288 ParserOptions $parserOptions, ParserOutput $parserOutput
289 ) {
290 $maxIncludeSize = $parserOptions->getMaxIncludeSize();
291
292 $cpuTime = $parserOutput->getTimeSinceStart( 'cpu' );
293 if ( $cpuTime !== null ) {
294 $parserOutput->setLimitReportData( 'limitreport-cputime',
295 sprintf( "%.3f", $cpuTime )
296 );
297 }
298
299 $wallTime = $parserOutput->getTimeSinceStart( 'wall' );
300 $parserOutput->setLimitReportData( 'limitreport-walltime',
301 sprintf( "%.3f", $wallTime )
302 );
303
304 $parserOutput->setLimitReportData( 'limitreport-timingprofile', [ 'not yet supported' ] );
305
306 // Add other cache related metadata
307 $parserOutput->setLimitReportData( 'cachereport-timestamp',
308 $parserOutput->getCacheTime() );
309 $parserOutput->setLimitReportData( 'cachereport-ttl',
310 $parserOutput->getCacheExpiry() );
311 $parserOutput->setLimitReportData( 'cachereport-transientcontent',
312 $parserOutput->hasReducedExpiry() );
313 }
314
315}
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
setCacheRevisionId( $id)
setCacheTime( $t)
setCacheTime() sets the timestamp expressing when the page has been rendered.
Definition CacheTime.php:82
getCacheExpiry()
Returns the number of seconds after which this object should expire.
An interface for creating language converters.
Service locator for MediaWiki core services.
static getInstance()
Returns the global default instance of the top level service locator.
Helper class used by MediaWiki to create Parsoid PageConfig objects.
static parserOutputFromPageBundle(PageBundle $pageBundle, ?ParserOutput $originalParserOutput=null)
Creates a ParserOutput object containing the relevant data from the given PageBundle object.
Parser implementation which uses Parsoid.
__construct(Parsoid $parsoid, PageConfigFactory $pageConfigFactory, LanguageConverterFactory $languageConverterFactory, ParserFactory $legacyParserFactory, GlobalIdGenerator $globalIdGenerator)
parseFakeRevision(RevisionRecord $fakeRev, PageReference $page, ParserOptions $options)
makeLimitReport(ParserOptions $parserOptions, ParserOutput $parserOutput)
Set the limit report data in the current ParserOutput.
parse(string $text, PageReference $page, ParserOptions $options, bool $linestart=true, bool $clearState=true, ?int $revId=null)
Convert wikitext to HTML Do not call this function recursively.
Represents the identity of a specific rendering of a specific revision at some point in time.
Page revision base class.
Value object representing a content slot associated with a page revision.
Represents a title within MediaWiki.
Definition Title.php:76
Set options of the Parser.
getRenderReason()
Returns reason for rendering the content.
getMaxIncludeSize()
Maximum size of template expansions, in bytes.
getUserLangObj()
Get the user language used by the parser for this page and split the parser cache.
getTargetLanguage()
Target language for the parse.
getUserIdentity()
Get the identity of the user for whom the parse is made.
getDisableContentConversion()
Whether content conversion should be disabled.
getWrapOutputClass()
Class to use to wrap output from Parser::parse()
registerWatcher( $callback)
Registers a callback for tracking which ParserOptions which are used.
getInterfaceMessage()
Parsing an interface message?
Rendered output of a wiki page, as parsed from wikitext.
addWrapperDivClass( $class)
Add a CSS class to use for the wrapping div.
setLimitReportData( $key, $value)
Sets parser limit report data for a key.
hasReducedExpiry()
Check whether the cache TTL was lowered from the site default.
setExtensionData( $key, $value)
Attaches arbitrary data to this ParserObject.
getTimeSinceStart( $clock)
Returns the time since resetParseStartTime() was last called.
Class for getting statistically unique IDs without a central coordinator.
Content object for wiki text pages.
Interface for objects (potentially) representing a page that can be viewable and linked to on a wiki.
Copyright (C) 2011-2022 Wikimedia Foundation and others.