Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
25.89% |
29 / 112 |
|
0.00% |
0 / 6 |
CRAP | |
0.00% |
0 / 1 |
ParsoidParser | |
25.89% |
29 / 112 |
|
0.00% |
0 / 6 |
165.92 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
2 | |||
setParsoidRenderID | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
genParserOutput | |
0.00% |
0 / 43 |
|
0.00% |
0 / 1 |
30 | |||
parse | |
93.55% |
29 / 31 |
|
0.00% |
0 / 1 |
7.01 | |||
parseFakeRevision | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
12 | |||
makeLimitReport | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Parser\Parsoid; |
4 | |
5 | use MediaWiki\Languages\LanguageConverterFactory; |
6 | use MediaWiki\MediaWikiServices; |
7 | use MediaWiki\Page\PageReference; |
8 | use MediaWiki\Parser\ParserOutput; |
9 | use MediaWiki\Parser\Parsoid\Config\PageConfigFactory; |
10 | use MediaWiki\Revision\MutableRevisionRecord; |
11 | use MediaWiki\Revision\RevisionRecord; |
12 | use MediaWiki\Revision\SlotRecord; |
13 | use MediaWiki\Title\Title; |
14 | use ParserFactory; |
15 | use ParserOptions; |
16 | use Wikimedia\Assert\Assert; |
17 | use Wikimedia\Parsoid\Config\PageConfig; |
18 | use Wikimedia\Parsoid\Parsoid; |
19 | use Wikimedia\UUID\GlobalIdGenerator; |
20 | use WikitextContent; |
21 | |
22 | /** |
23 | * Parser implementation which uses Parsoid. |
24 | * |
25 | * Currently incomplete; see T236809 for the long-term plan. |
26 | * |
27 | * @since 1.41 |
28 | * @unstable since 1.41; see T236809 for plan. |
29 | */ |
30 | class ParsoidParser /* eventually this will extend \Parser */ { |
31 | /** |
32 | * @unstable |
33 | * This should not be used widely right now since this may go away. |
34 | * This is being added to support DiscussionTools with Parsoid HTML |
35 | * and after initial exploration, this may be implemented differently. |
36 | */ |
37 | public const PARSOID_TITLE_KEY = "parsoid:title-dbkey"; |
38 | private Parsoid $parsoid; |
39 | private PageConfigFactory $pageConfigFactory; |
40 | private LanguageConverterFactory $languageConverterFactory; |
41 | private ParserFactory $legacyParserFactory; |
42 | private GlobalIdGenerator $globalIdGenerator; |
43 | |
44 | /** |
45 | * @param Parsoid $parsoid |
46 | * @param PageConfigFactory $pageConfigFactory |
47 | * @param LanguageConverterFactory $languageConverterFactory |
48 | * @param ParserFactory $legacyParserFactory |
49 | * @param GlobalIdGenerator $globalIdGenerator |
50 | */ |
51 | public function __construct( |
52 | Parsoid $parsoid, |
53 | PageConfigFactory $pageConfigFactory, |
54 | LanguageConverterFactory $languageConverterFactory, |
55 | ParserFactory $legacyParserFactory, |
56 | GlobalIdGenerator $globalIdGenerator |
57 | ) { |
58 | $this->parsoid = $parsoid; |
59 | $this->pageConfigFactory = $pageConfigFactory; |
60 | $this->languageConverterFactory = $languageConverterFactory; |
61 | $this->legacyParserFactory = $legacyParserFactory; |
62 | $this->globalIdGenerator = $globalIdGenerator; |
63 | } |
64 | |
65 | /** |
66 | * API users expect a ParsoidRenderID value set in the parser output's extension data. |
67 | * @param PageConfig $pageConfig |
68 | * @param ParserOutput $parserOutput |
69 | */ |
70 | private function setParsoidRenderID( PageConfig $pageConfig, ParserOutput $parserOutput ): void { |
71 | $parserOutput->setRenderId( $this->globalIdGenerator->newUUIDv1() ); |
72 | $parserOutput->setCacheRevisionId( $pageConfig->getRevisionId() ); |
73 | $parserOutput->setRevisionTimestamp( $pageConfig->getRevisionTimestamp() ); |
74 | $parserOutput->setCacheTime( wfTimestampNow() ); |
75 | } |
76 | |
77 | /** |
78 | * Internal helper to avoid code deuplication across two methods |
79 | * |
80 | * @param PageConfig $pageConfig |
81 | * @param ParserOptions $options |
82 | * @return ParserOutput |
83 | */ |
84 | private function genParserOutput( |
85 | PageConfig $pageConfig, ParserOptions $options |
86 | ): ParserOutput { |
87 | $parserOutput = new ParserOutput(); |
88 | |
89 | // Parsoid itself does not vary output by parser options right now. |
90 | // But, ensure that any option use by extensions, parser functions, |
91 | // recursive parses, or (in the unlikely future scenario) Parsoid itself |
92 | // are recorded as used. |
93 | $options->registerWatcher( [ $parserOutput, 'recordOption' ] ); |
94 | |
95 | // The enable/disable logic here matches that in Parser::internalParseHalfParsed(), |
96 | // although __NOCONTENTCONVERT__ is handled internal to Parsoid. |
97 | // |
98 | // T349137: It might be preferable to handle __NOCONTENTCONVERT__ here rather than |
99 | // by inspecting the DOM inside Parsoid. That will come in a separate patch. |
100 | $htmlVariantLanguage = null; |
101 | if ( !( $options->getDisableContentConversion() || $options->getInterfaceMessage() ) ) { |
102 | // NOTES (some of these are TODOs for read views integration) |
103 | // 1. This html variant conversion is a pre-cache transform. HtmlOutputRendererHelper |
104 | // has another variant conversion that is a post-cache transform based on the |
105 | // 'Accept-Language' header. If that header is set, there is really no reason to |
106 | // do this conversion here. So, eventually, we are likely to either not pass in |
107 | // the htmlVariantLanguage option below OR disable language conversion from the |
108 | // wt2html path in Parsoid and this and the Accept-Language variant conversion |
109 | // both would have to be handled as post-cache transforms. |
110 | // |
111 | // 2. Parser.php calls convert() which computes a preferred variant from the |
112 | // target language. But, we cannot do that unconditionally here because REST API |
113 | // requests specify the exact variant via the 'Content-Language' header. |
114 | // |
115 | // For Parsoid page views, either the callers will have to compute the |
116 | // preferred variant and set it in ParserOptions OR the REST API will have |
117 | // to set some other flag indicating that the preferred variant should not |
118 | // be computed. For now, I am adding a temporary hack, but this should be |
119 | // replaced with something more sensible (T267067). |
120 | // |
121 | // 3. Additionally, Parsoid's callers will have to set targetLanguage in ParserOptions |
122 | // to mimic the logic in Parser.php (missing right now). |
123 | $langCode = $pageConfig->getPageLanguageBcp47(); |
124 | if ( $options->getRenderReason() === 'page-view' ) { // TEMPORARY HACK |
125 | $langFactory = MediaWikiServices::getInstance()->getLanguageFactory(); |
126 | $lang = $langFactory->getLanguage( $langCode ); |
127 | $langConv = $this->languageConverterFactory->getLanguageConverter( $lang ); |
128 | $htmlVariantLanguage = $langFactory->getLanguage( $langConv->getPreferredVariant() ); |
129 | } else { |
130 | $htmlVariantLanguage = $langCode; |
131 | } |
132 | } |
133 | |
134 | $defaultOptions = [ |
135 | 'pageBundle' => true, |
136 | 'wrapSections' => true, |
137 | 'logLinterData' => true, |
138 | 'body_only' => false, |
139 | 'htmlVariantLanguage' => $htmlVariantLanguage, |
140 | 'offsetType' => 'byte', |
141 | 'outputContentVersion' => Parsoid::defaultHTMLVersion() |
142 | ]; |
143 | |
144 | $parserOutput->resetParseStartTime(); |
145 | |
146 | // This can throw ClientError or ResourceLimitExceededException. |
147 | // Callers are responsible for figuring out how to handle them. |
148 | $pageBundle = $this->parsoid->wikitext2html( |
149 | $pageConfig, |
150 | $defaultOptions, |
151 | $headers, |
152 | $parserOutput ); |
153 | |
154 | $parserOutput = PageBundleParserOutputConverter::parserOutputFromPageBundle( $pageBundle, $parserOutput ); |
155 | |
156 | // Record the page title in dbkey form so that post-cache transforms |
157 | // have access to the title. |
158 | $parserOutput->setExtensionData( |
159 | self::PARSOID_TITLE_KEY, |
160 | Title::newFromLinkTarget( $pageConfig->getLinkTarget() )->getPrefixedDBkey() |
161 | ); |
162 | |
163 | // Register a watcher again because the $parserOuptut arg |
164 | // and $parserOutput return value above are different objects! |
165 | $options->registerWatcher( [ $parserOutput, 'recordOption' ] ); |
166 | |
167 | $revId = $pageConfig->getRevisionId(); |
168 | if ( $revId !== null ) { |
169 | // T350538: This shouldn't be necessary so long as ContentRenderer |
170 | // is involved in the call chain somewhere, and should be turned |
171 | // into an assertion (and ::setParsoidRenderID() removed). |
172 | $this->setParsoidRenderID( $pageConfig, $parserOutput ); |
173 | } |
174 | |
175 | $parserOutput->setFromParserOptions( $options ); |
176 | |
177 | $parserOutput->recordTimeProfile(); |
178 | $this->makeLimitReport( $options, $parserOutput ); |
179 | |
180 | // Add Parsoid skinning module |
181 | $parserOutput->addModuleStyles( [ 'mediawiki.skinning.content.parsoid' ] ); |
182 | |
183 | // Record Parsoid version in extension data; this allows |
184 | // us to use the onRejectParserCacheValue hook to selectively |
185 | // expire "bad" generated content in the event of a rollback. |
186 | $parserOutput->setExtensionData( |
187 | 'core:parsoid-version', Parsoid::version() |
188 | ); |
189 | |
190 | return $parserOutput; |
191 | } |
192 | |
193 | /** |
194 | * Convert wikitext to HTML |
195 | * Do not call this function recursively. |
196 | * |
197 | * @param string $text Text we want to parse |
198 | * @param-taint $text escapes_htmlnoent |
199 | * @param PageReference $page |
200 | * @param ParserOptions $options |
201 | * @param bool $linestart |
202 | * @param bool $clearState |
203 | * @param int|null $revId ID of the revision being rendered. This is used to render |
204 | * REVISION* magic words. 0 means that any current revision will be used. Null means |
205 | * that {{REVISIONID}}/{{REVISIONUSER}} will be empty and {{REVISIONTIMESTAMP}} will |
206 | * use the current timestamp. |
207 | * @return ParserOutput |
208 | * @return-taint escaped |
209 | * @unstable since 1.41 |
210 | */ |
211 | public function parse( |
212 | string $text, PageReference $page, ParserOptions $options, |
213 | bool $linestart = true, bool $clearState = true, ?int $revId = null |
214 | ): ParserOutput { |
215 | Assert::invariant( $linestart, '$linestart=false is not yet supported' ); |
216 | Assert::invariant( $clearState, '$clearState=false is not yet supported' ); |
217 | $title = Title::newFromPageReference( $page ); |
218 | $lang = $options->getTargetLanguage(); |
219 | if ( $lang === null && $options->getInterfaceMessage() ) { |
220 | $lang = $options->getUserLangObj(); |
221 | } |
222 | $pageConfig = $revId === null ? null : $this->pageConfigFactory->create( |
223 | $title, |
224 | $options->getUserIdentity(), |
225 | $revId, |
226 | null, // unused |
227 | $lang // defaults to title page language if null |
228 | ); |
229 | if ( !( $pageConfig && $pageConfig->getPageMainContent() === $text ) ) { |
230 | // This is a bit awkward! But we really need to parse $text, which |
231 | // may or may not correspond to the $revId provided! |
232 | // T332928 suggests one solution: splitting the "have revid" |
233 | // callers from the "bare text, no associated revision" callers. |
234 | $revisionRecord = new MutableRevisionRecord( $title ); |
235 | if ( $revId !== null ) { |
236 | $revisionRecord->setId( $revId ); |
237 | } |
238 | $revisionRecord->setSlot( |
239 | SlotRecord::newUnsaved( |
240 | SlotRecord::MAIN, |
241 | new WikitextContent( $text ) |
242 | ) |
243 | ); |
244 | $pageConfig = $this->pageConfigFactory->create( |
245 | $title, |
246 | $options->getUserIdentity(), |
247 | $revisionRecord, |
248 | null, // unused |
249 | $lang // defaults to title page language if null |
250 | ); |
251 | } |
252 | |
253 | return $this->genParserOutput( $pageConfig, $options ); |
254 | } |
255 | |
256 | /** |
257 | * @internal |
258 | * |
259 | * Convert custom wikitext (stored in main slot of the $fakeRev arg) to HTML. |
260 | * Callers are expected NOT to stuff the result into ParserCache. |
261 | * |
262 | * @param RevisionRecord $fakeRev Revision to parse |
263 | * @param PageReference $page |
264 | * @param ParserOptions $options |
265 | * @return ParserOutput |
266 | * @unstable since 1.41 |
267 | */ |
268 | public function parseFakeRevision( |
269 | RevisionRecord $fakeRev, PageReference $page, ParserOptions $options |
270 | ): ParserOutput { |
271 | $title = Title::newFromPageReference( $page ); |
272 | $lang = $options->getTargetLanguage(); |
273 | if ( $lang === null && $options->getInterfaceMessage() ) { |
274 | $lang = $options->getUserLangObj(); |
275 | } |
276 | $pageConfig = $this->pageConfigFactory->create( |
277 | $title, |
278 | $options->getUserIdentity(), |
279 | $fakeRev, |
280 | null, // unused |
281 | $lang // defaults to title page language if null |
282 | ); |
283 | |
284 | return $this->genParserOutput( $pageConfig, $options ); |
285 | } |
286 | |
287 | /** |
288 | * Set the limit report data in the current ParserOutput. |
289 | * This is ported from Parser::makeLimitReport() and should eventually |
290 | * use the method from the superclass directly. |
291 | */ |
292 | protected function makeLimitReport( |
293 | ParserOptions $parserOptions, ParserOutput $parserOutput |
294 | ) { |
295 | $maxIncludeSize = $parserOptions->getMaxIncludeSize(); |
296 | |
297 | $cpuTime = $parserOutput->getTimeProfile( 'cpu' ); |
298 | if ( $cpuTime !== null ) { |
299 | $parserOutput->setLimitReportData( 'limitreport-cputime', |
300 | sprintf( "%.3f", $cpuTime ) |
301 | ); |
302 | } |
303 | |
304 | $wallTime = $parserOutput->getTimeProfile( 'wall' ); |
305 | $parserOutput->setLimitReportData( 'limitreport-walltime', |
306 | sprintf( "%.3f", $wallTime ) |
307 | ); |
308 | |
309 | $parserOutput->setLimitReportData( 'limitreport-timingprofile', [ 'not yet supported' ] ); |
310 | |
311 | // Add other cache related metadata |
312 | $parserOutput->setLimitReportData( 'cachereport-timestamp', |
313 | $parserOutput->getCacheTime() ); |
314 | $parserOutput->setLimitReportData( 'cachereport-ttl', |
315 | $parserOutput->getCacheExpiry() ); |
316 | $parserOutput->setLimitReportData( 'cachereport-transientcontent', |
317 | $parserOutput->hasReducedExpiry() ); |
318 | } |
319 | |
320 | } |