Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
71.03% |
228 / 321 |
|
42.86% |
12 / 28 |
CRAP | |
0.00% |
0 / 1 |
HtmlOutputRendererHelper | |
71.03% |
228 / 321 |
|
42.86% |
12 / 28 |
373.11 | |
0.00% |
0 / 1 |
__construct | |
93.33% |
14 / 15 |
|
0.00% |
0 / 1 |
3.00 | |||
setFlavor | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
3.07 | |||
getFlavor | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setOutputProfileVersion | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
setUseParserCache | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
3 | |||
setStashingEnabled | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
setRevision | |
83.33% |
5 / 6 |
|
0.00% |
0 / 1 |
4.07 | |||
setContent | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
setContentSource | |
60.00% |
3 / 5 |
|
0.00% |
0 / 1 |
2.26 | |||
setPageLanguage | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
init | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
initInternal | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
3 | |||
setVariantConversionLanguage | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
3 | |||
getAcceptedTargetLanguage | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 | |||
getHtml | |
69.57% |
32 / 46 |
|
0.00% |
0 / 1 |
8.38 | |||
getETag | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
3 | |||
getLastModified | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getParamSettings | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
2 | |||
getDefaultPageLanguage | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
2.06 | |||
getParserOutput | |
93.10% |
27 / 29 |
|
0.00% |
0 / 1 |
10.03 | |||
getHtmlOutputContentLanguage | |
23.08% |
3 / 13 |
|
0.00% |
0 / 1 |
7.10 | |||
putHeaders | |
80.00% |
8 / 10 |
|
0.00% |
0 / 1 |
5.20 | |||
getPageBundle | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
2 | |||
getRevisionId | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
4 | |||
stripParsoidSectionTags | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
4 | |||
getParserOutputInternal | |
67.69% |
44 / 65 |
|
0.00% |
0 / 1 |
38.32 | |||
parseUncacheable | |
63.16% |
12 / 19 |
|
0.00% |
0 / 1 |
7.80 | |||
isParsoidContent | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | */ |
20 | namespace MediaWiki\Rest\Handler\Helper; |
21 | |
22 | use HttpError; |
23 | use InvalidArgumentException; |
24 | use MediaWiki\Content\Content; |
25 | use MediaWiki\Content\IContentHandlerFactory; |
26 | use MediaWiki\Edit\ParsoidOutputStash; |
27 | use MediaWiki\Edit\ParsoidRenderID; |
28 | use MediaWiki\Edit\SelserContext; |
29 | use MediaWiki\Language\LanguageCode; |
30 | use MediaWiki\Languages\LanguageFactory; |
31 | use MediaWiki\Logger\LoggerFactory; |
32 | use MediaWiki\MainConfigNames; |
33 | use MediaWiki\Page\PageIdentity; |
34 | use MediaWiki\Page\PageLookup; |
35 | use MediaWiki\Page\PageRecord; |
36 | use MediaWiki\Page\ParserOutputAccess; |
37 | use MediaWiki\Parser\ParserOptions; |
38 | use MediaWiki\Parser\ParserOutput; |
39 | use MediaWiki\Parser\Parsoid\Config\SiteConfig as ParsoidSiteConfig; |
40 | use MediaWiki\Parser\Parsoid\HtmlTransformFactory; |
41 | use MediaWiki\Parser\Parsoid\PageBundleParserOutputConverter; |
42 | use MediaWiki\Permissions\Authority; |
43 | use MediaWiki\Rest\Handler; |
44 | use MediaWiki\Rest\HttpException; |
45 | use MediaWiki\Rest\LocalizedHttpException; |
46 | use MediaWiki\Rest\ResponseInterface; |
47 | use MediaWiki\Revision\MutableRevisionRecord; |
48 | use MediaWiki\Revision\RevisionAccessException; |
49 | use MediaWiki\Revision\RevisionLookup; |
50 | use MediaWiki\Revision\RevisionRecord; |
51 | use MediaWiki\Revision\RevisionRenderer; |
52 | use MediaWiki\Revision\SlotRecord; |
53 | use MediaWiki\Status\Status; |
54 | use MediaWiki\Title\Title; |
55 | use MWUnknownContentModelException; |
56 | use Wikimedia\Assert\Assert; |
57 | use Wikimedia\Bcp47Code\Bcp47Code; |
58 | use Wikimedia\Bcp47Code\Bcp47CodeValue; |
59 | use Wikimedia\Message\MessageValue; |
60 | use Wikimedia\ParamValidator\ParamValidator; |
61 | use Wikimedia\Parsoid\Core\ClientError; |
62 | use Wikimedia\Parsoid\Core\PageBundle; |
63 | use Wikimedia\Parsoid\Core\ResourceLimitExceededException; |
64 | use Wikimedia\Parsoid\DOM\Element; |
65 | use Wikimedia\Parsoid\Parsoid; |
66 | use Wikimedia\Parsoid\Utils\DOMCompat; |
67 | use Wikimedia\Parsoid\Utils\DOMUtils; |
68 | use Wikimedia\Parsoid\Utils\WTUtils; |
69 | use Wikimedia\Stats\StatsFactory; |
70 | |
71 | /** |
72 | * Helper for getting output of a given wikitext page rendered by parsoid. |
73 | * |
74 | * @since 1.36 |
75 | * |
76 | * @unstable Pending consolidation of the Parsoid extension with core code. |
77 | */ |
78 | class HtmlOutputRendererHelper implements HtmlOutputHelper { |
79 | use RestAuthorizeTrait; |
80 | use RestStatusTrait; |
81 | |
82 | /** |
83 | * @internal |
84 | */ |
85 | public const CONSTRUCTOR_OPTIONS = [ |
86 | MainConfigNames::ParsoidCacheConfig |
87 | ]; |
88 | |
89 | private const OUTPUT_FLAVORS = [ 'view', 'stash', 'fragment', 'edit' ]; |
90 | |
91 | /** @var PageIdentity|null */ |
92 | private $page = null; |
93 | |
94 | /** @var RevisionRecord|int|null */ |
95 | private $revisionOrId = null; |
96 | |
97 | /** @var Bcp47Code|null */ |
98 | private $pageLanguage = null; |
99 | |
100 | /** @var ?string One of the flavors from OUTPUT_FLAVORS */ |
101 | private $flavor = null; |
102 | |
103 | /** @var bool */ |
104 | private $stash = false; |
105 | |
106 | /** @var Authority */ |
107 | private $authority; |
108 | |
109 | /** @var ParserOutput */ |
110 | private $parserOutput; |
111 | |
112 | /** @var ParserOutput */ |
113 | private $processedParserOutput; |
114 | |
115 | /** @var ?Bcp47Code */ |
116 | private $sourceLanguage = null; |
117 | |
118 | /** @var ?Bcp47Code */ |
119 | private $targetLanguage = null; |
120 | |
121 | /** |
122 | * Should we ignore mismatches between $page and the page that $revision belongs to? |
123 | * Usually happens because of page moves. This should be set to true only for internal API calls. |
124 | */ |
125 | private bool $lenientRevHandling = false; |
126 | |
127 | /** |
128 | * Flags to be passed as $options to ParserOutputAccess::getParserOutput, |
129 | * to control parser cache access. |
130 | * |
131 | * @var int Use ParserOutputAccess::OPT_* |
132 | */ |
133 | private $parserOutputAccessOptions = 0; |
134 | |
135 | /** |
136 | * @see the $options parameter on Parsoid::wikitext2html |
137 | * @var array |
138 | */ |
139 | private $parsoidOptions = []; |
140 | |
141 | private ?ParserOptions $parserOptions = null; |
142 | |
143 | /** |
144 | * Whether the result can be cached in the parser cache and the web cache. |
145 | * Set to false when bespoke options are set. |
146 | * |
147 | * @var bool |
148 | */ |
149 | private $isCacheable = true; |
150 | |
151 | private ParsoidOutputStash $parsoidOutputStash; |
152 | private StatsFactory $statsFactory; |
153 | private ParserOutputAccess $parserOutputAccess; |
154 | private PageLookup $pageLookup; |
155 | private RevisionLookup $revisionLookup; |
156 | private RevisionRenderer $revisionRenderer; |
157 | private ParsoidSiteConfig $parsoidSiteConfig; |
158 | private HtmlTransformFactory $htmlTransformFactory; |
159 | private IContentHandlerFactory $contentHandlerFactory; |
160 | private LanguageFactory $languageFactory; |
161 | |
162 | /** |
163 | * @param ParsoidOutputStash $parsoidOutputStash |
164 | * @param StatsFactory $statsFactory |
165 | * @param ParserOutputAccess $parserOutputAccess |
166 | * @param PageLookup $pageLookup |
167 | * @param RevisionLookup $revisionLookup |
168 | * @param RevisionRenderer $revisionRenderer |
169 | * @param ParsoidSiteConfig $parsoidSiteConfig |
170 | * @param HtmlTransformFactory $htmlTransformFactory |
171 | * @param IContentHandlerFactory $contentHandlerFactory |
172 | * @param LanguageFactory $languageFactory |
173 | * @param PageIdentity|null $page |
174 | * @param array $parameters |
175 | * @param Authority|null $authority |
176 | * @param RevisionRecord|int|null $revision |
177 | * @param bool $lenientRevHandling Should we ignore mismatches between |
178 | * $page and the page that $revision belongs to? Usually happens |
179 | * because of page moves. This should be set to true only for |
180 | * internal API calls. |
181 | * @param ParserOptions|null $parserOptions |
182 | * @note Since 1.43, setting $page and $authority arguments to null |
183 | * has been deprecated. |
184 | */ |
185 | public function __construct( |
186 | ParsoidOutputStash $parsoidOutputStash, |
187 | StatsFactory $statsFactory, |
188 | ParserOutputAccess $parserOutputAccess, |
189 | PageLookup $pageLookup, |
190 | RevisionLookup $revisionLookup, |
191 | RevisionRenderer $revisionRenderer, |
192 | ParsoidSiteConfig $parsoidSiteConfig, |
193 | HtmlTransformFactory $htmlTransformFactory, |
194 | IContentHandlerFactory $contentHandlerFactory, |
195 | LanguageFactory $languageFactory, |
196 | ?PageIdentity $page = null, |
197 | array $parameters = [], |
198 | ?Authority $authority = null, |
199 | $revision = null, |
200 | bool $lenientRevHandling = false, |
201 | ?ParserOptions $parserOptions = null |
202 | ) { |
203 | $this->parsoidOutputStash = $parsoidOutputStash; |
204 | $this->statsFactory = $statsFactory; |
205 | $this->parserOutputAccess = $parserOutputAccess; |
206 | $this->pageLookup = $pageLookup; |
207 | $this->revisionLookup = $revisionLookup; |
208 | $this->revisionRenderer = $revisionRenderer; |
209 | $this->parsoidSiteConfig = $parsoidSiteConfig; |
210 | $this->htmlTransformFactory = $htmlTransformFactory; |
211 | $this->contentHandlerFactory = $contentHandlerFactory; |
212 | $this->languageFactory = $languageFactory; |
213 | $this->lenientRevHandling = $lenientRevHandling; |
214 | $this->parserOptions = $parserOptions; |
215 | if ( $page === null || $authority === null ) { |
216 | // Constructing without $page and $authority parameters |
217 | // is deprecated since 1.43. |
218 | wfDeprecated( __METHOD__ . ' without $page or $authority', '1.43' ); |
219 | } else { |
220 | $this->initInternal( $page, $parameters, $authority, $revision ); |
221 | } |
222 | } |
223 | |
224 | /** |
225 | * Sets the given flavor to use for Wikitext -> HTML transformations. |
226 | * |
227 | * Flavors may influence parser options, parsoid options, and DOM transformations. |
228 | * They will be reflected by the ETag returned by getETag(). |
229 | * |
230 | * @note This method should not be called if stashing mode is enabled. |
231 | * @see setStashingEnabled |
232 | * @see getFlavor() |
233 | * |
234 | * @param string $flavor |
235 | * |
236 | * @return void |
237 | */ |
238 | public function setFlavor( string $flavor ): void { |
239 | if ( !in_array( $flavor, self::OUTPUT_FLAVORS ) ) { |
240 | throw new InvalidArgumentException( 'Invalid flavor supplied' ); |
241 | } |
242 | |
243 | if ( $this->stash ) { |
244 | // XXX: throw? |
245 | $flavor = 'stash'; |
246 | } |
247 | |
248 | $this->flavor = $flavor; |
249 | } |
250 | |
251 | /** |
252 | * Returns the flavor of HTML that will be generated. |
253 | * @see setFlavor() |
254 | * @return string |
255 | */ |
256 | public function getFlavor(): string { |
257 | return $this->flavor; |
258 | } |
259 | |
260 | /** |
261 | * Set the desired Parsoid profile version for the output. |
262 | * The actual output version is selected to be compatible with the one given here, |
263 | * per the rules of semantic versioning. |
264 | * |
265 | * @note Will disable caching if the effective output version is different from the default. |
266 | * |
267 | * @param string $version |
268 | * |
269 | * @throws HttpException If the given version is not supported (status 406) |
270 | */ |
271 | public function setOutputProfileVersion( $version ) { |
272 | $outputContentVersion = Parsoid::resolveContentVersion( $version ); |
273 | |
274 | if ( !$outputContentVersion ) { |
275 | throw new LocalizedHttpException( |
276 | new MessageValue( "rest-unsupported-profile-version", [ $version ] ), 406 |
277 | ); |
278 | } |
279 | |
280 | // Only set the option if the value isn't the default! |
281 | if ( $outputContentVersion !== Parsoid::defaultHTMLVersion() ) { |
282 | throw new LocalizedHttpException( |
283 | new MessageValue( "rest-unsupported-profile-version", [ $version ] ), 406 |
284 | ); |
285 | |
286 | // TODO: (T347426) At some later point, we may reintroduce support for |
287 | // non-default content versions as part of work on the content |
288 | // negotiation protocol. |
289 | // |
290 | // // See Parsoid::wikitext2html |
291 | // $this->parsoidOptions['outputContentVersion'] = $outputContentVersion; |
292 | // $this->isCacheable = false; |
293 | } |
294 | } |
295 | |
296 | /** |
297 | * Controls how the parser cache is used. |
298 | * |
299 | * @param bool $read Whether we should look for cached output before parsing |
300 | * @param bool $write Whether we should cache output after parsing |
301 | */ |
302 | public function setUseParserCache( bool $read, bool $write ) { |
303 | $this->parserOutputAccessOptions = |
304 | ( $read ? 0 : ParserOutputAccess::OPT_FORCE_PARSE ) | |
305 | ( $write ? 0 : ParserOutputAccess::OPT_NO_UPDATE_CACHE ); |
306 | } |
307 | |
308 | /** |
309 | * Determine whether stashing should be applied. |
310 | * |
311 | * @param bool $stash |
312 | * |
313 | * @return void |
314 | */ |
315 | public function setStashingEnabled( bool $stash ): void { |
316 | $this->stash = $stash; |
317 | |
318 | if ( $stash ) { |
319 | $this->setFlavor( 'stash' ); |
320 | } elseif ( $this->flavor === 'stash' ) { |
321 | $this->setFlavor( 'view' ); |
322 | } |
323 | } |
324 | |
325 | /** |
326 | * Set the revision to render. |
327 | * |
328 | * This can take a fake RevisionRecord when rendering for previews |
329 | * or when switching the editor from source mode to visual mode. |
330 | * |
331 | * In that case, $revisionOrId->getId() must return 0 to indicate |
332 | * that the ParserCache should be bypassed. Stashing may still apply. |
333 | * |
334 | * @param RevisionRecord|int $revisionOrId |
335 | */ |
336 | public function setRevision( $revisionOrId ): void { |
337 | Assert::parameterType( [ RevisionRecord::class, 'integer' ], $revisionOrId, '$revision' ); |
338 | |
339 | if ( is_int( $revisionOrId ) && $revisionOrId <= 0 ) { |
340 | throw new HttpError( 400, "Bad revision ID: $revisionOrId" ); |
341 | } |
342 | |
343 | $this->revisionOrId = $revisionOrId; |
344 | |
345 | if ( $this->getRevisionId() === null ) { |
346 | // If we have a RevisionRecord but no revision ID, we are dealing with a fake |
347 | // revision used for editor previews or mode switches. The wikitext is coming |
348 | // from the request, not the database, so the result is not cacheable for re-use |
349 | // by others (though it can be stashed for use by the same client). |
350 | $this->isCacheable = false; |
351 | } |
352 | } |
353 | |
354 | /** |
355 | * Set the content to render. Useful when rendering for previews |
356 | * or when switching the editor from source mode to visual mode. |
357 | * |
358 | * This will create a fake revision for rendering, the revision ID will be 0. |
359 | * |
360 | * @see setRevision |
361 | * @see setContentSource |
362 | * |
363 | * @param Content $content |
364 | */ |
365 | public function setContent( Content $content ): void { |
366 | $rev = new MutableRevisionRecord( $this->page ); |
367 | $rev->setId( 0 ); |
368 | $rev->setPageId( $this->page->getId() ); |
369 | $rev->setContent( SlotRecord::MAIN, $content ); |
370 | $this->setRevision( $rev ); |
371 | } |
372 | |
373 | /** |
374 | * Set the content to render. Useful when rendering for previews |
375 | * or when switching the editor from source mode to visual mode. |
376 | * |
377 | * This will create a fake revision for rendering. The revision ID will be 0. |
378 | * |
379 | * @param string $source The source data, e.g. wikitext |
380 | * @param string $model The content model indicating how to interpret $source, e.g. CONTENT_MODEL_WIKITEXT |
381 | * |
382 | * @see setRevision |
383 | * @see setContent |
384 | */ |
385 | public function setContentSource( string $source, string $model ): void { |
386 | try { |
387 | $handler = $this->contentHandlerFactory->getContentHandler( $model ); |
388 | $content = $handler->unserializeContent( $source ); |
389 | $this->setContent( $content ); |
390 | } catch ( MWUnknownContentModelException $ex ) { |
391 | throw new LocalizedHttpException( new MessageValue( "rest-bad-content-model", [ $model ] ), 400 ); |
392 | } |
393 | } |
394 | |
395 | /** |
396 | * This is equivalent to 'pageLanguageOverride' in PageConfigFactory |
397 | * For example, when clients call the REST API with the 'content-language' |
398 | * header to affect language variant conversion. |
399 | * |
400 | * @param Bcp47Code|string $pageLanguage the page language, as a Bcp47Code |
401 | * or a BCP-47 string. |
402 | */ |
403 | public function setPageLanguage( $pageLanguage ): void { |
404 | if ( is_string( $pageLanguage ) ) { |
405 | $pageLanguage = new Bcp47CodeValue( $pageLanguage ); |
406 | } |
407 | $this->pageLanguage = $pageLanguage; |
408 | } |
409 | |
410 | /** |
411 | * Initializes the helper with the given parameters like the page |
412 | * we're dealing with, parameters gotten from the request inputs, |
413 | * and the revision if any is available. |
414 | * |
415 | * @param PageIdentity $page |
416 | * @param array $parameters |
417 | * @param Authority $authority |
418 | * @param RevisionRecord|int|null $revision |
419 | * @deprecated since 1.43, use parameters in constructor instead |
420 | */ |
421 | public function init( |
422 | PageIdentity $page, |
423 | array $parameters, |
424 | Authority $authority, |
425 | $revision = null |
426 | ) { |
427 | wfDeprecated( __METHOD__, '1.43' ); |
428 | $this->initInternal( $page, $parameters, $authority, $revision ); |
429 | } |
430 | |
431 | private function initInternal( |
432 | PageIdentity $page, |
433 | array $parameters, |
434 | Authority $authority, |
435 | $revision = null |
436 | ) { |
437 | $this->page = $page; |
438 | $this->authority = $authority; |
439 | $this->stash = $parameters['stash'] ?? false; |
440 | |
441 | if ( $revision !== null ) { |
442 | $this->setRevision( $revision ); |
443 | } |
444 | |
445 | if ( $this->stash ) { |
446 | $this->setFlavor( 'stash' ); |
447 | } else { |
448 | $this->setFlavor( $parameters['flavor'] ?? 'view' ); |
449 | } |
450 | $this->parserOptions ??= ParserOptions::newFromAnon(); |
451 | } |
452 | |
453 | /** |
454 | * @inheritDoc |
455 | */ |
456 | public function setVariantConversionLanguage( |
457 | $targetLanguage, |
458 | $sourceLanguage = null |
459 | ): void { |
460 | if ( is_string( $targetLanguage ) ) { |
461 | $targetLanguage = $this->getAcceptedTargetLanguage( $targetLanguage ); |
462 | $targetLanguage = LanguageCode::normalizeNonstandardCodeAndWarn( |
463 | $targetLanguage |
464 | ); |
465 | } |
466 | if ( is_string( $sourceLanguage ) ) { |
467 | $sourceLanguage = LanguageCode::normalizeNonstandardCodeAndWarn( |
468 | $sourceLanguage |
469 | ); |
470 | } |
471 | $this->targetLanguage = $targetLanguage; |
472 | $this->sourceLanguage = $sourceLanguage; |
473 | } |
474 | |
475 | /** |
476 | * Get a target language from an accept header |
477 | */ |
478 | private function getAcceptedTargetLanguage( string $targetLanguage ): string { |
479 | // We could try to identify the most desirable language here, |
480 | // following the rules for Accept-Language headers in RFC9100. |
481 | // For now, just take the first language code. |
482 | |
483 | if ( preg_match( '/^\s*([-\w]+)/', $targetLanguage, $m ) ) { |
484 | return $m[1]; |
485 | } else { |
486 | // "undetermined" per RFC5646 |
487 | return 'und'; |
488 | } |
489 | } |
490 | |
491 | /** |
492 | * @inheritDoc |
493 | */ |
494 | public function getHtml(): ParserOutput { |
495 | if ( $this->processedParserOutput ) { |
496 | return $this->processedParserOutput; |
497 | } |
498 | |
499 | $parserOutput = $this->getParserOutput(); |
500 | |
501 | if ( $this->stash ) { |
502 | $this->authorizeWriteOrThrow( $this->authority, 'stashbasehtml', $this->page ); |
503 | |
504 | $isFakeRevision = $this->getRevisionId() === null; |
505 | $parsoidStashKey = ParsoidRenderID::newFromParserOutput( $parserOutput ); |
506 | $stashSuccess = $this->parsoidOutputStash->set( |
507 | $parsoidStashKey, |
508 | new SelserContext( |
509 | PageBundleParserOutputConverter::pageBundleFromParserOutput( $parserOutput ), |
510 | $parsoidStashKey->getRevisionID(), |
511 | $isFakeRevision ? $this->revisionOrId->getContent( SlotRecord::MAIN ) : null |
512 | ) |
513 | ); |
514 | if ( !$stashSuccess ) { |
515 | $this->statsFactory->getCounter( 'htmloutputrendererhelper_stash_total' ) |
516 | ->setLabel( 'status', 'fail' ) |
517 | ->copyToStatsdAt( 'htmloutputrendererhelper.stash.fail' ) |
518 | ->increment(); |
519 | |
520 | $errorData = [ 'parsoid-stash-key' => $parsoidStashKey ]; |
521 | LoggerFactory::getInstance( 'HtmlOutputRendererHelper' )->error( |
522 | "Parsoid stash failure", |
523 | $errorData |
524 | ); |
525 | throw new LocalizedHttpException( |
526 | MessageValue::new( 'rest-html-stash-failure' ), |
527 | 500, |
528 | $errorData |
529 | ); |
530 | } |
531 | $this->statsFactory->getCounter( 'htmloutputrendererhelper_stash_total' ) |
532 | ->setLabel( 'status', 'save' ) |
533 | ->copyToStatsdAt( 'htmloutputrendererhelper.stash.save' ) |
534 | ->increment(); |
535 | } |
536 | |
537 | if ( $this->flavor === 'edit' ) { |
538 | $pb = $this->getPageBundle(); |
539 | |
540 | // Inject data-parsoid and data-mw attributes. |
541 | $parserOutput->setRawText( $pb->toInlineAttributeHtml() ); |
542 | } |
543 | |
544 | // Check if variant conversion has to be performed |
545 | // NOTE: Variant conversion is performed on the fly, and kept outside the stash. |
546 | if ( $this->targetLanguage ) { |
547 | $languageVariantConverter = $this->htmlTransformFactory->getLanguageVariantConverter( $this->page ); |
548 | $parserOutput = $languageVariantConverter->convertParserOutputVariant( |
549 | $parserOutput, |
550 | $this->targetLanguage, |
551 | $this->sourceLanguage |
552 | ); |
553 | } |
554 | |
555 | $this->processedParserOutput = $parserOutput; |
556 | return $parserOutput; |
557 | } |
558 | |
559 | /** |
560 | * @inheritDoc |
561 | */ |
562 | public function getETag( string $suffix = '' ): ?string { |
563 | $parserOutput = $this->getParserOutput(); |
564 | |
565 | $renderID = ParsoidRenderID::newFromParserOutput( $parserOutput )->getKey(); |
566 | |
567 | if ( $suffix !== '' ) { |
568 | $eTag = "$renderID/{$this->flavor}/$suffix"; |
569 | } else { |
570 | $eTag = "$renderID/{$this->flavor}"; |
571 | } |
572 | |
573 | if ( $this->targetLanguage ) { |
574 | $eTag .= "+lang:{$this->targetLanguage->toBcp47Code()}"; |
575 | } |
576 | |
577 | return "\"{$eTag}\""; |
578 | } |
579 | |
580 | /** |
581 | * @inheritDoc |
582 | */ |
583 | public function getLastModified(): ?string { |
584 | return $this->getParserOutput()->getCacheTime(); |
585 | } |
586 | |
587 | /** |
588 | * @inheritDoc |
589 | */ |
590 | public static function getParamSettings(): array { |
591 | return [ |
592 | 'stash' => [ |
593 | Handler::PARAM_SOURCE => 'query', |
594 | ParamValidator::PARAM_TYPE => 'boolean', |
595 | ParamValidator::PARAM_DEFAULT => false, |
596 | ParamValidator::PARAM_REQUIRED => false, |
597 | Handler::PARAM_DESCRIPTION => new MessageValue( 'rest-param-desc-html-output-stash' ) |
598 | ], |
599 | 'flavor' => [ |
600 | Handler::PARAM_SOURCE => 'query', |
601 | ParamValidator::PARAM_TYPE => self::OUTPUT_FLAVORS, |
602 | ParamValidator::PARAM_DEFAULT => 'view', |
603 | ParamValidator::PARAM_REQUIRED => false, |
604 | Handler::PARAM_DESCRIPTION => new MessageValue( 'rest-param-desc-html-output-flavor' ) |
605 | ], |
606 | ]; |
607 | } |
608 | |
609 | private function getDefaultPageLanguage(): Bcp47Code { |
610 | // NOTE: keep in sync with Parser::getTargetLanguage! |
611 | |
612 | // XXX: Inject a TitleFactory just for this?! We need a better way to determine the page language... |
613 | $title = Title::castFromPageIdentity( $this->page ); |
614 | |
615 | if ( $this->parserOptions->getInterfaceMessage() ) { |
616 | return $this->parserOptions->getUserLangObj(); |
617 | } |
618 | |
619 | return $title->getPageLanguage(); |
620 | } |
621 | |
622 | /** |
623 | * @return ParserOutput |
624 | */ |
625 | private function getParserOutput(): ParserOutput { |
626 | if ( !$this->parserOutput ) { |
627 | $this->parserOptions->setRenderReason( __METHOD__ ); |
628 | |
629 | $defaultLanguage = $this->getDefaultPageLanguage(); |
630 | |
631 | if ( $this->pageLanguage |
632 | && $this->pageLanguage->toBcp47Code() !== $defaultLanguage->toBcp47Code() |
633 | ) { |
634 | $languageObj = $this->languageFactory->getLanguage( $this->pageLanguage ); |
635 | $this->parserOptions->setTargetLanguage( $languageObj ); |
636 | // Ensure target language splits the parser cache, when |
637 | // non-default; targetLangauge is not in |
638 | // ParserOptions::$cacheVaryingOptionsHash for the legacy |
639 | // parser. |
640 | $this->parserOptions->addExtraKey( 'target=' . $languageObj->getCode() ); |
641 | } |
642 | |
643 | try { |
644 | $status = $this->getParserOutputInternal(); |
645 | } catch ( RevisionAccessException $e ) { |
646 | throw new LocalizedHttpException( |
647 | MessageValue::new( 'rest-nonexistent-title' ), |
648 | 404, |
649 | [ 'reason' => $e->getMessage() ] |
650 | ); |
651 | } |
652 | |
653 | if ( !$status->isOK() ) { |
654 | if ( $status->hasMessage( 'parsoid-client-error' ) ) { |
655 | $this->throwExceptionForStatus( $status, 'rest-html-backend-error', 400 ); |
656 | } elseif ( $status->hasMessage( 'parsoid-resource-limit-exceeded' ) ) { |
657 | $this->throwExceptionForStatus( $status, 'rest-resource-limit-exceeded', 413 ); |
658 | } elseif ( $status->hasMessage( 'missing-revision-permission' ) ) { |
659 | $this->throwExceptionForStatus( $status, 'rest-permission-denied-revision', 403 ); |
660 | } elseif ( $status->hasMessage( 'parsoid-revision-access' ) ) { |
661 | $this->throwExceptionForStatus( $status, 'rest-specified-revision-unavailable', 404 ); |
662 | } else { |
663 | $this->logStatusError( $status, 'Parsoid backend error', 'HtmlOutputRendererHelper' ); |
664 | $this->throwExceptionForStatus( $status, 'rest-html-backend-error', 500 ); |
665 | } |
666 | } |
667 | |
668 | $this->parserOutput = $status->getValue(); |
669 | } |
670 | |
671 | Assert::invariant( $this->parserOutput->getRenderId() !== null, "no render id" ); |
672 | return $this->parserOutput; |
673 | } |
674 | |
675 | /** |
676 | * The content language of the HTML output after parsing. |
677 | * |
678 | * @return Bcp47Code The language, as a BCP-47 code |
679 | */ |
680 | public function getHtmlOutputContentLanguage(): Bcp47Code { |
681 | $contentLanguage = $this->getHtml()->getLanguage(); |
682 | |
683 | // This shouldn't happen, but don't crash if it does: |
684 | if ( !$contentLanguage ) { |
685 | if ( $this->pageLanguage ) { |
686 | LoggerFactory::getInstance( 'HtmlOutputRendererHelper' )->warning( |
687 | "ParserOutput does not specify a language" |
688 | ); |
689 | |
690 | $contentLanguage = $this->pageLanguage; |
691 | } else { |
692 | LoggerFactory::getInstance( 'HtmlOutputRendererHelper' )->warning( |
693 | "ParserOutput does not specify a language and no page language set in helper." |
694 | ); |
695 | |
696 | $title = Title::newFromPageIdentity( $this->page ); |
697 | $contentLanguage = $title->getPageLanguage(); |
698 | } |
699 | } |
700 | |
701 | return $contentLanguage; |
702 | } |
703 | |
704 | /** |
705 | * @inheritDoc |
706 | */ |
707 | public function putHeaders( ResponseInterface $response, bool $forHtml = true ): void { |
708 | if ( $forHtml ) { |
709 | // For HTML, we want to set the Content-Language. For JSON, we probably don't. |
710 | $response->setHeader( 'Content-Language', $this->getHtmlOutputContentLanguage()->toBcp47Code() ); |
711 | |
712 | $pb = $this->getPageBundle(); |
713 | ParsoidFormatHelper::setContentType( $response, ParsoidFormatHelper::FORMAT_HTML, $pb->version ); |
714 | } |
715 | |
716 | if ( $this->targetLanguage ) { |
717 | $response->addHeader( 'Vary', 'Accept-Language' ); |
718 | } |
719 | |
720 | // XXX: if Parsoid returns Vary headers, set them here?! |
721 | |
722 | if ( !$this->isCacheable ) { |
723 | $response->setHeader( 'Cache-Control', 'private,no-cache,s-maxage=0' ); |
724 | } |
725 | |
726 | // TODO: cache control for stable HTML? See ContentHelper::setCacheControl |
727 | |
728 | if ( $this->getRevisionId() ) { |
729 | $response->setHeader( 'Content-Revision-Id', (string)$this->getRevisionId() ); |
730 | } |
731 | } |
732 | |
733 | /** |
734 | * Returns the rendered HTML as a PageBundle object. |
735 | * |
736 | * @return PageBundle |
737 | */ |
738 | public function getPageBundle(): PageBundle { |
739 | // XXX: converting between PageBundle and ParserOutput is inefficient! |
740 | $parserOutput = $this->getParserOutput(); |
741 | $pb = PageBundleParserOutputConverter::pageBundleFromParserOutput( $parserOutput ); |
742 | |
743 | // Check if variant conversion has to be performed |
744 | // NOTE: Variant conversion is performed on the fly, and kept outside the stash. |
745 | if ( $this->targetLanguage ) { |
746 | $languageVariantConverter = $this->htmlTransformFactory->getLanguageVariantConverter( $this->page ); |
747 | $pb = $languageVariantConverter->convertPageBundleVariant( |
748 | $pb, |
749 | $this->targetLanguage, |
750 | $this->sourceLanguage |
751 | ); |
752 | } |
753 | |
754 | return $pb; |
755 | } |
756 | |
757 | /** |
758 | * Returns the ID of the revision that is being rendered. |
759 | * |
760 | * This will return 0 if no revision has been specified, so the current revision |
761 | * will be rendered. |
762 | * |
763 | * This wil return null if RevisionRecord has been set but that RevisionRecord |
764 | * does not have a revision ID, e.g. when rendering a preview. |
765 | * |
766 | * @return ?int |
767 | */ |
768 | public function getRevisionId(): ?int { |
769 | if ( !$this->revisionOrId ) { |
770 | // If we don't have a revision set, or it's 0, we are rendering the current revision. |
771 | return 0; |
772 | } |
773 | |
774 | if ( is_object( $this->revisionOrId ) ) { |
775 | // NOTE: return null even if getId() gave us 0 |
776 | return $this->revisionOrId->getId() ?: null; |
777 | } |
778 | |
779 | // It's a revision ID, just return it |
780 | return (int)$this->revisionOrId; |
781 | } |
782 | |
783 | /** |
784 | * Strip Parsoid's section wrappers |
785 | * |
786 | * TODO: Should we move this to Parsoid's ContentUtils class? |
787 | * There already is a stripUnnecessaryWrappersAndSyntheticNodes but |
788 | * it targets html2wt and does a lot more than just section unwrapping. |
789 | * |
790 | * @param Element $elt |
791 | */ |
792 | private function stripParsoidSectionTags( Element $elt ): void { |
793 | $n = $elt->firstChild; |
794 | while ( $n ) { |
795 | $next = $n->nextSibling; |
796 | if ( $n instanceof Element ) { |
797 | // Recurse into subtree before stripping this |
798 | $this->stripParsoidSectionTags( $n ); |
799 | // Strip <section> tags and synthetic extended-annotation-region wrappers |
800 | if ( WTUtils::isParsoidSectionTag( $n ) ) { |
801 | $parent = $n->parentNode; |
802 | // Help out phan |
803 | '@phan-var Element $parent'; |
804 | DOMUtils::migrateChildren( $n, $parent, $n ); |
805 | $parent->removeChild( $n ); |
806 | } |
807 | } |
808 | $n = $next; |
809 | } |
810 | } |
811 | |
812 | /** |
813 | * @return Status |
814 | */ |
815 | private function getParserOutputInternal(): Status { |
816 | // NOTE: ParserOutputAccess::getParserOutput() should be used for revisions |
817 | // that come from the database. Either this revision is null to indicate |
818 | // the current revision or the revision must have an ID. |
819 | // If we have a revision and the ID is 0 or null, then it's a fake revision |
820 | // representing a preview. |
821 | $parsoidOptions = $this->parsoidOptions; |
822 | // NOTE: VisualEditor would set this flavor when transforming from Wikitext to HTML |
823 | // for the purpose of editing when doing parsefragment (in body only mode). |
824 | if ( $this->flavor === 'fragment' || $this->getRevisionId() === null ) { |
825 | $this->isCacheable = false; |
826 | } |
827 | |
828 | // TODO: Decide whether we want to allow stale content for speed for the |
829 | // 'view' flavor. In that case, we would want to use PoolCounterWork, |
830 | // either directly or through ParserOutputAccess. |
831 | |
832 | $flags = $this->parserOutputAccessOptions; |
833 | // Resolve revision |
834 | $page = $this->page; |
835 | $revision = $this->revisionOrId; |
836 | if ( $page === null ) { |
837 | throw new RevisionAccessException( "No page" ); |
838 | } |
839 | // NOTE: If we have a RevisionRecord already and this is |
840 | // not cacheable, just use it, there is no need to |
841 | // resolve $page to a PageRecord (and it may not be |
842 | // possible if the page doesn't exist). |
843 | if ( $this->isCacheable || !$revision instanceof RevisionRecord ) { |
844 | if ( !$page instanceof PageRecord ) { |
845 | $name = "$page"; |
846 | $page = $this->pageLookup->getPageByReference( $page ); |
847 | if ( !$page ) { |
848 | throw new RevisionAccessException( |
849 | 'Page {name} not found', |
850 | [ 'name' => $name ] |
851 | ); |
852 | } |
853 | } |
854 | |
855 | $revision ??= $page->getLatest(); |
856 | |
857 | if ( is_int( $revision ) ) { |
858 | $revId = $revision; |
859 | $revision = $this->revisionLookup->getRevisionById( $revId ); |
860 | |
861 | if ( !$revision ) { |
862 | throw new RevisionAccessException( |
863 | 'Revision {revId} not found', |
864 | [ 'revId' => $revId ] |
865 | ); |
866 | } |
867 | } |
868 | |
869 | if ( $page->getId() !== $revision->getPageId() ) { |
870 | if ( $this->lenientRevHandling ) { |
871 | $page = $this->pageLookup->getPageById( $revision->getPageId() ); |
872 | if ( !$page ) { |
873 | // This should ideally never trigger! |
874 | throw new \RuntimeException( |
875 | "Unexpected NULL page for pageid " . $revision->getPageId() . |
876 | " from revision " . $revision->getId() |
877 | ); |
878 | } |
879 | // Don't cache this! |
880 | $flags |= ParserOutputAccess::OPT_NO_UPDATE_CACHE; |
881 | } else { |
882 | throw new RevisionAccessException( |
883 | 'Revision {revId} does not belong to page {name}', |
884 | [ 'name' => $page->getDBkey(), 'revId' => $revision->getId() ] |
885 | ); |
886 | } |
887 | } |
888 | } |
889 | |
890 | $mainSlot = $revision->getSlot( SlotRecord::MAIN ); |
891 | $contentModel = $mainSlot->getModel(); |
892 | if ( $this->parsoidSiteConfig->supportsContentModel( $contentModel ) ) { |
893 | $this->parserOptions->setUseParsoid(); |
894 | } |
895 | if ( $this->isCacheable ) { |
896 | // phan can't tell that we must have used the block above to |
897 | // resolve $page to a PageRecord if we've made it to this block. |
898 | '@phan-var PageRecord $page'; |
899 | try { |
900 | $status = $this->parserOutputAccess->getParserOutput( |
901 | $page, $this->parserOptions, $revision, $flags |
902 | ); |
903 | } catch ( ClientError $e ) { |
904 | $status = Status::newFatal( 'parsoid-client-error', $e->getMessage() ); |
905 | } catch ( ResourceLimitExceededException $e ) { |
906 | $status = Status::newFatal( 'parsoid-resource-limit-exceeded', $e->getMessage() ); |
907 | } |
908 | Assert::invariant( $status->isOK() ? $status->getValue()->getRenderId() !== null : true, "no render id" ); |
909 | } else { |
910 | $status = $this->parseUncacheable( |
911 | $page, |
912 | $revision, |
913 | $this->lenientRevHandling |
914 | ); |
915 | |
916 | // @phan-suppress-next-line PhanSuspiciousValueComparison |
917 | if ( $status->isOK() && $this->flavor === 'fragment' ) { |
918 | // Unwrap sections and return body_only content |
919 | // NOTE: This introduces an extra html -> dom -> html roundtrip |
920 | // This will get addressed once HtmlHolder work is complete |
921 | $parserOutput = $status->getValue(); |
922 | $body = DOMCompat::getBody( DOMUtils::parseHTML( $parserOutput->getRawText() ) ); |
923 | if ( $body ) { |
924 | $this->stripParsoidSectionTags( $body ); |
925 | $parserOutput->setText( DOMCompat::getInnerHTML( $body ) ); |
926 | } |
927 | } |
928 | Assert::invariant( $status->isOK() ? $status->getValue()->getRenderId() !== null : true, "no render id" ); |
929 | } |
930 | |
931 | return $status; |
932 | } |
933 | |
934 | // See ParserOutputAccess::renderRevision() -- but of course this method |
935 | // bypasses any caching. |
936 | private function parseUncacheable( |
937 | PageIdentity $page, |
938 | RevisionRecord $revision, |
939 | bool $lenientRevHandling = false |
940 | ): Status { |
941 | // Enforce caller expectation |
942 | $revId = $revision->getId(); |
943 | if ( $revId !== 0 && $revId !== null ) { |
944 | return Status::newFatal( 'parsoid-revision-access', |
945 | "parseUncacheable should not be called for a real revision" ); |
946 | } |
947 | try { |
948 | $renderedRev = $this->revisionRenderer->getRenderedRevision( |
949 | $revision, |
950 | $this->parserOptions, |
951 | // ParserOutputAccess uses 'null' for the authority and |
952 | // 'audience' => RevisionRecord::RAW, presumably because |
953 | // the access checks are already handled by the |
954 | // RestAuthorizeTrait |
955 | $this->authority, |
956 | [ 'audience' => RevisionRecord::RAW ] |
957 | ); |
958 | if ( $renderedRev === null ) { |
959 | return Status::newFatal( 'parsoid-revision-access' ); |
960 | } |
961 | $parserOutput = $renderedRev->getRevisionParserOutput(); |
962 | // Ensure this isn't accidentally cached |
963 | $parserOutput->updateCacheExpiry( 0 ); |
964 | return Status::newGood( $parserOutput ); |
965 | } catch ( ClientError $e ) { |
966 | return Status::newFatal( 'parsoid-client-error', $e->getMessage() ); |
967 | } catch ( ResourceLimitExceededException $e ) { |
968 | return Status::newFatal( 'parsoid-resource-limit-exceeded', $e->getMessage() ); |
969 | } |
970 | } |
971 | |
972 | public function isParsoidContent(): bool { |
973 | return PageBundleParserOutputConverter::hasPageBundle( |
974 | $this->getParserOutput() |
975 | ); |
976 | } |
977 | |
978 | } |