Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
76.35% |
184 / 241 |
|
52.00% |
13 / 25 |
CRAP | |
0.00% |
0 / 1 |
HtmlOutputRendererHelper | |
76.35% |
184 / 241 |
|
52.00% |
13 / 25 |
164.67 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
1 | |||
setFlavor | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
3.07 | |||
getFlavor | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setOutputProfileVersion | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
setUseParserCache | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
3 | |||
setStashingEnabled | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
setRevision | |
83.33% |
5 / 6 |
|
0.00% |
0 / 1 |
4.07 | |||
setContent | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
setContentSource | |
60.00% |
3 / 5 |
|
0.00% |
0 / 1 |
2.26 | |||
setPageLanguage | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
init | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
3 | |||
setVariantConversionLanguage | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
3 | |||
getAcceptedTargetLanguage | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 | |||
getHtml | |
73.81% |
31 / 42 |
|
0.00% |
0 / 1 |
7.88 | |||
getETag | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
3 | |||
getLastModified | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getParamSettings | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
2 | |||
getDefaultPageLanguage | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
2.06 | |||
getParserOutput | |
92.86% |
26 / 28 |
|
0.00% |
0 / 1 |
10.04 | |||
getHtmlOutputContentLanguage | |
23.08% |
3 / 13 |
|
0.00% |
0 / 1 |
7.10 | |||
putHeaders | |
80.00% |
8 / 10 |
|
0.00% |
0 / 1 |
5.20 | |||
getPageBundle | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
2 | |||
getRevisionId | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
4 | |||
stripParsoidSectionTags | |
70.00% |
7 / 10 |
|
0.00% |
0 / 1 |
4.43 | |||
getParserOutputInternal | |
100.00% |
25 / 25 |
|
100.00% |
1 / 1 |
7 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | */ |
20 | namespace MediaWiki\Rest\Handler\Helper; |
21 | |
22 | use Content; |
23 | use HttpError; |
24 | use IBufferingStatsdDataFactory; |
25 | use InvalidArgumentException; |
26 | use LanguageCode; |
27 | use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface; |
28 | use MediaWiki\Content\IContentHandlerFactory; |
29 | use MediaWiki\Edit\ParsoidOutputStash; |
30 | use MediaWiki\Edit\SelserContext; |
31 | use MediaWiki\Languages\LanguageFactory; |
32 | use MediaWiki\Logger\LoggerFactory; |
33 | use MediaWiki\MainConfigNames; |
34 | use MediaWiki\Page\PageIdentity; |
35 | use MediaWiki\Page\ParserOutputAccess; |
36 | use MediaWiki\Parser\ParserOutput; |
37 | use MediaWiki\Parser\Parsoid\HtmlTransformFactory; |
38 | use MediaWiki\Parser\Parsoid\PageBundleParserOutputConverter; |
39 | use MediaWiki\Parser\Parsoid\ParsoidOutputAccess; |
40 | use MediaWiki\Parser\Parsoid\ParsoidRenderID; |
41 | use MediaWiki\Permissions\Authority; |
42 | use MediaWiki\Rest\Handler; |
43 | use MediaWiki\Rest\HttpException; |
44 | use MediaWiki\Rest\LocalizedHttpException; |
45 | use MediaWiki\Rest\ResponseInterface; |
46 | use MediaWiki\Revision\MutableRevisionRecord; |
47 | use MediaWiki\Revision\RevisionAccessException; |
48 | use MediaWiki\Revision\RevisionRecord; |
49 | use MediaWiki\Revision\SlotRecord; |
50 | use MediaWiki\Status\Status; |
51 | use MediaWiki\Title\Title; |
52 | use MWUnknownContentModelException; |
53 | use ParserOptions; |
54 | use Wikimedia\Assert\Assert; |
55 | use Wikimedia\Bcp47Code\Bcp47Code; |
56 | use Wikimedia\Bcp47Code\Bcp47CodeValue; |
57 | use Wikimedia\Message\MessageValue; |
58 | use Wikimedia\ParamValidator\ParamValidator; |
59 | use Wikimedia\Parsoid\Core\PageBundle; |
60 | use Wikimedia\Parsoid\DOM\Element; |
61 | use Wikimedia\Parsoid\Parsoid; |
62 | use Wikimedia\Parsoid\Utils\ContentUtils; |
63 | use Wikimedia\Parsoid\Utils\DOMCompat; |
64 | use Wikimedia\Parsoid\Utils\DOMUtils; |
65 | use Wikimedia\Parsoid\Utils\WTUtils; |
66 | |
67 | /** |
68 | * Helper for getting output of a given wikitext page rendered by parsoid. |
69 | * |
70 | * @since 1.36 |
71 | * |
72 | * @unstable Pending consolidation of the Parsoid extension with core code. |
73 | */ |
74 | class HtmlOutputRendererHelper implements HtmlOutputHelper { |
75 | use RestAuthorizeTrait; |
76 | use RestStatusTrait; |
77 | |
78 | /** |
79 | * @internal |
80 | * @var string[] |
81 | */ |
82 | public const CONSTRUCTOR_OPTIONS = [ |
83 | MainConfigNames::ParsoidCacheConfig |
84 | ]; |
85 | |
86 | /** @var string[] */ |
87 | private const OUTPUT_FLAVORS = [ 'view', 'stash', 'fragment', 'edit' ]; |
88 | |
89 | /** @var ParsoidOutputStash */ |
90 | private $parsoidOutputStash; |
91 | |
92 | /** @var PageIdentity|null */ |
93 | private $page = null; |
94 | |
95 | /** @var RevisionRecord|int|null */ |
96 | private $revisionOrId = null; |
97 | |
98 | /** @var Bcp47Code|null */ |
99 | private $pageLanguage = null; |
100 | |
101 | /** @var ?string One of the flavors from OUTPUT_FLAVORS */ |
102 | private $flavor = null; |
103 | |
104 | /** @var bool */ |
105 | private $stash = false; |
106 | |
107 | /** @var IBufferingStatsdDataFactory */ |
108 | private $stats; |
109 | |
110 | /** @var Authority */ |
111 | private $authority; |
112 | |
113 | /** @var ParsoidOutputAccess */ |
114 | private $parsoidOutputAccess; |
115 | |
116 | /** @var ParserOutput */ |
117 | private $parserOutput; |
118 | |
119 | /** @var ParserOutput */ |
120 | private $processedParserOutput; |
121 | |
122 | /** @var HtmlTransformFactory */ |
123 | private $htmlTransformFactory; |
124 | |
125 | /** @var IContentHandlerFactory */ |
126 | private $contentHandlerFactory; |
127 | |
128 | /** @var LanguageFactory */ |
129 | private $languageFactory; |
130 | |
131 | /** @var ?Bcp47Code */ |
132 | private $sourceLanguage = null; |
133 | |
134 | /** @var ?Bcp47Code */ |
135 | private $targetLanguage = null; |
136 | |
137 | /** Should we ignore mismatched $page and $revisionOrId values? */ |
138 | private bool $lenientRevHandling = false; |
139 | |
140 | /** |
141 | * Flags to be passed as $options to ParsoidOutputAccess::getParserOutput, |
142 | * to control parser cache access. |
143 | * |
144 | * @var int Use ParsoidOutputAccess::OPT_* |
145 | */ |
146 | private $parsoidOutputAccessOptions = 0; |
147 | |
148 | /** |
149 | * @see the $options parameter on Parsoid::wikitext2html |
150 | * @var array |
151 | */ |
152 | private $parsoidOptions = []; |
153 | |
154 | /** |
155 | * Whether the result can be cached in the parser cache and the web cache. |
156 | * Set to false when bespoke options are set. |
157 | * |
158 | * @var bool |
159 | */ |
160 | private $isCacheable = true; |
161 | |
162 | /** |
163 | * @param ParsoidOutputStash $parsoidOutputStash |
164 | * @param StatsdDataFactoryInterface $statsDataFactory |
165 | * @param ParsoidOutputAccess $parsoidOutputAccess |
166 | * @param HtmlTransformFactory $htmlTransformFactory |
167 | * @param IContentHandlerFactory $contentHandlerFactory |
168 | * @param LanguageFactory $languageFactory |
169 | * @param bool $lenientRevHandling Should we ignore mismatches |
170 | * $page and the page that $revision belongs to? Usually happens |
171 | * because of page moves. This should be set to true only for |
172 | * internal API calls. |
173 | */ |
174 | public function __construct( |
175 | ParsoidOutputStash $parsoidOutputStash, |
176 | StatsdDataFactoryInterface $statsDataFactory, |
177 | ParsoidOutputAccess $parsoidOutputAccess, |
178 | HtmlTransformFactory $htmlTransformFactory, |
179 | IContentHandlerFactory $contentHandlerFactory, |
180 | LanguageFactory $languageFactory, |
181 | bool $lenientRevHandling = false |
182 | ) { |
183 | $this->parsoidOutputStash = $parsoidOutputStash; |
184 | $this->stats = $statsDataFactory; |
185 | $this->parsoidOutputAccess = $parsoidOutputAccess; |
186 | $this->htmlTransformFactory = $htmlTransformFactory; |
187 | $this->contentHandlerFactory = $contentHandlerFactory; |
188 | $this->languageFactory = $languageFactory; |
189 | $this->lenientRevHandling = $lenientRevHandling; |
190 | } |
191 | |
192 | /** |
193 | * Sets the given flavor to use for Wikitext -> HTML transformations. |
194 | * |
195 | * Flavors may influence parser options, parsoid options, and DOM transformations. |
196 | * They will be reflected by the ETag returned by getETag(). |
197 | * |
198 | * @note This method should not be called if stashing mode is enabled. |
199 | * @see setStashingEnabled |
200 | * @see getFlavor() |
201 | * |
202 | * @param string $flavor |
203 | * |
204 | * @return void |
205 | */ |
206 | public function setFlavor( string $flavor ): void { |
207 | if ( !in_array( $flavor, self::OUTPUT_FLAVORS ) ) { |
208 | throw new InvalidArgumentException( 'Invalid flavor supplied' ); |
209 | } |
210 | |
211 | if ( $this->stash ) { |
212 | // XXX: throw? |
213 | $flavor = 'stash'; |
214 | } |
215 | |
216 | $this->flavor = $flavor; |
217 | } |
218 | |
219 | /** |
220 | * Returns the flavor of HTML that will be generated. |
221 | * @see setFlavor() |
222 | * @return string |
223 | */ |
224 | public function getFlavor(): string { |
225 | return $this->flavor; |
226 | } |
227 | |
228 | /** |
229 | * Set the desired Parsoid profile version for the output. |
230 | * The actual output version is selected to be compatible with the one given here, |
231 | * per the rules of semantic versioning. |
232 | * |
233 | * @note Will disable caching if the effective output version is different from the default. |
234 | * |
235 | * @param string $version |
236 | * |
237 | * @throws HttpException If the given version is not supported (status 406) |
238 | */ |
239 | public function setOutputProfileVersion( $version ) { |
240 | $outputContentVersion = Parsoid::resolveContentVersion( $version ); |
241 | |
242 | if ( !$outputContentVersion ) { |
243 | throw new LocalizedHttpException( |
244 | new MessageValue( "rest-unsupported-profile-version", [ $version ] ), 406 |
245 | ); |
246 | } |
247 | |
248 | // Only set the option if the value isn't the default! |
249 | if ( $outputContentVersion !== Parsoid::defaultHTMLVersion() ) { |
250 | throw new LocalizedHttpException( |
251 | new MessageValue( "rest-unsupported-profile-version", [ $version ] ), 406 |
252 | ); |
253 | |
254 | // TODO: (T347426) At some later point, we may reintroduce support for |
255 | // non-default content versions as part of work on the content |
256 | // negotiatiation protocol. |
257 | // |
258 | // // See Parsoid::wikitext2html |
259 | // $this->parsoidOptions['outputContentVersion'] = $outputContentVersion; |
260 | // $this->isCacheable = false; |
261 | } |
262 | } |
263 | |
264 | /** |
265 | * Controls how the parser cache is used. |
266 | * |
267 | * @param bool $read Whether we should look for cached output before parsing |
268 | * @param bool $write Whether we should cache output after parsing |
269 | */ |
270 | public function setUseParserCache( bool $read, bool $write ) { |
271 | $this->parsoidOutputAccessOptions = |
272 | ( $read ? 0 : ParserOutputAccess::OPT_FORCE_PARSE ) | |
273 | ( $write ? 0 : ParserOutputAccess::OPT_NO_UPDATE_CACHE ); |
274 | } |
275 | |
276 | /** |
277 | * Determine whether stashing should be applied. |
278 | * |
279 | * @param bool $stash |
280 | * |
281 | * @return void |
282 | */ |
283 | public function setStashingEnabled( bool $stash ): void { |
284 | $this->stash = $stash; |
285 | |
286 | if ( $stash ) { |
287 | $this->setFlavor( 'stash' ); |
288 | } elseif ( $this->flavor === 'stash' ) { |
289 | $this->setFlavor( 'view' ); |
290 | } |
291 | } |
292 | |
293 | /** |
294 | * Set the revision to render. |
295 | * |
296 | * This can take a fake RevisionRecord when rendering for previews |
297 | * or when switching the editor from source mode to visual mode. |
298 | * |
299 | * In that case, $revisionOrId->getId() must return 0 to indicate |
300 | * that the ParserCache should be bypassed. Stashing may still apply. |
301 | * |
302 | * @param RevisionRecord|int $revisionOrId |
303 | */ |
304 | public function setRevision( $revisionOrId ): void { |
305 | Assert::parameterType( [ RevisionRecord::class, 'integer' ], $revisionOrId, '$revision' ); |
306 | |
307 | if ( is_int( $revisionOrId ) && $revisionOrId <= 0 ) { |
308 | throw new HttpError( 400, "Bad revision ID: $revisionOrId" ); |
309 | } |
310 | |
311 | $this->revisionOrId = $revisionOrId; |
312 | |
313 | if ( $this->getRevisionId() === null ) { |
314 | // If we have a RevisionRecord but no revision ID, we are dealing with a fake |
315 | // revision used for editor previews or mode switches. The wikitext is coming |
316 | // from the request, not the database, so the result is not cacheable for re-use |
317 | // by others (though it can be stashed for use by the same client). |
318 | $this->isCacheable = false; |
319 | } |
320 | } |
321 | |
322 | /** |
323 | * Set the content to render. Useful when rendering for previews |
324 | * or when switching the editor from source mode to visual mode. |
325 | * |
326 | * This will create a fake revision for rendering, the revision ID will be 0. |
327 | * |
328 | * @see setRevision |
329 | * @see setContentSource |
330 | * |
331 | * @param Content $content |
332 | */ |
333 | public function setContent( Content $content ): void { |
334 | $rev = new MutableRevisionRecord( $this->page ); |
335 | $rev->setId( 0 ); |
336 | $rev->setPageId( $this->page->getId() ); |
337 | $rev->setContent( SlotRecord::MAIN, $content ); |
338 | $this->setRevision( $rev ); |
339 | } |
340 | |
341 | /** |
342 | * Set the content to render. Useful when rendering for previews |
343 | * or when switching the editor from source mode to visual mode. |
344 | * |
345 | * This will create a fake revision for rendering, the revision ID will be 0. |
346 | * |
347 | * @param string $source The source data, e.g. wikitext |
348 | * @param string $model The content model indicating how to interpret $source, e.g. CONTENT_MODEL_WIKITEXT |
349 | * |
350 | * @see setRevision |
351 | * @see setContent |
352 | */ |
353 | public function setContentSource( string $source, string $model ): void { |
354 | try { |
355 | $handler = $this->contentHandlerFactory->getContentHandler( $model ); |
356 | $content = $handler->unserializeContent( $source ); |
357 | $this->setContent( $content ); |
358 | } catch ( MWUnknownContentModelException $ex ) { |
359 | throw new LocalizedHttpException( new MessageValue( "rest-bad-content-model", [ $model ] ), 400 ); |
360 | } |
361 | } |
362 | |
363 | /** |
364 | * This is equivalent of 'pageLanguageOverride' in PageConfigFactory |
365 | * For example, when clients call the REST API with the 'content-language' |
366 | * header to effect language variant conversion. |
367 | * |
368 | * @param Bcp47Code|string $pageLanguage the page language, as a Bcp47Code |
369 | * or a BCP-47 string. |
370 | */ |
371 | public function setPageLanguage( $pageLanguage ): void { |
372 | if ( is_string( $pageLanguage ) ) { |
373 | $pageLanguage = new Bcp47CodeValue( $pageLanguage ); |
374 | } |
375 | $this->pageLanguage = $pageLanguage; |
376 | } |
377 | |
378 | /** |
379 | * Initializes the helper with the given parameters like the page |
380 | * we're dealing with, parameters gotten from the request inputs, |
381 | * and the revision if any is available. |
382 | * |
383 | * @param PageIdentity $page |
384 | * @param array $parameters |
385 | * @param Authority $authority |
386 | * @param RevisionRecord|int|null $revision |
387 | */ |
388 | public function init( |
389 | PageIdentity $page, |
390 | array $parameters, |
391 | Authority $authority, |
392 | $revision = null |
393 | ) { |
394 | $this->page = $page; |
395 | $this->authority = $authority; |
396 | $this->stash = $parameters['stash'] ?? false; |
397 | |
398 | if ( $revision !== null ) { |
399 | $this->setRevision( $revision ); |
400 | } |
401 | |
402 | if ( $this->stash ) { |
403 | $this->setFlavor( 'stash' ); |
404 | } else { |
405 | $this->setFlavor( $parameters['flavor'] ?? 'view' ); |
406 | } |
407 | } |
408 | |
409 | /** |
410 | * @inheritDoc |
411 | */ |
412 | public function setVariantConversionLanguage( |
413 | $targetLanguage, |
414 | $sourceLanguage = null |
415 | ): void { |
416 | if ( is_string( $targetLanguage ) ) { |
417 | $targetLanguage = $this->getAcceptedTargetLanguage( $targetLanguage ); |
418 | $targetLanguage = LanguageCode::normalizeNonstandardCodeAndWarn( |
419 | $targetLanguage |
420 | ); |
421 | } |
422 | if ( is_string( $sourceLanguage ) ) { |
423 | $sourceLanguage = LanguageCode::normalizeNonstandardCodeAndWarn( |
424 | $sourceLanguage |
425 | ); |
426 | } |
427 | $this->targetLanguage = $targetLanguage; |
428 | $this->sourceLanguage = $sourceLanguage; |
429 | } |
430 | |
431 | /** |
432 | * Get a target language from an accept header |
433 | */ |
434 | private function getAcceptedTargetLanguage( string $targetLanguage ): string { |
435 | // We could try to identify the most desirable language here, |
436 | // following the rules for Accept-Language headers in RFC9100. |
437 | // For now, just take the first language code. |
438 | |
439 | if ( preg_match( '/^\s*([-\w]+)/', $targetLanguage, $m ) ) { |
440 | return $m[1]; |
441 | } else { |
442 | // "undetermined" per RFC5646 |
443 | return 'und'; |
444 | } |
445 | } |
446 | |
447 | /** |
448 | * @inheritDoc |
449 | */ |
450 | public function getHtml(): ParserOutput { |
451 | if ( $this->processedParserOutput ) { |
452 | return $this->processedParserOutput; |
453 | } |
454 | |
455 | $parserOutput = $this->getParserOutput(); |
456 | |
457 | if ( $this->stash ) { |
458 | $this->authorizeWriteOrThrow( $this->authority, 'stashbasehtml', $this->page ); |
459 | |
460 | $isFakeRevision = $this->getRevisionId() === null; |
461 | $parsoidStashKey = ParsoidRenderID::newFromParserOutput( $parserOutput ); |
462 | $stashSuccess = $this->parsoidOutputStash->set( |
463 | $parsoidStashKey, |
464 | new SelserContext( |
465 | PageBundleParserOutputConverter::pageBundleFromParserOutput( $parserOutput ), |
466 | $parsoidStashKey->getRevisionID(), |
467 | $isFakeRevision ? $this->revisionOrId->getContent( SlotRecord::MAIN ) : null |
468 | ) |
469 | ); |
470 | if ( !$stashSuccess ) { |
471 | $this->stats->increment( 'htmloutputrendererhelper.stash.fail' ); |
472 | |
473 | $errorData = [ 'parsoid-stash-key' => $parsoidStashKey ]; |
474 | LoggerFactory::getInstance( 'HtmlOutputRendererHelper' )->error( |
475 | "Parsoid stash failure", |
476 | $errorData |
477 | ); |
478 | throw new LocalizedHttpException( |
479 | MessageValue::new( 'rest-html-stash-failure' ), |
480 | 500, |
481 | $errorData |
482 | ); |
483 | } |
484 | $this->stats->increment( 'htmloutputrendererhelper.stash.save' ); |
485 | } |
486 | |
487 | if ( $this->flavor === 'edit' ) { |
488 | $pb = $this->getPageBundle(); |
489 | |
490 | // Inject data-parsoid and data-mw attributes. |
491 | // XXX: Would be nice if we had a DOM handy. |
492 | $doc = DOMUtils::parseHTML( $parserOutput->getRawText() ); |
493 | PageBundle::apply( $doc, $pb ); |
494 | $parserOutput->setRawText( ContentUtils::toXML( $doc ) ); |
495 | } |
496 | |
497 | // Check if variant conversion has to be performed |
498 | // NOTE: Variant conversion is performed on the fly, and kept outside the stash. |
499 | if ( $this->targetLanguage ) { |
500 | $languageVariantConverter = $this->htmlTransformFactory->getLanguageVariantConverter( $this->page ); |
501 | $parserOutput = $languageVariantConverter->convertParserOutputVariant( |
502 | $parserOutput, |
503 | $this->targetLanguage, |
504 | $this->sourceLanguage |
505 | ); |
506 | } |
507 | |
508 | $this->processedParserOutput = $parserOutput; |
509 | return $parserOutput; |
510 | } |
511 | |
512 | /** |
513 | * @inheritDoc |
514 | */ |
515 | public function getETag( string $suffix = '' ): ?string { |
516 | $parserOutput = $this->getParserOutput(); |
517 | |
518 | $renderID = ParsoidRenderID::newFromParserOutput( $parserOutput )->getKey(); |
519 | |
520 | if ( $suffix !== '' ) { |
521 | $eTag = "$renderID/{$this->flavor}/$suffix"; |
522 | } else { |
523 | $eTag = "$renderID/{$this->flavor}"; |
524 | } |
525 | |
526 | if ( $this->targetLanguage ) { |
527 | $eTag .= "+lang:{$this->targetLanguage->toBcp47Code()}"; |
528 | } |
529 | |
530 | return "\"{$eTag}\""; |
531 | } |
532 | |
533 | /** |
534 | * @inheritDoc |
535 | */ |
536 | public function getLastModified(): ?string { |
537 | return $this->getParserOutput()->getCacheTime(); |
538 | } |
539 | |
540 | /** |
541 | * @inheritDoc |
542 | */ |
543 | public function getParamSettings(): array { |
544 | return [ |
545 | 'stash' => [ |
546 | Handler::PARAM_SOURCE => 'query', |
547 | ParamValidator::PARAM_TYPE => 'boolean', |
548 | ParamValidator::PARAM_DEFAULT => false, |
549 | ParamValidator::PARAM_REQUIRED => false, |
550 | ], |
551 | 'flavor' => [ |
552 | Handler::PARAM_SOURCE => 'query', |
553 | ParamValidator::PARAM_TYPE => self::OUTPUT_FLAVORS, |
554 | ParamValidator::PARAM_DEFAULT => 'view', |
555 | ParamValidator::PARAM_REQUIRED => false, |
556 | ], |
557 | ]; |
558 | } |
559 | |
560 | private function getDefaultPageLanguage( ParserOptions $options ): Bcp47Code { |
561 | // NOTE: keep in sync with Parser::getTargetLanguage! |
562 | |
563 | // XXX: Inject a TitleFactory just for this?! We need a better way to determine the page language... |
564 | $title = Title::castFromPageIdentity( $this->page ); |
565 | |
566 | if ( $options->getInterfaceMessage() ) { |
567 | return $options->getUserLangObj(); |
568 | } |
569 | |
570 | return $title->getPageLanguage(); |
571 | } |
572 | |
573 | /** |
574 | * @return ParserOutput |
575 | */ |
576 | private function getParserOutput(): ParserOutput { |
577 | if ( !$this->parserOutput ) { |
578 | $parserOptions = ParserOptions::newFromAnon(); |
579 | $parserOptions->setRenderReason( __METHOD__ ); |
580 | |
581 | $defaultLanguage = $this->getDefaultPageLanguage( $parserOptions ); |
582 | |
583 | if ( $this->pageLanguage |
584 | && $this->pageLanguage->toBcp47Code() !== $defaultLanguage->toBcp47Code() |
585 | ) { |
586 | $languageObj = $this->languageFactory->getLanguage( $this->pageLanguage ); |
587 | $parserOptions->setTargetLanguage( $languageObj ); |
588 | } |
589 | |
590 | try { |
591 | $status = $this->getParserOutputInternal( $parserOptions ); |
592 | } catch ( RevisionAccessException $e ) { |
593 | throw new LocalizedHttpException( |
594 | MessageValue::new( 'rest-nonexistent-title' ), |
595 | 404, |
596 | [ 'reason' => $e->getMessage() ] |
597 | ); |
598 | } |
599 | |
600 | if ( !$status->isOK() ) { |
601 | if ( $status->hasMessage( 'parsoid-client-error' ) ) { |
602 | $this->throwExceptionForStatus( $status, 'rest-html-backend-error', 400 ); |
603 | } elseif ( $status->hasMessage( 'parsoid-resource-limit-exceeded' ) ) { |
604 | $this->throwExceptionForStatus( $status, 'rest-resource-limit-exceeded', 413 ); |
605 | } elseif ( $status->hasMessage( 'missing-revision-permission' ) ) { |
606 | $this->throwExceptionForStatus( $status, 'rest-permission-denied-revision', 403 ); |
607 | } elseif ( $status->hasMessage( 'parsoid-revision-access' ) ) { |
608 | $this->throwExceptionForStatus( $status, 'rest-specified-revision-unavailable', 404 ); |
609 | } else { |
610 | $this->logStatusError( $status, 'Parsoid backend error', 'HtmlOutputRendererHelper' ); |
611 | $this->throwExceptionForStatus( $status, 'rest-html-backend-error', 500 ); |
612 | } |
613 | } |
614 | |
615 | $this->parserOutput = $status->getValue(); |
616 | } |
617 | |
618 | return $this->parserOutput; |
619 | } |
620 | |
621 | /** |
622 | * The content language of the HTML output after parsing. |
623 | * |
624 | * @return Bcp47Code The language, as a BCP-47 code |
625 | */ |
626 | public function getHtmlOutputContentLanguage(): Bcp47Code { |
627 | $contentLanguage = $this->getHtml()->getLanguage(); |
628 | |
629 | // This shouldn't happen, but don't crash if it does: |
630 | if ( !$contentLanguage ) { |
631 | if ( $this->pageLanguage ) { |
632 | LoggerFactory::getInstance( 'HtmlOutputRendererHelper' )->warning( |
633 | "ParserOutput does not specify a language" |
634 | ); |
635 | |
636 | $contentLanguage = $this->pageLanguage; |
637 | } else { |
638 | LoggerFactory::getInstance( 'HtmlOutputRendererHelper' )->warning( |
639 | "ParserOutput does not specify a language and no page language set in helper." |
640 | ); |
641 | |
642 | $title = Title::newFromPageIdentity( $this->page ); |
643 | $contentLanguage = $title->getPageLanguage(); |
644 | } |
645 | } |
646 | |
647 | return $contentLanguage; |
648 | } |
649 | |
650 | /** |
651 | * @inheritDoc |
652 | */ |
653 | public function putHeaders( ResponseInterface $response, bool $forHtml = true ): void { |
654 | if ( $forHtml ) { |
655 | // For HTML we want to set the Content-Language. For JSON, we probably don't. |
656 | $response->setHeader( 'Content-Language', $this->getHtmlOutputContentLanguage()->toBcp47Code() ); |
657 | |
658 | $pb = $this->getPageBundle(); |
659 | ParsoidFormatHelper::setContentType( $response, ParsoidFormatHelper::FORMAT_HTML, $pb->version ); |
660 | } |
661 | |
662 | if ( $this->targetLanguage ) { |
663 | $response->addHeader( 'Vary', 'Accept-Language' ); |
664 | } |
665 | |
666 | // XXX: if Parsoid returns Vary headers, set them here?! |
667 | |
668 | if ( !$this->isCacheable ) { |
669 | $response->setHeader( 'Cache-Control', 'private,no-cache,s-maxage=0' ); |
670 | } |
671 | |
672 | // TODO: cache control for stable HTML? See ContentHelper::setCacheControl |
673 | |
674 | if ( $this->getRevisionId() ) { |
675 | $response->setHeader( 'Content-Revision-Id', (string)$this->getRevisionId() ); |
676 | } |
677 | } |
678 | |
679 | /** |
680 | * Returns the rendered HTML as a PageBundle object. |
681 | * |
682 | * @return PageBundle |
683 | */ |
684 | public function getPageBundle(): PageBundle { |
685 | // XXX: converting between PageBundle and ParserOutput is inefficient! |
686 | $parserOutput = $this->getParserOutput(); |
687 | $pb = PageBundleParserOutputConverter::pageBundleFromParserOutput( $parserOutput ); |
688 | |
689 | // Check if variant conversion has to be performed |
690 | // NOTE: Variant conversion is performed on the fly, and kept outside the stash. |
691 | if ( $this->targetLanguage ) { |
692 | $languageVariantConverter = $this->htmlTransformFactory->getLanguageVariantConverter( $this->page ); |
693 | $pb = $languageVariantConverter->convertPageBundleVariant( |
694 | $pb, |
695 | $this->targetLanguage, |
696 | $this->sourceLanguage |
697 | ); |
698 | } |
699 | |
700 | return $pb; |
701 | } |
702 | |
703 | /** |
704 | * Returns the ID of the revision that is being rendered. |
705 | * |
706 | * This will return 0 if no revision has been specified, so the current revision |
707 | * will be rendered. |
708 | * |
709 | * This wil return null if RevisionRecord has been set but that RevisionRecord |
710 | * does not have a revision ID, e.g. when rendering a preview. |
711 | * |
712 | * @return ?int |
713 | */ |
714 | public function getRevisionId(): ?int { |
715 | if ( !$this->revisionOrId ) { |
716 | // If we don't have a revision set or it's 0, we are rendering the current revision. |
717 | return 0; |
718 | } |
719 | |
720 | if ( is_object( $this->revisionOrId ) ) { |
721 | // NOTE: return null even of getId() gave us 0 |
722 | return $this->revisionOrId->getId() ?: null; |
723 | } |
724 | |
725 | // It's a revision ID, just return it |
726 | return (int)$this->revisionOrId; |
727 | } |
728 | |
729 | /** |
730 | * Strip Parsoid's section wrappers |
731 | * |
732 | * TODO: Should we move this to Parsoid's ContentUtils class? |
733 | * There already is a stripUnnecessaryWrappersAndSyntheticNodes but |
734 | * it targets html2wt and does a lot more than just section unwrapping. |
735 | * |
736 | * @param Element $elt |
737 | */ |
738 | private function stripParsoidSectionTags( Element $elt ): void { |
739 | $n = $elt->firstChild; |
740 | while ( $n ) { |
741 | $next = $n->nextSibling; |
742 | if ( $n instanceof Element ) { |
743 | // Recurse into subtree before stripping this |
744 | $this->stripParsoidSectionTags( $n ); |
745 | // Strip <section> tags and synthetic extended-annotation-region wrappers |
746 | if ( WTUtils::isParsoidSectionTag( $n ) ) { |
747 | $parent = $n->parentNode; |
748 | // Help out phan |
749 | '@phan-var Element $parent'; |
750 | DOMUtils::migrateChildren( $n, $parent, $n ); |
751 | $parent->removeChild( $n ); |
752 | } |
753 | } |
754 | $n = $next; |
755 | } |
756 | } |
757 | |
758 | /** |
759 | * @param ParserOptions $parserOptions |
760 | * |
761 | * @return Status |
762 | */ |
763 | private function getParserOutputInternal( ParserOptions $parserOptions ): Status { |
764 | // NOTE: ParsoidOutputAccess::getParserOutput() should be used for revisions |
765 | // that comes from the database. Either this revision is null to indicate |
766 | // the current revision or the revision must have an ID. |
767 | // If we have a revision and the ID is 0 or null, then it's a fake revision |
768 | // representing a preview. |
769 | $parsoidOptions = $this->parsoidOptions; |
770 | // NOTE: VisualEditor would set this flavor when transforming from Wikitext to HTML |
771 | // for the purpose of editing when doing parsefragment (in body only mode). |
772 | if ( $this->flavor === 'fragment' || $this->getRevisionId() === null ) { |
773 | $this->isCacheable = false; |
774 | } |
775 | |
776 | // TODO: Decide whether we want to allow stale content for speed for the |
777 | // 'view' flavor. In that case, we would want to use PoolCounterWork, |
778 | // either directly or through ParserOutputAccess. |
779 | |
780 | if ( $this->isCacheable ) { |
781 | $flags = $this->parsoidOutputAccessOptions; |
782 | $status = $this->parsoidOutputAccess->getParserOutput( |
783 | $this->page, |
784 | $parserOptions, |
785 | $this->revisionOrId, |
786 | $flags, |
787 | $this->lenientRevHandling |
788 | ); |
789 | } else { |
790 | $status = $this->parsoidOutputAccess->parseUncacheable( |
791 | $this->page, |
792 | $parserOptions, |
793 | $this->revisionOrId, |
794 | $this->lenientRevHandling |
795 | ); |
796 | |
797 | // @phan-suppress-next-line PhanSuspiciousValueComparison |
798 | if ( $status->isOK() && $this->flavor === 'fragment' ) { |
799 | // Unwrap sections and return body_only content |
800 | // NOTE: This introduces an extra html -> dom -> html roundtrip |
801 | // This will get addressed once HtmlHolder work is complete |
802 | $parserOutput = $status->getValue(); |
803 | $body = DOMCompat::getBody( DOMUtils::parseHTML( $parserOutput->getRawText() ) ); |
804 | if ( $body ) { |
805 | $this->stripParsoidSectionTags( $body ); |
806 | $parserOutput->setText( DOMCompat::getInnerHTML( $body ) ); |
807 | } |
808 | } |
809 | } |
810 | |
811 | return $status; |
812 | } |
813 | |
814 | } |