Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
71.04% |
238 / 335 |
|
37.93% |
11 / 29 |
CRAP | |
0.00% |
0 / 1 |
HtmlOutputRendererHelper | |
71.04% |
238 / 335 |
|
37.93% |
11 / 29 |
403.74 | |
0.00% |
0 / 1 |
__construct | |
93.33% |
14 / 15 |
|
0.00% |
0 / 1 |
3.00 | |||
setFlavor | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
3.07 | |||
getFlavor | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setOutputProfileVersion | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
12 | |||
setStashingEnabled | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
setRevision | |
83.33% |
5 / 6 |
|
0.00% |
0 / 1 |
4.07 | |||
setContent | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
setContentSource | |
60.00% |
3 / 5 |
|
0.00% |
0 / 1 |
2.26 | |||
setPageLanguage | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
init | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
initInternal | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
3 | |||
setVariantConversionLanguage | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
3 | |||
getAcceptedTargetLanguage | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 | |||
getHtml | |
69.57% |
32 / 46 |
|
0.00% |
0 / 1 |
8.38 | |||
getETag | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
3 | |||
isLatest | |
88.89% |
8 / 9 |
|
0.00% |
0 / 1 |
4.02 | |||
getLastModified | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
getParamSettings | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
2 | |||
getDefaultPageLanguage | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
2.06 | |||
getParserOutput | |
93.10% |
27 / 29 |
|
0.00% |
0 / 1 |
10.03 | |||
getHtmlOutputContentLanguage | |
23.08% |
3 / 13 |
|
0.00% |
0 / 1 |
7.10 | |||
putHeaders | |
80.00% |
8 / 10 |
|
0.00% |
0 / 1 |
5.20 | |||
getPageBundle | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
2 | |||
getRevisionId | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
4 | |||
stripParsoidSectionTags | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
4 | |||
getPageRecord | |
75.00% |
6 / 8 |
|
0.00% |
0 / 1 |
4.25 | |||
getParserOutputInternal | |
63.93% |
39 / 61 |
|
0.00% |
0 / 1 |
38.76 | |||
parseUncacheable | |
63.16% |
12 / 19 |
|
0.00% |
0 / 1 |
7.80 | |||
isParsoidContent | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | */ |
20 | namespace MediaWiki\Rest\Handler\Helper; |
21 | |
22 | use InvalidArgumentException; |
23 | use MediaWiki\Content\Content; |
24 | use MediaWiki\Content\IContentHandlerFactory; |
25 | use MediaWiki\Edit\ParsoidOutputStash; |
26 | use MediaWiki\Edit\ParsoidRenderID; |
27 | use MediaWiki\Edit\SelserContext; |
28 | use MediaWiki\Exception\HttpError; |
29 | use MediaWiki\Exception\MWUnknownContentModelException; |
30 | use MediaWiki\Language\LanguageCode; |
31 | use MediaWiki\Languages\LanguageFactory; |
32 | use MediaWiki\Logger\LoggerFactory; |
33 | use MediaWiki\MainConfigNames; |
34 | use MediaWiki\Page\PageIdentity; |
35 | use MediaWiki\Page\PageLookup; |
36 | use MediaWiki\Page\PageRecord; |
37 | use MediaWiki\Page\ParserOutputAccess; |
38 | use MediaWiki\Parser\ParserOptions; |
39 | use MediaWiki\Parser\ParserOutput; |
40 | use MediaWiki\Parser\Parsoid\Config\SiteConfig as ParsoidSiteConfig; |
41 | use MediaWiki\Parser\Parsoid\HtmlTransformFactory; |
42 | use MediaWiki\Parser\Parsoid\PageBundleParserOutputConverter; |
43 | use MediaWiki\Permissions\Authority; |
44 | use MediaWiki\Rest\Handler; |
45 | use MediaWiki\Rest\HttpException; |
46 | use MediaWiki\Rest\LocalizedHttpException; |
47 | use MediaWiki\Rest\ResponseInterface; |
48 | use MediaWiki\Revision\MutableRevisionRecord; |
49 | use MediaWiki\Revision\RevisionAccessException; |
50 | use MediaWiki\Revision\RevisionLookup; |
51 | use MediaWiki\Revision\RevisionRecord; |
52 | use MediaWiki\Revision\RevisionRenderer; |
53 | use MediaWiki\Revision\SlotRecord; |
54 | use MediaWiki\Status\Status; |
55 | use MediaWiki\Title\Title; |
56 | use Wikimedia\Assert\Assert; |
57 | use Wikimedia\Bcp47Code\Bcp47Code; |
58 | use Wikimedia\Bcp47Code\Bcp47CodeValue; |
59 | use Wikimedia\Message\MessageValue; |
60 | use Wikimedia\ParamValidator\ParamValidator; |
61 | use Wikimedia\Parsoid\Core\ClientError; |
62 | use Wikimedia\Parsoid\Core\PageBundle; |
63 | use Wikimedia\Parsoid\Core\ResourceLimitExceededException; |
64 | use Wikimedia\Parsoid\DOM\Element; |
65 | use Wikimedia\Parsoid\Parsoid; |
66 | use Wikimedia\Parsoid\Utils\DOMCompat; |
67 | use Wikimedia\Parsoid\Utils\DOMUtils; |
68 | use Wikimedia\Parsoid\Utils\WTUtils; |
69 | use Wikimedia\Stats\StatsFactory; |
70 | |
71 | /** |
72 | * Helper for getting output of a given wikitext page rendered by parsoid. |
73 | * |
74 | * @since 1.36 |
75 | * |
76 | * @unstable Pending consolidation of the Parsoid extension with core code. |
77 | */ |
78 | class HtmlOutputRendererHelper implements HtmlOutputHelper { |
79 | use RestAuthorizeTrait; |
80 | use RestStatusTrait; |
81 | |
82 | /** |
83 | * @internal |
84 | */ |
85 | public const CONSTRUCTOR_OPTIONS = [ |
86 | MainConfigNames::ParsoidCacheConfig |
87 | ]; |
88 | |
89 | private const OUTPUT_FLAVORS = [ 'view', 'stash', 'fragment', 'edit' ]; |
90 | |
91 | /** @var PageIdentity|null */ |
92 | private $page = null; |
93 | |
94 | /** @var RevisionRecord|int|null */ |
95 | private $revisionOrId = null; |
96 | |
97 | /** @var Bcp47Code|null */ |
98 | private $pageLanguage = null; |
99 | |
100 | /** @var ?string One of the flavors from OUTPUT_FLAVORS */ |
101 | private $flavor = null; |
102 | |
103 | /** @var bool */ |
104 | private $stash = false; |
105 | |
106 | /** @var Authority */ |
107 | private $authority; |
108 | |
109 | /** @var ParserOutput */ |
110 | private $parserOutput; |
111 | |
112 | /** @var ParserOutput */ |
113 | private $processedParserOutput; |
114 | |
115 | /** @var ?Bcp47Code */ |
116 | private $sourceLanguage = null; |
117 | |
118 | /** @var ?Bcp47Code */ |
119 | private $targetLanguage = null; |
120 | |
121 | /** |
122 | * Should we ignore mismatches between $page and the page that $revision belongs to? |
123 | * Usually happens because of page moves. This should be set to true only for internal API calls. |
124 | */ |
125 | private bool $lenientRevHandling = false; |
126 | |
127 | /** |
128 | * Flags to be passed as $options to ParserOutputAccess::getParserOutput, |
129 | * to control parser cache access. |
130 | * |
131 | * @var int Use ParserOutputAccess::OPT_* |
132 | */ |
133 | private $parserOutputAccessOptions = 0; |
134 | |
135 | /** |
136 | * @see the $options parameter on Parsoid::wikitext2html |
137 | * @var array |
138 | */ |
139 | private $parsoidOptions = []; |
140 | |
141 | private ?ParserOptions $parserOptions = null; |
142 | |
143 | /** |
144 | * Whether the result can be cached in the parser cache and the web cache. |
145 | * Set to false when bespoke options are set. |
146 | * |
147 | * @var bool |
148 | */ |
149 | private $isCacheable = true; |
150 | |
151 | private ParsoidOutputStash $parsoidOutputStash; |
152 | private StatsFactory $statsFactory; |
153 | private ParserOutputAccess $parserOutputAccess; |
154 | private PageLookup $pageLookup; |
155 | private RevisionLookup $revisionLookup; |
156 | private RevisionRenderer $revisionRenderer; |
157 | private ParsoidSiteConfig $parsoidSiteConfig; |
158 | private HtmlTransformFactory $htmlTransformFactory; |
159 | private IContentHandlerFactory $contentHandlerFactory; |
160 | private LanguageFactory $languageFactory; |
161 | |
162 | /** |
163 | * @param ParsoidOutputStash $parsoidOutputStash |
164 | * @param StatsFactory $statsFactory |
165 | * @param ParserOutputAccess $parserOutputAccess |
166 | * @param PageLookup $pageLookup |
167 | * @param RevisionLookup $revisionLookup |
168 | * @param RevisionRenderer $revisionRenderer |
169 | * @param ParsoidSiteConfig $parsoidSiteConfig |
170 | * @param HtmlTransformFactory $htmlTransformFactory |
171 | * @param IContentHandlerFactory $contentHandlerFactory |
172 | * @param LanguageFactory $languageFactory |
173 | * @param PageIdentity|null $page |
174 | * @param array $parameters |
175 | * @param Authority|null $authority |
176 | * @param RevisionRecord|int|null $revision |
177 | * @param bool $lenientRevHandling Should we ignore mismatches between |
178 | * $page and the page that $revision belongs to? Usually happens |
179 | * because of page moves. This should be set to true only for |
180 | * internal API calls. |
181 | * @param ParserOptions|null $parserOptions |
182 | * @note Since 1.43, setting $page and $authority arguments to null |
183 | * has been deprecated. |
184 | */ |
185 | public function __construct( |
186 | ParsoidOutputStash $parsoidOutputStash, |
187 | StatsFactory $statsFactory, |
188 | ParserOutputAccess $parserOutputAccess, |
189 | PageLookup $pageLookup, |
190 | RevisionLookup $revisionLookup, |
191 | RevisionRenderer $revisionRenderer, |
192 | ParsoidSiteConfig $parsoidSiteConfig, |
193 | HtmlTransformFactory $htmlTransformFactory, |
194 | IContentHandlerFactory $contentHandlerFactory, |
195 | LanguageFactory $languageFactory, |
196 | ?PageIdentity $page = null, |
197 | array $parameters = [], |
198 | ?Authority $authority = null, |
199 | $revision = null, |
200 | bool $lenientRevHandling = false, |
201 | ?ParserOptions $parserOptions = null |
202 | ) { |
203 | $this->parsoidOutputStash = $parsoidOutputStash; |
204 | $this->statsFactory = $statsFactory; |
205 | $this->parserOutputAccess = $parserOutputAccess; |
206 | $this->pageLookup = $pageLookup; |
207 | $this->revisionLookup = $revisionLookup; |
208 | $this->revisionRenderer = $revisionRenderer; |
209 | $this->parsoidSiteConfig = $parsoidSiteConfig; |
210 | $this->htmlTransformFactory = $htmlTransformFactory; |
211 | $this->contentHandlerFactory = $contentHandlerFactory; |
212 | $this->languageFactory = $languageFactory; |
213 | $this->lenientRevHandling = $lenientRevHandling; |
214 | $this->parserOptions = $parserOptions; |
215 | if ( $page === null || $authority === null ) { |
216 | // Constructing without $page and $authority parameters |
217 | // is deprecated since 1.43. |
218 | wfDeprecated( __METHOD__ . ' without $page or $authority', '1.43' ); |
219 | } else { |
220 | $this->initInternal( $page, $parameters, $authority, $revision ); |
221 | } |
222 | } |
223 | |
224 | /** |
225 | * Sets the given flavor to use for Wikitext -> HTML transformations. |
226 | * |
227 | * Flavors may influence parser options, parsoid options, and DOM transformations. |
228 | * They will be reflected by the ETag returned by getETag(). |
229 | * |
230 | * @note This method should not be called if stashing mode is enabled. |
231 | * @see setStashingEnabled |
232 | * @see getFlavor() |
233 | * |
234 | * @param string $flavor |
235 | * |
236 | * @return void |
237 | */ |
238 | public function setFlavor( string $flavor ): void { |
239 | if ( !in_array( $flavor, self::OUTPUT_FLAVORS ) ) { |
240 | throw new InvalidArgumentException( 'Invalid flavor supplied' ); |
241 | } |
242 | |
243 | if ( $this->stash ) { |
244 | // XXX: throw? |
245 | $flavor = 'stash'; |
246 | } |
247 | |
248 | $this->flavor = $flavor; |
249 | } |
250 | |
251 | /** |
252 | * Returns the flavor of HTML that will be generated. |
253 | * @see setFlavor() |
254 | * @return string |
255 | */ |
256 | public function getFlavor(): string { |
257 | return $this->flavor; |
258 | } |
259 | |
260 | /** |
261 | * Set the desired Parsoid profile version for the output. |
262 | * The actual output version is selected to be compatible with the one given here, |
263 | * per the rules of semantic versioning. |
264 | * |
265 | * @note Will disable caching if the effective output version is different from the default. |
266 | * |
267 | * @param string $version |
268 | * |
269 | * @throws HttpException If the given version is not supported (status 406) |
270 | */ |
271 | public function setOutputProfileVersion( $version ) { |
272 | $outputContentVersion = Parsoid::resolveContentVersion( $version ); |
273 | |
274 | if ( !$outputContentVersion ) { |
275 | throw new LocalizedHttpException( |
276 | new MessageValue( "rest-unsupported-profile-version", [ $version ] ), 406 |
277 | ); |
278 | } |
279 | |
280 | // Only set the option if the value isn't the default! |
281 | if ( $outputContentVersion !== Parsoid::defaultHTMLVersion() ) { |
282 | throw new LocalizedHttpException( |
283 | new MessageValue( "rest-unsupported-profile-version", [ $version ] ), 406 |
284 | ); |
285 | |
286 | // TODO: (T347426) At some later point, we may reintroduce support for |
287 | // non-default content versions as part of work on the content |
288 | // negotiation protocol. |
289 | // |
290 | // // See Parsoid::wikitext2html |
291 | // $this->parsoidOptions['outputContentVersion'] = $outputContentVersion; |
292 | // $this->isCacheable = false; |
293 | } |
294 | } |
295 | |
296 | /** |
297 | * Determine whether stashing should be applied. |
298 | * |
299 | * @param bool $stash |
300 | * |
301 | * @return void |
302 | */ |
303 | public function setStashingEnabled( bool $stash ): void { |
304 | $this->stash = $stash; |
305 | |
306 | if ( $stash ) { |
307 | $this->setFlavor( 'stash' ); |
308 | } elseif ( $this->flavor === 'stash' ) { |
309 | $this->setFlavor( 'view' ); |
310 | } |
311 | } |
312 | |
313 | /** |
314 | * Set the revision to render. |
315 | * |
316 | * This can take a fake RevisionRecord when rendering for previews |
317 | * or when switching the editor from source mode to visual mode. |
318 | * |
319 | * In that case, $revisionOrId->getId() must return 0 to indicate |
320 | * that the ParserCache should be bypassed. Stashing may still apply. |
321 | * |
322 | * @param RevisionRecord|int $revisionOrId |
323 | */ |
324 | public function setRevision( $revisionOrId ): void { |
325 | Assert::parameterType( [ RevisionRecord::class, 'integer' ], $revisionOrId, '$revision' ); |
326 | |
327 | if ( is_int( $revisionOrId ) && $revisionOrId <= 0 ) { |
328 | throw new HttpError( 400, "Bad revision ID: $revisionOrId" ); |
329 | } |
330 | |
331 | $this->revisionOrId = $revisionOrId; |
332 | |
333 | if ( $this->getRevisionId() === null ) { |
334 | // If we have a RevisionRecord but no revision ID, we are dealing with a fake |
335 | // revision used for editor previews or mode switches. The wikitext is coming |
336 | // from the request, not the database, so the result is not cacheable for re-use |
337 | // by others (though it can be stashed for use by the same client). |
338 | $this->isCacheable = false; |
339 | } |
340 | } |
341 | |
342 | /** |
343 | * Set the content to render. Useful when rendering for previews |
344 | * or when switching the editor from source mode to visual mode. |
345 | * |
346 | * This will create a fake revision for rendering, the revision ID will be 0. |
347 | * |
348 | * @see setRevision |
349 | * @see setContentSource |
350 | * |
351 | * @param Content $content |
352 | */ |
353 | public function setContent( Content $content ): void { |
354 | $rev = new MutableRevisionRecord( $this->page ); |
355 | $rev->setId( 0 ); |
356 | $rev->setPageId( $this->page->getId() ); |
357 | $rev->setContent( SlotRecord::MAIN, $content ); |
358 | $this->setRevision( $rev ); |
359 | } |
360 | |
361 | /** |
362 | * Set the content to render. Useful when rendering for previews |
363 | * or when switching the editor from source mode to visual mode. |
364 | * |
365 | * This will create a fake revision for rendering. The revision ID will be 0. |
366 | * |
367 | * @param string $source The source data, e.g. wikitext |
368 | * @param string $model The content model indicating how to interpret $source, e.g. CONTENT_MODEL_WIKITEXT |
369 | * |
370 | * @see setRevision |
371 | * @see setContent |
372 | */ |
373 | public function setContentSource( string $source, string $model ): void { |
374 | try { |
375 | $handler = $this->contentHandlerFactory->getContentHandler( $model ); |
376 | $content = $handler->unserializeContent( $source ); |
377 | $this->setContent( $content ); |
378 | } catch ( MWUnknownContentModelException $ex ) { |
379 | throw new LocalizedHttpException( new MessageValue( "rest-bad-content-model", [ $model ] ), 400 ); |
380 | } |
381 | } |
382 | |
383 | /** |
384 | * This is equivalent to 'pageLanguageOverride' in PageConfigFactory |
385 | * For example, when clients call the REST API with the 'content-language' |
386 | * header to affect language variant conversion. |
387 | * |
388 | * @param Bcp47Code|string $pageLanguage the page language, as a Bcp47Code |
389 | * or a BCP-47 string. |
390 | */ |
391 | public function setPageLanguage( $pageLanguage ): void { |
392 | if ( is_string( $pageLanguage ) ) { |
393 | $pageLanguage = new Bcp47CodeValue( $pageLanguage ); |
394 | } |
395 | $this->pageLanguage = $pageLanguage; |
396 | } |
397 | |
398 | /** |
399 | * Initializes the helper with the given parameters like the page |
400 | * we're dealing with, parameters gotten from the request inputs, |
401 | * and the revision if any is available. |
402 | * |
403 | * @param PageIdentity $page |
404 | * @param array $parameters |
405 | * @param Authority $authority |
406 | * @param RevisionRecord|int|null $revision |
407 | * @deprecated since 1.43, use parameters in constructor instead |
408 | */ |
409 | public function init( |
410 | PageIdentity $page, |
411 | array $parameters, |
412 | Authority $authority, |
413 | $revision = null |
414 | ) { |
415 | wfDeprecated( __METHOD__, '1.43' ); |
416 | $this->initInternal( $page, $parameters, $authority, $revision ); |
417 | } |
418 | |
419 | /** |
420 | * @param PageIdentity $page |
421 | * @param array $parameters |
422 | * @param Authority $authority |
423 | * @param int|RevisionRecord|null $revision |
424 | */ |
425 | private function initInternal( |
426 | PageIdentity $page, |
427 | array $parameters, |
428 | Authority $authority, |
429 | $revision = null |
430 | ) { |
431 | $this->page = $page; |
432 | $this->authority = $authority; |
433 | $this->stash = $parameters['stash'] ?? false; |
434 | |
435 | if ( $revision !== null ) { |
436 | $this->setRevision( $revision ); |
437 | } |
438 | |
439 | if ( $this->stash ) { |
440 | $this->setFlavor( 'stash' ); |
441 | } else { |
442 | $this->setFlavor( $parameters['flavor'] ?? 'view' ); |
443 | } |
444 | $this->parserOptions ??= ParserOptions::newFromAnon(); |
445 | } |
446 | |
447 | /** |
448 | * @inheritDoc |
449 | */ |
450 | public function setVariantConversionLanguage( |
451 | $targetLanguage, |
452 | $sourceLanguage = null |
453 | ): void { |
454 | if ( is_string( $targetLanguage ) ) { |
455 | $targetLanguage = $this->getAcceptedTargetLanguage( $targetLanguage ); |
456 | $targetLanguage = LanguageCode::normalizeNonstandardCodeAndWarn( |
457 | $targetLanguage |
458 | ); |
459 | } |
460 | if ( is_string( $sourceLanguage ) ) { |
461 | $sourceLanguage = LanguageCode::normalizeNonstandardCodeAndWarn( |
462 | $sourceLanguage |
463 | ); |
464 | } |
465 | $this->targetLanguage = $targetLanguage; |
466 | $this->sourceLanguage = $sourceLanguage; |
467 | } |
468 | |
469 | /** |
470 | * Get a target language from an accept header |
471 | */ |
472 | private function getAcceptedTargetLanguage( string $targetLanguage ): string { |
473 | // We could try to identify the most desirable language here, |
474 | // following the rules for Accept-Language headers in RFC9100. |
475 | // For now, just take the first language code. |
476 | |
477 | if ( preg_match( '/^\s*([-\w]+)/', $targetLanguage, $m ) ) { |
478 | return $m[1]; |
479 | } else { |
480 | // "undetermined" per RFC5646 |
481 | return 'und'; |
482 | } |
483 | } |
484 | |
485 | /** |
486 | * @inheritDoc |
487 | */ |
488 | public function getHtml(): ParserOutput { |
489 | if ( $this->processedParserOutput ) { |
490 | return $this->processedParserOutput; |
491 | } |
492 | |
493 | $parserOutput = $this->getParserOutput(); |
494 | |
495 | if ( $this->stash ) { |
496 | $this->authorizeWriteOrThrow( $this->authority, 'stashbasehtml', $this->page ); |
497 | |
498 | $isFakeRevision = $this->getRevisionId() === null; |
499 | $parsoidStashKey = ParsoidRenderID::newFromParserOutput( $parserOutput ); |
500 | $stashSuccess = $this->parsoidOutputStash->set( |
501 | $parsoidStashKey, |
502 | new SelserContext( |
503 | PageBundleParserOutputConverter::pageBundleFromParserOutput( $parserOutput ), |
504 | $parsoidStashKey->getRevisionID(), |
505 | $isFakeRevision ? $this->revisionOrId->getContent( SlotRecord::MAIN ) : null |
506 | ) |
507 | ); |
508 | if ( !$stashSuccess ) { |
509 | $this->statsFactory->getCounter( 'htmloutputrendererhelper_stash_total' ) |
510 | ->setLabel( 'status', 'fail' ) |
511 | ->copyToStatsdAt( 'htmloutputrendererhelper.stash.fail' ) |
512 | ->increment(); |
513 | |
514 | $errorData = [ 'parsoid-stash-key' => $parsoidStashKey ]; |
515 | LoggerFactory::getInstance( 'HtmlOutputRendererHelper' )->error( |
516 | "Parsoid stash failure", |
517 | $errorData |
518 | ); |
519 | throw new LocalizedHttpException( |
520 | MessageValue::new( 'rest-html-stash-failure' ), |
521 | 500, |
522 | $errorData |
523 | ); |
524 | } |
525 | $this->statsFactory->getCounter( 'htmloutputrendererhelper_stash_total' ) |
526 | ->setLabel( 'status', 'save' ) |
527 | ->copyToStatsdAt( 'htmloutputrendererhelper.stash.save' ) |
528 | ->increment(); |
529 | } |
530 | |
531 | if ( $this->flavor === 'edit' ) { |
532 | $pb = $this->getPageBundle(); |
533 | |
534 | // Inject data-parsoid and data-mw attributes. |
535 | $parserOutput->setRawText( $pb->toInlineAttributeHtml() ); |
536 | } |
537 | |
538 | // Check if variant conversion has to be performed |
539 | // NOTE: Variant conversion is performed on the fly, and kept outside the stash. |
540 | if ( $this->targetLanguage ) { |
541 | $languageVariantConverter = $this->htmlTransformFactory->getLanguageVariantConverter( $this->page ); |
542 | $parserOutput = $languageVariantConverter->convertParserOutputVariant( |
543 | $parserOutput, |
544 | $this->targetLanguage, |
545 | $this->sourceLanguage |
546 | ); |
547 | } |
548 | |
549 | $this->processedParserOutput = $parserOutput; |
550 | return $parserOutput; |
551 | } |
552 | |
553 | /** |
554 | * @inheritDoc |
555 | */ |
556 | public function getETag( string $suffix = '' ): ?string { |
557 | $parserOutput = $this->getParserOutput(); |
558 | |
559 | $renderID = ParsoidRenderID::newFromParserOutput( $parserOutput )->getKey(); |
560 | |
561 | if ( $suffix !== '' ) { |
562 | $eTag = "$renderID/{$this->flavor}/$suffix"; |
563 | } else { |
564 | $eTag = "$renderID/{$this->flavor}"; |
565 | } |
566 | |
567 | if ( $this->targetLanguage ) { |
568 | $eTag .= "+lang:{$this->targetLanguage->toBcp47Code()}"; |
569 | } |
570 | |
571 | return "\"{$eTag}\""; |
572 | } |
573 | |
574 | private function isLatest(): bool { |
575 | $revId = $this->getRevisionId(); |
576 | |
577 | if ( $revId === null ) { |
578 | return false; // un-saved revision |
579 | } |
580 | |
581 | if ( $revId === 0 ) { |
582 | return true; // latest revision |
583 | } |
584 | |
585 | $page = $this->getPageRecord(); |
586 | |
587 | if ( !$page ) { |
588 | return false; // page doesn't exist. shouldn't happen. |
589 | } |
590 | |
591 | return $revId === $page->getLatest(); |
592 | } |
593 | |
594 | /** |
595 | * @inheritDoc |
596 | */ |
597 | public function getLastModified(): ?string { |
598 | if ( $this->isLatest() ) { |
599 | $page = $this->getPageRecord(); |
600 | |
601 | // $page should never be null here. |
602 | // If it's null, getParserOutput() will fail nicely below. |
603 | if ( $page ) { |
604 | // Using the touch timestamp for this purpose is in line with |
605 | // the behavior of ViewAction::show(). However, |
606 | // OutputPage::checkLastModified() applies a lot of additional |
607 | // limitations. |
608 | return $page->getTouched(); |
609 | } |
610 | } |
611 | |
612 | return $this->getParserOutput()->getCacheTime(); |
613 | } |
614 | |
615 | /** |
616 | * @inheritDoc |
617 | */ |
618 | public static function getParamSettings(): array { |
619 | return [ |
620 | 'stash' => [ |
621 | Handler::PARAM_SOURCE => 'query', |
622 | ParamValidator::PARAM_TYPE => 'boolean', |
623 | ParamValidator::PARAM_DEFAULT => false, |
624 | ParamValidator::PARAM_REQUIRED => false, |
625 | Handler::PARAM_DESCRIPTION => new MessageValue( 'rest-param-desc-html-output-stash' ) |
626 | ], |
627 | 'flavor' => [ |
628 | Handler::PARAM_SOURCE => 'query', |
629 | ParamValidator::PARAM_TYPE => self::OUTPUT_FLAVORS, |
630 | ParamValidator::PARAM_DEFAULT => 'view', |
631 | ParamValidator::PARAM_REQUIRED => false, |
632 | Handler::PARAM_DESCRIPTION => new MessageValue( 'rest-param-desc-html-output-flavor' ) |
633 | ], |
634 | ]; |
635 | } |
636 | |
637 | private function getDefaultPageLanguage(): Bcp47Code { |
638 | // NOTE: keep in sync with Parser::getTargetLanguage! |
639 | |
640 | // XXX: Inject a TitleFactory just for this?! We need a better way to determine the page language... |
641 | $title = Title::castFromPageIdentity( $this->page ); |
642 | |
643 | if ( $this->parserOptions->getInterfaceMessage() ) { |
644 | return $this->parserOptions->getUserLangObj(); |
645 | } |
646 | |
647 | return $title->getPageLanguage(); |
648 | } |
649 | |
650 | private function getParserOutput(): ParserOutput { |
651 | if ( !$this->parserOutput ) { |
652 | $this->parserOptions->setRenderReason( __METHOD__ ); |
653 | |
654 | $defaultLanguage = $this->getDefaultPageLanguage(); |
655 | |
656 | if ( $this->pageLanguage |
657 | && $this->pageLanguage->toBcp47Code() !== $defaultLanguage->toBcp47Code() |
658 | ) { |
659 | $languageObj = $this->languageFactory->getLanguage( $this->pageLanguage ); |
660 | $this->parserOptions->setTargetLanguage( $languageObj ); |
661 | // Ensure target language splits the parser cache, when |
662 | // non-default; targetLangauge is not in |
663 | // ParserOptions::$cacheVaryingOptionsHash for the legacy |
664 | // parser. |
665 | $this->parserOptions->addExtraKey( 'target=' . $languageObj->getCode() ); |
666 | } |
667 | |
668 | try { |
669 | $status = $this->getParserOutputInternal(); |
670 | } catch ( RevisionAccessException $e ) { |
671 | throw new LocalizedHttpException( |
672 | MessageValue::new( 'rest-nonexistent-title' ), |
673 | 404, |
674 | [ 'reason' => $e->getMessage() ] |
675 | ); |
676 | } |
677 | |
678 | if ( !$status->isOK() ) { |
679 | if ( $status->hasMessage( 'parsoid-client-error' ) ) { |
680 | $this->throwExceptionForStatus( $status, 'rest-html-backend-error', 400 ); |
681 | } elseif ( $status->hasMessage( 'parsoid-resource-limit-exceeded' ) ) { |
682 | $this->throwExceptionForStatus( $status, 'rest-resource-limit-exceeded', 413 ); |
683 | } elseif ( $status->hasMessage( 'missing-revision-permission' ) ) { |
684 | $this->throwExceptionForStatus( $status, 'rest-permission-denied-revision', 403 ); |
685 | } elseif ( $status->hasMessage( 'parsoid-revision-access' ) ) { |
686 | $this->throwExceptionForStatus( $status, 'rest-specified-revision-unavailable', 404 ); |
687 | } else { |
688 | $this->logStatusError( $status, 'Parsoid backend error', 'HtmlOutputRendererHelper' ); |
689 | $this->throwExceptionForStatus( $status, 'rest-html-backend-error', 500 ); |
690 | } |
691 | } |
692 | |
693 | $this->parserOutput = $status->getValue(); |
694 | } |
695 | |
696 | Assert::invariant( $this->parserOutput->getRenderId() !== null, "no render id" ); |
697 | return $this->parserOutput; |
698 | } |
699 | |
700 | /** |
701 | * The content language of the HTML output after parsing. |
702 | * |
703 | * @return Bcp47Code The language, as a BCP-47 code |
704 | */ |
705 | public function getHtmlOutputContentLanguage(): Bcp47Code { |
706 | $contentLanguage = $this->getHtml()->getLanguage(); |
707 | |
708 | // This shouldn't happen, but don't crash if it does: |
709 | if ( !$contentLanguage ) { |
710 | if ( $this->pageLanguage ) { |
711 | LoggerFactory::getInstance( 'HtmlOutputRendererHelper' )->warning( |
712 | "ParserOutput does not specify a language" |
713 | ); |
714 | |
715 | $contentLanguage = $this->pageLanguage; |
716 | } else { |
717 | LoggerFactory::getInstance( 'HtmlOutputRendererHelper' )->warning( |
718 | "ParserOutput does not specify a language and no page language set in helper." |
719 | ); |
720 | |
721 | $title = Title::newFromPageIdentity( $this->page ); |
722 | $contentLanguage = $title->getPageLanguage(); |
723 | } |
724 | } |
725 | |
726 | return $contentLanguage; |
727 | } |
728 | |
729 | /** |
730 | * @inheritDoc |
731 | */ |
732 | public function putHeaders( ResponseInterface $response, bool $forHtml = true ): void { |
733 | if ( $forHtml ) { |
734 | // For HTML, we want to set the Content-Language. For JSON, we probably don't. |
735 | $response->setHeader( 'Content-Language', $this->getHtmlOutputContentLanguage()->toBcp47Code() ); |
736 | |
737 | $pb = $this->getPageBundle(); |
738 | ParsoidFormatHelper::setContentType( $response, ParsoidFormatHelper::FORMAT_HTML, $pb->version ); |
739 | } |
740 | |
741 | if ( $this->targetLanguage ) { |
742 | $response->addHeader( 'Vary', 'Accept-Language' ); |
743 | } |
744 | |
745 | // XXX: if Parsoid returns Vary headers, set them here?! |
746 | |
747 | if ( !$this->isCacheable ) { |
748 | $response->setHeader( 'Cache-Control', 'private,no-cache,s-maxage=0' ); |
749 | } |
750 | |
751 | // TODO: cache control for stable HTML? See ContentHelper::setCacheControl |
752 | |
753 | if ( $this->getRevisionId() ) { |
754 | $response->setHeader( 'Content-Revision-Id', (string)$this->getRevisionId() ); |
755 | } |
756 | } |
757 | |
758 | /** |
759 | * Returns the rendered HTML as a PageBundle object. |
760 | */ |
761 | public function getPageBundle(): PageBundle { |
762 | // XXX: converting between PageBundle and ParserOutput is inefficient! |
763 | $parserOutput = $this->getParserOutput(); |
764 | $pb = PageBundleParserOutputConverter::pageBundleFromParserOutput( $parserOutput ); |
765 | |
766 | // Check if variant conversion has to be performed |
767 | // NOTE: Variant conversion is performed on the fly, and kept outside the stash. |
768 | if ( $this->targetLanguage ) { |
769 | $languageVariantConverter = $this->htmlTransformFactory->getLanguageVariantConverter( $this->page ); |
770 | $pb = $languageVariantConverter->convertPageBundleVariant( |
771 | $pb, |
772 | $this->targetLanguage, |
773 | $this->sourceLanguage |
774 | ); |
775 | } |
776 | |
777 | return $pb; |
778 | } |
779 | |
780 | /** |
781 | * Returns the ID of the revision that is being rendered. |
782 | * |
783 | * This will return 0 if no revision has been specified, so the current revision |
784 | * will be rendered. |
785 | * |
786 | * This wil return null if RevisionRecord has been set but that RevisionRecord |
787 | * does not have a revision ID, e.g. when rendering a preview. |
788 | */ |
789 | public function getRevisionId(): ?int { |
790 | if ( !$this->revisionOrId ) { |
791 | // If we don't have a revision set, or it's 0, we are rendering the current revision. |
792 | return 0; |
793 | } |
794 | |
795 | if ( $this->revisionOrId instanceof RevisionRecord ) { |
796 | // NOTE: return null even if getId() gave us 0 |
797 | return $this->revisionOrId->getId() ?: null; |
798 | } |
799 | |
800 | // It's a revision ID, just return it |
801 | return (int)$this->revisionOrId; |
802 | } |
803 | |
804 | /** |
805 | * Strip Parsoid's section wrappers |
806 | * |
807 | * TODO: Should we move this to Parsoid's ContentUtils class? |
808 | * There already is a stripUnnecessaryWrappersAndSyntheticNodes but |
809 | * it targets html2wt and does a lot more than just section unwrapping. |
810 | */ |
811 | private function stripParsoidSectionTags( Element $elt ): void { |
812 | $n = $elt->firstChild; |
813 | while ( $n ) { |
814 | $next = $n->nextSibling; |
815 | if ( $n instanceof Element ) { |
816 | // Recurse into subtree before stripping this |
817 | $this->stripParsoidSectionTags( $n ); |
818 | // Strip <section> tags and synthetic extended-annotation-region wrappers |
819 | if ( WTUtils::isParsoidSectionTag( $n ) ) { |
820 | $parent = $n->parentNode; |
821 | // Help out phan |
822 | '@phan-var Element $parent'; |
823 | DOMUtils::migrateChildren( $n, $parent, $n ); |
824 | $parent->removeChild( $n ); |
825 | } |
826 | } |
827 | $n = $next; |
828 | } |
829 | } |
830 | |
831 | /** |
832 | * Returns the page record, or null if no page is known or the page does not exist. |
833 | * |
834 | * @return PageRecord|null |
835 | */ |
836 | private function getPageRecord(): ?PageRecord { |
837 | if ( $this->page === null ) { |
838 | return null; |
839 | } |
840 | |
841 | if ( !$this->page instanceof PageRecord ) { |
842 | $page = $this->pageLookup->getPageByReference( $this->page ); |
843 | if ( !$page ) { |
844 | return null; |
845 | } |
846 | |
847 | $this->page = $page; |
848 | } |
849 | |
850 | return $this->page; |
851 | } |
852 | |
853 | private function getParserOutputInternal(): Status { |
854 | // NOTE: ParserOutputAccess::getParserOutput() should be used for revisions |
855 | // that come from the database. Either this revision is null to indicate |
856 | // the current revision or the revision must have an ID. |
857 | // If we have a revision and the ID is 0 or null, then it's a fake revision |
858 | // representing a preview. |
859 | $parsoidOptions = $this->parsoidOptions; |
860 | // NOTE: VisualEditor would set this flavor when transforming from Wikitext to HTML |
861 | // for the purpose of editing when doing parsefragment (in body only mode). |
862 | if ( $this->flavor === 'fragment' || $this->getRevisionId() === null ) { |
863 | $this->isCacheable = false; |
864 | } |
865 | |
866 | // TODO: Decide whether we want to allow stale content for speed for the |
867 | // 'view' flavor. In that case, we would want to use PoolCounterWork, |
868 | // either directly or through ParserOutputAccess. |
869 | |
870 | $flags = $this->parserOutputAccessOptions; |
871 | |
872 | // Find page |
873 | $pageRecord = $this->getPageRecord(); |
874 | $revision = $this->revisionOrId; |
875 | |
876 | // NOTE: If we have a RevisionRecord already and this is |
877 | // not cacheable, just use it, there is no need to |
878 | // resolve $page to a PageRecord (and it may not be |
879 | // possible if the page doesn't exist). |
880 | if ( $this->isCacheable ) { |
881 | if ( !$pageRecord ) { |
882 | if ( $this->page ) { |
883 | throw new RevisionAccessException( |
884 | 'Page {name} not found', |
885 | [ 'name' => "{$this->page}" ] |
886 | ); |
887 | } else { |
888 | throw new RevisionAccessException( "No page" ); |
889 | } |
890 | } |
891 | |
892 | $revision ??= $pageRecord->getLatest(); |
893 | |
894 | if ( is_int( $revision ) ) { |
895 | $revId = $revision; |
896 | $revision = $this->revisionLookup->getRevisionById( $revId ); |
897 | |
898 | if ( !$revision ) { |
899 | throw new RevisionAccessException( |
900 | 'Revision {revId} not found', |
901 | [ 'revId' => $revId ] |
902 | ); |
903 | } |
904 | } |
905 | |
906 | if ( $pageRecord->getId() !== $revision->getPageId() ) { |
907 | if ( $this->lenientRevHandling ) { |
908 | $pageRecord = $this->pageLookup->getPageById( $revision->getPageId() ); |
909 | if ( !$pageRecord ) { |
910 | // This should ideally never trigger! |
911 | throw new \RuntimeException( |
912 | "Unexpected NULL page for pageid " . $revision->getPageId() . |
913 | " from revision " . $revision->getId() |
914 | ); |
915 | } |
916 | // Don't cache this! |
917 | $flags |= ParserOutputAccess::OPT_NO_UPDATE_CACHE; |
918 | } else { |
919 | throw new RevisionAccessException( |
920 | 'Revision {revId} does not belong to page {name}', |
921 | [ 'name' => $pageRecord->getDBkey(), 'revId' => $revision->getId() ] |
922 | ); |
923 | } |
924 | } |
925 | } |
926 | |
927 | $contentModel = $revision->getMainContentModel(); |
928 | if ( $this->parsoidSiteConfig->supportsContentModel( $contentModel ) ) { |
929 | $this->parserOptions->setUseParsoid(); |
930 | } |
931 | if ( $this->isCacheable ) { |
932 | // phan can't tell that we must have used the block above to |
933 | // resolve $pageRecord to a PageRecord if we've made it to this block. |
934 | '@phan-var PageRecord $pageRecord'; |
935 | try { |
936 | $status = $this->parserOutputAccess->getParserOutput( |
937 | $pageRecord, $this->parserOptions, $revision, $flags |
938 | ); |
939 | } catch ( ClientError $e ) { |
940 | $status = Status::newFatal( 'parsoid-client-error', $e->getMessage() ); |
941 | } catch ( ResourceLimitExceededException $e ) { |
942 | $status = Status::newFatal( 'parsoid-resource-limit-exceeded', $e->getMessage() ); |
943 | } |
944 | Assert::invariant( $status->isOK() ? $status->getValue()->getRenderId() !== null : true, "no render id" ); |
945 | } else { |
946 | '@phan-var RevisionRecord $revision'; |
947 | $status = $this->parseUncacheable( |
948 | $this->page, |
949 | $revision, |
950 | $this->lenientRevHandling |
951 | ); |
952 | |
953 | // @phan-suppress-next-line PhanSuspiciousValueComparison |
954 | if ( $status->isOK() && $this->flavor === 'fragment' ) { |
955 | // Unwrap sections and return body_only content |
956 | // NOTE: This introduces an extra html -> dom -> html roundtrip |
957 | // This will get addressed once HtmlHolder work is complete |
958 | $parserOutput = $status->getValue(); |
959 | $body = DOMCompat::getBody( DOMUtils::parseHTML( $parserOutput->getRawText() ) ); |
960 | if ( $body ) { |
961 | $this->stripParsoidSectionTags( $body ); |
962 | $parserOutput->setText( DOMCompat::getInnerHTML( $body ) ); |
963 | } |
964 | } |
965 | Assert::invariant( $status->isOK() ? $status->getValue()->getRenderId() !== null : true, "no render id" ); |
966 | } |
967 | |
968 | return $status; |
969 | } |
970 | |
971 | // See ParserOutputAccess::renderRevision() -- but of course this method |
972 | // bypasses any caching. |
973 | private function parseUncacheable( |
974 | PageIdentity $page, |
975 | RevisionRecord $revision, |
976 | bool $lenientRevHandling = false |
977 | ): Status { |
978 | // Enforce caller expectation |
979 | $revId = $revision->getId(); |
980 | if ( $revId !== 0 && $revId !== null ) { |
981 | return Status::newFatal( 'parsoid-revision-access', |
982 | "parseUncacheable should not be called for a real revision" ); |
983 | } |
984 | try { |
985 | $renderedRev = $this->revisionRenderer->getRenderedRevision( |
986 | $revision, |
987 | $this->parserOptions, |
988 | // ParserOutputAccess uses 'null' for the authority and |
989 | // 'audience' => RevisionRecord::RAW, presumably because |
990 | // the access checks are already handled by the |
991 | // RestAuthorizeTrait |
992 | $this->authority, |
993 | [ 'audience' => RevisionRecord::RAW ] |
994 | ); |
995 | if ( $renderedRev === null ) { |
996 | return Status::newFatal( 'parsoid-revision-access' ); |
997 | } |
998 | $parserOutput = $renderedRev->getRevisionParserOutput(); |
999 | // Ensure this isn't accidentally cached |
1000 | $parserOutput->updateCacheExpiry( 0 ); |
1001 | return Status::newGood( $parserOutput ); |
1002 | } catch ( ClientError $e ) { |
1003 | return Status::newFatal( 'parsoid-client-error', $e->getMessage() ); |
1004 | } catch ( ResourceLimitExceededException $e ) { |
1005 | return Status::newFatal( 'parsoid-resource-limit-exceeded', $e->getMessage() ); |
1006 | } |
1007 | } |
1008 | |
1009 | public function isParsoidContent(): bool { |
1010 | return PageBundleParserOutputConverter::hasPageBundle( |
1011 | $this->getParserOutput() |
1012 | ); |
1013 | } |
1014 | |
1015 | } |