Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
67.70% |
174 / 257 |
|
25.00% |
3 / 12 |
CRAP | |
0.00% |
0 / 1 |
HtmlInputTransformHelper | |
67.70% |
174 / 257 |
|
25.00% |
3 / 12 |
308.50 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
getParamSettings | |
0.00% |
0 / 38 |
|
0.00% |
0 / 1 |
2 | |||
normalizeParameters | |
72.73% |
16 / 22 |
|
0.00% |
0 / 1 |
21.19 | |||
init | |
84.31% |
43 / 51 |
|
0.00% |
0 / 1 |
21.54 | |||
getTransform | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setMetrics | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
setOriginal | |
81.13% |
43 / 53 |
|
0.00% |
0 / 1 |
27.87 | |||
getContent | |
72.22% |
13 / 18 |
|
0.00% |
0 / 1 |
4.34 | |||
putContent | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
2 | |||
fetchParserOutputFromParsoid | |
95.24% |
20 / 21 |
|
0.00% |
0 / 1 |
4 | |||
fetchSelserContextFromStash | |
86.96% |
20 / 23 |
|
0.00% |
0 / 1 |
5.06 | |||
throwHttpExceptionForStatus | |
0.00% |
0 / 9 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | */ |
20 | namespace MediaWiki\Rest\Handler\Helper; |
21 | |
22 | use Content; |
23 | use InvalidArgumentException; |
24 | use LanguageCode; |
25 | use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface; |
26 | use MediaWiki\Edit\ParsoidOutputStash; |
27 | use MediaWiki\Edit\ParsoidRenderID; |
28 | use MediaWiki\Edit\SelserContext; |
29 | use MediaWiki\MainConfigNames; |
30 | use MediaWiki\Page\PageIdentity; |
31 | use MediaWiki\Parser\ParserOutput; |
32 | use MediaWiki\Parser\Parsoid\HtmlToContentTransform; |
33 | use MediaWiki\Parser\Parsoid\HtmlTransformFactory; |
34 | use MediaWiki\Parser\Parsoid\PageBundleParserOutputConverter; |
35 | use MediaWiki\Parser\Parsoid\ParsoidOutputAccess; |
36 | use MediaWiki\Rest\Handler; |
37 | use MediaWiki\Rest\HttpException; |
38 | use MediaWiki\Rest\LocalizedHttpException; |
39 | use MediaWiki\Rest\ResponseInterface; |
40 | use MediaWiki\Revision\RevisionAccessException; |
41 | use MediaWiki\Revision\RevisionRecord; |
42 | use MediaWiki\Status\Status; |
43 | use MWUnknownContentModelException; |
44 | use ParserOptions; |
45 | use Wikimedia\Bcp47Code\Bcp47Code; |
46 | use Wikimedia\Message\MessageValue; |
47 | use Wikimedia\ParamValidator\ParamValidator; |
48 | use Wikimedia\Parsoid\Core\ClientError; |
49 | use Wikimedia\Parsoid\Core\PageBundle; |
50 | use Wikimedia\Parsoid\Core\ResourceLimitExceededException; |
51 | use Wikimedia\Parsoid\Parsoid; |
52 | |
53 | /** |
54 | * REST helper for converting HTML to page content source (e.g. wikitext). |
55 | * |
56 | * @since 1.40 |
57 | * |
58 | * @unstable Pending consolidation of the Parsoid extension with core code. |
59 | */ |
60 | class HtmlInputTransformHelper { |
61 | /** |
62 | * @internal |
63 | * @var string[] |
64 | */ |
65 | public const CONSTRUCTOR_OPTIONS = [ |
66 | MainConfigNames::ParsoidCacheConfig |
67 | ]; |
68 | |
69 | /** @var HtmlTransformFactory */ |
70 | private $htmlTransformFactory; |
71 | |
72 | /** @var PageIdentity|null */ |
73 | private $page = null; |
74 | |
75 | /** @var StatsdDataFactoryInterface */ |
76 | private $stats; |
77 | |
78 | /** |
79 | * @var HtmlToContentTransform |
80 | */ |
81 | private $transform; |
82 | |
83 | /** |
84 | * @var ParsoidOutputStash |
85 | */ |
86 | private $parsoidOutputStash; |
87 | |
88 | /** |
89 | * @var ParsoidOutputAccess |
90 | */ |
91 | private $parsoidOutputAccess; |
92 | |
93 | /** |
94 | * @var array |
95 | */ |
96 | private $envOptions; |
97 | |
98 | /** |
99 | * @param StatsdDataFactoryInterface $statsDataFactory |
100 | * @param HtmlTransformFactory $htmlTransformFactory |
101 | * @param ParsoidOutputStash $parsoidOutputStash |
102 | * @param ParsoidOutputAccess $parsoidOutputAccess |
103 | * @param array $envOptions |
104 | */ |
105 | public function __construct( |
106 | StatsdDataFactoryInterface $statsDataFactory, |
107 | HtmlTransformFactory $htmlTransformFactory, |
108 | ParsoidOutputStash $parsoidOutputStash, |
109 | ParsoidOutputAccess $parsoidOutputAccess, |
110 | array $envOptions = [] |
111 | ) { |
112 | $this->stats = $statsDataFactory; |
113 | $this->htmlTransformFactory = $htmlTransformFactory; |
114 | $this->parsoidOutputStash = $parsoidOutputStash; |
115 | $this->envOptions = $envOptions + [ |
116 | 'outputContentVersion' => Parsoid::defaultHTMLVersion(), |
117 | 'offsetType' => 'byte', |
118 | ]; |
119 | $this->parsoidOutputAccess = $parsoidOutputAccess; |
120 | } |
121 | |
122 | /** |
123 | * @return array |
124 | */ |
125 | public function getParamSettings(): array { |
126 | // JSON body schema: |
127 | /* |
128 | doc: |
129 | properties: |
130 | headers: |
131 | type: array |
132 | items: |
133 | type: string |
134 | body: |
135 | type: [ string, object ] |
136 | required: [ body ] |
137 | |
138 | body: |
139 | properties: |
140 | offsetType: |
141 | type: string |
142 | revid: |
143 | type: integer |
144 | renderid: |
145 | type: string |
146 | etag: |
147 | type: string |
148 | html: |
149 | type: [ doc, string ] |
150 | data-mw: |
151 | type: doc |
152 | original: |
153 | properties: |
154 | html: |
155 | type: doc |
156 | source: |
157 | type: doc |
158 | data-mw: |
159 | type: doc |
160 | data-parsoid: |
161 | type: doc |
162 | required: [ html ] |
163 | */ |
164 | |
165 | // FUTURE: more params |
166 | // - slot (for loading the base content) |
167 | |
168 | return [ |
169 | // XXX: should we really declare this here? Or should end endpoint do this? |
170 | // We are not reading this property... |
171 | 'title' => [ |
172 | Handler::PARAM_SOURCE => 'path', |
173 | ParamValidator::PARAM_TYPE => 'string', |
174 | ParamValidator::PARAM_DEFAULT => '', |
175 | ParamValidator::PARAM_REQUIRED => false, |
176 | ], |
177 | // XXX: Needed for compatibility with the parsoid transform endpoint. |
178 | // But revid should just be part of the info about the original data |
179 | // in the body. |
180 | 'oldid' => [ |
181 | Handler::PARAM_SOURCE => 'path', |
182 | ParamValidator::PARAM_TYPE => 'int', |
183 | ParamValidator::PARAM_DEFAULT => 0, |
184 | ParamValidator::PARAM_REQUIRED => false, |
185 | ], |
186 | // XXX: Supported for compatibility with the parsoid transform endpoint. |
187 | // If given, it should be 'html' or 'pagebundle'. |
188 | 'from' => [ |
189 | Handler::PARAM_SOURCE => 'path', |
190 | ParamValidator::PARAM_TYPE => 'string', |
191 | ParamValidator::PARAM_DEFAULT => '', |
192 | ParamValidator::PARAM_REQUIRED => false, |
193 | ], |
194 | // XXX: Supported for compatibility with the parsoid transform endpoint. |
195 | // Ignored. |
196 | 'format' => [ |
197 | Handler::PARAM_SOURCE => 'path', |
198 | ParamValidator::PARAM_TYPE => 'string', |
199 | ParamValidator::PARAM_DEFAULT => '', |
200 | ParamValidator::PARAM_REQUIRED => false, |
201 | ], |
202 | 'contentmodel' => [ // XXX: get this from the Accept header? |
203 | Handler::PARAM_SOURCE => 'query', |
204 | ParamValidator::PARAM_TYPE => 'string', |
205 | ParamValidator::PARAM_DEFAULT => '', |
206 | ParamValidator::PARAM_REQUIRED => false, |
207 | ], |
208 | 'language' => [ // TODO: get this from Accept-Language header?! |
209 | Handler::PARAM_SOURCE => 'query', |
210 | ParamValidator::PARAM_TYPE => 'string', |
211 | ParamValidator::PARAM_DEFAULT => '', |
212 | ParamValidator::PARAM_REQUIRED => false, |
213 | ] |
214 | ]; |
215 | } |
216 | |
217 | /** |
218 | * Modify body and parameters to provide compatibility with legacy endpoints. |
219 | * |
220 | * @see ParsoidHandler::getRequestAttributes |
221 | * |
222 | * @param array<string,mixed> &$body |
223 | * @param array<string,mixed> &$parameters |
224 | * |
225 | * @throws HttpException |
226 | * |
227 | * @return void |
228 | */ |
229 | private static function normalizeParameters( array &$body, array &$parameters ) { |
230 | // If the revision ID is given in the path, pretend it was given in the body. |
231 | if ( isset( $parameters['oldid'] ) && (int)$parameters['oldid'] > 0 ) { |
232 | $body['original']['revid'] = (int)$parameters['oldid']; |
233 | } |
234 | |
235 | // If an etag is given in the body, use it as the render ID. |
236 | // Note that we support ETag format in the renderid field. |
237 | if ( !empty( $body['original']['etag'] ) ) { |
238 | // @phan-suppress-next-line PhanTypeInvalidDimOffset false positive |
239 | $body['original']['renderid'] = $body['original']['etag']; |
240 | } |
241 | |
242 | // Accept 'wikitext' as an alias for 'source'. |
243 | if ( isset( $body['original']['wikitext'] ) ) { |
244 | // @phan-suppress-next-line PhanTypeInvalidDimOffset false positive |
245 | $body['original']['source'] = $body['original']['wikitext']; |
246 | unset( $body['original']['wikitext'] ); |
247 | } |
248 | |
249 | // If 'from' is not set, we accept page bundle style input as well as full HTML. |
250 | // If 'from' is set, we only accept page bundle style input if it is set to FORMAT_PAGEBUNDLE. |
251 | if ( |
252 | isset( $parameters['from'] ) && $parameters['from'] !== '' && |
253 | $parameters['from'] !== ParsoidFormatHelper::FORMAT_PAGEBUNDLE |
254 | ) { |
255 | unset( $body['original']['data-parsoid']['body'] ); |
256 | unset( $body['original']['data-mw']['body'] ); |
257 | unset( $body['data-mw']['body'] ); |
258 | } |
259 | |
260 | // If 'from' is given, it must be html or pagebundle. |
261 | if ( |
262 | isset( $parameters['from'] ) && $parameters['from'] !== '' && |
263 | $parameters['from'] !== ParsoidFormatHelper::FORMAT_HTML && |
264 | $parameters['from'] !== ParsoidFormatHelper::FORMAT_PAGEBUNDLE |
265 | ) { |
266 | throw new LocalizedHttpException( |
267 | new MessageValue( "rest-unsupported-transform-input", [ $parameters['from'] ] ), 400 |
268 | ); |
269 | } |
270 | |
271 | if ( isset( $body['contentmodel'] ) && $body['contentmodel'] !== '' ) { |
272 | $parameters['contentmodel'] = $body['contentmodel']; |
273 | } elseif ( isset( $parameters['format'] ) && $parameters['format'] !== '' ) { |
274 | $parameters['contentmodel'] = $parameters['format']; |
275 | } |
276 | } |
277 | |
278 | /** |
279 | * @param PageIdentity $page |
280 | * @param array|string $body Body structure, or an HTML string |
281 | * @param array $parameters |
282 | * @param RevisionRecord|null $originalRevision |
283 | * @param Bcp47Code|null $pageLanguage |
284 | * |
285 | * @throws HttpException |
286 | */ |
287 | public function init( |
288 | PageIdentity $page, |
289 | $body, |
290 | array $parameters, |
291 | ?RevisionRecord $originalRevision = null, |
292 | ?Bcp47Code $pageLanguage = null |
293 | ) { |
294 | if ( is_string( $body ) ) { |
295 | $body = [ 'html' => $body ]; |
296 | } |
297 | |
298 | self::normalizeParameters( $body, $parameters ); |
299 | |
300 | $this->page = $page; |
301 | |
302 | if ( !isset( $body['html'] ) ) { |
303 | throw new LocalizedHttpException( new MessageValue( "rest-missing-body-field", [ 'html' ] ) ); |
304 | } |
305 | |
306 | $html = is_array( $body['html'] ) ? $body['html']['body'] : $body['html']; |
307 | |
308 | // TODO: validate $body against a proper schema. |
309 | $this->transform = $this->htmlTransformFactory->getHtmlToContentTransform( |
310 | $html, |
311 | $this->page |
312 | ); |
313 | |
314 | $this->transform->setMetrics( $this->stats ); |
315 | |
316 | // NOTE: Env::getContentModel will fall back to the page's recorded content model |
317 | // if none is set here. |
318 | $this->transform->setOptions( [ |
319 | 'contentmodel' => $parameters['contentmodel'] ?? null, |
320 | 'offsetType' => $body['offsetType'] ?? $this->envOptions['offsetType'], |
321 | ] ); |
322 | |
323 | $original = $body['original'] ?? []; |
324 | $originalRendering = null; |
325 | |
326 | if ( !isset( $original['html'] ) && !empty( $original['renderid'] ) ) { |
327 | $key = $original['renderid']; |
328 | if ( preg_match( '!^(W/)?".*"$!', $key ) ) { |
329 | $originalRendering = ParsoidRenderID::newFromETag( $key ); |
330 | |
331 | if ( !$originalRendering ) { |
332 | throw new LocalizedHttpException( new MessageValue( "rest-bad-etag", [ $key ] ), 400 ); |
333 | } |
334 | } else { |
335 | $originalRendering = ParsoidRenderID::newFromKey( $key ); |
336 | } |
337 | } elseif ( !empty( $original['html'] ) || !empty( $original['data-parsoid'] ) ) { |
338 | // NOTE: We might have an incomplete PageBundle here, with no HTML but with data-parsoid! |
339 | // XXX: Do we need to support that, or can that just be a 400? |
340 | $originalRendering = new PageBundle( |
341 | $original['html']['body'] ?? '', |
342 | $original['data-parsoid']['body'] ?? null, |
343 | $original['data-mw']['body'] ?? null, |
344 | null, // will be derived from $original['html']['headers']['content-type'] |
345 | $original['html']['headers'] ?? [] |
346 | ); |
347 | } |
348 | |
349 | if ( !$originalRevision && !empty( $original['revid'] ) ) { |
350 | $originalRevision = (int)$original['revid']; |
351 | } |
352 | |
353 | if ( $originalRevision || $originalRendering ) { |
354 | $this->setOriginal( $originalRevision, $originalRendering ); |
355 | } else { |
356 | if ( $this->page->exists() ) { |
357 | $this->stats->increment( 'html_input_transform.original_html.not_given.page_exists' ); |
358 | } else { |
359 | $this->stats->increment( 'html_input_transform.original_html.not_given.page_not_exist' ); |
360 | } |
361 | } |
362 | |
363 | if ( isset( $body['data-mw']['body'] ) ) { |
364 | $this->transform->setModifiedDataMW( $body['data-mw']['body'] ); |
365 | } |
366 | |
367 | if ( $pageLanguage ) { |
368 | $this->transform->setContentLanguage( $pageLanguage ); |
369 | } elseif ( isset( $parameters['language'] ) && $parameters['language'] !== '' ) { |
370 | $pageLanguage = LanguageCode::normalizeNonstandardCodeAndWarn( |
371 | $parameters['language'] |
372 | ); |
373 | $this->transform->setContentLanguage( $pageLanguage ); |
374 | } |
375 | |
376 | if ( isset( $original['source']['body'] ) ) { |
377 | // XXX: do we really have to support wikitext overrides? |
378 | $this->transform->setOriginalText( $original['source']['body'] ); |
379 | } |
380 | } |
381 | |
382 | /** |
383 | * Return HTMLTransform object, so additional context can be provided by calling setters on it. |
384 | * @return HtmlToContentTransform |
385 | */ |
386 | public function getTransform(): HtmlToContentTransform { |
387 | return $this->transform; |
388 | } |
389 | |
390 | /** |
391 | * Set metrics sink. |
392 | * |
393 | * @param StatsdDataFactoryInterface $stats |
394 | */ |
395 | public function setMetrics( StatsdDataFactoryInterface $stats ) { |
396 | $this->stats = $stats; |
397 | |
398 | if ( $this->transform ) { |
399 | $this->transform->setMetrics( $stats ); |
400 | } |
401 | } |
402 | |
403 | /** |
404 | * Supply information about the revision and rendering that was the original basis of |
405 | * the input HTML. This is used to apply selective serialization (selser), if possible. |
406 | * |
407 | * @param RevisionRecord|int|null $rev |
408 | * @param ParsoidRenderID|PageBundle|ParserOutput|null $originalRendering |
409 | */ |
410 | public function setOriginal( $rev, $originalRendering ) { |
411 | if ( $originalRendering instanceof ParsoidRenderID ) { |
412 | $renderId = $originalRendering; |
413 | |
414 | // If the client asked for a render ID, load original data from stash |
415 | try { |
416 | $selserContext = $this->fetchSelserContextFromStash( $renderId ); |
417 | } catch ( InvalidArgumentException $ex ) { |
418 | $this->stats->increment( 'html_input_transform.original_html.given.as_renderid.bad' ); |
419 | throw new LocalizedHttpException( new MessageValue( "rest-bad-stash-key" ), |
420 | 400, |
421 | [ |
422 | 'reason' => $ex->getMessage(), |
423 | 'key' => "$renderId" |
424 | ] |
425 | ); |
426 | } |
427 | |
428 | if ( !$selserContext ) { |
429 | // NOTE: When the client asked for a specific stash key (resp. etag), |
430 | // we should fail with a 412 if we don't have the specific rendering. |
431 | // On the other hand, of the client only provided a base revision ID, |
432 | // we can re-parse and hope for the best. |
433 | |
434 | throw new LocalizedHttpException( |
435 | new MessageValue( "rest-no-stashed-content", [ $renderId->getKey() ] ), 412 |
436 | ); |
437 | |
438 | // TODO: This class should provide getETag and getLastModified methods for use by |
439 | // the REST endpoint, to provide proper support for conditionals. |
440 | // However, that requires some refactoring of how HTTP conditional checks |
441 | // work in the Handler base class. |
442 | } |
443 | |
444 | if ( !$rev ) { |
445 | $rev = $renderId->getRevisionID(); |
446 | } |
447 | |
448 | $originalRendering = $selserContext->getPageBundle(); |
449 | $content = $selserContext->getContent(); |
450 | |
451 | if ( $content ) { |
452 | $this->transform->setOriginalContent( $content ); |
453 | } |
454 | } elseif ( !$originalRendering && $rev ) { |
455 | // The client provided a revision ID, but not stash key. |
456 | // Try to get a rendering for the given revision, and use it as the basis for selser. |
457 | // Chances are good that the resulting diff will be reasonably clean. |
458 | // NOTE: If we don't have a revision ID, we should not attempt selser! |
459 | $originalRendering = $this->fetchParserOutputFromParsoid( $rev, true ); |
460 | |
461 | if ( $originalRendering ) { |
462 | $this->stats->increment( 'html_input_transform.original_html.given.as_revid.found' ); |
463 | } else { |
464 | $this->stats->increment( 'html_input_transform.original_html.given.as_revid.not_found' ); |
465 | } |
466 | } elseif ( $originalRendering ) { |
467 | $this->stats->increment( 'html_input_transform.original_html.given.verbatim' ); |
468 | } |
469 | |
470 | if ( $originalRendering instanceof ParserOutput ) { |
471 | $originalRendering = PageBundleParserOutputConverter::pageBundleFromParserOutput( $originalRendering ); |
472 | |
473 | // NOTE: Use the default if we got a ParserOutput object. |
474 | // Don't apply the default if we got passed a PageBundle, |
475 | // in that case, we want to require the version to be explicit. |
476 | if ( $originalRendering->version === null && !isset( $originalRendering->headers['content-type'] ) ) { |
477 | $originalRendering->version = Parsoid::defaultHTMLVersion(); |
478 | } |
479 | } |
480 | |
481 | if ( !$originalRendering instanceof PageBundle ) { |
482 | return; |
483 | } |
484 | |
485 | if ( $originalRendering->version !== null ) { |
486 | $this->transform->setOriginalSchemaVersion( $originalRendering->version ); |
487 | } elseif ( !empty( $originalRendering->headers['content-type'] ) ) { |
488 | $vOriginal = ParsoidFormatHelper::parseContentTypeHeader( |
489 | // @phan-suppress-next-line PhanTypeArraySuspiciousNullable Silly Phan, we just checked. |
490 | $originalRendering->headers['content-type'] |
491 | ); |
492 | |
493 | if ( $vOriginal ) { |
494 | $this->transform->setOriginalSchemaVersion( $vOriginal ); |
495 | } |
496 | } |
497 | |
498 | if ( $rev instanceof RevisionRecord ) { |
499 | $this->transform->setOriginalRevision( $rev ); |
500 | } elseif ( $rev && is_int( $rev ) ) { |
501 | $this->transform->setOriginalRevisionId( $rev ); |
502 | } |
503 | |
504 | // NOTE: We might have an incomplete PageBundle here, with no HTML. |
505 | // PageBundle::$html is declared to not be nullable, so it would be set to the empty |
506 | // string if not given. Note however that it might also be null, since it's a public field. |
507 | if ( $originalRendering->html !== null && $originalRendering->html !== '' ) { |
508 | $this->transform->setOriginalHtml( $originalRendering->html ); |
509 | } |
510 | |
511 | if ( $originalRendering->parsoid !== null ) { |
512 | // @phan-suppress-next-line PhanTypeMismatchArgumentNullable Silly Phan, we just checked. |
513 | $this->transform->setOriginalDataParsoid( $originalRendering->parsoid ); |
514 | } |
515 | |
516 | if ( $originalRendering->mw !== null ) { |
517 | // @phan-suppress-next-line PhanTypeMismatchArgumentNullable Silly Phan, we just checked. |
518 | $this->transform->setOriginalDataMW( $originalRendering->mw ); |
519 | } |
520 | } |
521 | |
522 | /** |
523 | * @return Content the content derived from the input HTML. |
524 | * @throws HttpException |
525 | */ |
526 | public function getContent(): Content { |
527 | try { |
528 | return $this->transform->htmlToContent(); |
529 | } catch ( ClientError $e ) { |
530 | throw new LocalizedHttpException( |
531 | new MessageValue( 'rest-html-backend-error', [ $e->getMessage() ] ), |
532 | 400, |
533 | [ 'reason' => $e->getMessage() ] |
534 | ); |
535 | } catch ( ResourceLimitExceededException $e ) { |
536 | throw new LocalizedHttpException( |
537 | new MessageValue( 'rest-resource-limit-exceeded' ), |
538 | 413, |
539 | [ 'reason' => $e->getMessage() ] |
540 | ); |
541 | } catch ( MWUnknownContentModelException $e ) { |
542 | throw new LocalizedHttpException( |
543 | new MessageValue( "rest-unknown-content-model", [ $e->getModelId() ] ), |
544 | 400 |
545 | ); |
546 | } |
547 | } |
548 | |
549 | /** |
550 | * Creates a response containing the content derived from the input HTML. |
551 | * This will set the appropriate Content-Type header. |
552 | * |
553 | * @param ResponseInterface $response |
554 | */ |
555 | public function putContent( ResponseInterface $response ) { |
556 | $content = $this->getContent(); |
557 | $data = $content->serialize(); |
558 | |
559 | try { |
560 | $contentType = ParsoidFormatHelper::getContentType( |
561 | $content->getModel(), |
562 | $this->envOptions['outputContentVersion'] |
563 | ); |
564 | } catch ( InvalidArgumentException $e ) { |
565 | // If Parsoid doesn't know the content type, |
566 | // ask the ContentHandler! |
567 | $contentType = $content->getDefaultFormat(); |
568 | } |
569 | |
570 | $response->setHeader( 'Content-Type', $contentType ); |
571 | $response->getBody()->write( $data ); |
572 | } |
573 | |
574 | /** |
575 | * @param RevisionRecord|int $rev |
576 | * @param bool $mayParse |
577 | * |
578 | * @return ParserOutput|null |
579 | * @throws HttpException |
580 | */ |
581 | private function fetchParserOutputFromParsoid( $rev, bool $mayParse ): ?ParserOutput { |
582 | $parserOptions = ParserOptions::newFromAnon(); |
583 | |
584 | try { |
585 | if ( $mayParse ) { |
586 | $status = $this->parsoidOutputAccess->getParserOutput( |
587 | $this->page, |
588 | $parserOptions, |
589 | $rev |
590 | ); |
591 | |
592 | if ( !$status->isOK() ) { |
593 | $this->throwHttpExceptionForStatus( $status ); |
594 | } |
595 | |
596 | $parserOutput = $status->getValue(); |
597 | } else { |
598 | $parserOutput = $this->parsoidOutputAccess->getCachedParserOutput( |
599 | $this->page, |
600 | $parserOptions, |
601 | $rev |
602 | ); |
603 | } |
604 | } catch ( RevisionAccessException $e ) { |
605 | // The client supplied bad revision ID, or the revision was deleted or suppressed. |
606 | throw new LocalizedHttpException( new MessageValue( "rest-specified-revision-unavailable" ), |
607 | 404, |
608 | [ 'reason' => $e->getMessage() ] |
609 | ); |
610 | } |
611 | |
612 | return $parserOutput; |
613 | } |
614 | |
615 | /** |
616 | * @param ParsoidRenderID $renderID |
617 | * |
618 | * @return SelserContext|null |
619 | */ |
620 | private function fetchSelserContextFromStash( $renderID ): ?SelserContext { |
621 | $selserContext = $this->parsoidOutputStash->get( $renderID ); |
622 | |
623 | if ( $selserContext ) { |
624 | $this->stats->increment( 'html_input_transform.original_html.given.as_renderid.' . |
625 | 'stash_hit.found.hit' ); |
626 | |
627 | return $selserContext; |
628 | } else { |
629 | // Looks like the rendering is gone from stash (or the client send us a bogus key). |
630 | // Try to load it from the parser cache instead. |
631 | // On a wiki with low edit frequency, there is a good chance that it's still there. |
632 | try { |
633 | $parserOutput = $this->fetchParserOutputFromParsoid( $renderID->getRevisionID(), false ); |
634 | |
635 | if ( !$parserOutput ) { |
636 | $this->stats->increment( 'html_input_transform.original_html.given.as_renderid.' . |
637 | 'stash_miss_pc_fallback.not_found.miss' ); |
638 | return null; |
639 | } |
640 | |
641 | $cachedRenderID = ParsoidRenderID::newFromParserOutput( $parserOutput ); |
642 | if ( $cachedRenderID->getKey() !== $renderID->getKey() ) { |
643 | $this->stats->increment( 'html_input_transform.original_html.given.as_renderid.' . |
644 | 'stash_miss_pc_fallback.not_found.mismatch' ); |
645 | |
646 | // It's not the correct rendering. |
647 | return null; |
648 | } |
649 | |
650 | $this->stats->increment( 'html_input_transform.original_html.given.as_renderid.' . |
651 | 'stash_miss_pc_fallback.found.hit' ); |
652 | |
653 | $pb = PageBundleParserOutputConverter::pageBundleFromParserOutput( $parserOutput ); |
654 | return new SelserContext( $pb, $renderID->getRevisionID() ); |
655 | } catch ( HttpException $e ) { |
656 | $this->stats->increment( 'html_input_transform.original_html.given.as_renderid.' . |
657 | 'stash_miss_pc_fallback.not_found.failed' ); |
658 | |
659 | // If the revision isn't found, don't trigger a 404. Return null to trigger a 412. |
660 | return null; |
661 | } |
662 | } |
663 | } |
664 | |
665 | /** |
666 | * @param Status $status |
667 | * |
668 | * @return never |
669 | * @throws HttpException |
670 | */ |
671 | private function throwHttpExceptionForStatus( Status $status ) { |
672 | // TODO: make this nicer. |
673 | if ( $status->hasMessage( 'parsoid-resource-limit-exceeded' ) ) { |
674 | throw new LocalizedHttpException( new MessageValue( "rest-parsoid-resource-exceeded" ), |
675 | 413, |
676 | [ 'reason' => $status->getHTML() ] |
677 | ); |
678 | } else { |
679 | throw new LocalizedHttpException( new MessageValue( "rest-parsoid-error" ), |
680 | 400, |
681 | [ 'reason' => $status->getHTML() ] |
682 | ); |
683 | } |
684 | } |
685 | |
686 | } |