Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
39.57% |
186 / 470 |
|
12.50% |
3 / 24 |
CRAP | |
0.00% |
0 / 1 |
ParsoidHandler | |
39.57% |
186 / 470 |
|
12.50% |
3 / 24 |
3516.38 | |
0.00% |
0 / 1 |
factory | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
__construct | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
getSupportedRequestTypes | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
assertDomainIsCorrect | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
42 | |||
getParsedBody | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
42 | |||
getRequestAttributes | |
0.00% |
0 / 82 |
|
0.00% |
0 / 1 |
462 | |||
getHtmlOutputRendererHelper | |
95.00% |
19 / 20 |
|
0.00% |
0 / 1 |
6 | |||
getHtmlInputTransformHelper | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
1 | |||
acceptable | |
0.00% |
0 / 30 |
|
0.00% |
0 / 1 |
272 | |||
tryToCreatePageConfig | |
87.50% |
28 / 32 |
|
0.00% |
0 / 1 |
7.10 | |||
tryToCreatePageIdentity | |
55.56% |
5 / 9 |
|
0.00% |
0 / 1 |
3.79 | |||
getTransformEndpoint | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getPageContentEndpoint | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getRevisionContentEndpoint | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
wtLint | |
53.85% |
7 / 13 |
|
0.00% |
0 / 1 |
3.88 | |||
wt2html | |
78.16% |
68 / 87 |
|
0.00% |
0 / 1 |
30.00 | |||
newParsoid | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
parseHTML | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
html2wt | |
87.50% |
7 / 8 |
|
0.00% |
0 / 1 |
3.02 | |||
pb2pb | |
0.00% |
0 / 45 |
|
0.00% |
0 / 1 |
90 | |||
updateRedLinks | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
2 | |||
languageConversion | |
79.41% |
27 / 34 |
|
0.00% |
0 / 1 |
4.14 | |||
execute | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
validatePb | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
pageConfigToPageIdentity | |
44.44% |
4 / 9 |
|
0.00% |
0 / 1 |
2.69 |
1 | <?php |
2 | /** |
3 | * Copyright (C) 2011-2020 Wikimedia Foundation and others. |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
18 | */ |
19 | |
20 | namespace MediaWiki\Rest\Handler; |
21 | |
22 | use Composer\Semver\Semver; |
23 | use InvalidArgumentException; |
24 | use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface; |
25 | use LogicException; |
26 | use MediaWiki\Content\WikitextContent; |
27 | use MediaWiki\Context\RequestContext; |
28 | use MediaWiki\Language\LanguageCode; |
29 | use MediaWiki\Logger\LoggerFactory; |
30 | use MediaWiki\MainConfigNames; |
31 | use MediaWiki\MediaWikiServices; |
32 | use MediaWiki\Page\PageIdentity; |
33 | use MediaWiki\Page\ProperPageIdentity; |
34 | use MediaWiki\Parser\ParserOutput; |
35 | use MediaWiki\Parser\Parsoid\Config\SiteConfig; |
36 | use MediaWiki\Registration\ExtensionRegistry; |
37 | use MediaWiki\Rest\Handler; |
38 | use MediaWiki\Rest\Handler\Helper\HtmlInputTransformHelper; |
39 | use MediaWiki\Rest\Handler\Helper\HtmlOutputRendererHelper; |
40 | use MediaWiki\Rest\Handler\Helper\ParsoidFormatHelper; |
41 | use MediaWiki\Rest\HttpException; |
42 | use MediaWiki\Rest\LocalizedHttpException; |
43 | use MediaWiki\Rest\Response; |
44 | use MediaWiki\Revision\MutableRevisionRecord; |
45 | use MediaWiki\Revision\RevisionAccessException; |
46 | use MediaWiki\Revision\RevisionLookup; |
47 | use MediaWiki\Revision\SlotRecord; |
48 | use MediaWiki\Revision\SuppressedDataException; |
49 | use MediaWiki\Title\MalformedTitleException; |
50 | use MediaWiki\Title\Title; |
51 | use MediaWiki\WikiMap\WikiMap; |
52 | use MobileContext; |
53 | use Wikimedia\Http\HttpAcceptParser; |
54 | use Wikimedia\Message\DataMessageValue; |
55 | use Wikimedia\Message\MessageValue; |
56 | use Wikimedia\Parsoid\Config\DataAccess; |
57 | use Wikimedia\Parsoid\Config\PageConfig; |
58 | use Wikimedia\Parsoid\Config\PageConfigFactory; |
59 | use Wikimedia\Parsoid\Core\ClientError; |
60 | use Wikimedia\Parsoid\Core\PageBundle; |
61 | use Wikimedia\Parsoid\Core\ResourceLimitExceededException; |
62 | use Wikimedia\Parsoid\DOM\Document; |
63 | use Wikimedia\Parsoid\Parsoid; |
64 | use Wikimedia\Parsoid\Utils\ContentUtils; |
65 | use Wikimedia\Parsoid\Utils\DOMCompat; |
66 | use Wikimedia\Parsoid\Utils\DOMUtils; |
67 | use Wikimedia\Parsoid\Utils\Timing; |
68 | |
69 | // TODO logging, timeouts(?), CORS |
70 | // TODO content negotiation (routes.js routes.acceptable) |
71 | // TODO handle MaxConcurrentCallsError (pool counter?) |
72 | |
73 | /** |
74 | * Base class for Parsoid handlers. |
75 | * @internal For use by the Parsoid extension |
76 | */ |
77 | abstract class ParsoidHandler extends Handler { |
78 | |
79 | private RevisionLookup $revisionLookup; |
80 | protected SiteConfig $siteConfig; |
81 | protected PageConfigFactory $pageConfigFactory; |
82 | protected DataAccess $dataAccess; |
83 | |
84 | /** @var ExtensionRegistry */ |
85 | protected $extensionRegistry; |
86 | |
87 | /** @var ?StatsdDataFactoryInterface A statistics aggregator */ |
88 | protected $metrics; |
89 | |
90 | /** @var array */ |
91 | private $requestAttributes; |
92 | |
93 | /** |
94 | * @return static |
95 | */ |
96 | public static function factory(): ParsoidHandler { |
97 | $services = MediaWikiServices::getInstance(); |
98 | // @phan-suppress-next-line PhanTypeInstantiateAbstractStatic |
99 | return new static( |
100 | $services->getRevisionLookup(), |
101 | $services->getParsoidSiteConfig(), |
102 | $services->getParsoidPageConfigFactory(), |
103 | $services->getParsoidDataAccess() |
104 | ); |
105 | } |
106 | |
107 | public function __construct( |
108 | RevisionLookup $revisionLookup, |
109 | SiteConfig $siteConfig, |
110 | PageConfigFactory $pageConfigFactory, |
111 | DataAccess $dataAccess |
112 | ) { |
113 | $this->revisionLookup = $revisionLookup; |
114 | $this->siteConfig = $siteConfig; |
115 | $this->pageConfigFactory = $pageConfigFactory; |
116 | $this->dataAccess = $dataAccess; |
117 | $this->extensionRegistry = ExtensionRegistry::getInstance(); |
118 | $this->metrics = $siteConfig->metrics(); |
119 | } |
120 | |
121 | public function getSupportedRequestTypes(): array { |
122 | return array_merge( parent::getSupportedRequestTypes(), [ |
123 | 'application/x-www-form-urlencoded', |
124 | 'multipart/form-data' |
125 | ] ); |
126 | } |
127 | |
128 | /** |
129 | * Verify that the {domain} path parameter matches the actual domain. |
130 | * @todo Remove this when we no longer need to support the {domain} |
131 | * parameter with backwards compatibility with the parsoid |
132 | * extension. |
133 | * @param string $domain Domain name parameter to validate |
134 | */ |
135 | protected function assertDomainIsCorrect( $domain ): void { |
136 | // We are cutting some corners here (IDN, non-ASCII casing) |
137 | // since domain name support is provisional. |
138 | // TODO use a proper validator instead |
139 | $server = RequestContext::getMain()->getConfig()->get( MainConfigNames::Server ); |
140 | $expectedDomain = parse_url( $server, PHP_URL_HOST ); |
141 | if ( !$expectedDomain ) { |
142 | throw new LogicException( 'Cannot parse $wgServer' ); |
143 | } |
144 | if ( strcasecmp( $expectedDomain, $domain ) === 0 ) { |
145 | return; |
146 | } |
147 | |
148 | // TODO: This should really go away! It's only acceptable because |
149 | // this entire method is going to be removed once we no longer |
150 | // need the parsoid extension endpoints with the {domain} parameter. |
151 | if ( $this->extensionRegistry->isLoaded( 'MobileFrontend' ) ) { |
152 | // @phan-suppress-next-line PhanUndeclaredClassMethod |
153 | $mobileServer = MobileContext::singleton()->getMobileUrl( $server ); |
154 | $expectedMobileDomain = parse_url( $mobileServer, PHP_URL_HOST ); |
155 | if ( $expectedMobileDomain && strcasecmp( $expectedMobileDomain, $domain ) === 0 ) { |
156 | return; |
157 | } |
158 | } |
159 | |
160 | $msg = new DataMessageValue( |
161 | 'mwparsoid-invalid-domain', |
162 | [], |
163 | 'invalid-domain', |
164 | [ 'expected' => $expectedDomain, 'actual' => $domain, ] |
165 | ); |
166 | |
167 | throw new LocalizedHttpException( $msg, 400, [ |
168 | 'error' => 'parameter-validation-failed', |
169 | 'name' => 'domain', |
170 | 'value' => $domain, |
171 | 'failureCode' => $msg->getCode(), |
172 | 'failureData' => $msg->getData(), |
173 | ] ); |
174 | } |
175 | |
176 | /** |
177 | * Get the parsed body by content-type |
178 | * |
179 | * @return array |
180 | */ |
181 | protected function getParsedBody(): array { |
182 | $request = $this->getRequest(); |
183 | [ $contentType ] = explode( ';', $request->getHeader( 'Content-Type' )[0] ?? '', 2 ); |
184 | switch ( $contentType ) { |
185 | case 'application/x-www-form-urlencoded': |
186 | case 'multipart/form-data': |
187 | return $request->getPostParams(); |
188 | case 'application/json': |
189 | $json = json_decode( $request->getBody()->getContents(), true ); |
190 | if ( !is_array( $json ) ) { |
191 | throw new LocalizedHttpException( |
192 | new MessageValue( "rest-json-body-parse-error", [ 'not a valid JSON object' ] ), 400 ); |
193 | } |
194 | return $json; |
195 | default: |
196 | throw new LocalizedHttpException( |
197 | new MessageValue( "rest-unsupported-content-type", [ $contentType ?? '(null)' ] ), |
198 | 415 |
199 | ); |
200 | } |
201 | } |
202 | |
203 | /** |
204 | * Rough equivalent of req.local from Parsoid-JS. |
205 | * FIXME most of these should be replaced with more native ways of handling the request. |
206 | * @return array |
207 | */ |
208 | protected function &getRequestAttributes(): array { |
209 | if ( $this->requestAttributes ) { |
210 | return $this->requestAttributes; |
211 | } |
212 | |
213 | $request = $this->getRequest(); |
214 | $body = ( $request->getMethod() === 'POST' ) ? $this->getParsedBody() : []; |
215 | $opts = array_merge( $body, array_intersect_key( $request->getPathParams(), |
216 | [ 'from' => true, 'format' => true ] ) ); |
217 | '@phan-var array<string,array|bool|string> $opts'; // @var array<string,array|bool|string> $opts |
218 | $contentLanguage = $request->getHeaderLine( 'Content-Language' ) ?: null; |
219 | if ( $contentLanguage ) { |
220 | $contentLanguage = LanguageCode::normalizeNonstandardCodeAndWarn( |
221 | $contentLanguage |
222 | ); |
223 | } |
224 | $attribs = [ |
225 | 'pageName' => $request->getPathParam( 'title' ) ?? '', |
226 | 'oldid' => $request->getPathParam( 'revision' ), |
227 | // "body_only" flag to return just the body (instead of the entire HTML doc) |
228 | // We would like to deprecate use of this flag: T181657 |
229 | 'body_only' => $request->getQueryParams()['body_only'] ?? $body['body_only'] ?? null, |
230 | 'errorEnc' => ParsoidFormatHelper::ERROR_ENCODING[$opts['format']] ?? 'plain', |
231 | 'iwp' => WikiMap::getCurrentWikiId(), // PORT-FIXME verify |
232 | 'offsetType' => $body['offsetType'] |
233 | ?? $request->getQueryParams()['offsetType'] |
234 | // Lint requests should return UCS2 offsets by default |
235 | ?? ( $opts['format'] === ParsoidFormatHelper::FORMAT_LINT ? 'ucs2' : 'byte' ), |
236 | 'pagelanguage' => $contentLanguage, |
237 | ]; |
238 | |
239 | // For use in getHtmlOutputRendererHelper |
240 | $opts['stash'] = $request->getQueryParams()['stash'] ?? false; |
241 | |
242 | if ( $request->getMethod() === 'POST' ) { |
243 | if ( isset( $opts['original']['revid'] ) ) { |
244 | $attribs['oldid'] = $opts['original']['revid']; |
245 | } |
246 | if ( isset( $opts['original']['title'] ) ) { |
247 | $attribs['pageName'] = $opts['original']['title']; |
248 | } |
249 | } |
250 | if ( $attribs['oldid'] !== null ) { |
251 | if ( $attribs['oldid'] === '' ) { |
252 | $attribs['oldid'] = null; |
253 | } else { |
254 | $attribs['oldid'] = (int)$attribs['oldid']; |
255 | } |
256 | } |
257 | |
258 | // For use in getHtmlOutputRendererHelper |
259 | $opts['accept-language'] = $request->getHeaderLine( 'Accept-Language' ) ?: null; |
260 | |
261 | $acceptLanguage = null; |
262 | if ( $opts['accept-language'] !== null ) { |
263 | $acceptLanguage = LanguageCode::normalizeNonstandardCodeAndWarn( |
264 | $opts['accept-language'] |
265 | ); |
266 | } |
267 | |
268 | // Init pageName if oldid is provided and is a valid revision |
269 | if ( ( $attribs['pageName'] === '' ) && $attribs['oldid'] ) { |
270 | $rev = $this->revisionLookup->getRevisionById( $attribs['oldid'] ); |
271 | if ( $rev ) { |
272 | $attribs['pageName'] = $rev->getPage()->getDBkey(); |
273 | } |
274 | } |
275 | |
276 | $attribs['envOptions'] = [ |
277 | // We use `prefix` but ought to use `domain` (T206764) |
278 | 'prefix' => $attribs['iwp'], |
279 | // For the legacy "domain" path parameter used by the endpoints exposed |
280 | // by the parsoid extension. Will be null for core endpoints. |
281 | 'domain' => $request->getPathParam( 'domain' ), |
282 | 'pageName' => $attribs['pageName'], |
283 | 'cookie' => $request->getHeaderLine( 'Cookie' ), |
284 | 'reqId' => $request->getHeaderLine( 'X-Request-Id' ), |
285 | 'userAgent' => $request->getHeaderLine( 'User-Agent' ), |
286 | 'htmlVariantLanguage' => $acceptLanguage, |
287 | // Semver::satisfies checks below expect a valid outputContentVersion value. |
288 | // Better to set it here instead of adding the default value at every check. |
289 | 'outputContentVersion' => Parsoid::defaultHTMLVersion(), |
290 | ]; |
291 | |
292 | # Convert language codes in $opts['updates']['variant'] if present |
293 | $sourceVariant = $opts['updates']['variant']['source'] ?? null; |
294 | if ( $sourceVariant ) { |
295 | $sourceVariant = LanguageCode::normalizeNonstandardCodeAndWarn( |
296 | $sourceVariant |
297 | ); |
298 | $opts['updates']['variant']['source'] = $sourceVariant; |
299 | } |
300 | $targetVariant = $opts['updates']['variant']['target'] ?? null; |
301 | if ( $targetVariant ) { |
302 | $targetVariant = LanguageCode::normalizeNonstandardCodeAndWarn( |
303 | $targetVariant |
304 | ); |
305 | $opts['updates']['variant']['target'] = $targetVariant; |
306 | } |
307 | if ( isset( $opts['wikitext']['headers']['content-language'] ) ) { |
308 | $contentLanguage = $opts['wikitext']['headers']['content-language']; |
309 | $contentLanguage = LanguageCode::normalizeNonstandardCodeAndWarn( |
310 | $contentLanguage |
311 | ); |
312 | $opts['wikitext']['headers']['content-language'] = $contentLanguage; |
313 | } |
314 | if ( isset( $opts['original']['wikitext']['headers']['content-language'] ) ) { |
315 | $contentLanguage = $opts['original']['wikitext']['headers']['content-language']; |
316 | $contentLanguage = LanguageCode::normalizeNonstandardCodeAndWarn( |
317 | $contentLanguage |
318 | ); |
319 | $opts['original']['wikitext']['headers']['content-language'] = $contentLanguage; |
320 | } |
321 | |
322 | $attribs['opts'] = $opts; |
323 | |
324 | // TODO: Remove assertDomainIsCorrect() once we no longer need to support the {domain} |
325 | // parameter for the endpoints exposed by the parsoid extension. |
326 | if ( $attribs['envOptions']['domain'] !== null ) { |
327 | $this->assertDomainIsCorrect( $attribs['envOptions']['domain'] ); |
328 | } |
329 | |
330 | $this->requestAttributes = $attribs; |
331 | return $this->requestAttributes; |
332 | } |
333 | |
334 | /** |
335 | * @param array $attribs |
336 | * @param ?string $source |
337 | * @param PageIdentity $page |
338 | * @param ?int $revId |
339 | * |
340 | * @return HtmlOutputRendererHelper |
341 | */ |
342 | private function getHtmlOutputRendererHelper( |
343 | array $attribs, |
344 | ?string $source, |
345 | PageIdentity $page, |
346 | ?int $revId |
347 | ): HtmlOutputRendererHelper { |
348 | $services = MediaWikiServices::getInstance(); |
349 | |
350 | // Request lenient rev handling |
351 | $lenientRevHandling = true; |
352 | |
353 | $authority = $this->getAuthority(); |
354 | |
355 | $params = []; |
356 | $helper = $services->getPageRestHelperFactory()->newHtmlOutputRendererHelper( |
357 | $page, $params, $authority, $revId, $lenientRevHandling |
358 | ); |
359 | |
360 | // XXX: should default to the page's content model? |
361 | $model = $attribs['opts']['contentmodel'] |
362 | ?? ( $attribs['envOptions']['contentmodel'] ?? CONTENT_MODEL_WIKITEXT ); |
363 | |
364 | if ( $source !== null ) { |
365 | $helper->setContentSource( $source, $model ); |
366 | } |
367 | |
368 | if ( isset( $attribs['opts']['stash'] ) ) { |
369 | $helper->setStashingEnabled( $attribs['opts']['stash'] ); |
370 | } |
371 | |
372 | if ( isset( $attribs['envOptions']['outputContentVersion'] ) ) { |
373 | $helper->setOutputProfileVersion( $attribs['envOptions']['outputContentVersion'] ); |
374 | } |
375 | |
376 | if ( isset( $attribs['pagelanguage'] ) ) { |
377 | $helper->setPageLanguage( $attribs['pagelanguage'] ); |
378 | } |
379 | |
380 | if ( isset( $attribs['opts']['accept-language'] ) ) { |
381 | $helper->setVariantConversionLanguage( $attribs['opts']['accept-language'] ); |
382 | } |
383 | |
384 | return $helper; |
385 | } |
386 | |
387 | /** |
388 | * @param array $attribs |
389 | * @param string $html |
390 | * @param PageIdentity $page |
391 | * |
392 | * @return HtmlInputTransformHelper |
393 | */ |
394 | protected function getHtmlInputTransformHelper( |
395 | array $attribs, |
396 | string $html, |
397 | PageIdentity $page |
398 | ): HtmlInputTransformHelper { |
399 | $services = MediaWikiServices::getInstance(); |
400 | |
401 | $parameters = $attribs['opts'] + $attribs; |
402 | $body = $attribs['opts']; |
403 | |
404 | $body['html'] = $html; |
405 | |
406 | $helper = $services->getPageRestHelperFactory()->newHtmlInputTransformHelper( |
407 | $attribs['envOptions'] + [ |
408 | 'offsetType' => $attribs['offsetType'], |
409 | ], |
410 | $page, |
411 | $body, |
412 | $parameters |
413 | ); |
414 | |
415 | $helper->setMetrics( $this->siteConfig->prefixedStatsFactory() ); |
416 | |
417 | return $helper; |
418 | } |
419 | |
420 | /** |
421 | * FIXME: Combine with ParsoidFormatHelper::parseContentTypeHeader |
422 | */ |
423 | private const NEW_SPEC = |
424 | '#^https://www.mediawiki.org/wiki/Specs/(HTML|pagebundle)/(\d+\.\d+\.\d+)$#D'; |
425 | |
426 | /** |
427 | * This method checks if we support the requested content formats |
428 | * As a side-effect, it updates $attribs to set outputContentVersion |
429 | * that Parsoid should generate based on request headers. |
430 | * |
431 | * @param array &$attribs Request attributes from getRequestAttributes() |
432 | * @return bool |
433 | */ |
434 | protected function acceptable( array &$attribs ): bool { |
435 | $request = $this->getRequest(); |
436 | $format = $attribs['opts']['format']; |
437 | |
438 | if ( $format === ParsoidFormatHelper::FORMAT_WIKITEXT ) { |
439 | return true; |
440 | } |
441 | |
442 | $acceptHeader = $request->getHeader( 'Accept' ); |
443 | if ( !$acceptHeader ) { |
444 | return true; |
445 | } |
446 | |
447 | $parser = new HttpAcceptParser(); |
448 | $acceptableTypes = $parser->parseAccept( $acceptHeader[0] ); // FIXME: Multiple headers valid? |
449 | if ( !$acceptableTypes ) { |
450 | return true; |
451 | } |
452 | |
453 | // `acceptableTypes` is already sorted by quality. |
454 | foreach ( $acceptableTypes as $t ) { |
455 | $type = "{$t['type']}/{$t['subtype']}"; |
456 | $profile = $t['params']['profile'] ?? null; |
457 | if ( |
458 | ( $format === ParsoidFormatHelper::FORMAT_HTML && $type === 'text/html' ) || |
459 | ( $format === ParsoidFormatHelper::FORMAT_PAGEBUNDLE && $type === 'application/json' ) |
460 | ) { |
461 | if ( $profile ) { |
462 | preg_match( self::NEW_SPEC, $profile, $matches ); |
463 | if ( $matches && strtolower( $matches[1] ) === $format ) { |
464 | $contentVersion = Parsoid::resolveContentVersion( $matches[2] ); |
465 | if ( $contentVersion ) { |
466 | // $attribs mutated here! |
467 | $attribs['envOptions']['outputContentVersion'] = $contentVersion; |
468 | return true; |
469 | } else { |
470 | continue; |
471 | } |
472 | } else { |
473 | continue; |
474 | } |
475 | } else { |
476 | return true; |
477 | } |
478 | } elseif ( |
479 | ( $type === '*/*' ) || |
480 | ( $format === ParsoidFormatHelper::FORMAT_HTML && $type === 'text/*' ) |
481 | ) { |
482 | return true; |
483 | } |
484 | } |
485 | |
486 | return false; |
487 | } |
488 | |
489 | /** |
490 | * Try to create a PageConfig object. If we get an exception (because content |
491 | * may be missing or inaccessible), throw an appropriate HTTP response object |
492 | * for callers to handle. |
493 | * |
494 | * @param array $attribs |
495 | * @param ?string $wikitextOverride |
496 | * Custom wikitext to use instead of the real content of the page. |
497 | * @param bool $html2WtMode |
498 | * @return PageConfig |
499 | * @throws HttpException |
500 | */ |
501 | protected function tryToCreatePageConfig( |
502 | array $attribs, ?string $wikitextOverride = null, bool $html2WtMode = false |
503 | ): PageConfig { |
504 | $revId = $attribs['oldid']; |
505 | $pagelanguageOverride = $attribs['pagelanguage']; |
506 | $title = $attribs['pageName']; |
507 | |
508 | $title = ( $title !== '' ) ? Title::newFromText( $title ) : Title::newMainPage(); |
509 | if ( !$title ) { |
510 | // TODO use proper validation |
511 | throw new LogicException( 'Title not found!' ); |
512 | } |
513 | $user = RequestContext::getMain()->getUser(); |
514 | |
515 | if ( $wikitextOverride === null ) { |
516 | $revisionRecord = null; |
517 | } else { |
518 | // Create a mutable revision record point to the same revision |
519 | // and set to the desired wikitext. |
520 | $revisionRecord = new MutableRevisionRecord( $title ); |
521 | // Don't set id to $revId if we have $wikitextOverride |
522 | // A revision corresponds to specific wikitext, which $wikitextOverride |
523 | // might not be. |
524 | $revisionRecord->setId( 0 ); |
525 | $revisionRecord->setSlot( |
526 | SlotRecord::newUnsaved( |
527 | SlotRecord::MAIN, |
528 | new WikitextContent( $wikitextOverride ) |
529 | ) |
530 | ); |
531 | } |
532 | |
533 | $hasOldId = ( $revId !== null ); |
534 | $ensureAccessibleContent = !$html2WtMode || $hasOldId; |
535 | |
536 | try { |
537 | // Note: Parsoid by design isn't supposed to use the user |
538 | // context right now, and all user state is expected to be |
539 | // introduced as a post-parse transform. So although we pass a |
540 | // User here, it only currently affects the output in obscure |
541 | // corner cases; see PageConfigFactory::create() for more. |
542 | // @phan-suppress-next-line PhanUndeclaredMethod method defined in subtype |
543 | $pageConfig = $this->pageConfigFactory->create( |
544 | $title, $user, $revisionRecord ?? $revId, null, $pagelanguageOverride, |
545 | $ensureAccessibleContent |
546 | ); |
547 | } catch ( SuppressedDataException $e ) { |
548 | throw new LocalizedHttpException( |
549 | new MessageValue( "rest-permission-denied-revision", [ $e->getMessage() ] ), 403 |
550 | ); |
551 | } catch ( RevisionAccessException $e ) { |
552 | throw new LocalizedHttpException( |
553 | new MessageValue( "rest-specified-revision-unavailable", [ $e->getMessage() ] ), 404 |
554 | ); |
555 | } |
556 | |
557 | // All good! |
558 | return $pageConfig; |
559 | } |
560 | |
561 | /** |
562 | * Try to create a PageIdentity object. |
563 | * If no page is specified in the request, this will return the wiki's main page. |
564 | * If an invalid page is requested, this throws an appropriate HTTPException. |
565 | * |
566 | * @param array $attribs |
567 | * @return PageIdentity |
568 | * @throws HttpException |
569 | */ |
570 | protected function tryToCreatePageIdentity( array $attribs ): PageIdentity { |
571 | if ( $attribs['pageName'] === '' ) { |
572 | return Title::newMainPage(); |
573 | } |
574 | |
575 | // XXX: Should be injected, but the Parsoid extension relies on the |
576 | // constructor signature. Also, ParsoidHandler should go away soon anyway. |
577 | $pageStore = MediaWikiServices::getInstance()->getPageStore(); |
578 | |
579 | $page = $pageStore->getPageByText( $attribs['pageName'] ); |
580 | |
581 | if ( !$page ) { |
582 | throw new LocalizedHttpException( |
583 | new MessageValue( "rest-invalid-title", [ 'pageName' ] ), 400 |
584 | ); |
585 | } |
586 | |
587 | return $page; |
588 | } |
589 | |
590 | /** |
591 | * Get the path for the transform endpoint. May be overwritten to override the path. |
592 | * |
593 | * This is done in the parsoid extension, for backwards compatibility |
594 | * with the old endpoint URLs. |
595 | * |
596 | * @stable to override |
597 | * |
598 | * @param string $format The format the endpoint is expected to return. |
599 | * |
600 | * @return string |
601 | */ |
602 | protected function getTransformEndpoint( string $format = ParsoidFormatHelper::FORMAT_HTML ): string { |
603 | return '/coredev/v0/transform/{from}/to/{format}/{title}/{revision}'; |
604 | } |
605 | |
606 | /** |
607 | * Get the path for the page content endpoint. May be overwritten to override the path. |
608 | * |
609 | * This is done in the parsoid extension, for backwards compatibility |
610 | * with the old endpoint URLs. |
611 | * |
612 | * @stable to override |
613 | * |
614 | * @param string $format The format the endpoint is expected to return. |
615 | * |
616 | * @return string |
617 | */ |
618 | protected function getPageContentEndpoint( string $format = ParsoidFormatHelper::FORMAT_HTML ): string { |
619 | if ( $format !== ParsoidFormatHelper::FORMAT_HTML ) { |
620 | throw new InvalidArgumentException( 'Unsupported page content format: ' . $format ); |
621 | } |
622 | return '/v1/page/{title}/html'; |
623 | } |
624 | |
625 | /** |
626 | * Get the path for the page content endpoint. May be overwritten to override the path. |
627 | * |
628 | * This is done in the parsoid extension, for backwards compatibility |
629 | * with the old endpoint URLs. |
630 | * |
631 | * @stable to override |
632 | * |
633 | * @param string $format The format the endpoint is expected to return. |
634 | * |
635 | * @return string |
636 | */ |
637 | protected function getRevisionContentEndpoint( string $format = ParsoidFormatHelper::FORMAT_HTML ): string { |
638 | if ( $format !== ParsoidFormatHelper::FORMAT_HTML ) { |
639 | throw new InvalidArgumentException( 'Unsupported revision content format: ' . $format ); |
640 | } |
641 | return '/v1/revision/{revision}/html'; |
642 | } |
643 | |
644 | private function wtLint( |
645 | PageConfig $pageConfig, array $attribs, ?array $linterOverrides = [] |
646 | ) { |
647 | $envOptions = $attribs['envOptions'] + [ |
648 | 'linterOverrides' => $linterOverrides, |
649 | 'offsetType' => $attribs['offsetType'], |
650 | ]; |
651 | try { |
652 | $parsoid = $this->newParsoid(); |
653 | $parserOutput = new ParserOutput(); |
654 | return $parsoid->wikitext2lint( $pageConfig, $envOptions, $parserOutput ); |
655 | } catch ( ClientError $e ) { |
656 | throw new LocalizedHttpException( new MessageValue( "rest-parsoid-error", [ $e->getMessage() ] ), 400 ); |
657 | } catch ( ResourceLimitExceededException $e ) { |
658 | throw new LocalizedHttpException( |
659 | new MessageValue( "rest-parsoid-resource-exceeded", [ $e->getMessage() ] ), 413 |
660 | ); |
661 | } |
662 | } |
663 | |
664 | /** |
665 | * Wikitext -> HTML helper. |
666 | * Spec'd in https://phabricator.wikimedia.org/T75955 and the API tests. |
667 | * |
668 | * @param PageConfig $pageConfig |
669 | * @param array $attribs Request attributes from getRequestAttributes() |
670 | * @param ?string $wikitext Wikitext to transform (or null to use the |
671 | * page specified in the request attributes). |
672 | * |
673 | * @return Response |
674 | */ |
675 | protected function wt2html( |
676 | PageConfig $pageConfig, array $attribs, ?string $wikitext = null |
677 | ) { |
678 | $request = $this->getRequest(); |
679 | $opts = $attribs['opts']; |
680 | $format = $opts['format']; |
681 | $oldid = $attribs['oldid']; |
682 | $stash = $opts['stash'] ?? false; |
683 | |
684 | if ( $format === ParsoidFormatHelper::FORMAT_LINT ) { |
685 | $linterOverrides = []; |
686 | if ( $this->extensionRegistry->isLoaded( 'Linter' ) ) { // T360809 |
687 | $disabled = []; |
688 | $services = MediaWikiServices::getInstance(); |
689 | $linterCategories = $services->getMainConfig()->get( 'LinterCategories' ); |
690 | foreach ( $linterCategories as $name => $cat ) { |
691 | if ( $cat['priority'] === 'none' ) { |
692 | $disabled[] = $name; |
693 | } |
694 | } |
695 | $linterOverrides['disabled'] = $disabled; |
696 | } |
697 | $lints = $this->wtLint( $pageConfig, $attribs, $linterOverrides ); |
698 | $response = $this->getResponseFactory()->createJson( $lints ); |
699 | return $response; |
700 | } |
701 | |
702 | // TODO: This method should take a PageIdentity + revId, |
703 | // to reduce the usage of PageConfig in MW core. |
704 | $helper = $this->getHtmlOutputRendererHelper( |
705 | $attribs, |
706 | $wikitext, |
707 | $this->pageConfigToPageIdentity( $pageConfig ), |
708 | // Id will be 0 if we have $wikitext but that isn't valid |
709 | // to call $helper->setRevision with. In any case, the revision |
710 | // will be reset when $helper->setContent is called with $wikitext. |
711 | // Ideally, the revision would be pass through here instead of |
712 | // the id and wikitext. |
713 | $pageConfig->getRevisionId() ?: null |
714 | ); |
715 | |
716 | $needsPageBundle = ( $format === ParsoidFormatHelper::FORMAT_PAGEBUNDLE ); |
717 | |
718 | if ( $attribs['body_only'] ) { |
719 | $helper->setFlavor( 'fragment' ); |
720 | } elseif ( !$needsPageBundle ) { |
721 | // Inline data-parsoid. This will happen when no special params are set. |
722 | $helper->setFlavor( 'edit' ); |
723 | } |
724 | |
725 | if ( $wikitext === null && $oldid !== null ) { |
726 | $mstr = 'pageWithOldid'; |
727 | } else { |
728 | $mstr = 'wt'; |
729 | } |
730 | |
731 | $parseTiming = Timing::start(); |
732 | |
733 | if ( $needsPageBundle ) { |
734 | $pb = $helper->getPageBundle(); |
735 | |
736 | // Handle custom offset requests as a pb2pb transform |
737 | if ( |
738 | $helper->isParsoidContent() && |
739 | ( $attribs['offsetType'] !== 'byte' ) |
740 | ) { |
741 | $parsoid = $this->newParsoid(); |
742 | $pb = $parsoid->pb2pb( |
743 | $pageConfig, |
744 | 'convertoffsets', |
745 | $pb, |
746 | [ |
747 | 'inputOffsetType' => 'byte', |
748 | 'outputOffsetType' => $attribs['offsetType'] |
749 | ] |
750 | ); |
751 | } |
752 | |
753 | $response = $this->getResponseFactory()->createJson( $pb->responseData() ); |
754 | $helper->putHeaders( $response, false ); |
755 | |
756 | ParsoidFormatHelper::setContentType( |
757 | $response, |
758 | ParsoidFormatHelper::FORMAT_PAGEBUNDLE, |
759 | $pb->version |
760 | ); |
761 | } else { |
762 | $out = $helper->getHtml(); |
763 | |
764 | // TODO: offsetType conversion isn't supported right now for non-pagebundle endpoints |
765 | // Once the OutputTransform framework lands, we might revisit this. |
766 | |
767 | $response = $this->getResponseFactory()->create(); |
768 | $response->getBody()->write( $out->getRawText() ); |
769 | |
770 | $helper->putHeaders( $response, true ); |
771 | |
772 | // Emit an ETag only if stashing is enabled. It's not reliably useful otherwise. |
773 | if ( $stash ) { |
774 | $eTag = $helper->getETag(); |
775 | if ( $eTag ) { |
776 | $response->setHeader( 'ETag', $eTag ); |
777 | } |
778 | } |
779 | } |
780 | |
781 | // XXX: For pagebundle requests, this can be somewhat inflated |
782 | // because of pagebundle json-encoding overheads |
783 | $outSize = $response->getBody()->getSize(); |
784 | $parseTime = $parseTiming->end(); |
785 | |
786 | // Ignore slow parse metrics for non-oldid parses |
787 | if ( $mstr === 'pageWithOldid' ) { |
788 | if ( $parseTime > 3000 ) { |
789 | LoggerFactory::getInstance( 'slow-parsoid' ) |
790 | ->info( 'Parsing {title} was slow, took {time} seconds', [ |
791 | 'time' => number_format( $parseTime / 1000, 2 ), |
792 | 'title' => Title::newFromLinkTarget( $pageConfig->getLinkTarget() )->getPrefixedText(), |
793 | ] ); |
794 | } |
795 | |
796 | if ( $parseTime > 10 && $outSize > 100 ) { |
797 | // * Don't bother with this metric for really small parse times |
798 | // p99 for initialization time is ~7ms according to grafana. |
799 | // So, 10ms ensures that startup overheads don't skew the metrics |
800 | // * For body_only=false requests, <head> section isn't generated |
801 | // and if the output is small, per-request overheads can skew |
802 | // the timePerKB metrics. |
803 | |
804 | // NOTE: This is slightly misleading since there are fixed costs |
805 | // for generating output like the <head> section and should be factored in, |
806 | // but this is good enough for now as a useful first degree of approxmation. |
807 | $timePerKB = $parseTime * 1024 / $outSize; |
808 | if ( $timePerKB > 500 ) { |
809 | // At 100ms/KB, even a 100KB page which isn't that large will take 10s. |
810 | // So, we probably want to shoot for a threshold under 100ms. |
811 | // But, let's start with 500ms+ outliers first and see what we uncover. |
812 | LoggerFactory::getInstance( 'slow-parsoid' ) |
813 | ->info( 'Parsing {title} was slow, timePerKB took {timePerKB} ms, total: {time} seconds', [ |
814 | 'time' => number_format( $parseTime / 1000, 2 ), |
815 | 'timePerKB' => number_format( $timePerKB, 1 ), |
816 | 'title' => Title::newFromLinkTarget( $pageConfig->getLinkTarget() )->getPrefixedText(), |
817 | ] ); |
818 | } |
819 | } |
820 | } |
821 | |
822 | if ( $wikitext !== null ) { |
823 | // Don't cache requests when wt is set in case somebody uses |
824 | // GET for wikitext parsing |
825 | // XXX: can we just refuse to do wikitext parsing in a GET request? |
826 | $response->setHeader( 'Cache-Control', 'private,no-cache,s-maxage=0' ); |
827 | } elseif ( $oldid !== null ) { |
828 | // XXX: can this go away? Parsoid's PageContent class doesn't expose supressed revision content. |
829 | if ( $request->getHeaderLine( 'Cookie' ) || |
830 | $request->getHeaderLine( 'Authorization' ) ) { |
831 | // Don't cache requests with a session. |
832 | $response->setHeader( 'Cache-Control', 'private,no-cache,s-maxage=0' ); |
833 | } |
834 | } |
835 | return $response; |
836 | } |
837 | |
838 | protected function newParsoid(): Parsoid { |
839 | return new Parsoid( $this->siteConfig, $this->dataAccess ); |
840 | } |
841 | |
842 | protected function parseHTML( string $html, bool $validateXMLNames = false ): Document { |
843 | return DOMUtils::parseHTML( $html, $validateXMLNames ); |
844 | } |
845 | |
846 | /** |
847 | * @param PageConfig|PageIdentity $page |
848 | * @param array $attribs Attributes gotten from requests |
849 | * @param string $html Original HTML |
850 | * |
851 | * @return Response |
852 | * @throws HttpException |
853 | */ |
854 | protected function html2wt( |
855 | $page, array $attribs, string $html |
856 | ) { |
857 | if ( $page instanceof PageConfig ) { |
858 | // TODO: Deprecate passing a PageConfig. |
859 | // Ideally, callers would use HtmlToContentTransform directly. |
860 | $page = Title::newFromLinkTarget( $page->getLinkTarget() ); |
861 | } |
862 | |
863 | try { |
864 | $transform = $this->getHtmlInputTransformHelper( $attribs, $html, $page ); |
865 | |
866 | $response = $this->getResponseFactory()->create(); |
867 | $transform->putContent( $response ); |
868 | |
869 | return $response; |
870 | } catch ( ClientError $e ) { |
871 | throw new LocalizedHttpException( new MessageValue( "rest-parsoid-error", [ $e->getMessage() ] ), 400 ); |
872 | } |
873 | } |
874 | |
875 | /** |
876 | * Pagebundle -> pagebundle helper. |
877 | * |
878 | * @param array<string,array|string> $attribs |
879 | * @return Response |
880 | * @throws HttpException |
881 | */ |
882 | protected function pb2pb( array $attribs ) { |
883 | $opts = $attribs['opts']; |
884 | |
885 | $revision = $opts['previous'] ?? $opts['original'] ?? null; |
886 | if ( !isset( $revision['html'] ) ) { |
887 | throw new LocalizedHttpException( new MessageValue( "rest-missing-revision-html" ), 400 ); |
888 | } |
889 | |
890 | $vOriginal = ParsoidFormatHelper::parseContentTypeHeader( |
891 | $revision['html']['headers']['content-type'] ?? '' ); |
892 | if ( $vOriginal === null ) { |
893 | throw new LocalizedHttpException( new MessageValue( "rest-missing-revision-html-content-type" ), 400 ); |
894 | } |
895 | $attribs['envOptions']['inputContentVersion'] = $vOriginal; |
896 | '@phan-var array<string,array|string> $attribs'; // @var array<string,array|string> $attribs |
897 | |
898 | $this->metrics->increment( |
899 | 'pb2pb.original.version.' . $attribs['envOptions']['inputContentVersion'] |
900 | ); |
901 | |
902 | if ( !empty( $opts['updates'] ) ) { |
903 | // FIXME: Handling missing revisions uniformly for all update types |
904 | // is not probably the right thing to do but probably okay for now. |
905 | // This might need revisiting as we add newer types. |
906 | $pageConfig = $this->tryToCreatePageConfig( $attribs, null, true ); |
907 | // If we're only updating parts of the original version, it should |
908 | // satisfy the requested content version, since we'll be returning |
909 | // that same one. |
910 | // FIXME: Since this endpoint applies the acceptable middleware, |
911 | // `getOutputContentVersion` is not what's been passed in, but what |
912 | // can be produced. Maybe that should be selectively applied so |
913 | // that we can update older versions where it makes sense? |
914 | // Uncommenting below implies that we can only update the latest |
915 | // version, since carrot semantics is applied in both directions. |
916 | // if ( !Semver::satisfies( |
917 | // $attribs['envOptions']['inputContentVersion'], |
918 | // "^{$attribs['envOptions']['outputContentVersion']}" |
919 | // ) ) { |
920 | // throw new HttpException( |
921 | // 'We do not know how to do this conversion.', 415 |
922 | // ); |
923 | // } |
924 | if ( !empty( $opts['updates']['redlinks'] ) ) { |
925 | // Q(arlolra): Should redlinks be more complex than a bool? |
926 | // See gwicke's proposal at T114413#2240381 |
927 | return $this->updateRedLinks( $pageConfig, $attribs, $revision ); |
928 | } elseif ( isset( $opts['updates']['variant'] ) ) { |
929 | return $this->languageConversion( $pageConfig, $attribs, $revision ); |
930 | } else { |
931 | throw new LocalizedHttpException( new MessageValue( "rest-unknown-parsoid-transformation" ), 400 ); |
932 | } |
933 | } |
934 | |
935 | // TODO(arlolra): subbu has some sage advice in T114413#2365456 that |
936 | // we should probably be more explicit about the pb2pb conversion |
937 | // requested rather than this increasingly complex fallback logic. |
938 | $downgrade = Parsoid::findDowngrade( |
939 | $attribs['envOptions']['inputContentVersion'], |
940 | $attribs['envOptions']['outputContentVersion'] |
941 | ); |
942 | if ( $downgrade ) { |
943 | $pb = new PageBundle( |
944 | $revision['html']['body'], |
945 | $revision['data-parsoid']['body'] ?? null, |
946 | $revision['data-mw']['body'] ?? null |
947 | ); |
948 | $this->validatePb( $pb, $attribs['envOptions']['inputContentVersion'] ); |
949 | Parsoid::downgrade( $downgrade, $pb ); |
950 | |
951 | if ( !empty( $attribs['body_only'] ) ) { |
952 | $doc = $this->parseHTML( $pb->html ); |
953 | $body = DOMCompat::getBody( $doc ); |
954 | $pb->html = ContentUtils::toXML( $body, [ 'innerXML' => true ] ); |
955 | } |
956 | |
957 | $response = $this->getResponseFactory()->createJson( $pb->responseData() ); |
958 | ParsoidFormatHelper::setContentType( |
959 | $response, ParsoidFormatHelper::FORMAT_PAGEBUNDLE, $pb->version |
960 | ); |
961 | return $response; |
962 | // Ensure we only reuse from semantically similar content versions. |
963 | } elseif ( Semver::satisfies( $attribs['envOptions']['outputContentVersion'], |
964 | '^' . $attribs['envOptions']['inputContentVersion'] ) ) { |
965 | $pageConfig = $this->tryToCreatePageConfig( $attribs ); |
966 | return $this->wt2html( $pageConfig, $attribs ); |
967 | } else { |
968 | throw new LocalizedHttpException( new MessageValue( "rest-unsupported-profile-conversion" ), 415 ); |
969 | } |
970 | } |
971 | |
972 | /** |
973 | * Update red links on a document. |
974 | * |
975 | * @param PageConfig $pageConfig |
976 | * @param array $attribs |
977 | * @param array $revision |
978 | * @return Response |
979 | */ |
980 | protected function updateRedLinks( |
981 | PageConfig $pageConfig, array $attribs, array $revision |
982 | ) { |
983 | $parsoid = $this->newParsoid(); |
984 | |
985 | $pb = new PageBundle( |
986 | $revision['html']['body'], |
987 | $revision['data-parsoid']['body'] ?? null, |
988 | $revision['data-mw']['body'] ?? null, |
989 | $attribs['envOptions']['inputContentVersion'], |
990 | $revision['html']['headers'] ?? null, |
991 | $revision['contentmodel'] ?? null |
992 | ); |
993 | |
994 | $out = $parsoid->pb2pb( $pageConfig, 'redlinks', $pb, [] ); |
995 | |
996 | $this->validatePb( $out, $attribs['envOptions']['inputContentVersion'] ); |
997 | |
998 | $response = $this->getResponseFactory()->createJson( $out->responseData() ); |
999 | ParsoidFormatHelper::setContentType( |
1000 | $response, ParsoidFormatHelper::FORMAT_PAGEBUNDLE, $out->version |
1001 | ); |
1002 | return $response; |
1003 | } |
1004 | |
1005 | /** |
1006 | * Do variant conversion on a document. |
1007 | * |
1008 | * @param PageConfig $pageConfig |
1009 | * @param array $attribs |
1010 | * @param array $revision |
1011 | * @return Response |
1012 | * @throws HttpException |
1013 | */ |
1014 | protected function languageConversion( |
1015 | PageConfig $pageConfig, array $attribs, array $revision |
1016 | ) { |
1017 | $opts = $attribs['opts']; |
1018 | $target = $opts['updates']['variant']['target'] ?? |
1019 | $attribs['envOptions']['htmlVariantLanguage']; |
1020 | $source = $opts['updates']['variant']['source'] ?? null; |
1021 | |
1022 | if ( !$target ) { |
1023 | throw new LocalizedHttpException( new MessageValue( "rest-target-variant-required" ), 400 ); |
1024 | } |
1025 | |
1026 | $pageIdentity = $this->tryToCreatePageIdentity( $attribs ); |
1027 | |
1028 | $pb = new PageBundle( |
1029 | $revision['html']['body'], |
1030 | $revision['data-parsoid']['body'] ?? null, |
1031 | $revision['data-mw']['body'] ?? null, |
1032 | $attribs['envOptions']['inputContentVersion'], |
1033 | $revision['html']['headers'] ?? null, |
1034 | $revision['contentmodel'] ?? null |
1035 | ); |
1036 | |
1037 | // XXX: DI should inject HtmlTransformFactory |
1038 | $languageVariantConverter = MediaWikiServices::getInstance() |
1039 | ->getHtmlTransformFactory() |
1040 | ->getLanguageVariantConverter( $pageIdentity ); |
1041 | $languageVariantConverter->setPageConfig( $pageConfig ); |
1042 | $httpContentLanguage = $attribs['pagelanguage' ] ?? null; |
1043 | if ( $httpContentLanguage ) { |
1044 | $languageVariantConverter->setPageLanguageOverride( $httpContentLanguage ); |
1045 | } |
1046 | |
1047 | try { |
1048 | $out = $languageVariantConverter->convertPageBundleVariant( $pb, $target, $source ); |
1049 | } catch ( InvalidArgumentException $e ) { |
1050 | throw new LocalizedHttpException( |
1051 | new MessageValue( "rest-unsupported-language-conversion", [ $source ?? '(unspecified)', $target ] ), |
1052 | 400, |
1053 | [ 'reason' => $e->getMessage() ] |
1054 | ); |
1055 | } |
1056 | |
1057 | $response = $this->getResponseFactory()->createJson( $out->responseData() ); |
1058 | ParsoidFormatHelper::setContentType( |
1059 | $response, ParsoidFormatHelper::FORMAT_PAGEBUNDLE, $out->version |
1060 | ); |
1061 | return $response; |
1062 | } |
1063 | |
1064 | /** @inheritDoc */ |
1065 | abstract public function execute(): Response; |
1066 | |
1067 | /** |
1068 | * Validate a PageBundle against the given contentVersion, and throw |
1069 | * an HttpException if it does not match. |
1070 | * @param PageBundle $pb |
1071 | * @param string $contentVersion |
1072 | * @throws HttpException |
1073 | */ |
1074 | private function validatePb( PageBundle $pb, string $contentVersion ): void { |
1075 | $errorMessage = ''; |
1076 | if ( !$pb->validate( $contentVersion, $errorMessage ) ) { |
1077 | throw new LocalizedHttpException( |
1078 | new MessageValue( "rest-page-bundle-validation-error", [ $errorMessage ] ), |
1079 | 400 |
1080 | ); |
1081 | } |
1082 | } |
1083 | |
1084 | /** |
1085 | * @param PageConfig $page |
1086 | * |
1087 | * @return ProperPageIdentity |
1088 | * @throws HttpException |
1089 | */ |
1090 | private function pageConfigToPageIdentity( PageConfig $page ): ProperPageIdentity { |
1091 | $services = MediaWikiServices::getInstance(); |
1092 | |
1093 | $title = $page->getLinkTarget(); |
1094 | try { |
1095 | $page = $services->getPageStore()->getPageForLink( $title ); |
1096 | } catch ( MalformedTitleException | InvalidArgumentException $e ) { |
1097 | // Note that even some well-formed links are still invalid |
1098 | // parameters for getPageForLink(), e.g. interwiki links or special pages. |
1099 | throw new HttpException( |
1100 | "Bad title: $title", # uses LinkTarget::__toString() |
1101 | 400 |
1102 | ); |
1103 | } |
1104 | |
1105 | return $page; |
1106 | } |
1107 | |
1108 | } |