Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
40.43% |
186 / 460 |
|
13.04% |
3 / 23 |
CRAP | |
0.00% |
0 / 1 |
ParsoidHandler | |
40.43% |
186 / 460 |
|
13.04% |
3 / 23 |
3320.34 | |
0.00% |
0 / 1 |
factory | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
__construct | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
assertDomainIsCorrect | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
42 | |||
getParsedBody | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
42 | |||
getRequestAttributes | |
0.00% |
0 / 82 |
|
0.00% |
0 / 1 |
462 | |||
getHtmlOutputRendererHelper | |
94.74% |
18 / 19 |
|
0.00% |
0 / 1 |
6.01 | |||
getHtmlInputTransformHelper | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
2 | |||
acceptable | |
0.00% |
0 / 30 |
|
0.00% |
0 / 1 |
272 | |||
tryToCreatePageConfig | |
87.50% |
28 / 32 |
|
0.00% |
0 / 1 |
7.10 | |||
tryToCreatePageIdentity | |
55.56% |
5 / 9 |
|
0.00% |
0 / 1 |
3.79 | |||
getTransformEndpoint | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getPageContentEndpoint | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getRevisionContentEndpoint | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
wtLint | |
45.45% |
5 / 11 |
|
0.00% |
0 / 1 |
4.46 | |||
wt2html | |
84.88% |
73 / 86 |
|
0.00% |
0 / 1 |
24.83 | |||
newParsoid | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
parseHTML | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
html2wt | |
87.50% |
7 / 8 |
|
0.00% |
0 / 1 |
3.02 | |||
pb2pb | |
0.00% |
0 / 45 |
|
0.00% |
0 / 1 |
90 | |||
updateRedLinks | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
2 | |||
languageConversion | |
79.41% |
27 / 34 |
|
0.00% |
0 / 1 |
4.14 | |||
execute | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
validatePb | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
pageConfigToPageIdentity | |
44.44% |
4 / 9 |
|
0.00% |
0 / 1 |
2.69 |
1 | <?php |
2 | /** |
3 | * Copyright (C) 2011-2020 Wikimedia Foundation and others. |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
18 | */ |
19 | |
20 | namespace MediaWiki\Rest\Handler; |
21 | |
22 | use Composer\Semver\Semver; |
23 | use ExtensionRegistry; |
24 | use InvalidArgumentException; |
25 | use LanguageCode; |
26 | use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface; |
27 | use LogicException; |
28 | use MediaWiki\Context\RequestContext; |
29 | use MediaWiki\Logger\LoggerFactory; |
30 | use MediaWiki\MainConfigNames; |
31 | use MediaWiki\MediaWikiServices; |
32 | use MediaWiki\Page\PageIdentity; |
33 | use MediaWiki\Page\ProperPageIdentity; |
34 | use MediaWiki\Rest\Handler; |
35 | use MediaWiki\Rest\Handler\Helper\HtmlInputTransformHelper; |
36 | use MediaWiki\Rest\Handler\Helper\HtmlOutputRendererHelper; |
37 | use MediaWiki\Rest\Handler\Helper\ParsoidFormatHelper; |
38 | use MediaWiki\Rest\HttpException; |
39 | use MediaWiki\Rest\LocalizedHttpException; |
40 | use MediaWiki\Rest\Response; |
41 | use MediaWiki\Revision\MutableRevisionRecord; |
42 | use MediaWiki\Revision\RevisionAccessException; |
43 | use MediaWiki\Revision\RevisionLookup; |
44 | use MediaWiki\Revision\SlotRecord; |
45 | use MediaWiki\Revision\SuppressedDataException; |
46 | use MediaWiki\Title\MalformedTitleException; |
47 | use MediaWiki\Title\Title; |
48 | use MediaWiki\WikiMap\WikiMap; |
49 | use MobileContext; |
50 | use Wikimedia\Http\HttpAcceptParser; |
51 | use Wikimedia\Message\DataMessageValue; |
52 | use Wikimedia\Message\MessageValue; |
53 | use Wikimedia\Parsoid\Config\DataAccess; |
54 | use Wikimedia\Parsoid\Config\PageConfig; |
55 | use Wikimedia\Parsoid\Config\PageConfigFactory; |
56 | use Wikimedia\Parsoid\Config\SiteConfig; |
57 | use Wikimedia\Parsoid\Core\ClientError; |
58 | use Wikimedia\Parsoid\Core\PageBundle; |
59 | use Wikimedia\Parsoid\Core\ResourceLimitExceededException; |
60 | use Wikimedia\Parsoid\DOM\Document; |
61 | use Wikimedia\Parsoid\Parsoid; |
62 | use Wikimedia\Parsoid\Utils\ContentUtils; |
63 | use Wikimedia\Parsoid\Utils\DOMCompat; |
64 | use Wikimedia\Parsoid\Utils\DOMUtils; |
65 | use Wikimedia\Parsoid\Utils\Timing; |
66 | use WikitextContent; |
67 | |
68 | /** |
69 | * Base class for Parsoid handlers. |
70 | * @internal For use by the Parsoid extension |
71 | */ |
72 | abstract class ParsoidHandler extends Handler { |
73 | |
74 | // TODO logging, timeouts(?), CORS |
75 | // TODO content negotiation (routes.js routes.acceptable) |
76 | // TODO handle MaxConcurrentCallsError (pool counter?) |
77 | |
78 | /** @var SiteConfig */ |
79 | protected $siteConfig; |
80 | |
81 | /** @var PageConfigFactory */ |
82 | protected $pageConfigFactory; |
83 | |
84 | /** @var DataAccess */ |
85 | protected $dataAccess; |
86 | |
87 | /** @var ExtensionRegistry */ |
88 | protected $extensionRegistry; |
89 | |
90 | /** @var ?StatsdDataFactoryInterface A statistics aggregator */ |
91 | protected $metrics; |
92 | |
93 | /** @var array */ |
94 | private $requestAttributes; |
95 | |
96 | private RevisionLookup $revisionLookup; |
97 | |
98 | /** |
99 | * @return static |
100 | */ |
101 | public static function factory(): ParsoidHandler { |
102 | $services = MediaWikiServices::getInstance(); |
103 | // @phan-suppress-next-line PhanTypeInstantiateAbstractStatic |
104 | return new static( |
105 | $services->getRevisionLookup(), |
106 | $services->getParsoidSiteConfig(), |
107 | $services->getParsoidPageConfigFactory(), |
108 | $services->getParsoidDataAccess() |
109 | ); |
110 | } |
111 | |
112 | /** |
113 | * @param RevisionLookup $revisionLookup |
114 | * @param SiteConfig $siteConfig |
115 | * @param PageConfigFactory $pageConfigFactory |
116 | * @param DataAccess $dataAccess |
117 | */ |
118 | public function __construct( |
119 | RevisionLookup $revisionLookup, |
120 | SiteConfig $siteConfig, |
121 | PageConfigFactory $pageConfigFactory, |
122 | DataAccess $dataAccess |
123 | ) { |
124 | $this->revisionLookup = $revisionLookup; |
125 | $this->siteConfig = $siteConfig; |
126 | $this->pageConfigFactory = $pageConfigFactory; |
127 | $this->dataAccess = $dataAccess; |
128 | $this->extensionRegistry = ExtensionRegistry::getInstance(); |
129 | $this->metrics = $siteConfig->metrics(); |
130 | } |
131 | |
132 | /** |
133 | * Verify that the {domain} path parameter matches the actual domain. |
134 | * @todo Remove this when we no longer need to support the {domain} |
135 | * parameter with backwards compatibility with the parsoid |
136 | * extension. |
137 | * @param string $domain Domain name parameter to validate |
138 | */ |
139 | protected function assertDomainIsCorrect( $domain ): void { |
140 | // We are cutting some corners here (IDN, non-ASCII casing) |
141 | // since domain name support is provisional. |
142 | // TODO use a proper validator instead |
143 | $server = RequestContext::getMain()->getConfig()->get( MainConfigNames::Server ); |
144 | $expectedDomain = parse_url( $server, PHP_URL_HOST ); |
145 | if ( !$expectedDomain ) { |
146 | throw new LogicException( 'Cannot parse $wgServer' ); |
147 | } |
148 | if ( strcasecmp( $expectedDomain, $domain ) === 0 ) { |
149 | return; |
150 | } |
151 | |
152 | // TODO: This should really go away! It's only acceptable because |
153 | // this entire method is going to be removed once we no longer |
154 | // need the parsoid extension endpoints with the {domain} parameter. |
155 | if ( $this->extensionRegistry->isLoaded( 'MobileFrontend' ) ) { |
156 | // @phan-suppress-next-line PhanUndeclaredClassMethod |
157 | $mobileServer = MobileContext::singleton()->getMobileUrl( $server ); |
158 | $expectedMobileDomain = parse_url( $mobileServer, PHP_URL_HOST ); |
159 | if ( $expectedMobileDomain && strcasecmp( $expectedMobileDomain, $domain ) === 0 ) { |
160 | return; |
161 | } |
162 | } |
163 | |
164 | $msg = new DataMessageValue( |
165 | 'mwparsoid-invalid-domain', |
166 | [], |
167 | 'invalid-domain', |
168 | [ 'expected' => $expectedDomain, 'actual' => $domain, ] |
169 | ); |
170 | |
171 | throw new LocalizedHttpException( $msg, 400, [ |
172 | 'error' => 'parameter-validation-failed', |
173 | 'name' => 'domain', |
174 | 'value' => $domain, |
175 | 'failureCode' => $msg->getCode(), |
176 | 'failureData' => $msg->getData(), |
177 | ] ); |
178 | } |
179 | |
180 | /** |
181 | * Get the parsed body by content-type |
182 | * |
183 | * @return array |
184 | */ |
185 | protected function getParsedBody(): array { |
186 | $request = $this->getRequest(); |
187 | [ $contentType ] = explode( ';', $request->getHeader( 'Content-Type' )[0] ?? '', 2 ); |
188 | switch ( $contentType ) { |
189 | case 'application/x-www-form-urlencoded': |
190 | case 'multipart/form-data': |
191 | return $request->getPostParams(); |
192 | case 'application/json': |
193 | $json = json_decode( $request->getBody()->getContents(), true ); |
194 | if ( !is_array( $json ) ) { |
195 | throw new LocalizedHttpException( |
196 | new MessageValue( "rest-json-body-parse-error", [ 'not a valid JSON object' ] ), 400 ); |
197 | } |
198 | return $json; |
199 | default: |
200 | throw new LocalizedHttpException( |
201 | new MessageValue( "rest-unsupported-content-type", [ $contentType ?? '(null)' ] ), |
202 | 415 |
203 | ); |
204 | } |
205 | } |
206 | |
207 | /** |
208 | * Rough equivalent of req.local from Parsoid-JS. |
209 | * FIXME most of these should be replaced with more native ways of handling the request. |
210 | * @return array |
211 | */ |
212 | protected function &getRequestAttributes(): array { |
213 | if ( $this->requestAttributes ) { |
214 | return $this->requestAttributes; |
215 | } |
216 | |
217 | $request = $this->getRequest(); |
218 | $body = ( $request->getMethod() === 'POST' ) ? $this->getParsedBody() : []; |
219 | $opts = array_merge( $body, array_intersect_key( $request->getPathParams(), |
220 | [ 'from' => true, 'format' => true ] ) ); |
221 | '@phan-var array<string,array|bool|string> $opts'; // @var array<string,array|bool|string> $opts |
222 | $contentLanguage = $request->getHeaderLine( 'Content-Language' ) ?: null; |
223 | if ( $contentLanguage ) { |
224 | $contentLanguage = LanguageCode::normalizeNonstandardCodeAndWarn( |
225 | $contentLanguage |
226 | ); |
227 | } |
228 | $attribs = [ |
229 | 'pageName' => $request->getPathParam( 'title' ) ?? '', |
230 | 'oldid' => $request->getPathParam( 'revision' ), |
231 | // "body_only" flag to return just the body (instead of the entire HTML doc) |
232 | // We would like to deprecate use of this flag: T181657 |
233 | 'body_only' => $request->getQueryParams()['body_only'] ?? $body['body_only'] ?? null, |
234 | 'errorEnc' => ParsoidFormatHelper::ERROR_ENCODING[$opts['format']] ?? 'plain', |
235 | 'iwp' => WikiMap::getCurrentWikiId(), // PORT-FIXME verify |
236 | 'offsetType' => $body['offsetType'] |
237 | ?? $request->getQueryParams()['offsetType'] |
238 | // Lint requests should return UCS2 offsets by default |
239 | ?? ( $opts['format'] === ParsoidFormatHelper::FORMAT_LINT ? 'ucs2' : 'byte' ), |
240 | 'pagelanguage' => $contentLanguage, |
241 | ]; |
242 | |
243 | // For use in getHtmlOutputRendererHelper |
244 | $opts['stash'] = $request->getQueryParams()['stash'] ?? false; |
245 | |
246 | if ( $request->getMethod() === 'POST' ) { |
247 | if ( isset( $opts['original']['revid'] ) ) { |
248 | $attribs['oldid'] = $opts['original']['revid']; |
249 | } |
250 | if ( isset( $opts['original']['title'] ) ) { |
251 | $attribs['pageName'] = $opts['original']['title']; |
252 | } |
253 | } |
254 | if ( $attribs['oldid'] !== null ) { |
255 | if ( $attribs['oldid'] === '' ) { |
256 | $attribs['oldid'] = null; |
257 | } else { |
258 | $attribs['oldid'] = (int)$attribs['oldid']; |
259 | } |
260 | } |
261 | |
262 | // For use in getHtmlOutputRendererHelper |
263 | $opts['accept-language'] = $request->getHeaderLine( 'Accept-Language' ) ?: null; |
264 | |
265 | $acceptLanguage = null; |
266 | if ( $opts['accept-language'] !== null ) { |
267 | $acceptLanguage = LanguageCode::normalizeNonstandardCodeAndWarn( |
268 | $opts['accept-language'] |
269 | ); |
270 | } |
271 | |
272 | // Init pageName if oldid is provided and is a valid revision |
273 | if ( ( $attribs['pageName'] === '' ) && $attribs['oldid'] ) { |
274 | $rev = $this->revisionLookup->getRevisionById( $attribs['oldid'] ); |
275 | if ( $rev ) { |
276 | $attribs['pageName'] = $rev->getPage()->getDBkey(); |
277 | } |
278 | } |
279 | |
280 | $attribs['envOptions'] = [ |
281 | // We use `prefix` but ought to use `domain` (T206764) |
282 | 'prefix' => $attribs['iwp'], |
283 | // For the legacy "domain" path parameter used by the endpoints exposed |
284 | // by the parsoid extension. Will be null for core endpoints. |
285 | 'domain' => $request->getPathParam( 'domain' ), |
286 | 'pageName' => $attribs['pageName'], |
287 | 'cookie' => $request->getHeaderLine( 'Cookie' ), |
288 | 'reqId' => $request->getHeaderLine( 'X-Request-Id' ), |
289 | 'userAgent' => $request->getHeaderLine( 'User-Agent' ), |
290 | 'htmlVariantLanguage' => $acceptLanguage, |
291 | // Semver::satisfies checks below expect a valid outputContentVersion value. |
292 | // Better to set it here instead of adding the default value at every check. |
293 | 'outputContentVersion' => Parsoid::defaultHTMLVersion(), |
294 | ]; |
295 | |
296 | # Convert language codes in $opts['updates']['variant'] if present |
297 | $sourceVariant = $opts['updates']['variant']['source'] ?? null; |
298 | if ( $sourceVariant ) { |
299 | $sourceVariant = LanguageCode::normalizeNonstandardCodeAndWarn( |
300 | $sourceVariant |
301 | ); |
302 | $opts['updates']['variant']['source'] = $sourceVariant; |
303 | } |
304 | $targetVariant = $opts['updates']['variant']['target'] ?? null; |
305 | if ( $targetVariant ) { |
306 | $targetVariant = LanguageCode::normalizeNonstandardCodeAndWarn( |
307 | $targetVariant |
308 | ); |
309 | $opts['updates']['variant']['target'] = $targetVariant; |
310 | } |
311 | if ( isset( $opts['wikitext']['headers']['content-language'] ) ) { |
312 | $contentLanguage = $opts['wikitext']['headers']['content-language']; |
313 | $contentLanguage = LanguageCode::normalizeNonstandardCodeAndWarn( |
314 | $contentLanguage |
315 | ); |
316 | $opts['wikitext']['headers']['content-language'] = $contentLanguage; |
317 | } |
318 | if ( isset( $opts['original']['wikitext']['headers']['content-language'] ) ) { |
319 | $contentLanguage = $opts['original']['wikitext']['headers']['content-language']; |
320 | $contentLanguage = LanguageCode::normalizeNonstandardCodeAndWarn( |
321 | $contentLanguage |
322 | ); |
323 | $opts['original']['wikitext']['headers']['content-language'] = $contentLanguage; |
324 | } |
325 | |
326 | $attribs['opts'] = $opts; |
327 | |
328 | // TODO: Remove assertDomainIsCorrect() once we no longer need to support the {domain} |
329 | // parameter for the endpoints exposed by the parsoid extension. |
330 | if ( $attribs['envOptions']['domain'] !== null ) { |
331 | $this->assertDomainIsCorrect( $attribs['envOptions']['domain'] ); |
332 | } |
333 | |
334 | $this->requestAttributes = $attribs; |
335 | return $this->requestAttributes; |
336 | } |
337 | |
338 | /** |
339 | * @param array $attribs |
340 | * @param ?string $source |
341 | * @param PageIdentity $page |
342 | * @param ?int $revId |
343 | * |
344 | * @return HtmlOutputRendererHelper |
345 | */ |
346 | private function getHtmlOutputRendererHelper( |
347 | array $attribs, |
348 | ?string $source, |
349 | PageIdentity $page, |
350 | ?int $revId |
351 | ): HtmlOutputRendererHelper { |
352 | $services = MediaWikiServices::getInstance(); |
353 | |
354 | // Request lenient rev handling |
355 | $lenientRevHandling = true; |
356 | $helper = $services->getPageRestHelperFactory()->newHtmlOutputRendererHelper( $lenientRevHandling ); |
357 | |
358 | $authority = $this->getAuthority(); |
359 | |
360 | $params = []; |
361 | $helper->init( $page, $params, $authority, $revId ); |
362 | |
363 | // XXX: should default to the page's content model? |
364 | $model = $attribs['opts']['contentmodel'] |
365 | ?? ( $attribs['envOptions']['contentmodel'] ?? CONTENT_MODEL_WIKITEXT ); |
366 | |
367 | if ( $source !== null ) { |
368 | $helper->setContentSource( $source, $model ); |
369 | } |
370 | |
371 | if ( isset( $attribs['opts']['stash'] ) ) { |
372 | $helper->setStashingEnabled( $attribs['opts']['stash'] ); |
373 | } |
374 | |
375 | if ( isset( $attribs['envOptions']['outputContentVersion'] ) ) { |
376 | $helper->setOutputProfileVersion( $attribs['envOptions']['outputContentVersion'] ); |
377 | } |
378 | |
379 | if ( isset( $attribs['pagelanguage'] ) ) { |
380 | $helper->setPageLanguage( $attribs['pagelanguage'] ); |
381 | } |
382 | |
383 | if ( isset( $attribs['opts']['accept-language'] ) ) { |
384 | $helper->setVariantConversionLanguage( $attribs['opts']['accept-language'] ); |
385 | } |
386 | |
387 | return $helper; |
388 | } |
389 | |
390 | /** |
391 | * @param array $attribs |
392 | * @param string $html |
393 | * @param PageIdentity $page |
394 | * |
395 | * @return HtmlInputTransformHelper |
396 | */ |
397 | protected function getHtmlInputTransformHelper( |
398 | array $attribs, |
399 | string $html, |
400 | PageIdentity $page |
401 | ): HtmlInputTransformHelper { |
402 | $services = MediaWikiServices::getInstance(); |
403 | |
404 | $helper = $services->getPageRestHelperFactory()->newHtmlInputTransformHelper( |
405 | $attribs['envOptions'] |
406 | ); |
407 | |
408 | $metrics = $this->siteConfig->metrics(); |
409 | |
410 | if ( $metrics ) { |
411 | $helper->setMetrics( $metrics ); |
412 | } |
413 | |
414 | $parameters = $attribs['opts'] + $attribs; |
415 | $body = $attribs['opts']; |
416 | |
417 | $body['html'] = $html; |
418 | |
419 | $helper->init( $page, $body, $parameters ); |
420 | |
421 | return $helper; |
422 | } |
423 | |
424 | /** |
425 | * FIXME: Combine with ParsoidFormatHelper::parseContentTypeHeader |
426 | */ |
427 | private const NEW_SPEC = |
428 | '#^https://www.mediawiki.org/wiki/Specs/(HTML|pagebundle)/(\d+\.\d+\.\d+)$#D'; |
429 | |
430 | /** |
431 | * This method checks if we support the requested content formats |
432 | * As a side-effect, it updates $attribs to set outputContentVersion |
433 | * that Parsoid should generate based on request headers. |
434 | * |
435 | * @param array &$attribs Request attributes from getRequestAttributes() |
436 | * @return bool |
437 | */ |
438 | protected function acceptable( array &$attribs ): bool { |
439 | $request = $this->getRequest(); |
440 | $format = $attribs['opts']['format']; |
441 | |
442 | if ( $format === ParsoidFormatHelper::FORMAT_WIKITEXT ) { |
443 | return true; |
444 | } |
445 | |
446 | $acceptHeader = $request->getHeader( 'Accept' ); |
447 | if ( !$acceptHeader ) { |
448 | return true; |
449 | } |
450 | |
451 | $parser = new HttpAcceptParser(); |
452 | $acceptableTypes = $parser->parseAccept( $acceptHeader[0] ); // FIXME: Multiple headers valid? |
453 | if ( !$acceptableTypes ) { |
454 | return true; |
455 | } |
456 | |
457 | // `acceptableTypes` is already sorted by quality. |
458 | foreach ( $acceptableTypes as $t ) { |
459 | $type = "{$t['type']}/{$t['subtype']}"; |
460 | $profile = $t['params']['profile'] ?? null; |
461 | if ( |
462 | ( $format === ParsoidFormatHelper::FORMAT_HTML && $type === 'text/html' ) || |
463 | ( $format === ParsoidFormatHelper::FORMAT_PAGEBUNDLE && $type === 'application/json' ) |
464 | ) { |
465 | if ( $profile ) { |
466 | preg_match( self::NEW_SPEC, $profile, $matches ); |
467 | if ( $matches && strtolower( $matches[1] ) === $format ) { |
468 | $contentVersion = Parsoid::resolveContentVersion( $matches[2] ); |
469 | if ( $contentVersion ) { |
470 | // $attribs mutated here! |
471 | $attribs['envOptions']['outputContentVersion'] = $contentVersion; |
472 | return true; |
473 | } else { |
474 | continue; |
475 | } |
476 | } else { |
477 | continue; |
478 | } |
479 | } else { |
480 | return true; |
481 | } |
482 | } elseif ( |
483 | ( $type === '*/*' ) || |
484 | ( $format === ParsoidFormatHelper::FORMAT_HTML && $type === 'text/*' ) |
485 | ) { |
486 | return true; |
487 | } |
488 | } |
489 | |
490 | return false; |
491 | } |
492 | |
493 | /** |
494 | * Try to create a PageConfig object. If we get an exception (because content |
495 | * may be missing or inaccessible), throw an appropriate HTTP response object |
496 | * for callers to handle. |
497 | * |
498 | * @param array $attribs |
499 | * @param ?string $wikitextOverride |
500 | * Custom wikitext to use instead of the real content of the page. |
501 | * @param bool $html2WtMode |
502 | * @return PageConfig |
503 | * @throws HttpException |
504 | */ |
505 | protected function tryToCreatePageConfig( |
506 | array $attribs, ?string $wikitextOverride = null, bool $html2WtMode = false |
507 | ): PageConfig { |
508 | $revId = $attribs['oldid']; |
509 | $pagelanguageOverride = $attribs['pagelanguage']; |
510 | $title = $attribs['pageName']; |
511 | |
512 | $title = ( $title !== '' ) ? Title::newFromText( $title ) : Title::newMainPage(); |
513 | if ( !$title ) { |
514 | // TODO use proper validation |
515 | throw new LogicException( 'Title not found!' ); |
516 | } |
517 | $user = RequestContext::getMain()->getUser(); |
518 | |
519 | if ( $wikitextOverride === null ) { |
520 | $revisionRecord = null; |
521 | } else { |
522 | // Create a mutable revision record point to the same revision |
523 | // and set to the desired wikitext. |
524 | $revisionRecord = new MutableRevisionRecord( $title ); |
525 | // Don't set id to $revId if we have $wikitextOverride |
526 | // A revision corresponds to specific wikitext, which $wikitextOverride |
527 | // might not be. |
528 | $revisionRecord->setId( 0 ); |
529 | $revisionRecord->setSlot( |
530 | SlotRecord::newUnsaved( |
531 | SlotRecord::MAIN, |
532 | new WikitextContent( $wikitextOverride ) |
533 | ) |
534 | ); |
535 | } |
536 | |
537 | $hasOldId = ( $revId !== null ); |
538 | $ensureAccessibleContent = !$html2WtMode || $hasOldId; |
539 | |
540 | try { |
541 | // Note: Parsoid by design isn't supposed to use the user |
542 | // context right now, and all user state is expected to be |
543 | // introduced as a post-parse transform. So although we pass a |
544 | // User here, it only currently affects the output in obscure |
545 | // corner cases; see PageConfigFactory::create() for more. |
546 | // @phan-suppress-next-line PhanUndeclaredMethod method defined in subtype |
547 | $pageConfig = $this->pageConfigFactory->create( |
548 | $title, $user, $revisionRecord ?? $revId, null, $pagelanguageOverride, |
549 | $ensureAccessibleContent |
550 | ); |
551 | } catch ( SuppressedDataException $e ) { |
552 | throw new LocalizedHttpException( |
553 | new MessageValue( "rest-permission-denied-revision", [ $e->getMessage() ] ), 403 |
554 | ); |
555 | } catch ( RevisionAccessException $e ) { |
556 | throw new LocalizedHttpException( |
557 | new MessageValue( "rest-specified-revision-unavailable", [ $e->getMessage() ] ), 404 |
558 | ); |
559 | } |
560 | |
561 | // All good! |
562 | return $pageConfig; |
563 | } |
564 | |
565 | /** |
566 | * Try to create a PageIdentity object. |
567 | * If no page is specified in the request, this will return the wiki's main page. |
568 | * If an invalid page is requested, this throws an appropriate HTTPException. |
569 | * |
570 | * @param array $attribs |
571 | * @return PageIdentity |
572 | * @throws HttpException |
573 | */ |
574 | protected function tryToCreatePageIdentity( array $attribs ): PageIdentity { |
575 | if ( $attribs['pageName'] === '' ) { |
576 | return Title::newMainPage(); |
577 | } |
578 | |
579 | // XXX: Should be injected, but the Parsoid extension relies on the |
580 | // constructor signature. Also, ParsoidHandler should go away soon anyway. |
581 | $pageStore = MediaWikiServices::getInstance()->getPageStore(); |
582 | |
583 | $page = $pageStore->getPageByText( $attribs['pageName'] ); |
584 | |
585 | if ( !$page ) { |
586 | throw new LocalizedHttpException( |
587 | new MessageValue( "rest-invalid-title", [ 'pageName' ] ), 400 |
588 | ); |
589 | } |
590 | |
591 | return $page; |
592 | } |
593 | |
594 | /** |
595 | * Get the path for the transform endpoint. May be overwritten to override the path. |
596 | * |
597 | * This is done in the parsoid extension, for backwards compatibility |
598 | * with the old endpoint URLs. |
599 | * |
600 | * @stable to override |
601 | * |
602 | * @param string $format The format the endpoint is expected to return. |
603 | * |
604 | * @return string |
605 | */ |
606 | protected function getTransformEndpoint( string $format = ParsoidFormatHelper::FORMAT_HTML ): string { |
607 | return '/coredev/v0/transform/{from}/to/{format}/{title}/{revision}'; |
608 | } |
609 | |
610 | /** |
611 | * Get the path for the page content endpoint. May be overwritten to override the path. |
612 | * |
613 | * This is done in the parsoid extension, for backwards compatibility |
614 | * with the old endpoint URLs. |
615 | * |
616 | * @stable to override |
617 | * |
618 | * @param string $format The format the endpoint is expected to return. |
619 | * |
620 | * @return string |
621 | */ |
622 | protected function getPageContentEndpoint( string $format = ParsoidFormatHelper::FORMAT_HTML ): string { |
623 | if ( $format !== ParsoidFormatHelper::FORMAT_HTML ) { |
624 | throw new InvalidArgumentException( 'Unsupported page content format: ' . $format ); |
625 | } |
626 | return '/v1/page/{title}/html'; |
627 | } |
628 | |
629 | /** |
630 | * Get the path for the page content endpoint. May be overwritten to override the path. |
631 | * |
632 | * This is done in the parsoid extension, for backwards compatibility |
633 | * with the old endpoint URLs. |
634 | * |
635 | * @stable to override |
636 | * |
637 | * @param string $format The format the endpoint is expected to return. |
638 | * |
639 | * @return string |
640 | */ |
641 | protected function getRevisionContentEndpoint( string $format = ParsoidFormatHelper::FORMAT_HTML ): string { |
642 | if ( $format !== ParsoidFormatHelper::FORMAT_HTML ) { |
643 | throw new InvalidArgumentException( 'Unsupported revision content format: ' . $format ); |
644 | } |
645 | return '/v1/revision/{revision}/html'; |
646 | } |
647 | |
648 | public function wtLint( |
649 | PageConfig $pageConfig, array $attribs, ?array $linterOverrides = [] |
650 | ) { |
651 | $envOptions = $attribs['envOptions'] + [ |
652 | 'linterOverrides' => $linterOverrides, |
653 | ]; |
654 | try { |
655 | $parsoid = $this->newParsoid(); |
656 | return $parsoid->wikitext2lint( $pageConfig, $envOptions ); |
657 | } catch ( ClientError $e ) { |
658 | throw new LocalizedHttpException( new MessageValue( "rest-parsoid-error", [ $e->getMessage() ] ), 400 ); |
659 | } catch ( ResourceLimitExceededException $e ) { |
660 | throw new LocalizedHttpException( |
661 | new MessageValue( "rest-parsoid-resource-exceeded", [ $e->getMessage() ] ), 413 |
662 | ); |
663 | } |
664 | } |
665 | |
666 | /** |
667 | * Wikitext -> HTML helper. |
668 | * Spec'd in https://phabricator.wikimedia.org/T75955 and the API tests. |
669 | * |
670 | * @param PageConfig $pageConfig |
671 | * @param array $attribs Request attributes from getRequestAttributes() |
672 | * @param ?string $wikitext Wikitext to transform (or null to use the |
673 | * page specified in the request attributes). |
674 | * |
675 | * @return Response |
676 | */ |
677 | protected function wt2html( |
678 | PageConfig $pageConfig, array $attribs, ?string $wikitext = null |
679 | ) { |
680 | $request = $this->getRequest(); |
681 | $opts = $attribs['opts']; |
682 | $format = $opts['format']; |
683 | $oldid = $attribs['oldid']; |
684 | $stash = $opts['stash'] ?? false; |
685 | |
686 | if ( $format === ParsoidFormatHelper::FORMAT_LINT ) { |
687 | $linterOverrides = []; |
688 | if ( $this->extensionRegistry->isLoaded( 'Linter' ) ) { // T360809 |
689 | $disabled = []; |
690 | $services = MediaWikiServices::getInstance(); |
691 | $linterCategories = $services->getMainConfig()->get( 'LinterCategories' ); |
692 | foreach ( $linterCategories as $name => $cat ) { |
693 | if ( $cat['priority'] === 'none' ) { |
694 | $disabled[] = $name; |
695 | } |
696 | } |
697 | $linterOverrides['disabled'] = $disabled; |
698 | } |
699 | $lints = $this->wtLint( $pageConfig, $attribs, $linterOverrides ); |
700 | $response = $this->getResponseFactory()->createJson( $lints ); |
701 | return $response; |
702 | } |
703 | |
704 | // TODO: This method should take a PageIdentity + revId, |
705 | // to reduce the usage of PageConfig in MW core. |
706 | $helper = $this->getHtmlOutputRendererHelper( |
707 | $attribs, |
708 | $wikitext, |
709 | $this->pageConfigToPageIdentity( $pageConfig ), |
710 | // Id will be 0 if we have $wikitext but that isn't valid |
711 | // to call $helper->setRevision with. In any case, the revision |
712 | // will be reset when $helper->setContent is called with $wikitext. |
713 | // Ideally, the revision would be pass through here instead of |
714 | // the id and wikitext. |
715 | $pageConfig->getRevisionId() ?: null |
716 | ); |
717 | |
718 | $needsPageBundle = ( $format === ParsoidFormatHelper::FORMAT_PAGEBUNDLE ); |
719 | |
720 | if ( $attribs['body_only'] ) { |
721 | $helper->setFlavor( 'fragment' ); |
722 | } elseif ( !$needsPageBundle ) { |
723 | // Inline data-parsoid. This will happen when no special params are set. |
724 | $helper->setFlavor( 'edit' ); |
725 | } |
726 | |
727 | if ( $wikitext === null && $oldid !== null ) { |
728 | $mstr = 'pageWithOldid'; |
729 | } else { |
730 | $mstr = 'wt'; |
731 | } |
732 | |
733 | $parseTiming = Timing::start(); |
734 | |
735 | if ( $needsPageBundle ) { |
736 | $pb = $helper->getPageBundle(); |
737 | |
738 | // Handle custom offset requests as a pb2pb transform |
739 | if ( $attribs['offsetType'] !== 'byte' ) { |
740 | $parsoid = $this->newParsoid(); |
741 | $pb = $parsoid->pb2pb( |
742 | $pageConfig, |
743 | 'convertoffsets', |
744 | $pb, |
745 | [ |
746 | 'inputOffsetType' => 'byte', |
747 | 'outputOffsetType' => $attribs['offsetType'] |
748 | ] |
749 | ); |
750 | } |
751 | |
752 | $response = $this->getResponseFactory()->createJson( $pb->responseData() ); |
753 | $helper->putHeaders( $response, false ); |
754 | |
755 | ParsoidFormatHelper::setContentType( |
756 | $response, |
757 | ParsoidFormatHelper::FORMAT_PAGEBUNDLE, |
758 | $pb->version |
759 | ); |
760 | } else { |
761 | $out = $helper->getHtml(); |
762 | |
763 | // TODO: offsetType conversion isn't supported right now for non-pagebundle endpoints |
764 | // Once the OutputTransform framework lands, we might revisit this. |
765 | |
766 | $response = $this->getResponseFactory()->create(); |
767 | $response->getBody()->write( $out->getRawText() ); |
768 | |
769 | $helper->putHeaders( $response, true ); |
770 | |
771 | // Emit an ETag only if stashing is enabled. It's not reliably useful otherwise. |
772 | if ( $stash ) { |
773 | $eTag = $helper->getETag(); |
774 | if ( $eTag ) { |
775 | $response->setHeader( 'ETag', $eTag ); |
776 | } |
777 | } |
778 | } |
779 | |
780 | // XXX: For pagebundle requests, this can be somewhat inflated |
781 | // because of pagebundle json-encoding overheads |
782 | $outSize = $response->getBody()->getSize(); |
783 | $parseTime = $parseTiming->end(); |
784 | |
785 | // Ignore slow parse metrics for non-oldid parses |
786 | if ( $mstr === 'pageWithOldid' ) { |
787 | if ( $parseTime > 3000 ) { |
788 | LoggerFactory::getInstance( 'slow-parsoid' ) |
789 | ->info( 'Parsing {title} was slow, took {time} seconds', [ |
790 | 'time' => number_format( $parseTime / 1000, 2 ), |
791 | 'title' => Title::newFromLinkTarget( $pageConfig->getLinkTarget() )->getPrefixedText(), |
792 | ] ); |
793 | } |
794 | |
795 | if ( $parseTime > 10 && $outSize > 100 ) { |
796 | // * Don't bother with this metric for really small parse times |
797 | // p99 for initialization time is ~7ms according to grafana. |
798 | // So, 10ms ensures that startup overheads don't skew the metrics |
799 | // * For body_only=false requests, <head> section isn't generated |
800 | // and if the output is small, per-request overheads can skew |
801 | // the timePerKB metrics. |
802 | |
803 | // NOTE: This is slightly misleading since there are fixed costs |
804 | // for generating output like the <head> section and should be factored in, |
805 | // but this is good enough for now as a useful first degree of approxmation. |
806 | $timePerKB = $parseTime * 1024 / $outSize; |
807 | if ( $timePerKB > 500 ) { |
808 | // At 100ms/KB, even a 100KB page which isn't that large will take 10s. |
809 | // So, we probably want to shoot for a threshold under 100ms. |
810 | // But, let's start with 500ms+ outliers first and see what we uncover. |
811 | LoggerFactory::getInstance( 'slow-parsoid' ) |
812 | ->info( 'Parsing {title} was slow, timePerKB took {timePerKB} ms, total: {time} seconds', [ |
813 | 'time' => number_format( $parseTime / 1000, 2 ), |
814 | 'timePerKB' => number_format( $timePerKB, 1 ), |
815 | 'title' => Title::newFromLinkTarget( $pageConfig->getLinkTarget() )->getPrefixedText(), |
816 | ] ); |
817 | } |
818 | } |
819 | } |
820 | |
821 | if ( $wikitext !== null ) { |
822 | // Don't cache requests when wt is set in case somebody uses |
823 | // GET for wikitext parsing |
824 | // XXX: can we just refuse to do wikitext parsing in a GET request? |
825 | $response->setHeader( 'Cache-Control', 'private,no-cache,s-maxage=0' ); |
826 | } elseif ( $oldid !== null ) { |
827 | // XXX: can this go away? Parsoid's PageContent class doesn't expose supressed revision content. |
828 | if ( $request->getHeaderLine( 'Cookie' ) || |
829 | $request->getHeaderLine( 'Authorization' ) ) { |
830 | // Don't cache requests with a session. |
831 | $response->setHeader( 'Cache-Control', 'private,no-cache,s-maxage=0' ); |
832 | } |
833 | } |
834 | return $response; |
835 | } |
836 | |
837 | protected function newParsoid(): Parsoid { |
838 | return new Parsoid( $this->siteConfig, $this->dataAccess ); |
839 | } |
840 | |
841 | protected function parseHTML( string $html, bool $validateXMLNames = false ): Document { |
842 | return DOMUtils::parseHTML( $html, $validateXMLNames ); |
843 | } |
844 | |
845 | /** |
846 | * @param PageConfig|PageIdentity $page |
847 | * @param array $attribs Attributes gotten from requests |
848 | * @param string $html Original HTML |
849 | * |
850 | * @return Response |
851 | * @throws HttpException |
852 | */ |
853 | protected function html2wt( |
854 | $page, array $attribs, string $html |
855 | ) { |
856 | if ( $page instanceof PageConfig ) { |
857 | // TODO: Deprecate passing a PageConfig. |
858 | // Ideally, callers would use HtmlToContentTransform directly. |
859 | $page = Title::newFromLinkTarget( $page->getLinkTarget() ); |
860 | } |
861 | |
862 | try { |
863 | $transform = $this->getHtmlInputTransformHelper( $attribs, $html, $page ); |
864 | |
865 | $response = $this->getResponseFactory()->create(); |
866 | $transform->putContent( $response ); |
867 | |
868 | return $response; |
869 | } catch ( ClientError $e ) { |
870 | throw new LocalizedHttpException( new MessageValue( "rest-parsoid-error", [ $e->getMessage() ] ), 400 ); |
871 | } |
872 | } |
873 | |
874 | /** |
875 | * Pagebundle -> pagebundle helper. |
876 | * |
877 | * @param array<string,array|string> $attribs |
878 | * @return Response |
879 | * @throws HttpException |
880 | */ |
881 | protected function pb2pb( array $attribs ) { |
882 | $opts = $attribs['opts']; |
883 | |
884 | $revision = $opts['previous'] ?? $opts['original'] ?? null; |
885 | if ( !isset( $revision['html'] ) ) { |
886 | throw new LocalizedHttpException( new MessageValue( "rest-missing-revision-html" ), 400 ); |
887 | } |
888 | |
889 | $vOriginal = ParsoidFormatHelper::parseContentTypeHeader( |
890 | $revision['html']['headers']['content-type'] ?? '' ); |
891 | if ( $vOriginal === null ) { |
892 | throw new LocalizedHttpException( new MessageValue( "rest-missing-revision-html-content-type" ), 400 ); |
893 | } |
894 | $attribs['envOptions']['inputContentVersion'] = $vOriginal; |
895 | '@phan-var array<string,array|string> $attribs'; // @var array<string,array|string> $attribs |
896 | |
897 | $this->metrics->increment( |
898 | 'pb2pb.original.version.' . $attribs['envOptions']['inputContentVersion'] |
899 | ); |
900 | |
901 | if ( !empty( $opts['updates'] ) ) { |
902 | // FIXME: Handling missing revisions uniformly for all update types |
903 | // is not probably the right thing to do but probably okay for now. |
904 | // This might need revisiting as we add newer types. |
905 | $pageConfig = $this->tryToCreatePageConfig( $attribs, null, true ); |
906 | // If we're only updating parts of the original version, it should |
907 | // satisfy the requested content version, since we'll be returning |
908 | // that same one. |
909 | // FIXME: Since this endpoint applies the acceptable middleware, |
910 | // `getOutputContentVersion` is not what's been passed in, but what |
911 | // can be produced. Maybe that should be selectively applied so |
912 | // that we can update older versions where it makes sense? |
913 | // Uncommenting below implies that we can only update the latest |
914 | // version, since carrot semantics is applied in both directions. |
915 | // if ( !Semver::satisfies( |
916 | // $attribs['envOptions']['inputContentVersion'], |
917 | // "^{$attribs['envOptions']['outputContentVersion']}" |
918 | // ) ) { |
919 | // throw new HttpException( |
920 | // 'We do not know how to do this conversion.', 415 |
921 | // ); |
922 | // } |
923 | if ( !empty( $opts['updates']['redlinks'] ) ) { |
924 | // Q(arlolra): Should redlinks be more complex than a bool? |
925 | // See gwicke's proposal at T114413#2240381 |
926 | return $this->updateRedLinks( $pageConfig, $attribs, $revision ); |
927 | } elseif ( isset( $opts['updates']['variant'] ) ) { |
928 | return $this->languageConversion( $pageConfig, $attribs, $revision ); |
929 | } else { |
930 | throw new LocalizedHttpException( new MessageValue( "rest-unknown-parsoid-transformation" ), 400 ); |
931 | } |
932 | } |
933 | |
934 | // TODO(arlolra): subbu has some sage advice in T114413#2365456 that |
935 | // we should probably be more explicit about the pb2pb conversion |
936 | // requested rather than this increasingly complex fallback logic. |
937 | $downgrade = Parsoid::findDowngrade( |
938 | $attribs['envOptions']['inputContentVersion'], |
939 | $attribs['envOptions']['outputContentVersion'] |
940 | ); |
941 | if ( $downgrade ) { |
942 | $pb = new PageBundle( |
943 | $revision['html']['body'], |
944 | $revision['data-parsoid']['body'] ?? null, |
945 | $revision['data-mw']['body'] ?? null |
946 | ); |
947 | $this->validatePb( $pb, $attribs['envOptions']['inputContentVersion'] ); |
948 | Parsoid::downgrade( $downgrade, $pb ); |
949 | |
950 | if ( !empty( $attribs['body_only'] ) ) { |
951 | $doc = $this->parseHTML( $pb->html ); |
952 | $body = DOMCompat::getBody( $doc ); |
953 | $pb->html = ContentUtils::toXML( $body, [ 'innerXML' => true ] ); |
954 | } |
955 | |
956 | $response = $this->getResponseFactory()->createJson( $pb->responseData() ); |
957 | ParsoidFormatHelper::setContentType( |
958 | $response, ParsoidFormatHelper::FORMAT_PAGEBUNDLE, $pb->version |
959 | ); |
960 | return $response; |
961 | // Ensure we only reuse from semantically similar content versions. |
962 | } elseif ( Semver::satisfies( $attribs['envOptions']['outputContentVersion'], |
963 | '^' . $attribs['envOptions']['inputContentVersion'] ) ) { |
964 | $pageConfig = $this->tryToCreatePageConfig( $attribs ); |
965 | return $this->wt2html( $pageConfig, $attribs ); |
966 | } else { |
967 | throw new LocalizedHttpException( new MessageValue( "rest-unsupported-profile-conversion" ), 415 ); |
968 | } |
969 | } |
970 | |
971 | /** |
972 | * Update red links on a document. |
973 | * |
974 | * @param PageConfig $pageConfig |
975 | * @param array $attribs |
976 | * @param array $revision |
977 | * @return Response |
978 | */ |
979 | protected function updateRedLinks( |
980 | PageConfig $pageConfig, array $attribs, array $revision |
981 | ) { |
982 | $parsoid = $this->newParsoid(); |
983 | |
984 | $pb = new PageBundle( |
985 | $revision['html']['body'], |
986 | $revision['data-parsoid']['body'] ?? null, |
987 | $revision['data-mw']['body'] ?? null, |
988 | $attribs['envOptions']['inputContentVersion'], |
989 | $revision['html']['headers'] ?? null, |
990 | $revision['contentmodel'] ?? null |
991 | ); |
992 | |
993 | $out = $parsoid->pb2pb( $pageConfig, 'redlinks', $pb, [] ); |
994 | |
995 | $this->validatePb( $out, $attribs['envOptions']['inputContentVersion'] ); |
996 | |
997 | $response = $this->getResponseFactory()->createJson( $out->responseData() ); |
998 | ParsoidFormatHelper::setContentType( |
999 | $response, ParsoidFormatHelper::FORMAT_PAGEBUNDLE, $out->version |
1000 | ); |
1001 | return $response; |
1002 | } |
1003 | |
1004 | /** |
1005 | * Do variant conversion on a document. |
1006 | * |
1007 | * @param PageConfig $pageConfig |
1008 | * @param array $attribs |
1009 | * @param array $revision |
1010 | * @return Response |
1011 | * @throws HttpException |
1012 | */ |
1013 | protected function languageConversion( |
1014 | PageConfig $pageConfig, array $attribs, array $revision |
1015 | ) { |
1016 | $opts = $attribs['opts']; |
1017 | $target = $opts['updates']['variant']['target'] ?? |
1018 | $attribs['envOptions']['htmlVariantLanguage']; |
1019 | $source = $opts['updates']['variant']['source'] ?? null; |
1020 | |
1021 | if ( !$target ) { |
1022 | throw new LocalizedHttpException( new MessageValue( "rest-target-variant-required" ), 400 ); |
1023 | } |
1024 | |
1025 | $pageIdentity = $this->tryToCreatePageIdentity( $attribs ); |
1026 | |
1027 | $pb = new PageBundle( |
1028 | $revision['html']['body'], |
1029 | $revision['data-parsoid']['body'] ?? null, |
1030 | $revision['data-mw']['body'] ?? null, |
1031 | $attribs['envOptions']['inputContentVersion'], |
1032 | $revision['html']['headers'] ?? null, |
1033 | $revision['contentmodel'] ?? null |
1034 | ); |
1035 | |
1036 | // XXX: DI should inject HtmlTransformFactory |
1037 | $languageVariantConverter = MediaWikiServices::getInstance() |
1038 | ->getHtmlTransformFactory() |
1039 | ->getLanguageVariantConverter( $pageIdentity ); |
1040 | $languageVariantConverter->setPageConfig( $pageConfig ); |
1041 | $httpContentLanguage = $attribs['pagelanguage' ] ?? null; |
1042 | if ( $httpContentLanguage ) { |
1043 | $languageVariantConverter->setPageLanguageOverride( $httpContentLanguage ); |
1044 | } |
1045 | |
1046 | try { |
1047 | $out = $languageVariantConverter->convertPageBundleVariant( $pb, $target, $source ); |
1048 | } catch ( InvalidArgumentException $e ) { |
1049 | throw new LocalizedHttpException( |
1050 | new MessageValue( "rest-unsupported-language-conversion", [ $source ?? '(unspecified)', $target ] ), |
1051 | 400, |
1052 | [ 'reason' => $e->getMessage() ] |
1053 | ); |
1054 | } |
1055 | |
1056 | $response = $this->getResponseFactory()->createJson( $out->responseData() ); |
1057 | ParsoidFormatHelper::setContentType( |
1058 | $response, ParsoidFormatHelper::FORMAT_PAGEBUNDLE, $out->version |
1059 | ); |
1060 | return $response; |
1061 | } |
1062 | |
1063 | /** @inheritDoc */ |
1064 | abstract public function execute(): Response; |
1065 | |
1066 | /** |
1067 | * Validate a PageBundle against the given contentVersion, and throw |
1068 | * an HttpException if it does not match. |
1069 | * @param PageBundle $pb |
1070 | * @param string $contentVersion |
1071 | * @throws HttpException |
1072 | */ |
1073 | private function validatePb( PageBundle $pb, string $contentVersion ): void { |
1074 | $errorMessage = ''; |
1075 | if ( !$pb->validate( $contentVersion, $errorMessage ) ) { |
1076 | throw new LocalizedHttpException( |
1077 | new MessageValue( "rest-page-bundle-validation-error", [ $errorMessage ] ), |
1078 | 400 |
1079 | ); |
1080 | } |
1081 | } |
1082 | |
1083 | /** |
1084 | * @param PageConfig $page |
1085 | * |
1086 | * @return ProperPageIdentity |
1087 | * @throws HttpException |
1088 | */ |
1089 | private function pageConfigToPageIdentity( PageConfig $page ): ProperPageIdentity { |
1090 | $services = MediaWikiServices::getInstance(); |
1091 | |
1092 | $title = $page->getLinkTarget(); |
1093 | try { |
1094 | $page = $services->getPageStore()->getPageForLink( $title ); |
1095 | } catch ( MalformedTitleException | InvalidArgumentException $e ) { |
1096 | // Note that even some well-formed links are still invalid |
1097 | // parameters for getPageForLink(), e.g. interwiki links or special pages. |
1098 | throw new HttpException( |
1099 | "Bad title: $title", # uses LinkTarget::__toString() |
1100 | 400 |
1101 | ); |
1102 | } |
1103 | |
1104 | return $page; |
1105 | } |
1106 | |
1107 | } |