Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
48.37% |
89 / 184 |
|
17.65% |
3 / 17 |
CRAP | |
0.00% |
0 / 1 |
Parsoid | |
48.37% |
89 / 184 |
|
17.65% |
3 / 17 |
379.45 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
defaultHTMLVersion | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
resolveContentVersion | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
supportsLanguageConversion | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setupCommonOptions | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
72 | |||
parseWikitext | |
0.00% |
0 / 23 |
|
0.00% |
0 / 1 |
42 | |||
wikitext2html | |
95.65% |
22 / 23 |
|
0.00% |
0 / 1 |
5 | |||
wikitext2lint | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
dom2wikitext | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
6 | |||
html2wikitext | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
pb2pb | |
98.33% |
59 / 60 |
|
0.00% |
0 / 1 |
6 | |||
substTopLevelTemplates | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
findDowngrade | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
downgrade | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
30 | |||
implementsLanguageConversion | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
implementsLanguageConversionBcp47 | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
downgrade999to2 | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid; |
5 | |
6 | use Composer\Semver\Comparator; |
7 | use Composer\Semver\Semver; |
8 | use InvalidArgumentException; |
9 | use LogicException; |
10 | use Wikimedia\Bcp47Code\Bcp47Code; |
11 | use Wikimedia\Parsoid\Config\DataAccess; |
12 | use Wikimedia\Parsoid\Config\Env; |
13 | use Wikimedia\Parsoid\Config\PageConfig; |
14 | use Wikimedia\Parsoid\Config\SiteConfig; |
15 | use Wikimedia\Parsoid\Config\StubMetadataCollector; |
16 | use Wikimedia\Parsoid\Core\ContentMetadataCollector; |
17 | use Wikimedia\Parsoid\Core\PageBundle; |
18 | use Wikimedia\Parsoid\Core\ResourceLimitExceededException; |
19 | use Wikimedia\Parsoid\Core\SelserData; |
20 | use Wikimedia\Parsoid\DOM\Document; |
21 | use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI; |
22 | use Wikimedia\Parsoid\Language\LanguageConverter; |
23 | use Wikimedia\Parsoid\Logger\LintLogger; |
24 | use Wikimedia\Parsoid\Utils\ContentUtils; |
25 | use Wikimedia\Parsoid\Utils\DOMCompat; |
26 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
27 | use Wikimedia\Parsoid\Utils\DOMUtils; |
28 | use Wikimedia\Parsoid\Utils\Utils; |
29 | use Wikimedia\Parsoid\Wikitext\Wikitext; |
30 | use Wikimedia\Parsoid\Wt2Html\PP\Processors\AddRedLinks; |
31 | use Wikimedia\Parsoid\Wt2Html\PP\Processors\ConvertOffsets; |
32 | |
33 | class Parsoid { |
34 | |
35 | /** |
36 | * Available HTML content versions. |
37 | * @see https://www.mediawiki.org/wiki/Parsoid/API#Content_Negotiation |
38 | * @see https://www.mediawiki.org/wiki/Specs/HTML#Versioning |
39 | */ |
40 | public const AVAILABLE_VERSIONS = [ '2.7.0', '999.0.0' ]; |
41 | |
42 | private const DOWNGRADES = [ |
43 | [ 'from' => '999.0.0', 'to' => '2.0.0', 'func' => 'downgrade999to2' ], |
44 | ]; |
45 | |
46 | /** @var SiteConfig */ |
47 | private $siteConfig; |
48 | |
49 | /** @var DataAccess */ |
50 | private $dataAccess; |
51 | |
52 | /** |
53 | * @param SiteConfig $siteConfig |
54 | * @param DataAccess $dataAccess |
55 | */ |
56 | public function __construct( |
57 | SiteConfig $siteConfig, DataAccess $dataAccess |
58 | ) { |
59 | $this->siteConfig = $siteConfig; |
60 | $this->dataAccess = $dataAccess; |
61 | } |
62 | |
63 | /** |
64 | * Returns the default HTML content version |
65 | * @return string |
66 | */ |
67 | public static function defaultHTMLVersion(): string { |
68 | return self::AVAILABLE_VERSIONS[0]; |
69 | } |
70 | |
71 | /** |
72 | * See if any content version Parsoid knows how to produce satisfies the |
73 | * the supplied version, when interpreted with semver caret semantics. |
74 | * This will allow us to make backwards compatible changes, without the need |
75 | * for clients to bump the version in their headers all the time. |
76 | * |
77 | * @param string $version |
78 | * @return string|null |
79 | */ |
80 | public static function resolveContentVersion( string $version ) { |
81 | foreach ( self::AVAILABLE_VERSIONS as $i => $a ) { |
82 | if ( Semver::satisfies( $a, "^{$version}" ) && |
83 | // The section wrapping in 1.6.x should have induced a major |
84 | // version bump, since it requires upgrading clients to |
85 | // handle it. We therefore hardcode this in so that we can |
86 | // fail hard. |
87 | Comparator::greaterThanOrEqualTo( $version, '1.6.0' ) |
88 | ) { |
89 | return $a; |
90 | } |
91 | } |
92 | return null; |
93 | } |
94 | |
95 | /** |
96 | * Determine if language conversion is enabled, aka if the optional |
97 | * wikimedia/langconv library is installed. |
98 | * @return bool True if the wikimedia/langconv library is available |
99 | */ |
100 | public static function supportsLanguageConversion(): bool { |
101 | return class_exists( '\Wikimedia\LangConv\ReplacementMachine' ); |
102 | } |
103 | |
104 | /** |
105 | * @param array $options |
106 | * @return array |
107 | */ |
108 | private function setupCommonOptions( array $options ): array { |
109 | $envOptions = []; |
110 | if ( isset( $options['offsetType'] ) ) { |
111 | $envOptions['offsetType'] = $options['offsetType']; |
112 | } |
113 | if ( isset( $options['traceFlags'] ) ) { |
114 | $envOptions['traceFlags'] = $options['traceFlags']; |
115 | } |
116 | if ( isset( $options['dumpFlags'] ) ) { |
117 | $envOptions['dumpFlags'] = $options['dumpFlags']; |
118 | } |
119 | if ( isset( $options['debugFlags'] ) ) { |
120 | $envOptions['debugFlags'] = $options['debugFlags']; |
121 | } |
122 | if ( !empty( $options['htmlVariantLanguage'] ) ) { |
123 | $envOptions['htmlVariantLanguage'] = $options['htmlVariantLanguage']; |
124 | } |
125 | if ( !empty( $options['wtVariantLanguage'] ) ) { |
126 | $envOptions['wtVariantLanguage'] = $options['wtVariantLanguage']; |
127 | } |
128 | if ( isset( $options['logLevels'] ) ) { |
129 | $envOptions['logLevels'] = $options['logLevels']; |
130 | } |
131 | return $envOptions; |
132 | } |
133 | |
134 | /** |
135 | * Parsing code shared between the next two methods. |
136 | * |
137 | * @param PageConfig $pageConfig |
138 | * @param ContentMetadataCollector $metadata |
139 | * @param array $options See wikitext2html. |
140 | * @return array |
141 | */ |
142 | private function parseWikitext( |
143 | PageConfig $pageConfig, |
144 | ContentMetadataCollector $metadata, |
145 | array $options = [] |
146 | ): array { |
147 | $envOptions = $this->setupCommonOptions( $options ); |
148 | if ( isset( $options['outputContentVersion'] ) ) { |
149 | $envOptions['outputContentVersion'] = $options['outputContentVersion']; |
150 | } |
151 | $envOptions['discardDataParsoid'] = !empty( $options['discardDataParsoid'] ); |
152 | if ( isset( $options['wrapSections'] ) ) { |
153 | $envOptions['wrapSections'] = !empty( $options['wrapSections'] ); |
154 | } |
155 | if ( isset( $options['pageBundle'] ) ) { |
156 | $envOptions['pageBundle'] = !empty( $options['pageBundle'] ); |
157 | } |
158 | if ( isset( $options['logLinterData'] ) ) { |
159 | $envOptions['logLinterData'] = !empty( $options['logLinterData'] ); |
160 | } |
161 | $env = new Env( |
162 | $this->siteConfig, $pageConfig, $this->dataAccess, $metadata, $envOptions |
163 | ); |
164 | if ( !$env->compareWt2HtmlLimit( |
165 | 'wikitextSize', strlen( $env->topFrame->getSrcText() ) |
166 | ) ) { |
167 | throw new ResourceLimitExceededException( |
168 | "wt2html: wikitextSize limit exceeded" |
169 | ); |
170 | } |
171 | $contentmodel = $options['contentmodel'] ?? null; |
172 | $handler = $env->getContentHandler( $contentmodel ); |
173 | $extApi = new ParsoidExtensionAPI( $env ); |
174 | return [ $env, $handler->toDOM( $extApi ), $contentmodel ]; |
175 | } |
176 | |
177 | /** |
178 | * Parse the wikitext supplied in a `PageConfig` to HTML. |
179 | * |
180 | * @param PageConfig $pageConfig |
181 | * @param array $options [ |
182 | * 'wrapSections' => (bool) Whether `<section>` wrappers should be added. |
183 | * 'pageBundle' => (bool) Sets ids on nodes and stores |
184 | * data-* attributes in a JSON blob. |
185 | * 'body_only' => (bool|null) Only return the <body> children (T181657) |
186 | * 'outputContentVersion' => (string|null) Version of HTML to output. |
187 | * `null` returns the default version. |
188 | * 'contentmodel' => (string|null) The content model of the input. |
189 | * 'discardDataParsoid' => (bool) Drop all data-parsoid annotations. |
190 | * 'offsetType' => (string) ucs2, char, byte are valid values |
191 | * what kind of source offsets should be emitted? |
192 | * 'htmlVariantLanguage' => (string|Bcp47Code) If non-null, the language variant used for Parsoid HTML. |
193 | * A MediaWiki-internal language code string (deprecated), |
194 | * or a Bcp47Code object. |
195 | * 'wtVariantLanguage' => (string|Bcp47Code) If non-null, the language variant used for wikitext. |
196 | * A MediaWiki-internal language code string (deprecated), |
197 | * or a Bcp47Code object. |
198 | * 'logLinterData' => (bool) Should we log linter data if linting is enabled? |
199 | * 'traceFlags' => (array) associative array with tracing options |
200 | * 'dumpFlags' => (array) associative array with dump options |
201 | * 'debugFlags' => (array) associative array with debug options |
202 | * 'logLevels' => (string[]) Levels to log |
203 | * ] |
204 | * @param ?array &$headers |
205 | * @param ?ContentMetadataCollector $metadata Pass in a CMC in order to |
206 | * collect and retrieve metadata about the parse. |
207 | * @return PageBundle|string |
208 | */ |
209 | public function wikitext2html( |
210 | PageConfig $pageConfig, array $options = [], ?array &$headers = null, |
211 | ?ContentMetadataCollector $metadata = null |
212 | ) { |
213 | if ( $metadata === null ) { |
214 | $metadata = new StubMetadataCollector( $this->siteConfig->getLogger() ); |
215 | } |
216 | [ $env, $doc, $contentmodel ] = $this->parseWikitext( $pageConfig, $metadata, $options ); |
217 | // FIXME: Does this belong in parseWikitext so that the other endpoint |
218 | // is covered as well? It probably depends on expectations of the |
219 | // Rest API. If callers of /page/lint/ assume that will update the |
220 | // results on the Special page. |
221 | if ( $env->getSiteConfig()->linting() ) { |
222 | ( new LintLogger( $env ) )->logLintOutput(); |
223 | } |
224 | $headers = DOMUtils::findHttpEquivHeaders( $doc ); |
225 | $body_only = !empty( $options['body_only'] ); |
226 | $node = $body_only ? DOMCompat::getBody( $doc ) : $doc; |
227 | if ( $env->pageBundle ) { |
228 | $out = ContentUtils::extractDpAndSerialize( $node, [ |
229 | 'innerXML' => $body_only, |
230 | ] ); |
231 | return new PageBundle( |
232 | $out['html'], |
233 | $out['pb']->parsoid, $out['pb']->mw ?? null, |
234 | $env->getOutputContentVersion(), |
235 | $headers, |
236 | $contentmodel |
237 | ); |
238 | } else { |
239 | $xml = ContentUtils::toXML( $node, [ |
240 | 'innerXML' => $body_only, |
241 | ] ); |
242 | return $xml; |
243 | } |
244 | } |
245 | |
246 | /** |
247 | * Lint the wikitext supplied in a `PageConfig`. |
248 | * |
249 | * @param PageConfig $pageConfig |
250 | * @param array $options See wikitext2html. |
251 | * @return array |
252 | */ |
253 | public function wikitext2lint( |
254 | PageConfig $pageConfig, array $options = [] |
255 | ): array { |
256 | $metadata = new StubMetadataCollector( $this->siteConfig->getLogger() ); |
257 | [ $env, ] = $this->parseWikitext( $pageConfig, $metadata, $options ); |
258 | return $env->getLints(); |
259 | } |
260 | |
261 | /** |
262 | * Serialize DOM to wikitext. |
263 | * |
264 | * @param PageConfig $pageConfig |
265 | * @param Document $doc Data attributes are expected to have been applied |
266 | * already. Loading them will happen once the environment is created. |
267 | * @param array $options [ |
268 | * 'inputContentVersion' => (string) The content version of the input. |
269 | * Necessary if it differs from the current default in order to |
270 | * account for any serialization differences. |
271 | * 'offsetType' => (string) ucs2, char, byte are valid values |
272 | * what kind of source offsets are present in the HTML? |
273 | * 'contentmodel' => (string|null) The content model of the input. |
274 | * 'htmlVariantLanguage' => (string|Bcp47Code) If non-null, the language variant used for Parsoid HTML. |
275 | * A MediaWiki-internal language code string (deprecated), |
276 | * or a Bcp47Code object. |
277 | * 'wtVariantLanguage' => (string|Bcp47Code) If non-null, the language variant used for wikitext. |
278 | * A MediaWiki-internal language code string (deprecated), |
279 | * or a Bcp47Code object. |
280 | * 'traceFlags' => (array) associative array with tracing options |
281 | * 'dumpFlags' => (array) associative array with dump options |
282 | * 'debugFlags' => (array) associative array with debug options |
283 | * 'logLevels' => (string[]) Levels to log |
284 | * 'htmlSize' => (int) Size of the HTML that generated $doc |
285 | * ] |
286 | * @param ?SelserData $selserData |
287 | * @return string |
288 | */ |
289 | public function dom2wikitext( |
290 | PageConfig $pageConfig, Document $doc, array $options = [], |
291 | ?SelserData $selserData = null |
292 | ): string { |
293 | $envOptions = $this->setupCommonOptions( $options ); |
294 | if ( isset( $options['inputContentVersion'] ) ) { |
295 | $envOptions['inputContentVersion'] = $options['inputContentVersion']; |
296 | } |
297 | $envOptions['topLevelDoc'] = $doc; |
298 | $metadata = new StubMetadataCollector( $this->siteConfig->getLogger() ); |
299 | $env = new Env( |
300 | $this->siteConfig, $pageConfig, $this->dataAccess, $metadata, $envOptions |
301 | ); |
302 | $env->bumpHtml2WtResourceUse( 'htmlSize', $options['htmlSize'] ?? 0 ); |
303 | $contentmodel = $options['contentmodel'] ?? null; |
304 | $handler = $env->getContentHandler( $contentmodel ); |
305 | $extApi = new ParsoidExtensionAPI( $env ); |
306 | return $handler->fromDOM( $extApi, $selserData ); |
307 | } |
308 | |
309 | /** |
310 | * Serialize HTML to wikitext. Convenience method for dom2wikitext. |
311 | * |
312 | * @param PageConfig $pageConfig |
313 | * @param string $html |
314 | * @param array $options |
315 | * @param ?SelserData $selserData |
316 | * @return string |
317 | */ |
318 | public function html2wikitext( |
319 | PageConfig $pageConfig, string $html, array $options = [], |
320 | ?SelserData $selserData = null |
321 | ): string { |
322 | $doc = DOMUtils::parseHTML( $html, true ); |
323 | return $this->dom2wikitext( $pageConfig, $doc, $options, $selserData ); |
324 | } |
325 | |
326 | /** |
327 | * Update the supplied PageBundle based on the `$update` type. |
328 | * |
329 | * 'redlinks': Refreshes the classes of known, missing, etc. links. |
330 | * 'variant': Converts the HTML based on the supplied variant. |
331 | * |
332 | * Note that these are DOM transforms, and not roundtrips through wikitext. |
333 | * |
334 | * @param PageConfig $pageConfig |
335 | * @param string $update 'redlinks'|'variant' |
336 | * @param PageBundle $pb |
337 | * @param array $options |
338 | * @return PageBundle |
339 | */ |
340 | public function pb2pb( |
341 | PageConfig $pageConfig, string $update, PageBundle $pb, |
342 | array $options = [] |
343 | ): PageBundle { |
344 | $envOptions = [ |
345 | 'pageBundle' => true, |
346 | 'topLevelDoc' => DOMUtils::parseHTML( $pb->toHtml(), true ), |
347 | ]; |
348 | $metadata = new StubMetadataCollector( $this->siteConfig->getLogger() ); |
349 | $env = new Env( |
350 | $this->siteConfig, $pageConfig, $this->dataAccess, $metadata, $envOptions |
351 | ); |
352 | $doc = $env->topLevelDoc; |
353 | DOMDataUtils::visitAndLoadDataAttribs( |
354 | DOMCompat::getBody( $doc ), [ 'markNew' => true ] |
355 | ); |
356 | ContentUtils::convertOffsets( |
357 | $env, $doc, $env->getRequestOffsetType(), 'byte' |
358 | ); |
359 | if ( $update === 'redlinks' ) { |
360 | ( new AddRedLinks() )->run( $env, DOMCompat::getBody( $doc ) ); |
361 | } elseif ( $update === 'variant' ) { |
362 | // Note that `maybeConvert` could still be a no-op, in case the |
363 | // __NOCONTENTCONVERT__ magic word is present, or the targetVariant |
364 | // is a base language code or otherwise invalid. |
365 | LanguageConverter::maybeConvert( |
366 | $env, $doc, |
367 | Utils::mwCodeToBcp47( $options['variant']['target'] ), |
368 | $options['variant']['source'] ? |
369 | Utils::mwCodeToBcp47( $options['variant']['source'] ) : null |
370 | ); |
371 | // Update content-language and vary headers. |
372 | // This also ensures there is a <head> element. |
373 | $ensureHeader = static function ( string $h ) use ( $doc ) { |
374 | $el = DOMCompat::querySelector( $doc, "meta[http-equiv=\"{$h}\"i]" ); |
375 | if ( !$el ) { |
376 | $el = DOMUtils::appendToHead( $doc, 'meta', [ |
377 | 'http-equiv' => $h, |
378 | ] ); |
379 | } |
380 | return $el; |
381 | }; |
382 | ( $ensureHeader( 'content-language' ) )->setAttribute( |
383 | 'content', $env->htmlContentLanguageBcp47()->toBcp47Code() |
384 | ); |
385 | ( $ensureHeader( 'vary' ) )->setAttribute( |
386 | 'content', $env->htmlVary() |
387 | ); |
388 | } else { |
389 | throw new LogicException( 'Unknown transformation.' ); |
390 | } |
391 | ( new ConvertOffsets() )->run( $env, DOMCompat::getBody( $doc ), [], true ); |
392 | DOMDataUtils::visitAndStoreDataAttribs( |
393 | DOMCompat::getBody( $doc ), [ |
394 | 'discardDataParsoid' => $env->discardDataParsoid, |
395 | 'storeInPageBundle' => $env->pageBundle, |
396 | 'env' => $env, |
397 | ] |
398 | ); |
399 | $body_only = !empty( $options['body_only'] ); |
400 | $node = $body_only ? DOMCompat::getBody( $doc ) : $doc; |
401 | DOMDataUtils::injectPageBundle( $doc, DOMDataUtils::getPageBundle( $doc ) ); |
402 | $out = ContentUtils::extractDpAndSerialize( $node, [ |
403 | 'innerXML' => $body_only, |
404 | ] ); |
405 | return new PageBundle( |
406 | $out['html'], |
407 | $out['pb']->parsoid, $out['pb']->mw ?? null, |
408 | // Prefer the passed in version, since this was just a transformation |
409 | $pb->version ?? $env->getOutputContentVersion(), |
410 | DOMUtils::findHttpEquivHeaders( $doc ), |
411 | // Prefer the passed in content model |
412 | $pb->contentmodel ?? $pageConfig->getContentModel() |
413 | ); |
414 | } |
415 | |
416 | /** |
417 | * Perform pre-save transformations with top-level templates subst'd. |
418 | * |
419 | * @param PageConfig $pageConfig |
420 | * @param string $wikitext |
421 | * @return string |
422 | */ |
423 | public function substTopLevelTemplates( |
424 | PageConfig $pageConfig, string $wikitext |
425 | ): string { |
426 | $metadata = new StubMetadataCollector( $this->siteConfig->getLogger() ); |
427 | $env = new Env( $this->siteConfig, $pageConfig, $this->dataAccess, $metadata ); |
428 | return Wikitext::pst( $env, $wikitext, true /* $substTLTemplates */ ); |
429 | } |
430 | |
431 | /** |
432 | * Check whether a given content version can be downgraded to the requested |
433 | * content version. |
434 | * |
435 | * @param string $from Current content version |
436 | * @param string $to Requested content version |
437 | * @return string[]|null The downgrade that will fulfill the request, as |
438 | * [ 'from' => <old version>, 'to' => <new version> ], or null if it |
439 | * can't be fulfilled. |
440 | */ |
441 | public static function findDowngrade( string $from, string $to ): ?array { |
442 | foreach ( self::DOWNGRADES as list( 'from' => $dgFrom, 'to' => $dgTo ) ) { |
443 | if ( |
444 | Semver::satisfies( $from, "^$dgFrom" ) && |
445 | Semver::satisfies( $to, "^$dgTo" ) |
446 | ) { |
447 | // FIXME: Make this a class? |
448 | return [ 'from' => $dgFrom, 'to' => $dgTo ]; |
449 | } |
450 | } |
451 | return null; |
452 | } |
453 | |
454 | /** |
455 | * Downgrade a document to an older content version. |
456 | * |
457 | * @param string[] $dg Value returned by findDowngrade(). |
458 | * @param PageBundle $pageBundle |
459 | */ |
460 | public static function downgrade( |
461 | array $dg, PageBundle $pageBundle |
462 | ): void { |
463 | foreach ( self::DOWNGRADES as list( 'from' => $dgFrom, 'to' => $dgTo, 'func' => $dgFunc ) ) { |
464 | if ( $dg['from'] === $dgFrom && $dg['to'] === $dgTo ) { |
465 | call_user_func( [ self::class, $dgFunc ], $pageBundle ); |
466 | |
467 | // FIXME: Maybe this resolve should just be part of the $dg |
468 | $pageBundle->version = self::resolveContentVersion( $dg['to'] ); |
469 | |
470 | // FIXME: Maybe this should be a helper to avoid the rt |
471 | $doc = DOMUtils::parseHTML( $pageBundle->html ); |
472 | // Match the http-equiv meta to the content-type header |
473 | $meta = DOMCompat::querySelector( $doc, |
474 | 'meta[property="mw:htmlVersion"], meta[property="mw:html:version"]' ); |
475 | if ( $meta ) { |
476 | $meta->setAttribute( 'content', $pageBundle->version ); |
477 | $pageBundle->html = ContentUtils::toXML( $doc ); |
478 | } |
479 | |
480 | return; |
481 | } |
482 | } |
483 | throw new InvalidArgumentException( |
484 | "Unsupported downgrade: {$dg['from']} -> {$dg['to']}" |
485 | ); |
486 | } |
487 | |
488 | /** |
489 | * Check if language variant conversion is implemented for a language |
490 | * |
491 | * @internal FIXME: Remove once Parsoid's language variant work is completed |
492 | * @param PageConfig $pageConfig |
493 | * @param string $targetVariantCode Variant code to check |
494 | * @return bool |
495 | * @deprecated Use ::implementsLanguageConversionBcp47() |
496 | */ |
497 | public function implementsLanguageConversion( PageConfig $pageConfig, string $targetVariantCode ): bool { |
498 | // argh, another interface that doesn't use Bcp47Code :( |
499 | return $this->implementsLanguageConversionBcp47( |
500 | $pageConfig, Utils::mwCodeToBcp47( $targetVariantCode ) |
501 | ); |
502 | } |
503 | |
504 | /** |
505 | * Check if language variant conversion is implemented for a language |
506 | * |
507 | * @internal FIXME: Remove once Parsoid's language variant work is completed |
508 | * @param PageConfig $pageConfig |
509 | * @param Bcp47Code $targetVariant Variant language to check |
510 | * @return bool |
511 | */ |
512 | public function implementsLanguageConversionBcp47( PageConfig $pageConfig, Bcp47Code $targetVariant ): bool { |
513 | $metadata = new StubMetadataCollector( $this->siteConfig->getLogger() ); |
514 | $env = new Env( $this->siteConfig, $pageConfig, $this->dataAccess, $metadata ); |
515 | |
516 | return LanguageConverter::implementsLanguageConversion( $env, $targetVariant ); |
517 | } |
518 | |
519 | /** |
520 | * Downgrade the given document and pagebundle from 999.x to 2.x. |
521 | * |
522 | * @param PageBundle $pageBundle |
523 | */ |
524 | private static function downgrade999to2( PageBundle $pageBundle ) { |
525 | // Effectively, skip applying data-parsoid. Note that if we were to |
526 | // support a pb2html downgrade, we'd need to apply the full thing, |
527 | // but that would create complications where ids would be left behind. |
528 | // See the comment in around `DOMDataUtils::applyPageBundle` |
529 | $newPageBundle = new PageBundle( |
530 | $pageBundle->html, |
531 | [ 'ids' => [] ], |
532 | $pageBundle->mw |
533 | ); |
534 | $pageBundle->html = $newPageBundle->toHtml(); |
535 | // Now, modify the pagebundle to the expected form. This is important |
536 | // since, at least in the serialization path, the original pb will be |
537 | // applied to the modified content and its presence could cause lost |
538 | // deletions. |
539 | $pageBundle->mw = [ 'ids' => [] ]; |
540 | } |
541 | } |