MediaWiki master
LanguageVariantConverter.php
Go to the documentation of this file.
1<?php
2
4
16use Wikimedia\Bcp47Code\Bcp47Code;
17use Wikimedia\Bcp47Code\Bcp47CodeValue;
19use Wikimedia\Parsoid\Config\PageConfig;
20use Wikimedia\Parsoid\Config\SiteConfig;
21use Wikimedia\Parsoid\Core\PageBundle;
22use Wikimedia\Parsoid\DOM\Element;
23use Wikimedia\Parsoid\Parsoid;
24use Wikimedia\Parsoid\Utils\DOMCompat;
25use Wikimedia\Parsoid\Utils\DOMUtils;
26
32 private PageConfigFactory $pageConfigFactory;
33 private ?PageConfig $pageConfig = null;
34 private PageIdentity $pageIdentity;
35 private Title $pageTitle;
36 private Parsoid $parsoid;
37 private SiteConfig $siteConfig;
38 private LanguageConverterFactory $languageConverterFactory;
39 private LanguageFactory $languageFactory;
43 private ?Bcp47Code $pageLanguageOverride = null;
44 private bool $isFallbackLanguageConverterEnabled = true;
45
46 public function __construct(
47 PageIdentity $pageIdentity,
48 PageConfigFactory $pageConfigFactory,
49 Parsoid $parsoid,
50 SiteConfig $siteConfig,
51 TitleFactory $titleFactory,
52 LanguageConverterFactory $languageConverterFactory,
53 LanguageFactory $languageFactory
54 ) {
55 $this->pageConfigFactory = $pageConfigFactory;
56 $this->pageIdentity = $pageIdentity;
57 $this->parsoid = $parsoid;
58 $this->siteConfig = $siteConfig;
59 $this->pageTitle = $titleFactory->newFromPageIdentity( $this->pageIdentity );
60 $this->languageConverterFactory = $languageConverterFactory;
61 $this->languageFactory = $languageFactory;
62 }
63
71 public function setPageConfig( PageConfig $pageConfig ) {
72 $this->pageConfig = $pageConfig;
73 }
74
81 public function setPageLanguageOverride( Bcp47Code $language ) {
82 $this->pageLanguageOverride = $language;
83 }
84
96 public function convertPageBundleVariant(
97 PageBundle $pageBundle,
98 Bcp47Code $targetVariant,
99 ?Bcp47Code $sourceVariant = null
100 ): PageBundle {
101 [ $pageLanguage, $sourceVariant ] =
102 $this->getBaseAndSourceLanguage( $pageBundle, $sourceVariant );
103
104 if ( !$this->siteConfig->langConverterEnabledBcp47( $pageLanguage ) ) {
105 // If the language doesn't support variants, just return the content unmodified.
106 return $pageBundle;
107 }
108
109 $pageConfig = $this->getPageConfig( $pageLanguage, $sourceVariant );
110
111 if ( $this->parsoid->implementsLanguageConversionBcp47( $pageConfig, $targetVariant ) ) {
112 return $this->parsoid->pb2pb(
113 $pageConfig, 'variant', $pageBundle,
114 [
115 'variant' => [
116 'source' => $sourceVariant,
117 'target' => $targetVariant,
118 ]
119 ]
120 );
121 } else {
122 if ( !$this->isFallbackLanguageConverterEnabled ) {
123 // Fallback variant conversion is not enabled, return the page bundle as is.
124 return $pageBundle;
125 }
126
127 // LanguageConverter::hasVariant and LanguageConverter::convertTo
128 // could take a string|Bcp47Code in the future, which would
129 // allow us to avoid the $targetVariantCode conversion here.
130 $baseLanguage = $this->languageFactory->getParentLanguage( $targetVariant );
131 $languageConverter = $this->languageConverterFactory->getLanguageConverter( $baseLanguage );
132 $targetVariantCode = $this->languageFactory->getLanguage( $targetVariant )->getCode();
133 if ( $languageConverter->hasVariant( $targetVariantCode ) ) {
134 // NOTE: This is not a convert() because we have the exact desired variant
135 // and don't need to compute a preferred variant based on a base language.
136 // Also see T267067 for why convert() should be avoided.
137 $convertedHtml = $languageConverter->convertTo( $pageBundle->html, $targetVariantCode );
138 $pageVariant = $targetVariant;
139 } else {
140 // No conversion possible - pass through original HTML in original language
141 $convertedHtml = $pageBundle->html;
142 $pageVariant = $pageConfig->getPageLanguageBcp47();
143 }
144
145 // Add a note so that we can identify what was used to perform the variant conversion
146 $msg = "<!-- Variant conversion performed using the core LanguageConverter -->";
147 $convertedHtml = $msg . $convertedHtml;
148
149 // NOTE: Keep this in sync with code in Parsoid.php in Parsoid repo
150 // Add meta information that Parsoid normally adds
151 $headers = [
152 'content-language' => $pageVariant->toBcp47Code(),
153 'vary' => [ 'Accept', 'Accept-Language' ]
154 ];
155 $doc = DOMUtils::parseHTML( '' );
156 $doc->appendChild( $doc->createElement( 'head' ) );
157 DOMUtils::addHttpEquivHeaders( $doc, $headers );
158 $docElt = $doc->documentElement;
159 '@phan-var Element $docElt';
160 $docHtml = DOMCompat::getOuterHTML( $docElt );
161 $convertedHtml = preg_replace( "#</body>#", $docHtml, "$convertedHtml</body>" );
162 return new PageBundle(
163 $convertedHtml, [], [], $pageBundle->version, $headers
164 );
165 }
166 }
167
178 ParserOutput $parserOutput,
179 Bcp47Code $targetVariant,
180 ?Bcp47Code $sourceVariant = null
181 ): ParserOutput {
182 $pageBundle = PageBundleParserOutputConverter::pageBundleFromParserOutput( $parserOutput );
183 $modifiedPageBundle = $this->convertPageBundleVariant( $pageBundle, $targetVariant, $sourceVariant );
184
185 return PageBundleParserOutputConverter::parserOutputFromPageBundle( $modifiedPageBundle, $parserOutput );
186 }
187
192 public function disableFallbackLanguageConverter(): void {
193 $this->isFallbackLanguageConverterEnabled = false;
194 }
195
196 private function getPageConfig( Bcp47Code $pageLanguage, ?Bcp47Code $sourceVariant ): PageConfig {
197 if ( $this->pageConfig ) {
198 return $this->pageConfig;
199 }
200
201 try {
202 $this->pageConfig = $this->pageConfigFactory->create(
203 $this->pageIdentity,
204 null,
205 null,
206 null,
207 $pageLanguage
208 );
209
210 if ( $sourceVariant ) {
211 $this->pageConfig->setVariantBcp47( $sourceVariant );
212 }
213 } catch ( RevisionAccessException $exception ) {
214 // TODO: Throw a different exception, this class should not know
215 // about HTTP status codes.
216 throw new LocalizedHttpException( new MessageValue( "rest-specified-revision-unavailable" ), 404 );
217 }
218
219 return $this->pageConfig;
220 }
221
248 private function getPageLanguage( PageBundle $pageBundle, ?Bcp47Code $default = null ): Bcp47Code {
249 // If a language was set by calling setPageLanguageOverride(), always use it!
250 if ( $this->pageLanguageOverride ) {
251 return $this->pageLanguageOverride;
252 }
253
254 // If the page bundle contains a language code, use that.
255 $pageBundleLanguage = $pageBundle->headers[ 'content-language' ] ?? null;
256 if ( $pageBundleLanguage ) {
257 // The HTTP header will contain a BCP-47 language code, not a
258 // mediawiki-internal one.
259 return new Bcp47CodeValue( $pageBundleLanguage );
260 }
261
262 // NOTE: Use explicit default *before* we try PageBundle, because PageConfig::getPageLanguage()
263 // falls back to Title::getPageLanguage(). If we did that first, $default would never be used.
264 if ( $default ) {
265 return $default;
266 }
267
268 // If we have a PageConfig, we can ask it for the page's language. Note that this will fall back to
269 // Title::getPageLanguage(), so it has to be the last thing we try.
270 if ( $this->pageConfig ) {
271 return $this->pageConfig->getPageLanguageBcp47();
272 }
273
274 // Finally, just go by the code associated with the title. This may come from the database or
275 // it may be determined based on the title itself.
276 return $this->pageTitle->getPageLanguage();
277 }
278
294 private function getBaseAndSourceLanguage( PageBundle $pageBundle, ?Bcp47Code $sourceLanguage ): array {
295 // Try to determine the language code associated with the content of the page.
296 // The result may be a variant code.
297 $baseLanguage = $this->getPageLanguage( $pageBundle, $sourceLanguage );
298
299 // To find out if $baseLanguage is actually a variant, get the parent language and compare.
300 $parentLang = $this->languageFactory->getParentLanguage( $baseLanguage );
301
302 // If $parentLang is not the same language as $baseLanguage, this means that
303 // $baseLanguage is a variant. In that case, set $sourceLanguage to that
304 // variant (unless $sourceLanguage is already set), and set $baseLanguage
305 // to the $parentLang
306 if ( $parentLang && strcasecmp( $parentLang->toBcp47Code(), $baseLanguage->toBcp47Code() ) !== 0 ) {
307 if ( !$sourceLanguage ) {
308 $sourceLanguage = $baseLanguage;
309 }
310 $baseLanguage = $parentLang;
311 }
312
313 if ( $sourceLanguage !== null ) {
314 $parentConverter = $this->languageConverterFactory->getLanguageConverter( $parentLang );
315 // If the source variant isn't actually a variant, trigger auto-detection
316 $sourceIsVariant = (
317 strcasecmp( $parentLang->toBcp47Code(), $sourceLanguage->toBcp47Code() ) !== 0 &&
318 $parentConverter->hasVariant(
319 LanguageCode::bcp47ToInternal( $sourceLanguage->toBcp47Code() )
320 )
321 );
322 if ( !$sourceIsVariant ) {
323 $sourceLanguage = null;
324 }
325 }
326
327 return [ $baseLanguage, $sourceLanguage ];
328 }
329}
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:81
Methods for dealing with language codes.
An interface for creating language converters.
Internationalisation code See https://www.mediawiki.org/wiki/Special:MyLanguage/Localisation for more...
ParserOutput is a rendering of a Content object or a message.
Helper class used by MediaWiki to create Parsoid PageConfig objects.
convertParserOutputVariant(ParserOutput $parserOutput, Bcp47Code $targetVariant, ?Bcp47Code $sourceVariant=null)
Perform variant conversion on a ParserOutput object.
setPageConfig(PageConfig $pageConfig)
Set the PageConfig object to be used during language variant conversion.
__construct(PageIdentity $pageIdentity, PageConfigFactory $pageConfigFactory, Parsoid $parsoid, SiteConfig $siteConfig, TitleFactory $titleFactory, LanguageConverterFactory $languageConverterFactory, LanguageFactory $languageFactory)
disableFallbackLanguageConverter()
Disable fallback language variant converter.
setPageLanguageOverride(Bcp47Code $language)
Set the page content language override.
convertPageBundleVariant(PageBundle $pageBundle, Bcp47Code $targetVariant, ?Bcp47Code $sourceVariant=null)
Perform variant conversion on a PageBundle object.
Provides methods for conversion between PageBundle and ParserOutput TODO: Convert to a trait once we ...
static parserOutputFromPageBundle(PageBundle $pageBundle, ?ParserOutput $originalParserOutput=null)
Creates a ParserOutput object containing the relevant data from the given PageBundle object.
This is the base exception class for non-fatal exceptions thrown from REST handlers.
Exception representing a failure to look up a revision.
Creates Title objects.
newFromPageIdentity(PageIdentity $pageIdentity)
Represents a title within MediaWiki.
Definition Title.php:78
Value object representing a message for i18n.
Interface for objects (potentially) representing an editable wiki page.
Copyright (C) 2011-2022 Wikimedia Foundation and others.