MediaWiki master
LanguageVariantConverter.php
Go to the documentation of this file.
1<?php
2
4
15use Wikimedia\Bcp47Code\Bcp47Code;
16use Wikimedia\Bcp47Code\Bcp47CodeValue;
17use Wikimedia\Parsoid\Config\PageConfig;
18use Wikimedia\Parsoid\Config\SiteConfig;
19use Wikimedia\Parsoid\Core\PageBundle;
20use Wikimedia\Parsoid\DOM\Element;
21use Wikimedia\Parsoid\Parsoid;
22use Wikimedia\Parsoid\Utils\DOMCompat;
23use Wikimedia\Parsoid\Utils\DOMUtils;
24
30 private PageConfigFactory $pageConfigFactory;
31 private ?PageConfig $pageConfig = null;
32 private PageIdentity $pageIdentity;
33 private Title $pageTitle;
34 private Parsoid $parsoid;
35 private SiteConfig $siteConfig;
36 private LanguageConverterFactory $languageConverterFactory;
37 private LanguageFactory $languageFactory;
41 private ?Bcp47Code $pageLanguageOverride = null;
42 private bool $isFallbackLanguageConverterEnabled = true;
43
44 public function __construct(
45 PageIdentity $pageIdentity,
46 PageConfigFactory $pageConfigFactory,
47 Parsoid $parsoid,
48 SiteConfig $siteConfig,
49 TitleFactory $titleFactory,
50 LanguageConverterFactory $languageConverterFactory,
51 LanguageFactory $languageFactory
52 ) {
53 $this->pageConfigFactory = $pageConfigFactory;
54 $this->pageIdentity = $pageIdentity;
55 $this->parsoid = $parsoid;
56 $this->siteConfig = $siteConfig;
57 $this->pageTitle = $titleFactory->newFromPageIdentity( $this->pageIdentity );
58 $this->languageConverterFactory = $languageConverterFactory;
59 $this->languageFactory = $languageFactory;
60 }
61
69 public function setPageConfig( PageConfig $pageConfig ) {
70 $this->pageConfig = $pageConfig;
71 }
72
79 public function setPageLanguageOverride( Bcp47Code $language ) {
80 $this->pageLanguageOverride = $language;
81 }
82
94 public function convertPageBundleVariant(
95 PageBundle $pageBundle,
96 Bcp47Code $targetVariant,
97 ?Bcp47Code $sourceVariant = null
98 ): PageBundle {
99 [ $pageLanguage, $sourceVariant ] =
100 $this->getBaseAndSourceLanguage( $pageBundle, $sourceVariant );
101
102 if ( !$this->siteConfig->langConverterEnabledBcp47( $pageLanguage ) ) {
103 // If the language doesn't support variants, just return the content unmodified.
104 return $pageBundle;
105 }
106
107 $pageConfig = $this->getPageConfig( $pageLanguage, $sourceVariant );
108
109 if ( $this->parsoid->implementsLanguageConversionBcp47( $pageConfig, $targetVariant ) ) {
110 return $this->parsoid->pb2pb(
111 $pageConfig, 'variant', $pageBundle,
112 [
113 'variant' => [
114 'source' => $sourceVariant,
115 'target' => $targetVariant,
116 ]
117 ]
118 );
119 } else {
120 if ( !$this->isFallbackLanguageConverterEnabled ) {
121 // Fallback variant conversion is not enabled, return the page bundle as is.
122 return $pageBundle;
123 }
124
125 // LanguageConverter::hasVariant and LanguageConverter::convertTo
126 // could take a string|Bcp47Code in the future, which would
127 // allow us to avoid the $targetVariantCode conversion here.
128 $baseLanguage = $this->languageFactory->getParentLanguage( $targetVariant );
129 $languageConverter = $this->languageConverterFactory->getLanguageConverter( $baseLanguage );
130 $targetVariantCode = $this->languageFactory->getLanguage( $targetVariant )->getCode();
131 if ( $languageConverter->hasVariant( $targetVariantCode ) ) {
132 // NOTE: This is not a convert() because we have the exact desired variant
133 // and don't need to compute a preferred variant based on a base language.
134 // Also see T267067 for why convert() should be avoided.
135 $convertedHtml = $languageConverter->convertTo( $pageBundle->html, $targetVariantCode );
136 $pageVariant = $targetVariant;
137 } else {
138 // No conversion possible - pass through original HTML in original language
139 $convertedHtml = $pageBundle->html;
140 $pageVariant = $pageConfig->getPageLanguageBcp47();
141 }
142
143 // Add a note so that we can identify what was used to perform the variant conversion
144 $msg = "<!-- Variant conversion performed using the core LanguageConverter -->";
145 $convertedHtml = $msg . $convertedHtml;
146
147 // NOTE: Keep this in sync with code in Parsoid.php in Parsoid repo
148 // Add meta information that Parsoid normally adds
149 $headers = [
150 'content-language' => $pageVariant->toBcp47Code(),
151 'vary' => [ 'Accept', 'Accept-Language' ]
152 ];
153 $doc = DOMUtils::parseHTML( '' );
154 $doc->appendChild( $doc->createElement( 'head' ) );
155 DOMUtils::addHttpEquivHeaders( $doc, $headers );
156 $docElt = $doc->documentElement;
157 '@phan-var Element $docElt';
158 $docHtml = DOMCompat::getOuterHTML( $docElt );
159 $convertedHtml = preg_replace( "#</body>#", $docHtml, "$convertedHtml</body>" );
160 return new PageBundle(
161 $convertedHtml, [], [], $pageBundle->version, $headers
162 );
163 }
164 }
165
176 ParserOutput $parserOutput,
177 Bcp47Code $targetVariant,
178 ?Bcp47Code $sourceVariant = null
179 ): ParserOutput {
180 $pageBundle = PageBundleParserOutputConverter::pageBundleFromParserOutput( $parserOutput );
181 $modifiedPageBundle = $this->convertPageBundleVariant( $pageBundle, $targetVariant, $sourceVariant );
182
183 return PageBundleParserOutputConverter::parserOutputFromPageBundle( $modifiedPageBundle, $parserOutput );
184 }
185
190 public function disableFallbackLanguageConverter(): void {
191 $this->isFallbackLanguageConverterEnabled = false;
192 }
193
194 private function getPageConfig( Bcp47Code $pageLanguage, ?Bcp47Code $sourceVariant ): PageConfig {
195 if ( $this->pageConfig ) {
196 return $this->pageConfig;
197 }
198
199 try {
200 $this->pageConfig = $this->pageConfigFactory->create(
201 $this->pageIdentity,
202 null,
203 null,
204 null,
205 $pageLanguage
206 );
207
208 if ( $sourceVariant ) {
209 $this->pageConfig->setVariantBcp47( $sourceVariant );
210 }
211 } catch ( RevisionAccessException $exception ) {
212 // TODO: Throw a different exception, this class should not know
213 // about HTTP status codes.
214 throw new HttpException( 'The specified revision is deleted or suppressed.', 404 );
215 }
216
217 return $this->pageConfig;
218 }
219
246 private function getPageLanguage( PageBundle $pageBundle, ?Bcp47Code $default = null ): Bcp47Code {
247 // If a language was set by calling setPageLanguageOverride(), always use it!
248 if ( $this->pageLanguageOverride ) {
249 return $this->pageLanguageOverride;
250 }
251
252 // If the page bundle contains a language code, use that.
253 $pageBundleLanguage = $pageBundle->headers[ 'content-language' ] ?? null;
254 if ( $pageBundleLanguage ) {
255 // The HTTP header will contain a BCP-47 language code, not a
256 // mediawiki-internal one.
257 return new Bcp47CodeValue( $pageBundleLanguage );
258 }
259
260 // NOTE: Use explicit default *before* we try PageBundle, because PageConfig::getPageLanguage()
261 // falls back to Title::getPageLanguage(). If we did that first, $default would never be used.
262 if ( $default ) {
263 return $default;
264 }
265
266 // If we have a PageConfig, we can ask it for the page's language. Note that this will fall back to
267 // Title::getPageLanguage(), so it has to be the last thing we try.
268 if ( $this->pageConfig ) {
269 return $this->pageConfig->getPageLanguageBcp47();
270 }
271
272 // Finally, just go by the code associated with the title. This may come from the database or
273 // it may be determined based on the title itself.
274 return $this->pageTitle->getPageLanguage();
275 }
276
292 private function getBaseAndSourceLanguage( PageBundle $pageBundle, ?Bcp47Code $sourceLanguage ): array {
293 // Try to determine the language code associated with the content of the page.
294 // The result may be a variant code.
295 $baseLanguage = $this->getPageLanguage( $pageBundle, $sourceLanguage );
296
297 // To find out if $baseLanguage is actually a variant, get the parent language and compare.
298 $parentLang = $this->languageFactory->getParentLanguage( $baseLanguage );
299
300 // If $parentLang is not the same language as $baseLanguage, this means that
301 // $baseLanguage is a variant. In that case, set $sourceLanguage to that
302 // variant (unless $sourceLanguage is already set), and set $baseLanguage
303 // to the $parentLang
304 if ( $parentLang && strcasecmp( $parentLang->toBcp47Code(), $baseLanguage->toBcp47Code() ) !== 0 ) {
305 if ( !$sourceLanguage ) {
306 $sourceLanguage = $baseLanguage;
307 }
308 $baseLanguage = $parentLang;
309 }
310
311 if ( $sourceLanguage !== null ) {
312 $parentConverter = $this->languageConverterFactory->getLanguageConverter( $parentLang );
313 // If the source variant isn't actually a variant, trigger auto-detection
314 $sourceIsVariant = (
315 strcasecmp( $parentLang->toBcp47Code(), $sourceLanguage->toBcp47Code() ) !== 0 &&
316 $parentConverter->hasVariant(
317 LanguageCode::bcp47ToInternal( $sourceLanguage->toBcp47Code() )
318 )
319 );
320 if ( !$sourceIsVariant ) {
321 $sourceLanguage = null;
322 }
323 }
324
325 return [ $baseLanguage, $sourceLanguage ];
326 }
327}
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:81
Methods for dealing with language codes.
An interface for creating language converters.
Internationalisation code See https://www.mediawiki.org/wiki/Special:MyLanguage/Localisation for more...
Rendered output of a wiki page, as parsed from wikitext.
Helper class used by MediaWiki to create Parsoid PageConfig objects.
convertParserOutputVariant(ParserOutput $parserOutput, Bcp47Code $targetVariant, ?Bcp47Code $sourceVariant=null)
Perform variant conversion on a ParserOutput object.
setPageConfig(PageConfig $pageConfig)
Set the PageConfig object to be used during language variant conversion.
__construct(PageIdentity $pageIdentity, PageConfigFactory $pageConfigFactory, Parsoid $parsoid, SiteConfig $siteConfig, TitleFactory $titleFactory, LanguageConverterFactory $languageConverterFactory, LanguageFactory $languageFactory)
disableFallbackLanguageConverter()
Disable fallback language variant converter.
setPageLanguageOverride(Bcp47Code $language)
Set the page content language override.
convertPageBundleVariant(PageBundle $pageBundle, Bcp47Code $targetVariant, ?Bcp47Code $sourceVariant=null)
Perform variant conversion on a PageBundle object.
Provides methods for conversion between PageBundle and ParserOutput TODO: Convert to a trait once we ...
static parserOutputFromPageBundle(PageBundle $pageBundle, ?ParserOutput $originalParserOutput=null)
Creates a ParserOutput object containing the relevant data from the given PageBundle object.
This is the base exception class for non-fatal exceptions thrown from REST handlers.
Exception representing a failure to look up a revision.
Creates Title objects.
newFromPageIdentity(PageIdentity $pageIdentity)
Represents a title within MediaWiki.
Definition Title.php:78
Interface for objects (potentially) representing an editable wiki page.
Copyright (C) 2011-2022 Wikimedia Foundation and others.