MediaWiki REL1_40
LanguageVariantConverter.php
Go to the documentation of this file.
1<?php
2
4
13use ParserOutput;
14use Wikimedia\Bcp47Code\Bcp47Code;
15use Wikimedia\Bcp47Code\Bcp47CodeValue;
16use Wikimedia\Parsoid\Config\PageConfig;
17use Wikimedia\Parsoid\Config\SiteConfig;
18use Wikimedia\Parsoid\Core\PageBundle;
19use Wikimedia\Parsoid\Parsoid;
20
27 private $pageConfigFactory;
28
30 private $pageConfig;
31
33 private $pageIdentity;
34
36 private $pageTitle;
37
39 private $parsoid;
40
42 private $parsoidSettings;
43
45 private $siteConfig;
46
48 private $titleFactory;
49
51 private $languageConverterFactory;
52
54 private $languageFactory;
55
60 private $pageLanguageOverride;
61
63 private $isFallbackLanguageConverterEnabled = true;
64
65 public function __construct(
66 PageIdentity $pageIdentity,
67 PageConfigFactory $pageConfigFactory,
68 Parsoid $parsoid,
69 array $parsoidSettings,
70 SiteConfig $siteConfig,
71 TitleFactory $titleFactory,
72 LanguageConverterFactory $languageConverterFactory,
73 LanguageFactory $languageFactory
74 ) {
75 $this->pageConfigFactory = $pageConfigFactory;
76 $this->pageIdentity = $pageIdentity;
77 $this->parsoid = $parsoid;
78 $this->parsoidSettings = $parsoidSettings;
79 $this->siteConfig = $siteConfig;
80 $this->titleFactory = $titleFactory;
81 // @phan-suppress-next-line PhanPossiblyNullTypeMismatchProperty
82 $this->pageTitle = $this->titleFactory->castFromPageIdentity( $this->pageIdentity );
83 $this->languageConverterFactory = $languageConverterFactory;
84 $this->languageFactory = $languageFactory;
85 $this->pageLanguageOverride = null;
86 }
87
95 public function setPageConfig( PageConfig $pageConfig ) {
96 $this->pageConfig = $pageConfig;
97 }
98
105 public function setPageLanguageOverride( Bcp47Code $language ) {
106 $this->pageLanguageOverride = $language;
107 }
108
121 PageBundle $pageBundle,
122 Bcp47Code $targetVariant,
123 ?Bcp47Code $sourceVariant = null
124 ): PageBundle {
125 [ $pageLanguage, $sourceVariant ] =
126 $this->getBaseAndSourceLanguage( $pageBundle, $sourceVariant );
127
128 if ( !$this->siteConfig->langConverterEnabledBcp47( $pageLanguage ) ) {
129 // If the language doesn't support variants, just return the content unmodified.
130 return $pageBundle;
131 }
132
133 $pageConfig = $this->getPageConfig( $pageLanguage, $sourceVariant );
134
135 if ( !$this->parsoid->implementsLanguageConversionBcp47( $pageConfig, $targetVariant ) ) {
136 if ( !$this->isFallbackLanguageConverterEnabled ) {
137 // Fallback variant conversion is not enabled, return the page bundle as is.
138 return $pageBundle;
139 }
140
141 // LanguageConverter::hasVariant and LanguageConverter::convertTo
142 // could take a string|Bcp47Code in the future, which would
143 // allow us to avoid the $targetVariantCode conversion here.
144 $baseLanguage = $this->languageFactory->getParentLanguage( $targetVariant );
145 $languageConverter = $this->languageConverterFactory->getLanguageConverter( $baseLanguage );
146 $targetVariantCode = $this->languageFactory->getLanguage( $targetVariant )->getCode();
147 if ( $languageConverter->hasVariant( $targetVariantCode ) ) {
148 $convertedHtml = $languageConverter->convertTo( $pageBundle->html, $targetVariantCode );
149 } else {
150 // No conversion possible - pass through original HTML.
151 $convertedHtml = $pageBundle->html;
152 }
153
154 // Add a note so that we can identify what was used to perform the variant conversion
155 $msg = "<!-- Variant conversion performed using the core LanguageConverter -->";
156 $convertedHtml = $msg . $convertedHtml;
157
158 // HACK: Pass the HTML to Parsoid for variant conversion in order to add metadata that is
159 // missing when we use the core LanguageConverter directly.
160
161 // Replace the original page bundle, so Parsoid gets the converted HTML as input.
162 $pageBundle = new PageBundle(
163 $convertedHtml,
164 [],
165 [],
166 $pageBundle->version,
167 [ 'content-language' => $targetVariant->toBcp47Code() ]
168 );
169 }
170
171 $modifiedPageBundle = $this->parsoid->pb2pb(
172 $pageConfig, 'variant', $pageBundle,
173 [
174 'variant' => [
175 'source' => $sourceVariant,
176 'target' => $targetVariant,
177 ]
178 ]
179 );
180
181 return $modifiedPageBundle;
182 }
183
194 ParserOutput $parserOutput,
195 Bcp47Code $targetVariant,
196 ?Bcp47Code $sourceVariant = null
197 ): ParserOutput {
198 $pageBundle = PageBundleParserOutputConverter::pageBundleFromParserOutput( $parserOutput );
199 $modifiedPageBundle = $this->convertPageBundleVariant( $pageBundle, $targetVariant, $sourceVariant );
200
201 return PageBundleParserOutputConverter::parserOutputFromPageBundle( $modifiedPageBundle, $parserOutput );
202 }
203
208 public function disableFallbackLanguageConverter(): void {
209 $this->isFallbackLanguageConverterEnabled = false;
210 }
211
212 private function getPageConfig( Bcp47Code $pageLanguage, ?Bcp47Code $sourceVariant ): PageConfig {
213 if ( $this->pageConfig ) {
214 return $this->pageConfig;
215 }
216
217 try {
218 $this->pageConfig = $this->pageConfigFactory->create(
219 $this->pageIdentity,
220 null,
221 null,
222 null,
223 $pageLanguage,
224 $this->parsoidSettings
225 );
226
227 if ( $sourceVariant ) {
228 $this->pageConfig->setVariantBcp47( $sourceVariant );
229 }
230 } catch ( RevisionAccessException $exception ) {
231 // TODO: Throw a different exception, this class should not know
232 // about HTTP status codes.
233 throw new HttpException( 'The specified revision is deleted or suppressed.', 404 );
234 }
235
236 return $this->pageConfig;
237 }
238
265 private function getPageLanguage( PageBundle $pageBundle, ?Bcp47Code $default = null ): Bcp47Code {
266 // If a language was set by calling setPageLanguageOverride(), always use it!
267 if ( $this->pageLanguageOverride ) {
268 return $this->pageLanguageOverride;
269 }
270
271 // If the page bundle contains a language code, use that.
272 $pageBundleLanguage = $pageBundle->headers[ 'content-language' ] ?? null;
273 if ( $pageBundleLanguage ) {
274 // The HTTP header will contain a BCP-47 language code, not a
275 // mediawiki-internal one.
276 return new Bcp47CodeValue( $pageBundleLanguage );
277 }
278
279 // NOTE: Use explicit default *before* we try PageBundle, because PageConfig::getPageLanguage()
280 // falls back to Title::getPageLanguage(). If we did that first, $default would never be used.
281 if ( $default ) {
282 return $default;
283 }
284
285 // If we have a PageConfig, we can ask it for the page's language. Note that this will fall back to
286 // Title::getPageLanguage(), so it has to be the last thing we try.
287 if ( $this->pageConfig ) {
288 return $this->pageConfig->getPageLanguageBcp47();
289 }
290
291 // Finally, just go by the code associated with the title. This may come from the database or
292 // it may be determined based on the title itself.
293 return $this->pageTitle->getPageLanguage();
294 }
295
311 private function getBaseAndSourceLanguage( PageBundle $pageBundle, ?Bcp47Code $sourceLanguage ): array {
312 // Try to determine the language code associated with the content of the page.
313 // The result may be a variant code.
314 $baseLanguage = $this->getPageLanguage( $pageBundle, $sourceLanguage );
315
316 // To find out if $baseLanguage is actually a variant, get the parent language and compare.
317 $parentLang = $this->languageFactory->getParentLanguage( $baseLanguage );
318
319 // If $parentLang is not the same language as $baseLanguage, this means that
320 // $baseLanguage is a variant. In that case, set $sourceLanguage to that
321 // variant (unless $sourceLanguage is already set), and set $baseLanguage
322 // to the $parentLang
323 if ( $parentLang && strcasecmp( $parentLang->toBcp47Code(), $baseLanguage->toBcp47Code() ) !== 0 ) {
324 if ( !$sourceLanguage ) {
325 $sourceLanguage = $baseLanguage;
326 }
327 $baseLanguage = $parentLang;
328 }
329
330 // If the source variant isn't actually a variant, trigger auto-detection
331 // FIXME: This should probably use LanguageConverter::validateVariant()
332 // as well, but we'd need a LanguageConverterFactory for that.
333 if ( $sourceLanguage && strcasecmp( $sourceLanguage->toBcp47Code(), $baseLanguage->toBcp47Code() ) === 0 ) {
334 $sourceLanguage = null;
335 }
336
337 return [ $baseLanguage, $sourceLanguage ];
338 }
339}
if(!defined('MW_SETUP_CALLBACK'))
The persistent session ID (if any) loaded at startup.
Definition WebStart.php:88
An interface for creating language converters.
Internationalisation code See https://www.mediawiki.org/wiki/Special:MyLanguage/Localisation for more...
Helper class used by MediaWiki to create Parsoid PageConfig objects.
Page-level configuration interface for Parsoid.
Site-level configuration for Parsoid.
convertParserOutputVariant(ParserOutput $parserOutput, Bcp47Code $targetVariant, ?Bcp47Code $sourceVariant=null)
Perform variant conversion on a ParserOutput object.
setPageConfig(PageConfig $pageConfig)
Set the PageConfig object to be used during language variant conversion.
__construct(PageIdentity $pageIdentity, PageConfigFactory $pageConfigFactory, Parsoid $parsoid, array $parsoidSettings, SiteConfig $siteConfig, TitleFactory $titleFactory, LanguageConverterFactory $languageConverterFactory, LanguageFactory $languageFactory)
disableFallbackLanguageConverter()
Disable fallback language variant converter.
setPageLanguageOverride(Bcp47Code $language)
Set the page content language override.
convertPageBundleVariant(PageBundle $pageBundle, Bcp47Code $targetVariant, ?Bcp47Code $sourceVariant=null)
Perform variant conversion on a PageBundle object.
Provides methods for conversion between PageBundle and ParserOutput TODO: Convert to a trait once we ...
static parserOutputFromPageBundle(PageBundle $pageBundle, ?ParserOutput $parserOutput=null)
Creates a ParserOutput object containing the relevant data from the given PageBundle object.
This is the base exception class for non-fatal exceptions thrown from REST handlers.
Exception representing a failure to look up a revision.
Creates Title objects.
castFromPageIdentity(?PageIdentity $pageIdentity)
Represents a title within MediaWiki.
Definition Title.php:82
Interface for objects (potentially) representing an editable wiki page.
Copyright (C) 2011-2022 Wikimedia Foundation and others.