MediaWiki master
LanguageVariantConverter.php
Go to the documentation of this file.
1<?php
2declare( strict_types = 1 );
3
5
7use MediaWiki\Languages\LanguageConverterFactory;
8use MediaWiki\Languages\LanguageFactory;
18use Wikimedia\Bcp47Code\Bcp47Code;
19use Wikimedia\Bcp47Code\Bcp47CodeValue;
21use Wikimedia\Parsoid\Config\PageConfig;
22use Wikimedia\Parsoid\Config\SiteConfig;
23use Wikimedia\Parsoid\Core\HtmlPageBundle;
24use Wikimedia\Parsoid\DOM\Element;
25use Wikimedia\Parsoid\Parsoid;
26use Wikimedia\Parsoid\Utils\DOMCompat;
27use Wikimedia\Parsoid\Utils\DOMUtils;
28
34 private ?PageConfig $pageConfig = null;
35 private readonly Title $pageTitle;
39 private ?Bcp47Code $pageLanguageOverride = null;
40 private bool $isFallbackLanguageConverterEnabled = true;
41
43 private ?ParserOptions $parserOptionsForTest = null;
44
45 public function __construct(
46 private readonly PageIdentity $pageIdentity,
47 private readonly PageConfigFactory $pageConfigFactory,
48 private readonly Parsoid $parsoid,
49 private readonly SiteConfig $siteConfig,
50 TitleFactory $titleFactory,
51 private readonly LanguageConverterFactory $languageConverterFactory,
52 private readonly LanguageFactory $languageFactory
53 ) {
54 $this->pageTitle = $titleFactory->newFromPageIdentity( $pageIdentity );
55 }
56
64 public function setPageConfig( PageConfig $pageConfig ) {
65 $this->pageConfig = $pageConfig;
66 }
67
74 public function setPageLanguageOverride( Bcp47Code $language ) {
75 $this->pageLanguageOverride = $language;
76 }
77
89 public function convertPageBundleVariant(
90 HtmlPageBundle $pageBundle,
91 Bcp47Code $targetVariant,
92 ?Bcp47Code $sourceVariant = null
93 ): HtmlPageBundle {
94 [ $pageLanguage, $sourceVariant ] =
95 $this->getBaseAndSourceLanguage( $pageBundle, $sourceVariant );
96
97 if ( !$this->siteConfig->langConverterEnabledBcp47( $pageLanguage ) ) {
98 // If the language doesn't support variants, just return the content unmodified.
99 return $pageBundle;
100 }
101
102 $pageConfig = $this->getPageConfig( $pageLanguage, $sourceVariant );
103
104 if ( $this->parsoid->implementsLanguageConversionBcp47( $pageConfig, $targetVariant ) ) {
105 return $this->parsoid->pb2pb(
106 $pageConfig, 'variant', $pageBundle,
107 [
108 'variant' => [
109 'source' => $sourceVariant,
110 'target' => $targetVariant,
111 ]
112 ]
113 );
114 } else {
115 if ( !$this->isFallbackLanguageConverterEnabled ) {
116 // Fallback variant conversion is not enabled, return the page bundle as is.
117 return $pageBundle;
118 }
119
120 // LanguageConverter::hasVariant and LanguageConverter::convertTo
121 // could take a string|Bcp47Code in the future, which would
122 // allow us to avoid the $targetVariantCode conversion here.
123 $baseLanguage = $this->languageFactory->getParentLanguage( $targetVariant );
124 $languageConverter = $this->languageConverterFactory->getLanguageConverter( $baseLanguage );
125 $targetVariantCode = $this->languageFactory->getLanguage( $targetVariant )->getCode();
126 if ( $languageConverter->hasVariant( $targetVariantCode ) ) {
127 // NOTE: This is not a convert() because we have the exact desired variant
128 // and don't need to compute a preferred variant based on a base language.
129 // Also see T267067 for why convert() should be avoided.
130 $convertedHtml = $languageConverter->convertTo( $pageBundle->html, $targetVariantCode );
131 $pageVariant = $targetVariant;
132 } else {
133 // No conversion possible - pass through original HTML in original language
134 $convertedHtml = $pageBundle->html;
135 $pageVariant = $pageConfig->getPageLanguageBcp47();
136 }
137
138 // Add a note so that we can identify what was used to perform the variant conversion
139 $msg = "<!-- Variant conversion performed using the core LanguageConverter -->";
140 $convertedHtml = $msg . $convertedHtml;
141
142 // NOTE: Keep this in sync with code in Parsoid.php in Parsoid repo
143 // Add meta information that Parsoid normally adds
144 $headers = [
145 'content-language' => $pageVariant->toBcp47Code(),
146 'vary' => [ 'Accept', 'Accept-Language' ]
147 ];
148 $doc = DOMUtils::parseHTML( '<head></head><body></body>' );
149 DOMUtils::addHttpEquivHeaders( $doc, $headers );
150 $docElt = $doc->documentElement;
151 '@phan-var Element $docElt';
152 $docHtml = DOMCompat::getOuterHTML( $docElt );
153 $convertedHtml = preg_replace( "#</body>#", $docHtml, "$convertedHtml</body>" );
154 return new HtmlPageBundle(
155 html: $convertedHtml, parsoid: [], mw: [],
156 version: $pageBundle->version, headers: $headers
157 );
158 }
159 }
160
171 ParserOutput $parserOutput,
172 Bcp47Code $targetVariant,
173 ?Bcp47Code $sourceVariant = null
174 ): ParserOutput {
175 $pageBundle = PageBundleParserOutputConverter::pageBundleFromParserOutput( $parserOutput );
176 $modifiedPageBundle = $this->convertPageBundleVariant( $pageBundle, $targetVariant, $sourceVariant );
177
178 return PageBundleParserOutputConverter::parserOutputFromPageBundle( $modifiedPageBundle, $parserOutput );
179 }
180
184 public function disableFallbackLanguageConverter(): void {
185 $this->isFallbackLanguageConverterEnabled = false;
186 }
187
188 private function getPageConfig( Bcp47Code $pageLanguage, ?Bcp47Code $sourceVariant ): PageConfig {
189 if ( $this->pageConfig ) {
190 return $this->pageConfig;
191 }
192
193 try {
194 $this->pageConfig = $this->pageConfigFactory->createFromParserOptions(
195 // Hook for unit testing: can supply a mock for parser options
196 $this->parserOptionsForTest ?? ParserOptions::newFromAnon(),
197 $this->pageIdentity,
198 null,
199 $pageLanguage
200 );
201
202 if ( $sourceVariant ) {
203 $this->pageConfig->setVariantBcp47( $sourceVariant );
204 }
205 } catch ( RevisionAccessException ) {
206 // TODO: Throw a different exception, this class should not know
207 // about HTTP status codes.
208 throw new LocalizedHttpException( new MessageValue( "rest-specified-revision-unavailable" ), 404 );
209 }
210
211 return $this->pageConfig;
212 }
213
240 private function getPageLanguage( HtmlPageBundle $pageBundle, ?Bcp47Code $default = null ): Bcp47Code {
241 // If a language was set by calling setPageLanguageOverride(), always use it!
242 if ( $this->pageLanguageOverride ) {
243 return $this->pageLanguageOverride;
244 }
245
246 // If the page bundle contains a language code, use that.
247 $pageBundleLanguage = $pageBundle->headers[ 'content-language' ] ?? null;
248 if ( $pageBundleLanguage ) {
249 // The HTTP header will contain a BCP-47 language code, not a
250 // mediawiki-internal one.
251 return new Bcp47CodeValue( $pageBundleLanguage );
252 }
253
254 // NOTE: Use explicit default *before* we try HtmlPageBundle, because PageConfig::getPageLanguage()
255 // falls back to Title::getPageLanguage(). If we did that first, $default would never be used.
256 if ( $default ) {
257 return $default;
258 }
259
260 // If we have a PageConfig, we can ask it for the page's language. Note that this will fall back to
261 // Title::getPageLanguage(), so it has to be the last thing we try.
262 if ( $this->pageConfig ) {
263 return $this->pageConfig->getPageLanguageBcp47();
264 }
265
266 // Finally, just go by the code associated with the title. This may come from the database or
267 // it may be determined based on the title itself.
268 return $this->pageTitle->getPageLanguage();
269 }
270
286 private function getBaseAndSourceLanguage( HtmlPageBundle $pageBundle, ?Bcp47Code $sourceLanguage ): array {
287 // Try to determine the language code associated with the content of the page.
288 // The result may be a variant code.
289 $baseLanguage = $this->getPageLanguage( $pageBundle, $sourceLanguage );
290
291 // To find out if $baseLanguage is actually a variant, get the parent language and compare.
292 $parentLang = $this->languageFactory->getParentLanguage( $baseLanguage );
293
294 // If $parentLang is not the same language as $baseLanguage, this means that
295 // $baseLanguage is a variant. In that case, set $sourceLanguage to that
296 // variant (unless $sourceLanguage is already set), and set $baseLanguage
297 // to the $parentLang
298 if ( $parentLang && strcasecmp( $parentLang->toBcp47Code(), $baseLanguage->toBcp47Code() ) !== 0 ) {
299 if ( !$sourceLanguage ) {
300 $sourceLanguage = $baseLanguage;
301 }
302 $baseLanguage = $parentLang;
303 }
304
305 if ( $sourceLanguage !== null ) {
306 $parentConverter = $this->languageConverterFactory->getLanguageConverter( $parentLang );
307 // If the source variant isn't actually a variant, trigger auto-detection
308 $sourceIsVariant = (
309 strcasecmp( $parentLang->toBcp47Code(), $sourceLanguage->toBcp47Code() ) !== 0 &&
310 $parentConverter->hasVariant(
311 LanguageCode::bcp47ToInternal( $sourceLanguage->toBcp47Code() )
312 )
313 );
314 if ( !$sourceIsVariant ) {
315 $sourceLanguage = null;
316 }
317 }
318
319 return [ $baseLanguage, $sourceLanguage ];
320 }
321}
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:69
Methods for dealing with language codes.
Set options of the Parser.
ParserOutput is a rendering of a Content object or a message.
Helper class used by MediaWiki to create Parsoid PageConfig objects.
convertPageBundleVariant(HtmlPageBundle $pageBundle, Bcp47Code $targetVariant, ?Bcp47Code $sourceVariant=null)
Perform variant conversion on a HtmlPageBundle object.
convertParserOutputVariant(ParserOutput $parserOutput, Bcp47Code $targetVariant, ?Bcp47Code $sourceVariant=null)
Perform variant conversion on a ParserOutput object.
setPageConfig(PageConfig $pageConfig)
Set the PageConfig object to be used during language variant conversion.
__construct(private readonly PageIdentity $pageIdentity, private readonly PageConfigFactory $pageConfigFactory, private readonly Parsoid $parsoid, private readonly SiteConfig $siteConfig, TitleFactory $titleFactory, private readonly LanguageConverterFactory $languageConverterFactory, private readonly LanguageFactory $languageFactory)
disableFallbackLanguageConverter()
Disable fallback language variant converter.
setPageLanguageOverride(Bcp47Code $language)
Set the page content language override.
Provides methods for conversion between HtmlPageBundle and ParserOutput TODO: Convert to a trait once...
static parserOutputFromPageBundle(HtmlPageBundle $pageBundle, ?ParserOutput $originalParserOutput=null)
Creates a ParserOutput object containing the relevant data from the given HtmlPageBundle object.
This is the base exception class for non-fatal exceptions thrown from REST handlers.
Exception representing a failure to look up a revision.
Creates Title objects.
newFromPageIdentity(PageIdentity $pageIdentity)
Represents a title within MediaWiki.
Definition Title.php:69
Value object representing a message for i18n.
Interface for objects (potentially) representing an editable wiki page.