23use Wikimedia\Bcp47Code\Bcp47Code;
24use Wikimedia\Bcp47Code\Bcp47CodeValue;
44 private const DEPRECATED_LANGUAGE_CODE_MAPPING = [
49 'be-x-old' =>
'be-tarask',
52 'zh-classical' =>
'lzh',
53 'zh-min-nan' =>
'nan',
83 private const NON_STANDARD_LANGUAGE_CODE_MAPPING = [
90 'de-formal' =>
'de-x-formal',
92 'en-rtl' =>
'en-x-rtl',
93 'es-formal' =>
'es-x-formal',
94 'hu-formal' =>
'hu-x-formal',
95 'map-bms' =>
'jv-x-bms',
98 'nl-informal' =>
'nl-x-informal',
99 'roa-tara' =>
'nap-x-tara',
100 'simple' =>
'en-simple',
101 'sr-ec' =>
'sr-Cyrl',
102 'sr-el' =>
'sr-Latn',
113 'crh-ro' =>
'crh-Latn-RO',
114 'kk-cn' =>
'kk-Arab-CN',
115 'kk-tr' =>
'kk-Latn-TR',
116 'zh-cn' =>
'zh-Hans-CN',
117 'zh-sg' =>
'zh-Hans-SG',
118 'zh-my' =>
'zh-Hans-MY',
119 'zh-tw' =>
'zh-Hant-TW',
120 'zh-hk' =>
'zh-Hant-HK',
121 'zh-mo' =>
'zh-Hant-MO',
138 return self::DEPRECATED_LANGUAGE_CODE_MAPPING;
158 foreach ( self::DEPRECATED_LANGUAGE_CODE_MAPPING as $code => $ignore ) {
161 foreach ( self::NON_STANDARD_LANGUAGE_CODE_MAPPING as $code => $ignore ) {
178 return self::DEPRECATED_LANGUAGE_CODE_MAPPING[$code] ?? $code;
191 public static function bcp47( $code ) {
193 if ( isset( self::NON_STANDARD_LANGUAGE_CODE_MAPPING[$code] ) ) {
194 $code = self::NON_STANDARD_LANGUAGE_CODE_MAPPING[$code];
196 $codeSegment = explode(
'-', $code );
198 foreach ( $codeSegment as $segNo => $seg ) {
200 if ( $segNo > 0 && strtolower( $codeSegment[( $segNo - 1 )] ) ==
'x' ) {
201 $codeBCP[$segNo] = strtolower( $seg );
203 } elseif ( ( strlen( $seg ) == 2 ) && ( $segNo > 0 ) ) {
204 $codeBCP[$segNo] = strtoupper( $seg );
206 } elseif ( ( strlen( $seg ) == 4 ) && ( $segNo > 0 ) ) {
207 $codeBCP[$segNo] = ucfirst( strtolower( $seg ) );
210 $codeBCP[$segNo] = strtolower( $seg );
213 return implode(
'-', $codeBCP );
236 return $code->getCode();
238 if ( $code instanceof Bcp47Code ) {
239 $code = $code->toBcp47Code();
241 static $invertedLookup = [];
242 if ( !$invertedLookup ) {
260 foreach ( self::NON_STANDARD_LANGUAGE_CODE_MAPPING as $internal => $bcp47 ) {
261 $invertedLookup[strtolower( $bcp47 )] = $internal;
269 $code = strtolower( $code );
270 return $invertedLookup[$code] ?? $code;
287 $compatMap = self::getNonstandardLanguageCodeMapping();
288 if ( isset( $compatMap[strtolower( $code )] ) ) {
293 $code = $compatMap[strtolower( $code )];
295 return new Bcp47CodeValue( $code );
316 $alphanum =
'[a-z0-9]';
317 $x =
'x'; #
private use singleton
318 $singleton =
'[a-wy-z]'; # other singleton
319 $s = $lenient ?
'[-_]' :
'-';
321 $language =
"$alpha{2,8}|$alpha{2,3}$s$alpha{3}";
322 $script =
"$alpha{4}"; # ISO 15924
323 $region =
"(?:$alpha{2}|$digit{3})"; # ISO 3166-1 alpha-2 or UN M.49
324 $variant =
"(?:$alphanum{5,8}|$digit$alphanum{3})";
325 $extension =
"$singleton(?:$s$alphanum{2,8})+";
326 $privateUse =
"$x(?:$s$alphanum{1,8})+";
328 # Define certain legacy language tags (marked as “Type: grandfathered” in BCP 47),
329 # since otherwise the regex is pretty useless.
330 # Since these are limited, this is safe even later changes to the registry --
331 # the only oddity is that it might change the type of the tag, and thus
332 # the results from the capturing groups.
335 $legacy =
"en{$s}gb{$s}oed"
336 .
"|i{$s}(?:ami|bnn|default|enochian|hak|klingon|lux|mingo|navajo|pwn|tao|tay|tsu)"
337 .
"|no{$s}(?:bok|nyn)"
338 .
"|sgn{$s}(?:be{$s}(?:fr|nl)|ch{$s}de)"
339 .
"|zh{$s}min{$s}nan";
341 $variantList =
"$variant(?:$s$variant)*";
342 $extensionList =
"$extension(?:$s$extension)*";
344 $langtag =
"(?:($language)"
347 .
"(?:$s$variantList)?"
348 .
"(?:$s$extensionList)?"
349 .
"(?:$s$privateUse)?)";
351 # Here is the final breakdown, with capturing groups for each of these components
352 # The variants, extensions, legacy, and private-use may have interior '-'
354 $root =
"^(?:$langtag|$privateUse|$legacy)$";
356 return preg_match(
"/$root/i", $code );
361class_alias( LanguageCode::class,
'LanguageCode' );
if(!defined('MW_SETUP_CALLBACK'))