39 private const DEPRECATED_LANGUAGE_CODE_MAPPING = [
44 'be-x-old' =>
'be-tarask',
47 'zh-classical' =>
'lzh',
48 'zh-min-nan' =>
'nan',
78 private const NON_STANDARD_LANGUAGE_CODE_MAPPING = [
85 'de-formal' =>
'de-x-formal',
87 'en-rtl' =>
'en-x-rtl',
88 'es-formal' =>
'es-x-formal',
89 'hu-formal' =>
'hu-x-formal',
90 'map-bms' =>
'jv-x-bms',
93 'nl-informal' =>
'nl-x-informal',
94 'roa-tara' =>
'nap-x-tara',
95 'simple' =>
'en-simple',
104 'zh-cn' =>
'zh-Hans-CN',
105 'zh-sg' =>
'zh-Hans-SG',
106 'zh-my' =>
'zh-Hans-MY',
107 'zh-tw' =>
'zh-Hant-TW',
108 'zh-hk' =>
'zh-Hant-HK',
109 'zh-mo' =>
'zh-Hant-MO',
125 return self::DEPRECATED_LANGUAGE_CODE_MAPPING;
142 foreach ( self::DEPRECATED_LANGUAGE_CODE_MAPPING as $code => $ignore ) {
143 $result[$code] = self::bcp47( $code );
145 foreach ( self::NON_STANDARD_LANGUAGE_CODE_MAPPING as $code => $ignore ) {
146 $result[$code] = self::bcp47( $code );
162 return self::DEPRECATED_LANGUAGE_CODE_MAPPING[$code] ?? $code;
175 public static function bcp47( $code ) {
176 $code = self::replaceDeprecatedCodes( strtolower( $code ) );
177 if ( isset( self::NON_STANDARD_LANGUAGE_CODE_MAPPING[$code] ) ) {
178 $code = self::NON_STANDARD_LANGUAGE_CODE_MAPPING[$code];
180 $codeSegment = explode(
'-', $code );
182 foreach ( $codeSegment as $segNo => $seg ) {
184 if ( $segNo > 0 && strtolower( $codeSegment[( $segNo - 1 )] ) ==
'x' ) {
185 $codeBCP[$segNo] = strtolower( $seg );
187 } elseif ( ( strlen( $seg ) == 2 ) && ( $segNo > 0 ) ) {
188 $codeBCP[$segNo] = strtoupper( $seg );
190 } elseif ( ( strlen( $seg ) == 4 ) && ( $segNo > 0 ) ) {
191 $codeBCP[$segNo] = ucfirst( strtolower( $seg ) );
194 $codeBCP[$segNo] = strtolower( $seg );
197 $langCode = implode(
'-', $codeBCP );
219 $alphanum =
'[a-z0-9]';
220 $x =
'x'; #
private use singleton
221 $singleton =
'[a-wy-z]'; # other singleton
222 $s = $lenient ?
'[-_]' :
'-';
224 $language =
"$alpha{2,8}|$alpha{2,3}$s$alpha{3}";
225 $script =
"$alpha{4}"; # ISO 15924
226 $region =
"(?:$alpha{2}|$digit{3})"; # ISO 3166-1 alpha-2 or UN M.49
227 $variant =
"(?:$alphanum{5,8}|$digit$alphanum{3})";
228 $extension =
"$singleton(?:$s$alphanum{2,8})+";
229 $privateUse =
"$x(?:$s$alphanum{1,8})+";
231 # Define certain legacy language tags (marked as “Type: grandfathered” in BCP 47),
232 # since otherwise the regex is pretty useless.
233 # Since these are limited, this is safe even later changes to the registry --
234 # the only oddity is that it might change the type of the tag, and thus
235 # the results from the capturing groups.
238 $legacy =
"en{$s}GB{$s}oed"
239 .
"|i{$s}(?:ami|bnn|default|enochian|hak|klingon|lux|mingo|navajo|pwn|tao|tay|tsu)"
240 .
"|no{$s}(?:bok|nyn)"
241 .
"|sgn{$s}(?:BE{$s}(?:fr|nl)|CH{$s}de)"
242 .
"|zh{$s}min{$s}nan";
244 $variantList =
"$variant(?:$s$variant)*";
245 $extensionList =
"$extension(?:$s$extension)*";
247 $langtag =
"(?:($language)"
250 .
"(?:$s$variantList)?"
251 .
"(?:$s$extensionList)?"
252 .
"(?:$s$privateUse)?)";
254 # Here is the final breakdown, with capturing groups for each of these components
255 # The variants, extensions, legacy, and private-use may have interior '-'
257 $root =
"^(?:$langtag|$privateUse|$legacy)$";
259 return preg_match(
"/$root/", strtolower( $code ) );
Methods for dealing with language codes.
static getNonstandardLanguageCodeMapping()
Returns a mapping of non-standard language codes used by (current and previous version of) MediaWiki,...
static replaceDeprecatedCodes( $code)
Replace deprecated language codes that were used in previous versions of MediaWiki to up-to-date,...
static getDeprecatedCodeMapping()
Returns a mapping of deprecated language codes that were used in previous versions of MediaWiki to up...
static bcp47( $code)
Get the normalised IETF language tag See unit test for examples.
static isWellFormedLanguageTag(string $code, bool $lenient=false)
Returns true if a language code string is a well-formed language tag according to RFC 5646.
foreach( $mmfl['setupFiles'] as $fileName) if($queue) if(empty( $mmfl['quiet'])) $s