99 'as' => [
"\u{0982}",
"\u{0981}",
"\u{0983}",
"\u{09CE}",
"ক্ষ " ],
100 'ast' => [
"Ch",
"Ll",
"Ñ" ],
101 'az' => [
"Ç",
"Ə",
"Ğ",
"İ",
"Ö",
"Ş",
"Ü" ],
103 'be-tarask' => [
"Ё" ],
105 'bn' => [
'ং',
'ঃ',
'ঁ' ],
106 'bn@collation=traditional' => [
107 'ং',
'ঃ',
'ঁ',
'ক্',
'খ্',
'গ্',
'ঘ্',
'ঙ্',
'চ্',
'ছ্',
'জ্',
'ঝ্',
108 'ঞ্',
'ট্',
'ঠ্',
'ড্',
'ঢ্',
'ণ্',
'ৎ',
'থ্',
'দ্',
'ধ্',
'ন্',
'প্',
109 'ফ্',
'ব্',
'ভ্',
'ম্',
'য্',
'র্',
'ৰ্',
'ল্',
'ৱ্',
'শ্',
'ষ্',
'স্',
'হ্'
112 'br' => [
"Ch",
"C'h" ],
113 'bs' => [
"Č",
"Ć",
"Dž",
"Đ",
"Lj",
"Nj",
"Š",
"Ž" ],
118 'cs' => [
"Č",
"Ch",
"Ř",
"Š",
"Ž" ],
119 'cy' => [
"Ch",
"Dd",
"Ff",
"Ng",
"Ll",
"Ph",
"Rh",
"Th" ],
120 'da' => [
"Æ",
"Ø",
"Å" ],
122 'de-AT@collation=phonebook' => [
'ä',
'ö',
'ü',
'ß' ],
123 'dsb' => [
"Č",
"Ć",
"Dź",
"Ě",
"Ch",
"Ł",
"Ń",
"Ŕ",
"Š",
"Ś",
"Ž",
"Ź" ],
124 'ee' => [
"Dz",
"Ɖ",
"Ɛ",
"Ƒ",
"Gb",
"Ɣ",
"Kp",
"Ny",
"Ŋ",
"Ɔ",
"Ts",
"Ʋ" ],
127 'eo' => [
"Ĉ",
"Ĝ",
"Ĥ",
"Ĵ",
"Ŝ",
"Ŭ" ],
129 'et' => [
"Š",
"Ž",
"Õ",
"Ä",
"Ö",
"Ü" ],
139 'fi' => [
"Å",
"Ä",
"Ö" ],
140 'fil' => [
"Ñ",
"Ng" ],
141 'fo' => [
"Á",
"Ð",
"Í",
"Ó",
"Ú",
"Ý",
"Æ",
"Ø",
"Å" ],
144 'fur' => [
"À",
"Á",
"Â",
"È",
"Ì",
"Ò",
"Ù" ],
148 'gl' => [
"Ch",
"Ll",
"Ñ" ],
149 'gu' => [
"\u{0A82}",
"\u{0A83}",
"\u{0A81}",
"\u{0AB3}" ],
150 'ha' => [
'Ɓ',
'Ɗ',
'Ƙ',
'Sh',
'Ts',
'Ƴ' ],
153 'hi' => [
"\u{0902}",
"\u{0903}" ],
154 'hr' => [
"Č",
"Ć",
"Dž",
"Đ",
"Lj",
"Nj",
"Š",
"Ž" ],
155 'hsb' => [
"Č",
"Dź",
"Ě",
"Ch",
"Ł",
"Ń",
"Ř",
"Š",
"Ć",
"Ž" ],
156 'hu' => [
"Cs",
"Dz",
"Dzs",
"Gy",
"Ly",
"Ny",
"Ö",
"Sz",
"Ty",
"Ü",
"Zs" ],
159 'ig' => [
"Ch",
"Gb",
"Gh",
"Gw",
"Ị",
"Kp",
"Kw",
"Ṅ",
"Nw",
"Ny",
"Ọ",
"Sh",
"Ụ" ],
160 'is' => [
"Á",
"Ð",
"É",
"Í",
"Ó",
"Ú",
"Ý",
"Þ",
"Æ",
"Ö",
"Å" ],
163 'kk' => [
"Ү",
"І" ],
164 'kl' => [
"Æ",
"Ø",
"Å" ],
166 "រ",
"ឫ",
"ឬ",
"ល",
"ឭ",
"ឮ",
"\u{17BB}\u{17C6}",
167 "\u{17C6}",
"\u{17B6}\u{17C6}",
"\u{17C7}",
168 "\u{17B7}\u{17C7}",
"\u{17BB}\u{17C7}",
169 "\u{17C1}\u{17C7}",
"\u{17C4}\u{17C7}",
171 'kn' => [
"\u{0C81}",
"\u{0C83}",
"\u{0CF1}",
"\u{0CF2}" ],
172 'kok' => [
"\u{0902}",
"\u{0903}",
"ळ",
"क्ष" ],
173 'ku' => [
"Ç",
"Ê",
"Î",
"Ş",
"Û" ],
177 'lkt' => [
'Č',
'Ǧ',
'Ȟ',
'Š',
'Ž' ],
180 'lt' => [
"Č",
"Š",
"Ž" ],
181 'lv' => [
"Č",
"Ģ",
"Ķ",
"Ļ",
"Ņ",
"Š",
"Ž" ],
182 'mk' => [
"Ѓ",
"Ќ" ],
185 'mo' => [
"Ă",
"Â",
"Î",
"Ș",
"Ț" ],
186 'mr' => [
"\u{0902}",
"\u{0903}",
"ळ",
"क्ष",
"ज्ञ" ],
188 'mt' => [
"Ċ",
"Ġ",
"Għ",
"Ħ",
"Ż" ],
189 'nb' => [
"Æ",
"Ø",
"Å" ],
192 'nn' => [
"Æ",
"Ø",
"Å" ],
193 'no' => [
"Æ",
"Ø",
"Å" ],
195 'om' => [
'Ch',
'Dh',
'Kh',
'Ny',
'Ph',
'Sh' ],
196 'or' => [
"\u{0B01}",
"\u{0B02}",
"\u{0B03}",
"କ୍ଷ" ],
197 'pa' => [
"\u{0A4D}" ],
198 'pl' => [
"Ą",
"Ć",
"Ę",
"Ł",
"Ń",
"Ó",
"Ś",
"Ź",
"Ż" ],
201 'ro' => [
"Ă",
"Â",
"Î",
"Ș",
"Ț" ],
203 'rup' => [
"Ă",
"Â",
"Î",
"Ľ",
"Ń",
"Ș",
"Ț" ],
206 'Á',
'Č',
'Ʒ',
'Ǯ',
'Đ',
'Ǧ',
'Ǥ',
'Ǩ',
'Ŋ',
207 'Š',
'Ŧ',
'Ž',
'Ø',
'Æ',
'Ȧ',
'Ä',
'Ö'
209 'si' => [
"\u{0D82}",
"\u{0D83}",
"\u{0DA4}" ],
210 'sk' => [
"Ä",
"Č",
"Ch",
"Ô",
"Š",
"Ž" ],
211 'sl' => [
"Č",
"Š",
"Ž" ],
212 'smn' => [
"Á",
"Č",
"Đ",
"Ŋ",
"Š",
"Ŧ",
"Ž",
"Æ",
"Ø",
"Å",
"Ä",
"Ö" ],
213 'sq' => [
"Ç",
"Dh",
"Ë",
"Gj",
"Ll",
"Nj",
"Rr",
"Sh",
"Th",
"Xh",
"Zh" ],
215 'sr-Latn' => [
"Č",
"Ć",
"Dž",
"Đ",
"Lj",
"Nj",
"Š",
"Ž" ],
216 'sv' => [
"Å",
"Ä",
"Ö" ],
217 'sv@collation=standard' => [
"Å",
"Ä",
"Ö" ],
220 "\u{0B82}",
"ஃ",
"க்ஷ",
"க்",
"ங்",
"ச்",
"ஞ்",
"ட்",
"ண்",
"த்",
"ந்",
221 "ப்",
"ம்",
"ய்",
"ர்",
"ல்",
"வ்",
"ழ்",
"ள்",
"ற்",
"ன்",
"ஜ்",
"ஶ்",
"ஷ்",
224 'te' => [
"\u{0C01}",
"\u{0C02}",
"\u{0C03}" ],
225 'th' => [
"ฯ",
"\u{0E46}",
"\u{0E4D}",
"\u{0E3A}" ],
226 'tk' => [
"Ç",
"Ä",
"Ž",
"Ň",
"Ö",
"Ş",
"Ü",
"Ý" ],
227 'tl' => [
"Ñ",
"Ng" ],
228 'to' => [
"Ng",
"ʻ" ],
229 'tr' => [
"Ç",
"Ğ",
"İ",
"Ö",
"Ş",
"Ü" ],
231 'tt' => [
"Ә",
"Ө",
"Ү",
"Җ",
"Ң",
"Һ" ],
232 'uk' => [
"Ґ",
"Ь" ],
233 'uz' => [
"Ch",
"G'",
"Ng",
"O'",
"Sh" ],
234 'vi' => [
"Ă",
"Â",
"Đ",
"Ê",
"Ô",
"Ơ",
"Ư" ],
235 'vo' => [
"Ä",
"Ö",
"Ü" ],
237 "\u{05D1}\u{05BF}",
"\u{05DB}\u{05BC}",
"\u{05E4}\u{05BC}",
238 "\u{05E9}\u{05C2}",
"\u{05EA}\u{05BC}"
240 'yo' => [
"Ẹ",
"Gb",
"Ọ",
"Ṣ" ],
247 $localeParts = explode(
'@',
$locale );
248 $this->digitTransformLanguage = MediaWikiServices::getInstance()->getLanguageFactory()
249 ->getLanguage(
$locale ===
'root' ?
'en' : $localeParts[0] );
251 $this->mainCollator = Collator::create(
$locale );
252 if ( !$this->mainCollator ) {
253 throw new MWException(
"Invalid ICU locale specified for collation: $locale" );
256 $this->primaryCollator = Collator::create(
$locale );
257 $this->primaryCollator->setStrength( Collator::PRIMARY );
260 if ( substr(
$locale, -5, 5 ) ===
'-u-kn' ) {
261 $this->useNumericCollation =
true;
263 $this->locale = substr( $this->locale, 0, -5 );
264 $this->mainCollator->setAttribute( Collator::NUMERIC_COLLATION, Collator::ON );
265 $this->primaryCollator->setAttribute( Collator::NUMERIC_COLLATION, Collator::ON );
270 return $this->mainCollator->getSortKey( $string );
274 return $this->primaryCollator->getSortKey( $string );
278 $string = strval( $string );
279 if ( $string ===
'' ) {
283 $firstChar = mb_substr( $string, 0, 1,
'UTF-8' );
286 if ( ord( $firstChar ) > 0x7f && self::isCjk( UtfNormal\Utils::utf8ToCodepoint( $firstChar ) ) ) {
294 [ $this,
'getSortKeyByLetterIndex' ],
299 if ( $min ===
false ) {
306 if ( $this->useNumericCollation ) {
311 if ( ord( $sortLetter ) >= 48 && ord( $sortLetter ) <= 57 ) {
312 $sortLetter =
wfMessage(
'category-header-numerals' )->numParams( 0, 9 )->text();
323 if ( $this->firstLetterData ===
null ) {
325 $cacheKey =
$cache->makeKey(
329 $this->digitTransformLanguage->getCode(),
331 self::FIRST_LETTER_VERSION
333 $this->firstLetterData =
$cache->getWithSetCallback( $cacheKey, $cache::TTL_WEEK,
function () {
347 if ( isset( self::TAILORING_FIRST_LETTERS[$this->locale] ) ) {
348 $letters = require
"$IP/includes/collation/data/first-letters-root.php";
350 $letters = array_merge( $letters, self::TAILORING_FIRST_LETTERS[$this->locale] );
352 if ( isset( self::TAILORING_FIRST_LETTERS[
'-' . $this->locale] ) ) {
353 $letters = array_diff( $letters, self::TAILORING_FIRST_LETTERS[
'-' . $this->locale] );
356 $digits = [
'0',
'1',
'2',
'3',
'4',
'5',
'6',
'7',
'8',
'9' ];
357 $letters = array_diff( $letters, $digits );
358 foreach ( $digits as $digit ) {
359 $letters[] = $this->digitTransformLanguage->formatNumNoSeparators( $digit );
361 } elseif ( $this->locale ===
'root' ) {
362 $letters = require
"$IP/includes/collation/data/first-letters-root.php";
366 if ( $letters ===
false ) {
367 throw new MWException(
"MediaWiki does not support ICU locale " .
368 "\"{$this->locale}\"" );
382 foreach ( $letters as $letter ) {
384 if ( isset( $letterMap[$key] ) ) {
387 $comp = $this->mainCollator->compare( $letter, $letterMap[$key] );
388 wfDebug(
"Primary collision '$letter' '{$letterMap[$key]}' (comparison: $comp)" );
391 $comp = UtfNormal\Utils::utf8ToCodepoint( $letter ) <=>
392 UtfNormal\Utils::utf8ToCodepoint( $letterMap[$key] );
395 $letterMap[$key] = $letter;
398 $letterMap[$key] = $letter;
401 ksort( $letterMap, SORT_STRING );
438 $duplicatePrefixes = [];
439 foreach ( $letterMap as $key => $value ) {
442 $trimmedKey = rtrim( $key,
"\0" );
443 if ( $prev ===
false || $prev ===
'' ) {
456 if ( substr( $trimmedKey, 0, strlen( $prev ) ) === $prev ) {
457 $duplicatePrefixes[] = $key;
465 foreach ( $duplicatePrefixes as $badKey ) {
466 wfDebug(
"Removing '{$letterMap[$badKey]}' from first letters." );
467 unset( $letterMap[$badKey] );
471 'chars' => array_values( $letterMap ),
472 'keys' => array_keys( $letterMap ),
491 $file =
"$IP/serialized/$name";
492 if ( file_exists(
$file ) ) {
533 public static function isCjk( $codepoint ) {
534 foreach ( self::CJK_BLOCKS as $block ) {
535 if ( $codepoint >= $block[0] && $codepoint <= $block[1] ) {
550 $icuVersion = INTL_ICU_VERSION;
551 if ( !$icuVersion ) {
555 $versionPrefix = substr( $icuVersion, 0, 3 );
587 return $map[$versionPrefix] ??
false;