97 'as' => [
"\xe0\xa6\x82",
"\xe0\xa6\x81",
"\xe0\xa6\x83",
"\xe0\xa7\x8e",
"ক্ষ " ],
98 'ast' => [
"Ch",
"Ll",
"Ñ" ],
99 'az' => [
"Ç",
"Ə",
"Ğ",
"İ",
"Ö",
"Ş",
"Ü" ],
101 'be-tarask' => [
"Ё" ],
103 'bn' => [
'ং',
'ঃ',
'ঁ' ],
104 'bn@collation=traditional' => [
105 'ং',
'ঃ',
'ঁ',
'ক্',
'খ্',
'গ্',
'ঘ্',
'ঙ্',
'চ্',
'ছ্',
'জ্',
'ঝ্',
106 'ঞ্',
'ট্',
'ঠ্',
'ড্',
'ঢ্',
'ণ্',
'ৎ',
'থ্',
'দ্',
'ধ্',
'ন্',
'প্',
107 'ফ্',
'ব্',
'ভ্',
'ম্',
'য্',
'র্',
'ৰ্',
'ল্',
'ৱ্',
'শ্',
'ষ্',
'স্',
'হ্'
110 'br' => [
"Ch",
"C'h" ],
111 'bs' => [
"Č",
"Ć",
"Dž",
"Đ",
"Lj",
"Nj",
"Š",
"Ž" ],
116 'cs' => [
"Č",
"Ch",
"Ř",
"Š",
"Ž" ],
117 'cy' => [
"Ch",
"Dd",
"Ff",
"Ng",
"Ll",
"Ph",
"Rh",
"Th" ],
118 'da' => [
"Æ",
"Ø",
"Å" ],
120 'de-AT@collation=phonebook' => [
'ä',
'ö',
'ü',
'ß' ],
121 'dsb' => [
"Č",
"Ć",
"Dź",
"Ě",
"Ch",
"Ł",
"Ń",
"Ŕ",
"Š",
"Ś",
"Ž",
"Ź" ],
122 'ee' => [
"Dz",
"Ɖ",
"Ɛ",
"Ƒ",
"Gb",
"Ɣ",
"Kp",
"Ny",
"Ŋ",
"Ɔ",
"Ts",
"Ʋ" ],
125 'eo' => [
"Ĉ",
"Ĝ",
"Ĥ",
"Ĵ",
"Ŝ",
"Ŭ" ],
127 'et' => [
"Š",
"Ž",
"Õ",
"Ä",
"Ö",
"Ü",
"W" ],
137 'fi' => [
"Å",
"Ä",
"Ö" ],
138 'fil' => [
"Ñ",
"Ng" ],
139 'fo' => [
"Á",
"Ð",
"Í",
"Ó",
"Ú",
"Ý",
"Æ",
"Ø",
"Å" ],
142 'fur' => [
"À",
"Á",
"Â",
"È",
"Ì",
"Ò",
"Ù" ],
146 'gl' => [
"Ch",
"Ll",
"Ñ" ],
147 'gu' => [
"\xe0\xaa\x82",
"\xe0\xaa\x83",
"\xe0\xaa\x81",
"\xe0\xaa\xb3" ],
148 'ha' => [
'Ɓ',
'Ɗ',
'Ƙ',
'Sh',
'Ts',
'Ƴ' ],
151 'hi' => [
"\xe0\xa4\x82",
"\xe0\xa4\x83" ],
152 'hr' => [
"Č",
"Ć",
"Dž",
"Đ",
"Lj",
"Nj",
"Š",
"Ž" ],
153 'hsb' => [
"Č",
"Dź",
"Ě",
"Ch",
"Ł",
"Ń",
"Ř",
"Š",
"Ć",
"Ž" ],
154 'hu' => [
"Cs",
"Dz",
"Dzs",
"Gy",
"Ly",
"Ny",
"Ö",
"Sz",
"Ty",
"Ü",
"Zs" ],
157 'ig' => [
"Ch",
"Gb",
"Gh",
"Gw",
"Ị",
"Kp",
"Kw",
"Ṅ",
"Nw",
"Ny",
"Ọ",
"Sh",
"Ụ" ],
158 'is' => [
"Á",
"Ð",
"É",
"Í",
"Ó",
"Ú",
"Ý",
"Þ",
"Æ",
"Ö",
"Å" ],
161 'kk' => [
"Ү",
"І" ],
162 'kl' => [
"Æ",
"Ø",
"Å" ],
164 "រ",
"ឫ",
"ឬ",
"ល",
"ឭ",
"ឮ",
"\xe1\x9e\xbb\xe1\x9f\x86",
165 "\xe1\x9f\x86",
"\xe1\x9e\xb6\xe1\x9f\x86",
"\xe1\x9f\x87",
166 "\xe1\x9e\xb7\xe1\x9f\x87",
"\xe1\x9e\xbb\xe1\x9f\x87",
167 "\xe1\x9f\x81\xe1\x9f\x87",
"\xe1\x9f\x84\xe1\x9f\x87",
169 'kn' => [
"\xe0\xb2\x81",
"\xe0\xb2\x83",
"\xe0\xb3\xb1",
"\xe0\xb3\xb2" ],
170 'kok' => [
"\xe0\xa4\x82",
"\xe0\xa4\x83",
"ळ",
"क्ष" ],
171 'ku' => [
"Ç",
"Ê",
"Î",
"Ş",
"Û" ],
175 'lkt' => [
'Č',
'Ǧ',
'Ȟ',
'Š',
'Ž' ],
178 'lt' => [
"Č",
"Š",
"Ž" ],
179 'lv' => [
"Č",
"Ģ",
"Ķ",
"Ļ",
"Ņ",
"Š",
"Ž" ],
180 'mk' => [
"Ѓ",
"Ќ" ],
183 'mo' => [
"Ă",
"Â",
"Î",
"Ș",
"Ț" ],
184 'mr' => [
"\xe0\xa4\x82",
"\xe0\xa4\x83",
"ळ",
"क्ष",
"ज्ञ" ],
186 'mt' => [
"Ċ",
"Ġ",
"Għ",
"Ħ",
"Ż" ],
187 'nb' => [
"Æ",
"Ø",
"Å" ],
190 'nn' => [
"Æ",
"Ø",
"Å" ],
191 'no' => [
"Æ",
"Ø",
"Å" ],
193 'om' => [
'Ch',
'Dh',
'Kh',
'Ny',
'Ph',
'Sh' ],
194 'or' => [
"\xe0\xac\x81",
"\xe0\xac\x82",
"\xe0\xac\x83",
"କ୍ଷ" ],
195 'pa' => [
"\xe0\xa9\x8d" ],
196 'pl' => [
"Ą",
"Ć",
"Ę",
"Ł",
"Ń",
"Ó",
"Ś",
"Ź",
"Ż" ],
199 'ro' => [
"Ă",
"Â",
"Î",
"Ș",
"Ț" ],
201 'rup' => [
"Ă",
"Â",
"Î",
"Ľ",
"Ń",
"Ș",
"Ț" ],
204 'Á',
'Č',
'Ʒ',
'Ǯ',
'Đ',
'Ǧ',
'Ǥ',
'Ǩ',
'Ŋ',
205 'Š',
'Ŧ',
'Ž',
'Ø',
'Æ',
'Ȧ',
'Ä',
'Ö'
207 'si' => [
"\xe0\xb6\x82",
"\xe0\xb6\x83",
"\xe0\xb6\xa4" ],
208 'sk' => [
"Ä",
"Č",
"Ch",
"Ô",
"Š",
"Ž" ],
209 'sl' => [
"Č",
"Š",
"Ž" ],
210 'smn' => [
"Á",
"Č",
"Đ",
"Ŋ",
"Š",
"Ŧ",
"Ž",
"Æ",
"Ø",
"Å",
"Ä",
"Ö" ],
211 'sq' => [
"Ç",
"Dh",
"Ë",
"Gj",
"Ll",
"Nj",
"Rr",
"Sh",
"Th",
"Xh",
"Zh" ],
213 'sr-Latn' => [
"Č",
"Ć",
"Dž",
"Đ",
"Lj",
"Nj",
"Š",
"Ž" ],
214 'sv' => [
"Å",
"Ä",
"Ö" ],
215 'sv@collation=standard' => [
"Å",
"Ä",
"Ö" ],
218 "\xE0\xAE\x82",
"ஃ",
"க்ஷ",
"க்",
"ங்",
"ச்",
"ஞ்",
"ட்",
"ண்",
"த்",
"ந்",
219 "ப்",
"ம்",
"ய்",
"ர்",
"ல்",
"வ்",
"ழ்",
"ள்",
"ற்",
"ன்",
"ஜ்",
"ஶ்",
"ஷ்",
222 'te' => [
"\xe0\xb0\x81",
"\xe0\xb0\x82",
"\xe0\xb0\x83" ],
223 'th' => [
"ฯ",
"\xe0\xb9\x86",
"\xe0\xb9\x8d",
"\xe0\xb8\xba" ],
224 'tk' => [
"Ç",
"Ä",
"Ž",
"Ň",
"Ö",
"Ş",
"Ü",
"Ý" ],
225 'tl' => [
"Ñ",
"Ng" ],
226 'to' => [
"Ng",
"ʻ" ],
227 'tr' => [
"Ç",
"Ğ",
"İ",
"Ö",
"Ş",
"Ü" ],
228 'tt' => [
"Ә",
"Ө",
"Ү",
"Җ",
"Ң",
"Һ" ],
229 'uk' => [
"Ґ",
"Ь" ],
230 'uz' => [
"Ch",
"G'",
"Ng",
"O'",
"Sh" ],
231 'vi' => [
"Ă",
"Â",
"Đ",
"Ê",
"Ô",
"Ơ",
"Ư" ],
232 'vo' => [
"Ä",
"Ö",
"Ü" ],
234 "\xd7\x91\xd6\xbf",
"\xd7\x9b\xd6\xbc",
"\xd7\xa4\xd6\xbc",
235 "\xd7\xa9\xd7\x82",
"\xd7\xaa\xd6\xbc"
237 'yo' => [
"Ẹ",
"Gb",
"Ọ",
"Ṣ" ],
247 if ( !extension_loaded(
'intl' ) ) {
248 throw new MWException(
'An ICU collation was requested, ' .
249 'but the intl extension is not available.' );
254 $localeParts = explode(
'@',
$locale );
257 $this->mainCollator = Collator::create(
$locale );
258 if ( !$this->mainCollator ) {
259 throw new MWException(
"Invalid ICU locale specified for collation: $locale" );
262 $this->primaryCollator = Collator::create(
$locale );
263 $this->primaryCollator->setStrength( Collator::PRIMARY );
266 if ( substr(
$locale, -5, 5 ) ===
'-u-kn' ) {
267 $this->useNumericCollation =
true;
269 $this->locale = substr( $this->locale, 0, -5 );
270 $this->mainCollator->setAttribute( Collator::NUMERIC_COLLATION, Collator::ON );
271 $this->primaryCollator->setAttribute( Collator::NUMERIC_COLLATION, Collator::ON );
276 return $this->mainCollator->getSortKey( $string );
280 return $this->primaryCollator->getSortKey( $string );
284 $string = strval( $string );
285 if ( $string ===
'' ) {
289 $firstChar = mb_substr( $string, 0, 1,
'UTF-8' );
300 [ $this,
'getSortKeyByLetterIndex' ],
305 if ( $min ===
false ) {
312 if ( $this->useNumericCollation ) {
317 if ( ord( $sortLetter ) >= 48 && ord( $sortLetter ) <= 57 ) {
318 $sortLetter =
wfMessage(
'category-header-numerals' )->numParams( 0, 9 )->text();
329 if ( $this->firstLetterData ===
null ) {
331 $cacheKey =
$cache->makeKey(
335 $this->digitTransformLanguage->getCode(),
339 $this->firstLetterData =
$cache->getWithSetCallback( $cacheKey, $cache::TTL_WEEK,
function () {
352 if ( isset( self::$tailoringFirstLetters[$this->locale] ) ) {
355 $letters = array_merge( $letters, self::$tailoringFirstLetters[$this->locale] );
357 if ( isset( self::$tailoringFirstLetters[
'-' . $this->locale] ) ) {
358 $letters = array_diff( $letters, self::$tailoringFirstLetters[
'-' . $this->locale] );
361 $digits = [
'0',
'1',
'2',
'3',
'4',
'5',
'6',
'7',
'8',
'9' ];
362 $letters = array_diff( $letters, $digits );
363 foreach ( $digits
as $digit ) {
364 $letters[] = $this->digitTransformLanguage->formatNum( $digit,
true );
368 if ( $letters ===
false ) {
369 throw new MWException(
"MediaWiki does not support ICU locale " .
370 "\"{$this->locale}\"" );
384 foreach ( $letters
as $letter ) {
386 if ( isset( $letterMap[$key] ) ) {
389 if ( $this->mainCollator->compare( $letter, $letterMap[$key] ) < 0 ) {
390 $letterMap[$key] = $letter;
393 $letterMap[$key] = $letter;
396 ksort( $letterMap, SORT_STRING );
433 $duplicatePrefixes = [];
434 foreach ( $letterMap
as $key =>
$value ) {
437 $trimmedKey = rtrim( $key,
"\0" );
438 if ( $prev ===
false || $prev ===
'' ) {
451 if ( substr( $trimmedKey, 0, strlen( $prev ) ) === $prev ) {
452 $duplicatePrefixes[] = $key;
460 foreach ( $duplicatePrefixes
as $badKey ) {
461 wfDebug(
"Removing '{$letterMap[$badKey]}' from first letters.\n" );
462 unset( $letterMap[$badKey] );
466 'chars' => array_values( $letterMap ),
467 'keys' => array_keys( $letterMap ),
509 public static function isCjk( $codepoint ) {
510 foreach ( self::$cjkBlocks
as $block ) {
511 if ( $codepoint >= $block[0] && $codepoint <= $block[1] ) {
535 return defined(
'INTL_ICU_VERSION' ) ? INTL_ICU_VERSION :
false;
547 if ( !$icuVersion ) {
551 $versionPrefix = substr( $icuVersion, 0, 3 );
573 if ( isset( $map[$versionPrefix] ) ) {
574 return $map[$versionPrefix];