27 private const FIRST_LETTER_VERSION = 4;
30 private $primaryCollator;
33 private $mainCollator;
42 private $useNumericCollation =
false;
45 private $firstLetterData;
56 private const CJK_BLOCKS = [
95 private const TAILORING_FIRST_LETTERS = [
99 'as' => [
"\u{0982}",
"\u{0981}",
"\u{0983}",
"\u{09CE}",
"ক্ষ " ],
100 'ast' => [
"Ch",
"Ll",
"Ñ" ],
101 'az' => [
"Ç",
"Ə",
"Ğ",
"İ",
"Ö",
"Ş",
"Ü" ],
103 'be-tarask' => [
"Ё" ],
105 'bn' => [
'ং',
'ঃ',
'ঁ' ],
106 'bn@collation=traditional' => [
107 'ং',
'ঃ',
'ঁ',
'ক্',
'খ্',
'গ্',
'ঘ্',
'ঙ্',
'চ্',
'ছ্',
'জ্',
'ঝ্',
108 'ঞ্',
'ট্',
'ঠ্',
'ড্',
'ঢ্',
'ণ্',
'ৎ',
'থ্',
'দ্',
'ধ্',
'ন্',
'প্',
109 'ফ্',
'ব্',
'ভ্',
'ম্',
'য্',
'র্',
'ৰ্',
'ল্',
'ৱ্',
'শ্',
'ষ্',
'স্',
'হ্'
112 'br' => [
"Ch",
"C'h" ],
113 'bs' => [
"Č",
"Ć",
"Dž",
"Đ",
"Lj",
"Nj",
"Š",
"Ž" ],
118 'cs' => [
"Č",
"Ch",
"Ř",
"Š",
"Ž" ],
119 'cy' => [
"Ch",
"Dd",
"Ff",
"Ng",
"Ll",
"Ph",
"Rh",
"Th" ],
120 'da' => [
"Æ",
"Ø",
"Å" ],
122 'de-AT@collation=phonebook' => [
'ä',
'ö',
'ü',
'ß' ],
123 'dsb' => [
"Č",
"Ć",
"Dź",
"Ě",
"Ch",
"Ł",
"Ń",
"Ŕ",
"Š",
"Ś",
"Ž",
"Ź" ],
124 'ee' => [
"Dz",
"Ɖ",
"Ɛ",
"Ƒ",
"Gb",
"Ɣ",
"Kp",
"Ny",
"Ŋ",
"Ɔ",
"Ts",
"Ʋ" ],
127 'eo' => [
"Ĉ",
"Ĝ",
"Ĥ",
"Ĵ",
"Ŝ",
"Ŭ" ],
129 'et' => [
"Š",
"Ž",
"Õ",
"Ä",
"Ö",
"Ü" ],
139 'fi' => [
"Å",
"Ä",
"Ö" ],
140 'fil' => [
"Ñ",
"Ng" ],
141 'fo' => [
"Á",
"Ð",
"Í",
"Ó",
"Ú",
"Ý",
"Æ",
"Ø",
"Å" ],
144 'fur' => [
"À",
"Á",
"Â",
"È",
"Ì",
"Ò",
"Ù" ],
148 'gl' => [
"Ch",
"Ll",
"Ñ" ],
149 'gu' => [
"\u{0A82}",
"\u{0A83}",
"\u{0A81}",
"\u{0AB3}" ],
150 'ha' => [
'Ɓ',
'Ɗ',
'Ƙ',
'Sh',
'Ts',
'Ƴ' ],
153 'hi' => [
"\u{0902}",
"\u{0903}" ],
154 'hr' => [
"Č",
"Ć",
"Dž",
"Đ",
"Lj",
"Nj",
"Š",
"Ž" ],
155 'hsb' => [
"Č",
"Dź",
"Ě",
"Ch",
"Ł",
"Ń",
"Ř",
"Š",
"Ć",
"Ž" ],
156 'hu' => [
"Cs",
"Dz",
"Dzs",
"Gy",
"Ly",
"Ny",
"Ö",
"Sz",
"Ty",
"Ü",
"Zs" ],
159 'ig' => [
"Ch",
"Gb",
"Gh",
"Gw",
"Ị",
"Kp",
"Kw",
"Ṅ",
"Nw",
"Ny",
"Ọ",
"Sh",
"Ụ" ],
160 'is' => [
"Á",
"Ð",
"É",
"Í",
"Ó",
"Ú",
"Ý",
"Þ",
"Æ",
"Ö",
"Å" ],
163 'kk' => [
"Ү",
"І" ],
164 'kl' => [
"Æ",
"Ø",
"Å" ],
166 "រ",
"ឫ",
"ឬ",
"ល",
"ឭ",
"ឮ",
"\u{17BB}\u{17C6}",
167 "\u{17C6}",
"\u{17B6}\u{17C6}",
"\u{17C7}",
168 "\u{17B7}\u{17C7}",
"\u{17BB}\u{17C7}",
169 "\u{17C1}\u{17C7}",
"\u{17C4}\u{17C7}",
171 'kn' => [
"\u{0C81}",
"\u{0C83}",
"\u{0CF1}",
"\u{0CF2}" ],
172 'kok' => [
"\u{0902}",
"\u{0903}",
"ळ",
"क्ष" ],
173 'ku' => [
"Ç",
"Ê",
"Î",
"Ş",
"Û" ],
177 'lkt' => [
'Č',
'Ǧ',
'Ȟ',
'Š',
'Ž' ],
180 'lt' => [
"Č",
"Š",
"Ž" ],
181 'lv' => [
"Č",
"Ģ",
"Ķ",
"Ļ",
"Ņ",
"Š",
"Ž" ],
182 'mk' => [
"Ѓ",
"Ќ" ],
185 'mo' => [
"Ă",
"Â",
"Î",
"Ș",
"Ț" ],
186 'mr' => [
"\u{0902}",
"\u{0903}",
"ळ",
"क्ष",
"ज्ञ" ],
188 'mt' => [
"Ċ",
"Ġ",
"Għ",
"Ħ",
"Ż" ],
189 'nb' => [
"Æ",
"Ø",
"Å" ],
192 'nn' => [
"Æ",
"Ø",
"Å" ],
193 'no' => [
"Æ",
"Ø",
"Å" ],
195 'om' => [
'Ch',
'Dh',
'Kh',
'Ny',
'Ph',
'Sh' ],
196 'or' => [
"\u{0B01}",
"\u{0B02}",
"\u{0B03}",
"କ୍ଷ" ],
197 'pa' => [
"\u{0A4D}" ],
198 'pl' => [
"Ą",
"Ć",
"Ę",
"Ł",
"Ń",
"Ó",
"Ś",
"Ź",
"Ż" ],
201 'ro' => [
"Ă",
"Â",
"Î",
"Ș",
"Ț" ],
203 'rup' => [
"Ă",
"Â",
"Î",
"Ľ",
"Ń",
"Ș",
"Ț" ],
206 'Á',
'Č',
'Ʒ',
'Ǯ',
'Đ',
'Ǧ',
'Ǥ',
'Ǩ',
'Ŋ',
207 'Š',
'Ŧ',
'Ž',
'Ø',
'Æ',
'Ȧ',
'Ä',
'Ö'
209 'si' => [
"\u{0D82}",
"\u{0D83}",
"\u{0DA4}" ],
210 'sk' => [
"Ä",
"Č",
"Ch",
"Ô",
"Š",
"Ž" ],
211 'sl' => [
"Č",
"Š",
"Ž" ],
212 'smn' => [
"Á",
"Č",
"Đ",
"Ŋ",
"Š",
"Ŧ",
"Ž",
"Æ",
"Ø",
"Å",
"Ä",
"Ö" ],
213 'sq' => [
"Ç",
"Dh",
"Ë",
"Gj",
"Ll",
"Nj",
"Rr",
"Sh",
"Th",
"Xh",
"Zh" ],
215 'sr-Latn' => [
"Č",
"Ć",
"Dž",
"Đ",
"Lj",
"Nj",
"Š",
"Ž" ],
216 'sv' => [
"Å",
"Ä",
"Ö" ],
217 'sv@collation=standard' => [
"Å",
"Ä",
"Ö" ],
220 "\u{0B82}",
"ஃ",
"க்ஷ",
"க்",
"ங்",
"ச்",
"ஞ்",
"ட்",
"ண்",
"த்",
"ந்",
221 "ப்",
"ம்",
"ய்",
"ர்",
"ல்",
"வ்",
"ழ்",
"ள்",
"ற்",
"ன்",
"ஜ்",
"ஶ்",
"ஷ்",
224 'te' => [
"\u{0C01}",
"\u{0C02}",
"\u{0C03}" ],
225 'th' => [
"ฯ",
"\u{0E46}",
"\u{0E4D}",
"\u{0E3A}" ],
226 'tk' => [
"Ç",
"Ä",
"Ž",
"Ň",
"Ö",
"Ş",
"Ü",
"Ý" ],
227 'tl' => [
"Ñ",
"Ng" ],
228 'to' => [
"Ng",
"ʻ" ],
229 'tr' => [
"Ç",
"Ğ",
"İ",
"Ö",
"Ş",
"Ü" ],
231 'tt' => [
"Ә",
"Ө",
"Ү",
"Җ",
"Ң",
"Һ" ],
232 'uk' => [
"Ґ",
"Ь" ],
233 'uz' => [
"Ch",
"G'",
"Ng",
"O'",
"Sh" ],
234 'vi' => [
"Ă",
"Â",
"Đ",
"Ê",
"Ô",
"Ơ",
"Ư" ],
235 'vo' => [
"Ä",
"Ö",
"Ü" ],
237 "\u{05D1}\u{05BF}",
"\u{05DB}\u{05BC}",
"\u{05E4}\u{05BC}",
238 "\u{05E9}\u{05C2}",
"\u{05EA}\u{05BC}"
240 'yo' => [
"Ẹ",
"Gb",
"Ọ",
"Ṣ" ],
252 $this->locale = $locale;
254 $localeParts = explode(
'@', $locale );
255 $this->digitTransformLanguage = $languageFactory->
getLanguage( $locale ===
'root' ?
'en' : $localeParts[0] );
257 $mainCollator = Collator::create( $locale );
258 if ( !$mainCollator ) {
259 throw new MWException(
"Invalid ICU locale specified for collation: $locale" );
261 $this->mainCollator = $mainCollator;
264 $this->primaryCollator = Collator::create( $locale );
265 $this->primaryCollator->setStrength( Collator::PRIMARY );
268 if ( str_ends_with( $locale,
'-u-kn' ) ) {
269 $this->useNumericCollation =
true;
271 $this->locale = substr( $this->locale, 0, -5 );
272 $this->mainCollator->setAttribute( Collator::NUMERIC_COLLATION, Collator::ON );
273 $this->primaryCollator->setAttribute( Collator::NUMERIC_COLLATION, Collator::ON );
278 return $this->mainCollator->getSortKey( $string );
282 $string = strval( $string );
283 if ( $string ===
'' ) {
287 $firstChar = mb_substr( $string, 0, 1,
'UTF-8' );
290 if ( ord( $firstChar ) > 0x7f && self::isCjk( mb_ord( $firstChar ) ) ) {
294 $sortKey = $this->getPrimarySortKey( $string );
295 $data = $this->getFirstLetterData();
296 $keys = $data[
'keys'];
297 $letters = $data[
'chars'];
300 $min = ArrayUtils::findLowerBound(
301 static function ( $index ) use (
$keys ) {
302 return $keys[$index];
308 if ( $min ===
false ) {
313 $sortLetter = $letters[$min];
315 if ( $this->useNumericCollation ) {
320 if ( ord( $sortLetter ) >= 48 && ord( $sortLetter ) <= 57 ) {
321 $sortLetter =
wfMessage(
'category-header-numerals' )->numParams( 0, 9 )->text();
327 private function getPrimarySortKey( $string ) {
328 return $this->primaryCollator->getSortKey( $string );
335 private function getFirstLetterData() {
336 if ( $this->firstLetterData ===
null ) {
338 $cacheKey = $cache->makeKey(
342 $this->digitTransformLanguage->getCode(),
344 self::FIRST_LETTER_VERSION
346 $this->firstLetterData = $cache->getWithSetCallback( $cacheKey, $cache::TTL_WEEK,
function () {
347 return $this->fetchFirstLetterData();
350 return $this->firstLetterData;
357 private function fetchFirstLetterData() {
359 if ( isset( self::TAILORING_FIRST_LETTERS[$this->locale] ) ) {
360 $letters = require __DIR__ .
"/data/first-letters-root.php";
362 $letters = array_merge( $letters, self::TAILORING_FIRST_LETTERS[$this->locale] );
364 if ( isset( self::TAILORING_FIRST_LETTERS[
'-' . $this->locale] ) ) {
365 $letters = array_diff( $letters, self::TAILORING_FIRST_LETTERS[
'-' . $this->locale] );
368 $digits = [
'0',
'1',
'2',
'3',
'4',
'5',
'6',
'7',
'8',
'9' ];
369 $letters = array_diff( $letters, $digits );
370 foreach ( $digits as $digit ) {
371 $letters[] = $this->digitTransformLanguage->formatNumNoSeparators( $digit );
373 } elseif ( $this->locale ===
'root' ) {
374 $letters = require __DIR__ .
"/data/first-letters-root.php";
376 throw new MWException(
"MediaWiki does not support ICU locale " .
377 "\"{$this->locale}\"" );
390 foreach ( $letters as $letter ) {
391 $key = $this->getPrimarySortKey( $letter );
392 if ( isset( $letterMap[$key] ) ) {
395 $comp = $this->mainCollator->compare( $letter, $letterMap[$key] );
396 wfDebug(
"Primary collision '$letter' '{$letterMap[$key]}' (comparison: $comp)" );
399 $comp = mb_ord( $letter ) <=> mb_ord( $letterMap[$key] );
402 $letterMap[$key] = $letter;
405 $letterMap[$key] = $letter;
408 ksort( $letterMap, SORT_STRING );
445 $duplicatePrefixes = [];
446 foreach ( $letterMap as $key => $value ) {
453 if ( $prev !==
'' && str_starts_with( $key, $prev ) ) {
454 $duplicatePrefixes[] = $key;
462 foreach ( $duplicatePrefixes as $badKey ) {
463 wfDebug(
"Removing '{$letterMap[$badKey]}' from first letters." );
464 unset( $letterMap[$badKey] );
468 'chars' => array_values( $letterMap ),
469 'keys' => array_keys( $letterMap ),
479 public static function isCjk( $codepoint ) {
480 foreach ( self::CJK_BLOCKS as $block ) {
481 if ( $codepoint >= $block[0] && $codepoint <= $block[1] ) {
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.