99 'as' => [
"\u{0982}",
"\u{0981}",
"\u{0983}",
"\u{09CE}",
"ক্ষ " ],
100 'ast' => [
"Ch",
"Ll",
"Ñ" ],
101 'az' => [
"Ç",
"Ə",
"Ğ",
"İ",
"Ö",
"Ş",
"Ü" ],
103 'be-tarask' => [
"Ё" ],
105 'bn' => [
'ং',
'ঃ',
'ঁ' ],
106 'bn@collation=traditional' => [
107 'ং',
'ঃ',
'ঁ',
'ক্',
'খ্',
'গ্',
'ঘ্',
'ঙ্',
'চ্',
'ছ্',
'জ্',
'ঝ্',
108 'ঞ্',
'ট্',
'ঠ্',
'ড্',
'ঢ্',
'ণ্',
'ৎ',
'থ্',
'দ্',
'ধ্',
'ন্',
'প্',
109 'ফ্',
'ব্',
'ভ্',
'ম্',
'য্',
'র্',
'ৰ্',
'ল্',
'ৱ্',
'শ্',
'ষ্',
'স্',
'হ্'
112 'br' => [
"Ch",
"C'h" ],
113 'bs' => [
"Č",
"Ć",
"Dž",
"Đ",
"Lj",
"Nj",
"Š",
"Ž" ],
118 'cs' => [
"Č",
"Ch",
"Ř",
"Š",
"Ž" ],
119 'cy' => [
"Ch",
"Dd",
"Ff",
"Ng",
"Ll",
"Ph",
"Rh",
"Th" ],
120 'da' => [
"Æ",
"Ø",
"Å" ],
122 'de-AT@collation=phonebook' => [
'ä',
'ö',
'ü',
'ß' ],
123 'dsb' => [
"Č",
"Ć",
"Dź",
"Ě",
"Ch",
"Ł",
"Ń",
"Ŕ",
"Š",
"Ś",
"Ž",
"Ź" ],
124 'ee' => [
"Dz",
"Ɖ",
"Ɛ",
"Ƒ",
"Gb",
"Ɣ",
"Kp",
"Ny",
"Ŋ",
"Ɔ",
"Ts",
"Ʋ" ],
127 'eo' => [
"Ĉ",
"Ĝ",
"Ĥ",
"Ĵ",
"Ŝ",
"Ŭ" ],
129 'et' => [
"Š",
"Ž",
"Õ",
"Ä",
"Ö",
"Ü" ],
139 'fi' => [
"Å",
"Ä",
"Ö" ],
140 'fil' => [
"Ñ",
"Ng" ],
141 'fo' => [
"Á",
"Ð",
"Í",
"Ó",
"Ú",
"Ý",
"Æ",
"Ø",
"Å" ],
144 'fur' => [
"À",
"Á",
"Â",
"È",
"Ì",
"Ò",
"Ù" ],
148 'gl' => [
"Ch",
"Ll",
"Ñ" ],
149 'gu' => [
"\u{0A82}",
"\u{0A83}",
"\u{0A81}",
"\u{0AB3}" ],
150 'ha' => [
'Ɓ',
'Ɗ',
'Ƙ',
'Sh',
'Ts',
'Ƴ' ],
153 'hi' => [
"\u{0902}",
"\u{0903}" ],
154 'hr' => [
"Č",
"Ć",
"Dž",
"Đ",
"Lj",
"Nj",
"Š",
"Ž" ],
155 'hsb' => [
"Č",
"Dź",
"Ě",
"Ch",
"Ł",
"Ń",
"Ř",
"Š",
"Ć",
"Ž" ],
156 'hu' => [
"Cs",
"Dz",
"Dzs",
"Gy",
"Ly",
"Ny",
"Ö",
"Sz",
"Ty",
"Ü",
"Zs" ],
159 'ig' => [
"Ch",
"Gb",
"Gh",
"Gw",
"Ị",
"Kp",
"Kw",
"Ṅ",
"Nw",
"Ny",
"Ọ",
"Sh",
"Ụ" ],
160 'is' => [
"Á",
"Ð",
"É",
"Í",
"Ó",
"Ú",
"Ý",
"Þ",
"Æ",
"Ö",
"Å" ],
163 'kk' => [
"Ү",
"І" ],
164 'kl' => [
"Æ",
"Ø",
"Å" ],
166 "រ",
"ឫ",
"ឬ",
"ល",
"ឭ",
"ឮ",
"\u{17BB}\u{17C6}",
167 "\u{17C6}",
"\u{17B6}\u{17C6}",
"\u{17C7}",
168 "\u{17B7}\u{17C7}",
"\u{17BB}\u{17C7}",
169 "\u{17C1}\u{17C7}",
"\u{17C4}\u{17C7}",
171 'kn' => [
"\u{0C81}",
"\u{0C83}",
"\u{0CF1}",
"\u{0CF2}" ],
172 'kok' => [
"\u{0902}",
"\u{0903}",
"ळ",
"क्ष" ],
173 'ku' => [
"Ç",
"Ê",
"Î",
"Ş",
"Û" ],
177 'lkt' => [
'Č',
'Ǧ',
'Ȟ',
'Š',
'Ž' ],
180 'lt' => [
"Č",
"Š",
"Ž" ],
181 'lv' => [
"Č",
"Ģ",
"Ķ",
"Ļ",
"Ņ",
"Š",
"Ž" ],
182 'mk' => [
"Ѓ",
"Ќ" ],
185 'mo' => [
"Ă",
"Â",
"Î",
"Ș",
"Ț" ],
186 'mr' => [
"\u{0902}",
"\u{0903}",
"ळ",
"क्ष",
"ज्ञ" ],
188 'mt' => [
"Ċ",
"Ġ",
"Għ",
"Ħ",
"Ż" ],
189 'nb' => [
"Æ",
"Ø",
"Å" ],
192 'nn' => [
"Æ",
"Ø",
"Å" ],
193 'no' => [
"Æ",
"Ø",
"Å" ],
195 'om' => [
'Ch',
'Dh',
'Kh',
'Ny',
'Ph',
'Sh' ],
196 'or' => [
"\u{0B01}",
"\u{0B02}",
"\u{0B03}",
"କ୍ଷ" ],
197 'pa' => [
"\u{0A4D}" ],
198 'pl' => [
"Ą",
"Ć",
"Ę",
"Ł",
"Ń",
"Ó",
"Ś",
"Ź",
"Ż" ],
201 'ro' => [
"Ă",
"Â",
"Î",
"Ș",
"Ț" ],
203 'rup' => [
"Ă",
"Â",
"Î",
"Ľ",
"Ń",
"Ș",
"Ț" ],
206 'Á',
'Č',
'Ʒ',
'Ǯ',
'Đ',
'Ǧ',
'Ǥ',
'Ǩ',
'Ŋ',
207 'Š',
'Ŧ',
'Ž',
'Ø',
'Æ',
'Ȧ',
'Ä',
'Ö'
209 'si' => [
"\u{0D82}",
"\u{0D83}",
"\u{0DA4}" ],
210 'sk' => [
"Ä",
"Č",
"Ch",
"Ô",
"Š",
"Ž" ],
211 'sl' => [
"Č",
"Š",
"Ž" ],
212 'smn' => [
"Á",
"Č",
"Đ",
"Ŋ",
"Š",
"Ŧ",
"Ž",
"Æ",
"Ø",
"Å",
"Ä",
"Ö" ],
213 'sq' => [
"Ç",
"Dh",
"Ë",
"Gj",
"Ll",
"Nj",
"Rr",
"Sh",
"Th",
"Xh",
"Zh" ],
215 'sr-Latn' => [
"Č",
"Ć",
"Dž",
"Đ",
"Lj",
"Nj",
"Š",
"Ž" ],
216 'sv' => [
"Å",
"Ä",
"Ö" ],
217 'sv@collation=standard' => [
"Å",
"Ä",
"Ö" ],
220 "\u{0B82}",
"ஃ",
"க்ஷ",
"க்",
"ங்",
"ச்",
"ஞ்",
"ட்",
"ண்",
"த்",
"ந்",
221 "ப்",
"ம்",
"ய்",
"ர்",
"ல்",
"வ்",
"ழ்",
"ள்",
"ற்",
"ன்",
"ஜ்",
"ஶ்",
"ஷ்",
224 'te' => [
"\u{0C01}",
"\u{0C02}",
"\u{0C03}" ],
225 'th' => [
"ฯ",
"\u{0E46}",
"\u{0E4D}",
"\u{0E3A}" ],
226 'tk' => [
"Ç",
"Ä",
"Ž",
"Ň",
"Ö",
"Ş",
"Ü",
"Ý" ],
227 'tl' => [
"Ñ",
"Ng" ],
228 'to' => [
"Ng",
"ʻ" ],
229 'tr' => [
"Ç",
"Ğ",
"İ",
"Ö",
"Ş",
"Ü" ],
231 'tt' => [
"Ә",
"Ө",
"Ү",
"Җ",
"Ң",
"Һ" ],
232 'uk' => [
"Ґ",
"Ь" ],
233 'uz' => [
"Ch",
"G'",
"Ng",
"O'",
"Sh" ],
234 'vi' => [
"Ă",
"Â",
"Đ",
"Ê",
"Ô",
"Ơ",
"Ư" ],
235 'vo' => [
"Ä",
"Ö",
"Ü" ],
237 "\u{05D1}\u{05BF}",
"\u{05DB}\u{05BC}",
"\u{05E4}\u{05BC}",
238 "\u{05E9}\u{05C2}",
"\u{05EA}\u{05BC}"
240 'yo' => [
"Ẹ",
"Gb",
"Ọ",
"Ṣ" ],
254 $localeParts = explode(
'@',
$locale );
255 $this->digitTransformLanguage = $languageFactory->
getLanguage(
$locale ===
'root' ?
'en' : $localeParts[0] );
257 $this->mainCollator = Collator::create(
$locale );
258 if ( !$this->mainCollator ) {
259 throw new MWException(
"Invalid ICU locale specified for collation: $locale" );
262 $this->primaryCollator = Collator::create(
$locale );
263 $this->primaryCollator->setStrength( Collator::PRIMARY );
266 if ( substr(
$locale, -5, 5 ) ===
'-u-kn' ) {
267 $this->useNumericCollation =
true;
269 $this->locale = substr( $this->locale, 0, -5 );
270 $this->mainCollator->setAttribute( Collator::NUMERIC_COLLATION, Collator::ON );
271 $this->primaryCollator->setAttribute( Collator::NUMERIC_COLLATION, Collator::ON );
276 return $this->mainCollator->getSortKey( $string );
280 return $this->primaryCollator->getSortKey( $string );
284 $string = strval( $string );
285 if ( $string ===
'' ) {
289 $firstChar = mb_substr( $string, 0, 1,
'UTF-8' );
292 if ( ord( $firstChar ) > 0x7f && self::isCjk( UtfNormal\Utils::utf8ToCodepoint( $firstChar ) ) ) {
299 $min = ArrayUtils::findLowerBound(
300 [ $this,
'getSortKeyByLetterIndex' ],
305 if ( $min ===
false ) {
312 if ( $this->useNumericCollation ) {
317 if ( ord( $sortLetter ) >= 48 && ord( $sortLetter ) <= 57 ) {
318 $sortLetter =
wfMessage(
'category-header-numerals' )->numParams( 0, 9 )->text();
329 if ( $this->firstLetterData ===
null ) {
331 $cacheKey =
$cache->makeKey(
335 $this->digitTransformLanguage->getCode(),
337 self::FIRST_LETTER_VERSION
339 $this->firstLetterData =
$cache->getWithSetCallback( $cacheKey, $cache::TTL_WEEK,
function () {
353 if ( isset( self::TAILORING_FIRST_LETTERS[$this->locale] ) ) {
354 $letters = require
"$IP/includes/collation/data/first-letters-root.php";
356 $letters = array_merge( $letters, self::TAILORING_FIRST_LETTERS[$this->locale] );
358 if ( isset( self::TAILORING_FIRST_LETTERS[
'-' . $this->locale] ) ) {
359 $letters = array_diff( $letters, self::TAILORING_FIRST_LETTERS[
'-' . $this->locale] );
362 $digits = [
'0',
'1',
'2',
'3',
'4',
'5',
'6',
'7',
'8',
'9' ];
363 $letters = array_diff( $letters, $digits );
364 foreach ( $digits as $digit ) {
365 $letters[] = $this->digitTransformLanguage->formatNumNoSeparators( $digit );
367 } elseif ( $this->locale ===
'root' ) {
368 $letters = require
"$IP/includes/collation/data/first-letters-root.php";
372 if ( $letters ===
false ) {
373 throw new MWException(
"MediaWiki does not support ICU locale " .
374 "\"{$this->locale}\"" );
388 foreach ( $letters as $letter ) {
390 if ( isset( $letterMap[$key] ) ) {
393 $comp = $this->mainCollator->compare( $letter, $letterMap[$key] );
394 wfDebug(
"Primary collision '$letter' '{$letterMap[$key]}' (comparison: $comp)" );
397 $comp = UtfNormal\Utils::utf8ToCodepoint( $letter ) <=>
398 UtfNormal\Utils::utf8ToCodepoint( $letterMap[$key] );
401 $letterMap[$key] = $letter;
404 $letterMap[$key] = $letter;
407 ksort( $letterMap, SORT_STRING );
444 $duplicatePrefixes = [];
445 foreach ( $letterMap as $key => $value ) {
448 $trimmedKey = rtrim( $key,
"\0" );
449 if ( $prev ===
false || $prev ===
'' ) {
462 if ( substr( $trimmedKey, 0, strlen( $prev ) ) === $prev ) {
463 $duplicatePrefixes[] = $key;
471 foreach ( $duplicatePrefixes as $badKey ) {
472 wfDebug(
"Removing '{$letterMap[$badKey]}' from first letters." );
473 unset( $letterMap[$badKey] );
477 'chars' => array_values( $letterMap ),
478 'keys' => array_keys( $letterMap ),
497 $file =
"$IP/serialized/$name";
498 if ( file_exists(
$file ) ) {
539 public static function isCjk( $codepoint ) {
540 foreach ( self::CJK_BLOCKS as $block ) {
541 if ( $codepoint >= $block[0] && $codepoint <= $block[1] ) {
556 $icuVersion = INTL_ICU_VERSION;
557 if ( !$icuVersion ) {
561 $versionPrefix = substr( $icuVersion, 0, 3 );
595 return $map[$versionPrefix] ??
false;
unserialize( $serialized)
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
getLetterByIndex( $index)
getSortKeyByLetterIndex( $index)
const TAILORING_FIRST_LETTERS
Additional characters (or character groups) to be considered separate letters for given languages,...
getFirstLetter( $string)
Given a string, return the logical "first letter" to be used for grouping on category pages and so on...
const CJK_BLOCKS
Unified CJK blocks.
bool $useNumericCollation
getPrimarySortKey( $string)
static isCjk( $codepoint)
Test if a code point is a CJK (Chinese, Japanese, Korean) character.
const FIRST_LETTER_VERSION
__construct(LanguageFactory $languageFactory, $locale)
getPrecompiledData( $name)
Get an object from the precompiled serialized directory.
Language $digitTransformLanguage
Collator $primaryCollator
getSortKey( $string)
Given a string, convert it to a (hopefully short) key that can be used for efficient sorting.
static getUnicodeVersionForICU()
Return the version of Unicode appropriate for the version of ICU library currently in use,...
Internationalisation code See https://www.mediawiki.org/wiki/Special:MyLanguage/Localisation for more...
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.