30 if ( !self::$instance ) {
31 global $wgCategoryCollation;
42 static function factory( $collationName ) {
43 switch ( $collationName ) {
54 if ( preg_match(
'/^uca-([a-z@=-]+)$/', $collationName, $match ) ) {
58 # Provide a mechanism for extensions to hook in.
59 $collationObject =
null;
60 wfRunHooks(
'Collation::factory',
array( $collationName, &$collationObject ) );
62 if ( $collationObject instanceof
Collation ) {
63 return $collationObject;
67 throw new MWException( __METHOD__ .
": unknown collation type \"$collationName\"" );
119 return $this->lang->uc( $string );
123 if ( $string[0] ==
"\0" ) {
124 $string = substr( $string, 1 );
126 return $this->lang->ucfirst( $this->lang->firstChar( $string ) );
146 if ( $string[0] ==
"\0" ) {
147 $string = substr( $string, 1 );
169 array( 0x2E80, 0x2EFF ),
170 array( 0x2F00, 0x2FDF ),
171 array( 0x2FF0, 0x2FFF ),
172 array( 0x3000, 0x303F ),
173 array( 0x31C0, 0x31EF ),
174 array( 0x3200, 0x32FF ),
175 array( 0x3300, 0x33FF ),
176 array( 0x3400, 0x4DBF ),
177 array( 0x4E00, 0x9FFF ),
178 array( 0xF900, 0xFAFF ),
179 array( 0xFE30, 0xFE4F ),
180 array( 0x20000, 0x2A6DF ),
181 array( 0x2A700, 0x2B73F ),
182 array( 0x2B740, 0x2B81F ),
183 array( 0x2F800, 0x2FA1F ),
209 'be' =>
array(
"Ё" ),
210 'be-tarask' =>
array(
"Ё" ),
212 'fi' =>
array(
"Å",
"Ä",
"Ö" ),
213 'hu' =>
array(
"Cs",
"Dz",
"Dzs",
"Gy",
"Ly",
"Ny",
"Ö",
"Sz",
"Ty",
"Ü",
"Zs" ),
215 'pl' =>
array(
"Ą",
"Ć",
"Ę",
"Ł",
"Ń",
"Ó",
"Ś",
"Ź",
"Ż" ),
218 'sv' =>
array(
"Å",
"Ä",
"Ö" ),
219 'sv@collation=standard' =>
array(
"Å",
"Ä",
"Ö" ),
220 'uk' =>
array(
"Ґ",
"Ь" ),
221 'vi' =>
array(
"Ă",
"Â",
"Đ",
"Ê",
"Ô",
"Ơ",
"Ư" ),
224 'ast' =>
array(
"Ch",
"Ll",
"Ñ" ),
225 'az' =>
array(
"Ç",
"Ə",
"Ğ",
"İ",
"Ö",
"Ş",
"Ü" ),
227 'br' =>
array(
"Ch",
"C'h" ),
228 'bs' =>
array(
"Č",
"Ć",
"Dž",
"Đ",
"Lj",
"Nj",
"Š",
"Ž" ),
231 'cs' =>
array(
"Č",
"Ch",
"Ř",
"Š",
"Ž" ),
232 'cy' =>
array(
"Ch",
"Dd",
"Ff",
"Ng",
"Ll",
"Ph",
"Rh",
"Th" ),
233 'da' =>
array(
"Æ",
"Ø",
"Å" ),
235 'dsb' =>
array(
"Č",
"Ć",
"Dź",
"Ě",
"Ch",
"Ł",
"Ń",
"Ŕ",
"Š",
"Ś",
"Ž",
"Ź" ),
237 'eo' =>
array(
"Ĉ",
"Ĝ",
"Ĥ",
"Ĵ",
"Ŝ",
"Ŭ" ),
238 'es' =>
array(
"Ñ" ),
239 'et' =>
array(
"Š",
"Ž",
"Õ",
"Ä",
"Ö",
"Ü" ),
240 'eu' =>
array(
"Ñ" ),
241 'fa' =>
array(
"آ",
"ء",
"ه" ),
242 'fo' =>
array(
"Á",
"Ð",
"Í",
"Ó",
"Ú",
"Ý",
"Æ",
"Ø",
"Å" ),
244 'fur' =>
array(
"À",
"Á",
"Â",
"È",
"Ì",
"Ò",
"Ù" ),
248 'gl' =>
array(
"Ch",
"Ll",
"Ñ" ),
249 'hr' =>
array(
"Č",
"Ć",
"Dž",
"Đ",
"Lj",
"Nj",
"Š",
"Ž" ),
250 'hsb' =>
array(
"Č",
"Dź",
"Ě",
"Ch",
"Ł",
"Ń",
"Ř",
"Š",
"Ć",
"Ž" ),
251 'is' =>
array(
"Á",
"Ð",
"É",
"Í",
"Ó",
"Ú",
"Ý",
"Þ",
"Æ",
"Ö",
"Å" ),
252 'kk' =>
array(
"Ү",
"І" ),
253 'kl' =>
array(
"Æ",
"Ø",
"Å" ),
254 'ku' =>
array(
"Ç",
"Ê",
"Î",
"Ş",
"Û" ),
255 'ky' =>
array(
"Ё" ),
258 'lt' =>
array(
"Č",
"Š",
"Ž" ),
259 'lv' =>
array(
"Č",
"Ģ",
"Ķ",
"Ļ",
"Ņ",
"Š",
"Ž" ),
261 'mo' =>
array(
"Ă",
"Â",
"Î",
"Ş",
"Ţ" ),
262 'mt' =>
array(
"Ċ",
"Ġ",
"Għ",
"Ħ",
"Ż" ),
264 'no' =>
array(
"Æ",
"Ø",
"Å" ),
267 'ro' =>
array(
"Ă",
"Â",
"Î",
"Ş",
"Ţ" ),
268 'rup' =>
array(
"Ă",
"Â",
"Î",
"Ľ",
"Ń",
"Ş",
"Ţ" ),
270 'sk' =>
array(
"Ä",
"Č",
"Ch",
"Ô",
"Š",
"Ž" ),
271 'sl' =>
array(
"Č",
"Š",
"Ž" ),
272 'smn' =>
array(
"Á",
"Č",
"Đ",
"Ŋ",
"Š",
"Ŧ",
"Ž",
"Æ",
"Ø",
"Å",
"Ä",
"Ö" ),
273 'sq' =>
array(
"Ç",
"Dh",
"Ë",
"Gj",
"Ll",
"Nj",
"Rr",
"Sh",
"Th",
"Xh",
"Zh" ),
275 'tk' =>
array(
"Ç",
"Ä",
"Ž",
"Ň",
"Ö",
"Ş",
"Ü",
"Ý" ),
276 'tl' =>
array(
"Ñ",
"Ng" ),
277 'tr' =>
array(
"Ç",
"Ğ",
"İ",
"Ö",
"Ş",
"Ü" ),
278 'tt' =>
array(
"Ә",
"Ө",
"Ү",
"Җ",
"Ң",
"Һ" ),
279 'uz' =>
array(
"Ch",
"G'",
"Ng",
"O'",
"Sh" ),
285 if ( !extension_loaded(
'intl' ) ) {
286 throw new MWException(
'An ICU collation was requested, ' .
287 'but the intl extension is not available.' );
292 $localeParts = explode(
'@',
$locale );
295 $this->mainCollator = Collator::create(
$locale );
296 if ( !$this->mainCollator ) {
297 throw new MWException(
"Invalid ICU locale specified for collation: $locale" );
300 $this->primaryCollator = Collator::create(
$locale );
301 $this->primaryCollator->setStrength( Collator::PRIMARY );
309 $key = $this->mainCollator->getSortKey( $string ) .
'';
316 $key = $this->primaryCollator->getSortKey( $string ) .
'';
322 $string = strval( $string );
323 if ( $string ===
'' ) {
328 $firstChar = mb_substr( $string, 0, 1,
'UTF-8' );
329 if ( ord( $firstChar ) > 0x7f && self::isCjk(
utf8ToCodepoint( $firstChar ) ) ) {
337 array( $this,
'getSortKeyByLetterIndex' ),
342 if ( $min ===
false ) {
350 if ( $this->firstLetterData !==
null ) {
355 $cacheKey =
wfMemcKey(
'first-letters', $this->locale, $this->digitTransformLanguage->getCode() );
356 $cacheEntry =
$cache->get( $cacheKey );
358 if ( $cacheEntry && isset( $cacheEntry[
'version'] )
359 && $cacheEntry[
'version'] == self::FIRST_LETTER_VERSION
361 $this->firstLetterData = $cacheEntry;
367 if ( isset( self::$tailoringFirstLetters[$this->locale] ) ) {
370 $letters = array_merge( $letters, self::$tailoringFirstLetters[$this->locale] );
372 if ( isset( self::$tailoringFirstLetters[
'-' . $this->locale] ) ) {
373 $letters = array_diff( $letters, self::$tailoringFirstLetters[
'-' . $this->locale] );
376 $digits =
array(
'0',
'1',
'2',
'3',
'4',
'5',
'6',
'7',
'8',
'9' );
377 $letters = array_diff( $letters, $digits );
378 foreach ( $digits
as $digit ) {
379 $letters[] = $this->digitTransformLanguage->formatNum( $digit,
true );
383 if ( $letters ===
false ) {
384 throw new MWException(
"MediaWiki does not support ICU locale " .
385 "\"{$this->locale}\"" );
397 $letterMap =
array();
398 foreach ( $letters
as $letter ) {
400 if ( isset( $letterMap[$key] ) ) {
403 if ( $this->mainCollator->compare( $letter, $letterMap[$key] ) < 0 ) {
404 $letterMap[$key] = $letter;
407 $letterMap[$key] = $letter;
410 ksort( $letterMap, SORT_STRING );
445 $duplicatePrefixes =
array();
446 foreach ( $letterMap
as $key =>
$value ) {
449 $trimmedKey = rtrim( $key,
"\0" );
450 if ( $prev ===
false || $prev ===
'' ) {
463 if ( substr( $trimmedKey, 0, strlen( $prev ) ) === $prev ) {
464 $duplicatePrefixes[] = $key;
472 foreach ( $duplicatePrefixes
as $badKey ) {
473 wfDebug(
"Removing '{$letterMap[$badKey]}' from first letters.\n" );
474 unset( $letterMap[$badKey] );
478 'chars' => array_values( $letterMap ),
479 'keys' => array_keys( $letterMap ),
480 'version' => self::FIRST_LETTER_VERSION,
487 $this->firstLetterData = $data;
488 $cache->set( $cacheKey, $data, 86400 * 7 );
493 if ( $this->firstLetterData ===
null ) {
496 return $this->firstLetterData[
'chars'][$index];
500 if ( $this->firstLetterData ===
null ) {
503 return $this->firstLetterData[
'keys'][$index];
507 if ( $this->firstLetterData ===
null ) {
510 return count( $this->firstLetterData[
'chars'] );
530 function findLowerBound( $valueCallback, $valueCount, $comparisonCallback, $target ) {
535 static function isCjk( $codepoint ) {
536 foreach ( self::$cjkBlocks
as $block ) {
537 if ( $codepoint >= $block[0] && $codepoint <= $block[1] ) {
557 return defined(
'INTL_ICU_VERSION' ) ? INTL_ICU_VERSION :
false;
569 if ( !$icuVersion ) {
573 $versionPrefix = substr( $icuVersion, 0, 3 );
588 if ( isset( $map[$versionPrefix] ) ) {
589 return $map[$versionPrefix];
604 parent::__construct(
'fa' );