248 if ( !extension_loaded(
'intl' ) ) {
249 throw new MWException(
'An ICU collation was requested, ' .
250 'but the intl extension is not available.' );
255 $localeParts = explode(
'@',
$locale );
256 $this->digitTransformLanguage = Language::factory(
$locale ===
'root' ?
'en' : $localeParts[0] );
258 $this->mainCollator = Collator::create(
$locale );
259 if ( !$this->mainCollator ) {
260 throw new MWException(
"Invalid ICU locale specified for collation: $locale" );
263 $this->primaryCollator = Collator::create(
$locale );
264 $this->primaryCollator->setStrength( Collator::PRIMARY );
267 if ( substr(
$locale, -5, 5 ) ===
'-u-kn' ) {
268 $this->useNumericCollation =
true;
270 $this->locale = substr( $this->locale, 0, -5 );
271 $this->mainCollator->setAttribute( Collator::NUMERIC_COLLATION, Collator::ON );
272 $this->primaryCollator->setAttribute( Collator::NUMERIC_COLLATION, Collator::ON );
285 $string = strval( $string );
286 if ( $string ===
'' ) {
290 $firstChar = mb_substr( $string, 0, 1,
'UTF-8' );
293 if ( ord( $firstChar ) > 0x7f && self::isCjk( UtfNormal\Utils::utf8ToCodepoint( $firstChar ) ) ) {
300 $min = ArrayUtils::findLowerBound(
301 [ $this,
'getSortKeyByLetterIndex' ],
306 if ( $min ===
false ) {
313 if ( $this->useNumericCollation ) {
318 if ( ord( $sortLetter ) >= 48 && ord( $sortLetter ) <= 57 ) {
319 $sortLetter =
wfMessage(
'category-header-numerals' )->numParams( 0, 9 )->text();
330 if ( $this->firstLetterData ===
null ) {
332 $cacheKey =
$cache->makeKey(
336 $this->digitTransformLanguage->getCode(),
338 self::FIRST_LETTER_VERSION
340 $this->firstLetterData =
$cache->getWithSetCallback( $cacheKey, $cache::TTL_WEEK,
function () {
354 if ( isset( self::$tailoringFirstLetters[$this->locale] ) ) {
355 $letters = require
"$IP/includes/collation/data/first-letters-root.php";
357 $letters = array_merge( $letters, self::$tailoringFirstLetters[$this->locale] );
359 if ( isset( self::$tailoringFirstLetters[
'-' . $this->locale] ) ) {
360 $letters = array_diff( $letters, self::$tailoringFirstLetters[
'-' . $this->locale] );
363 $digits = [
'0',
'1',
'2',
'3',
'4',
'5',
'6',
'7',
'8',
'9' ];
364 $letters = array_diff( $letters, $digits );
365 foreach ( $digits as $digit ) {
366 $letters[] = $this->digitTransformLanguage->formatNum( $digit,
true );
368 } elseif ( $this->locale ===
'root' ) {
369 $letters = require
"$IP/includes/collation/data/first-letters-root.php";
373 if ( $letters ===
false ) {
374 throw new MWException(
"MediaWiki does not support ICU locale " .
375 "\"{$this->locale}\"" );
389 foreach ( $letters as $letter ) {
391 if ( isset( $letterMap[$key] ) ) {
394 $comp = $this->mainCollator->compare( $letter, $letterMap[$key] );
395 wfDebug(
"Primary collision '$letter' '{$letterMap[$key]}' (comparison: $comp)\n" );
398 $comp = UtfNormal\Utils::utf8ToCodepoint( $letter ) <=>
399 UtfNormal\Utils::utf8ToCodepoint( $letterMap[$key] );
402 $letterMap[$key] = $letter;
405 $letterMap[$key] = $letter;
408 ksort( $letterMap, SORT_STRING );
445 $duplicatePrefixes = [];
446 foreach ( $letterMap as $key => $value ) {
449 $trimmedKey = rtrim( $key,
"\0" );
450 if ( $prev ===
false || $prev ===
'' ) {
463 if ( substr( $trimmedKey, 0, strlen( $prev ) ) === $prev ) {
464 $duplicatePrefixes[] = $key;
472 foreach ( $duplicatePrefixes as $badKey ) {
473 wfDebug(
"Removing '{$letterMap[$badKey]}' from first letters.\n" );
474 unset( $letterMap[$badKey] );
478 'chars' => array_values( $letterMap ),
479 'keys' => array_keys( $letterMap ),