245 if ( !extension_loaded(
'intl' ) ) {
246 throw new MWException(
'An ICU collation was requested, ' .
247 'but the intl extension is not available.' );
252 $localeParts = explode(
'@',
$locale );
253 $this->digitTransformLanguage = MediaWikiServices::getInstance()->getLanguageFactory()
254 ->getLanguage(
$locale ===
'root' ?
'en' : $localeParts[0] );
256 $this->mainCollator = Collator::create(
$locale );
257 if ( !$this->mainCollator ) {
258 throw new MWException(
"Invalid ICU locale specified for collation: $locale" );
261 $this->primaryCollator = Collator::create(
$locale );
262 $this->primaryCollator->setStrength( Collator::PRIMARY );
265 if ( substr(
$locale, -5, 5 ) ===
'-u-kn' ) {
266 $this->useNumericCollation =
true;
268 $this->locale = substr( $this->locale, 0, -5 );
269 $this->mainCollator->setAttribute( Collator::NUMERIC_COLLATION, Collator::ON );
270 $this->primaryCollator->setAttribute( Collator::NUMERIC_COLLATION, Collator::ON );
283 $string = strval( $string );
284 if ( $string ===
'' ) {
288 $firstChar = mb_substr( $string, 0, 1,
'UTF-8' );
291 if ( ord( $firstChar ) > 0x7f && self::isCjk( UtfNormal\Utils::utf8ToCodepoint( $firstChar ) ) ) {
298 $min = ArrayUtils::findLowerBound(
299 [ $this,
'getSortKeyByLetterIndex' ],
304 if ( $min ===
false ) {
311 if ( $this->useNumericCollation ) {
316 if ( ord( $sortLetter ) >= 48 && ord( $sortLetter ) <= 57 ) {
317 $sortLetter =
wfMessage(
'category-header-numerals' )->numParams( 0, 9 )->text();
328 if ( $this->firstLetterData ===
null ) {
330 $cacheKey =
$cache->makeKey(
334 $this->digitTransformLanguage->getCode(),
336 self::FIRST_LETTER_VERSION
338 $this->firstLetterData =
$cache->getWithSetCallback( $cacheKey, $cache::TTL_WEEK,
function () {
352 if ( isset( self::$tailoringFirstLetters[$this->locale] ) ) {
353 $letters = require
"$IP/includes/collation/data/first-letters-root.php";
355 $letters = array_merge( $letters, self::$tailoringFirstLetters[$this->locale] );
357 if ( isset( self::$tailoringFirstLetters[
'-' . $this->locale] ) ) {
358 $letters = array_diff( $letters, self::$tailoringFirstLetters[
'-' . $this->locale] );
361 $digits = [
'0',
'1',
'2',
'3',
'4',
'5',
'6',
'7',
'8',
'9' ];
362 $letters = array_diff( $letters, $digits );
363 foreach ( $digits as $digit ) {
364 $letters[] = $this->digitTransformLanguage->formatNum( $digit,
true );
366 } elseif ( $this->locale ===
'root' ) {
367 $letters = require
"$IP/includes/collation/data/first-letters-root.php";
371 if ( $letters ===
false ) {
372 throw new MWException(
"MediaWiki does not support ICU locale " .
373 "\"{$this->locale}\"" );
387 foreach ( $letters as $letter ) {
389 if ( isset( $letterMap[$key] ) ) {
392 $comp = $this->mainCollator->compare( $letter, $letterMap[$key] );
393 wfDebug(
"Primary collision '$letter' '{$letterMap[$key]}' (comparison: $comp)" );
396 $comp = UtfNormal\Utils::utf8ToCodepoint( $letter ) <=>
397 UtfNormal\Utils::utf8ToCodepoint( $letterMap[$key] );
400 $letterMap[$key] = $letter;
403 $letterMap[$key] = $letter;
406 ksort( $letterMap, SORT_STRING );
443 $duplicatePrefixes = [];
444 foreach ( $letterMap as $key => $value ) {
447 $trimmedKey = rtrim( $key,
"\0" );
448 if ( $prev ===
false || $prev ===
'' ) {
461 if ( substr( $trimmedKey, 0, strlen( $prev ) ) === $prev ) {
462 $duplicatePrefixes[] = $key;
470 foreach ( $duplicatePrefixes as $badKey ) {
471 wfDebug(
"Removing '{$letterMap[$badKey]}' from first letters." );
472 unset( $letterMap[$badKey] );
476 'chars' => array_values( $letterMap ),
477 'keys' => array_keys( $letterMap ),