MediaWiki  1.34.0
CustomUppercaseCollation.php
Go to the documentation of this file.
1 <?php
41 
43  private $alphabet;
44 
46  private $puaSubset;
47 
49  private $firstLetters;
50 
57  public function __construct( array $alphabet, Language $lang ) {
58  if ( count( $alphabet ) < 1 || count( $alphabet ) >= 4096 ) {
59  throw new UnexpectedValueException( "Alphabet must be < 4096 items" );
60  }
61  $this->firstLetters = $alphabet;
62  // For digraphs, only the first letter is capitalized in input
63  $this->alphabet = array_map( [ $lang, 'uc' ], $alphabet );
64 
65  $this->puaSubset = [];
66  $len = count( $alphabet );
67  for ( $i = 0; $i < $len; $i++ ) {
68  $this->puaSubset[] = "\xF3\xB3" . chr( floor( $i / 64 ) + 128 ) . chr( ( $i % 64 ) + 128 );
69  }
70 
71  // Sort these arrays so that any trigraphs, digraphs etc. are first
72  // (and they get replaced first in convertToPua()).
73  $lengths = array_map( 'mb_strlen', $this->alphabet );
74  array_multisort( $lengths, SORT_DESC, $this->firstLetters, $this->alphabet, $this->puaSubset );
75 
76  parent::__construct( $lang );
77  }
78 
79  private function convertToPua( $string ) {
80  return str_replace( $this->alphabet, $this->puaSubset, $string );
81  }
82 
83  public function getSortKey( $string ) {
84  return $this->convertToPua( parent::getSortKey( $string ) );
85  }
86 
87  public function getFirstLetter( $string ) {
88  $sortkey = $this->getSortKey( $string );
89 
90  // In case a title begins with a character from our alphabet, return the corresponding
91  // first-letter. (This also happens if the title has a corresponding PUA code in it, to avoid
92  // inconsistent behaviour. This class mostly assumes that people will not use PUA codes.)
93  $index = array_search( substr( $sortkey, 0, 4 ), $this->puaSubset );
94  if ( $index !== false ) {
95  return $this->firstLetters[ $index ];
96  }
97 
98  // String begins with a character outside of our alphabet, fall back
99  return parent::getFirstLetter( $string );
100  }
101 }
CustomUppercaseCollation\$alphabet
$alphabet
Definition: CustomUppercaseCollation.php:43
CustomUppercaseCollation\convertToPua
convertToPua( $string)
Definition: CustomUppercaseCollation.php:79
CustomUppercaseCollation
Resort normal UTF-8 order by putting a bunch of stuff in PUA.
Definition: CustomUppercaseCollation.php:40
CustomUppercaseCollation\getFirstLetter
getFirstLetter( $string)
Given a string, return the logical "first letter" to be used for grouping on category pages and so on...
Definition: CustomUppercaseCollation.php:87
CustomUppercaseCollation\$puaSubset
$puaSubset
Definition: CustomUppercaseCollation.php:46
CustomUppercaseCollation\getSortKey
getSortKey( $string)
Given a string, convert it to a (hopefully short) key that can be used for efficient sorting.
Definition: CustomUppercaseCollation.php:83
NumericUppercaseCollation
Collation that orders text with numbers "naturally", so that 'Foo 1' < 'Foo 2' < 'Foo 12'.
Definition: NumericUppercaseCollation.php:35
CustomUppercaseCollation\$firstLetters
array $firstLetters
Definition: CustomUppercaseCollation.php:49
CustomUppercaseCollation\__construct
__construct(array $alphabet, Language $lang)
Definition: CustomUppercaseCollation.php:57
UppercaseCollation\$lang
$lang
Definition: UppercaseCollation.php:25
Language
Internationalisation code.
Definition: Language.php:37