MediaWiki
REL1_35
CustomUppercaseCollation.php
Go to the documentation of this file.
1
<?php
40
class
CustomUppercaseCollation
extends
NumericUppercaseCollation
{
41
43
private
$alphabet
;
44
46
private
$puaSubset
;
47
49
private
$firstLetters
;
50
57
public
function
__construct
( array
$alphabet
,
Language
$lang
) {
58
if
( count(
$alphabet
) < 1 || count(
$alphabet
) >= 4096 ) {
59
throw
new
UnexpectedValueException(
"Alphabet must be < 4096 items"
);
60
}
61
$this->firstLetters =
$alphabet
;
62
// For digraphs, only the first letter is capitalized in input
63
$this->alphabet = array_map( [
$lang
,
'uc'
],
$alphabet
);
64
65
$this->puaSubset = [];
66
$len = count(
$alphabet
);
67
for
( $i = 0; $i < $len; $i++ ) {
68
$this->puaSubset[] =
"\xF3\xB3"
. chr( floor( $i / 64 ) + 128 ) . chr( ( $i % 64 ) + 128 );
69
}
70
71
// Sort these arrays so that any trigraphs, digraphs etc. are first
72
// (and they get replaced first in convertToPua()).
73
$lengths = array_map(
'mb_strlen'
, $this->alphabet );
74
array_multisort( $lengths, SORT_DESC, $this->firstLetters, $this->alphabet, $this->puaSubset );
75
76
parent::__construct(
$lang
);
77
}
78
79
private
function
convertToPua
( $string ) {
80
return
str_replace( $this->alphabet, $this->puaSubset, $string );
81
}
82
83
public
function
getSortKey
( $string ) {
84
return
$this->
convertToPua
( parent::getSortKey( $string ) );
85
}
86
87
public
function
getFirstLetter
( $string ) {
88
$sortkey = $this->
getSortKey
( $string );
89
90
// In case a title begins with a character from our alphabet, return the corresponding
91
// first-letter. (This also happens if the title has a corresponding PUA code in it, to avoid
92
// inconsistent behaviour. This class mostly assumes that people will not use PUA codes.)
93
$index = array_search( substr( $sortkey, 0, 4 ), $this->puaSubset );
94
if
( $index !==
false
) {
95
return
$this->firstLetters[ $index ];
96
}
97
98
// String begins with a character outside of our alphabet, fall back
99
return
parent::getFirstLetter( $string );
100
}
101
}
CustomUppercaseCollation
Resort normal UTF-8 order by putting a bunch of stuff in PUA.
Definition
CustomUppercaseCollation.php:40
CustomUppercaseCollation\getSortKey
getSortKey( $string)
Given a string, convert it to a (hopefully short) key that can be used for efficient sorting.
Definition
CustomUppercaseCollation.php:83
CustomUppercaseCollation\$puaSubset
array $puaSubset
List of private use area codes.
Definition
CustomUppercaseCollation.php:46
CustomUppercaseCollation\__construct
__construct(array $alphabet, Language $lang)
Definition
CustomUppercaseCollation.php:57
CustomUppercaseCollation\getFirstLetter
getFirstLetter( $string)
Given a string, return the logical "first letter" to be used for grouping on category pages and so on...
Definition
CustomUppercaseCollation.php:87
CustomUppercaseCollation\$firstLetters
array $firstLetters
Definition
CustomUppercaseCollation.php:49
CustomUppercaseCollation\convertToPua
convertToPua( $string)
Definition
CustomUppercaseCollation.php:79
CustomUppercaseCollation\$alphabet
array $alphabet
Sorted array of letters.
Definition
CustomUppercaseCollation.php:43
Language
Internationalisation code See https://www.mediawiki.org/wiki/Special:MyLanguage/Localisation for more...
Definition
Language.php:41
NumericUppercaseCollation
Collation that orders text with numbers "naturally", so that 'Foo 1' < 'Foo 2' < 'Foo 12'.
Definition
NumericUppercaseCollation.php:35
UppercaseCollation\$lang
$lang
Definition
UppercaseCollation.php:27
includes
collation
CustomUppercaseCollation.php
Generated on Sat Apr 6 2024 00:07:01 for MediaWiki by
1.9.8