Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
90.48% |
19 / 21 |
|
75.00% |
3 / 4 |
CRAP | |
0.00% |
0 / 1 |
CustomUppercaseCollation | |
90.48% |
19 / 21 |
|
75.00% |
3 / 4 |
9.07 | |
0.00% |
0 / 1 |
__construct | |
85.71% |
12 / 14 |
|
0.00% |
0 / 1 |
5.07 | |||
convertToPua | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getSortKey | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getFirstLetter | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @since 1.30 |
19 | * |
20 | * @file |
21 | */ |
22 | |
23 | use MediaWiki\Languages\LanguageFactory; |
24 | |
25 | /** |
26 | * Resort normal UTF-8 order by putting a bunch of stuff in PUA |
27 | * |
28 | * This takes a bunch of characters (The alphabet) that should, |
29 | * be together, and converts them all to private-use-area characters |
30 | * so that they are all sorted in the right order relative to each |
31 | * other. |
32 | * |
33 | * This renumbers characters starting at U+F3000 (Chosen to avoid |
34 | * conflicts with other people using private use area) |
35 | * |
36 | * This does not support fancy things like secondary differences, etc. |
37 | * (It supports digraphs, trigraphs etc. though.) |
38 | * |
39 | * It is expected most people will subclass this and just override the |
40 | * constructor to hard-code an alphabet. |
41 | */ |
42 | class CustomUppercaseCollation extends NumericUppercaseCollation { |
43 | |
44 | /** @var array Sorted array of letters */ |
45 | private $alphabet; |
46 | |
47 | /** @var array List of private use area codes */ |
48 | private $puaSubset; |
49 | |
50 | /** @var array */ |
51 | private $firstLetters; |
52 | |
53 | /** |
54 | * @note This assumes $alphabet does not contain U+F3000-U+F3FFF |
55 | * |
56 | * @param LanguageFactory $languageFactory |
57 | * @param array $alphabet Sorted array of uppercase characters. |
58 | * @param string|Language $digitTransformLang What language for number sorting. |
59 | */ |
60 | public function __construct( |
61 | LanguageFactory $languageFactory, |
62 | array $alphabet, |
63 | $digitTransformLang |
64 | ) { |
65 | if ( count( $alphabet ) < 1 || count( $alphabet ) >= 4096 ) { |
66 | throw new UnexpectedValueException( "Alphabet must be < 4096 items" ); |
67 | } |
68 | $this->firstLetters = $alphabet; |
69 | $digitTransformLang = $digitTransformLang instanceof Language |
70 | ? $digitTransformLang |
71 | : $languageFactory->getLanguage( $digitTransformLang ); |
72 | // For digraphs, only the first letter is capitalized in input |
73 | $this->alphabet = array_map( [ $digitTransformLang, 'uc' ], $alphabet ); |
74 | |
75 | $this->puaSubset = []; |
76 | $len = count( $alphabet ); |
77 | for ( $i = 0; $i < $len; $i++ ) { |
78 | $this->puaSubset[] = "\xF3\xB3" . chr( (int)floor( $i / 64 ) + 128 ) . chr( ( $i % 64 ) + 128 ); |
79 | } |
80 | |
81 | // Sort these arrays so that any trigraphs, digraphs etc. are first |
82 | // (and they get replaced first in convertToPua()). |
83 | $lengths = array_map( 'mb_strlen', $this->alphabet ); |
84 | array_multisort( $lengths, SORT_DESC, $this->firstLetters, $this->alphabet, $this->puaSubset ); |
85 | |
86 | parent::__construct( $languageFactory, $digitTransformLang ); |
87 | } |
88 | |
89 | private function convertToPua( $string ) { |
90 | return str_replace( $this->alphabet, $this->puaSubset, $string ); |
91 | } |
92 | |
93 | public function getSortKey( $string ) { |
94 | return $this->convertToPua( parent::getSortKey( $string ) ); |
95 | } |
96 | |
97 | public function getFirstLetter( $string ) { |
98 | $sortkey = $this->getSortKey( $string ); |
99 | |
100 | // In case a title begins with a character from our alphabet, return the corresponding |
101 | // first-letter. (This also happens if the title has a corresponding PUA code in it, to avoid |
102 | // inconsistent behaviour. This class mostly assumes that people will not use PUA codes.) |
103 | $index = array_search( substr( $sortkey, 0, 4 ), $this->puaSubset ); |
104 | if ( $index !== false ) { |
105 | return $this->firstLetters[ $index ]; |
106 | } |
107 | |
108 | // String begins with a character outside of our alphabet, fall back |
109 | return parent::getFirstLetter( $string ); |
110 | } |
111 | } |