Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
95.45% |
42 / 44 |
|
71.43% |
5 / 7 |
CRAP | |
0.00% |
0 / 1 |
RemoteIcuCollation | |
95.45% |
42 / 44 |
|
71.43% |
5 / 7 |
13 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
getSortKey | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
encode | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
decode | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
2 | |||
getSortKeys | |
100.00% |
18 / 18 |
|
100.00% |
1 / 1 |
2 | |||
getFirstLetter | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
doGetSortKeys | |
88.89% |
8 / 9 |
|
0.00% |
0 / 1 |
4.02 |
1 | <?php |
2 | |
3 | use MediaWiki\Shell\ShellboxClientFactory; |
4 | use Shellbox\RPC\RpcClient; |
5 | |
6 | /** |
7 | * An ICU collation that uses a remote server to compute sort keys. This can be |
8 | * used in conjunction with $wgTempCategoryCollations to migrate to a different |
9 | * version of ICU. |
10 | */ |
11 | class RemoteIcuCollation extends Collation { |
12 | private RpcClient $rpcClient; |
13 | private string $locale; |
14 | |
15 | public function __construct( ShellboxClientFactory $shellboxClientFactory, string $locale ) { |
16 | $this->rpcClient = $shellboxClientFactory->getRpcClient( |
17 | [ 'service' => 'icu-collation' ] ); |
18 | $this->locale = $locale; |
19 | } |
20 | |
21 | public function getSortKey( $string ) { |
22 | return $this->getSortKeys( [ $string ] )[0]; |
23 | } |
24 | |
25 | /** |
26 | * Encode an array of binary strings as a string |
27 | * |
28 | * @param string[] $strings |
29 | * @return string |
30 | */ |
31 | private static function encode( $strings ) { |
32 | $ret = ''; |
33 | foreach ( $strings as $s ) { |
34 | $ret .= sprintf( "%08x", strlen( $s ) ) . $s; |
35 | } |
36 | return $ret; |
37 | } |
38 | |
39 | /** |
40 | * Decode the value returned by encode() |
41 | * |
42 | * @param string $blob |
43 | * @return string[] |
44 | */ |
45 | private static function decode( $blob ) { |
46 | $p = 0; |
47 | $ret = []; |
48 | while ( $p < strlen( $blob ) ) { |
49 | $len = intval( substr( $blob, $p, 8 ), 16 ); |
50 | $p += 8; |
51 | $ret[] = substr( $blob, $p, $len ); |
52 | $p += $len; |
53 | } |
54 | return $ret; |
55 | } |
56 | |
57 | public function getSortKeys( $strings ) { |
58 | if ( !count( $strings ) ) { |
59 | return []; |
60 | } |
61 | $blob = $this->rpcClient->call( |
62 | 'icu-collation', |
63 | self::class . '::' . 'doGetSortKeys', |
64 | [ |
65 | $this->locale, |
66 | self::encode( array_values( $strings ) ) |
67 | ], |
68 | [ |
69 | 'classes' => [ parent::class, self::class ], |
70 | 'binary' => true |
71 | ] |
72 | ); |
73 | return array_combine( |
74 | array_keys( $strings ), |
75 | self::decode( $blob ) |
76 | ); |
77 | } |
78 | |
79 | public function getFirstLetter( $string ) { |
80 | // @phan-suppress-previous-line PhanPluginNeverReturnMethod |
81 | throw new RuntimeException( __METHOD__ . ': not implemented' ); |
82 | } |
83 | |
84 | /** |
85 | * The remote entry point. Get sort keys for an encoded list of inputs. |
86 | * |
87 | * @param string $locale The ICU locale |
88 | * @param string $blob The input array encoded with encode() |
89 | * @return string The encoded result |
90 | */ |
91 | public static function doGetSortKeys( $locale, $blob ) { |
92 | $mainCollator = Collator::create( $locale ); |
93 | if ( !$mainCollator ) { |
94 | throw new RuntimeException( "Invalid ICU locale specified for collation: $locale" ); |
95 | } |
96 | |
97 | // If the special suffix for numeric collation is present, turn on numeric collation. |
98 | if ( str_ends_with( $locale, '-u-kn' ) ) { |
99 | $mainCollator->setAttribute( Collator::NUMERIC_COLLATION, Collator::ON ); |
100 | } |
101 | $ret = []; |
102 | foreach ( self::decode( $blob ) as $string ) { |
103 | $ret[] = $mainCollator->getSortKey( $string ); |
104 | } |
105 | return self::encode( $ret ); |
106 | } |
107 | } |