Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
85.59% |
101 / 118 |
|
0.00% |
0 / 6 |
CRAP | |
0.00% |
0 / 1 |
ArrayFlattener | |
85.59% |
101 / 118 |
|
0.00% |
0 / 6 |
49.79 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
flatten | |
88.24% |
15 / 17 |
|
0.00% |
0 / 1 |
7.08 | |||
flattenCLDRPlurals | |
95.65% |
22 / 23 |
|
0.00% |
0 / 1 |
10 | |||
unflatten | |
68.97% |
20 / 29 |
|
0.00% |
0 / 1 |
11.42 | |||
unflattenCLDRPlurals | |
95.00% |
38 / 40 |
|
0.00% |
0 / 1 |
11 | |||
compareContent | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
6.10 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace MediaWiki\Extension\Translate\MessageProcessing; |
5 | |
6 | use InvalidArgumentException; |
7 | use MediaWiki\Extension\Translate\Utilities\Utilities; |
8 | |
9 | /** |
10 | * Flattens message arrays for further processing. Supports parsing CLDR |
11 | * plural messages and converting them into MediaWiki's {{PLURAL}} syntax |
12 | * in a single message. |
13 | * |
14 | * @author Niklas Laxström |
15 | * @author Erik Moeller |
16 | * @license GPL-2.0-or-later |
17 | * @since 2016.01 |
18 | */ |
19 | class ArrayFlattener { |
20 | /** @var string */ |
21 | protected $sep; |
22 | /** @var bool */ |
23 | protected $parseCLDRPlurals; |
24 | // For CLDR pluralization rules |
25 | protected static $pluralWords = [ |
26 | 'zero' => 1, |
27 | 'one' => 1, |
28 | 'many' => 1, |
29 | 'few' => 1, |
30 | 'other' => 1, |
31 | 'two' => 1 |
32 | ]; |
33 | |
34 | public function __construct( string $sep = '.', bool $parseCLDRPlurals = false ) { |
35 | $this->sep = $sep; |
36 | $this->parseCLDRPlurals = $parseCLDRPlurals; |
37 | } |
38 | |
39 | /** |
40 | * Flattens multidimensional array by using the path to the value as key |
41 | * with each individual key separated by a dot. |
42 | * @param array $unflat Array of messages |
43 | */ |
44 | public function flatten( array $unflat ): array { |
45 | $flat = []; |
46 | |
47 | foreach ( $unflat as $key => $value ) { |
48 | if ( !is_array( $value ) ) { |
49 | $flat[$key] = $value; |
50 | continue; |
51 | } |
52 | |
53 | $plurals = false; |
54 | if ( $this->parseCLDRPlurals ) { |
55 | $plurals = $this->flattenCLDRPlurals( $value ); |
56 | } |
57 | |
58 | if ( $this->parseCLDRPlurals && $plurals ) { |
59 | $flat[$key] = $plurals; |
60 | } else { |
61 | $temp = []; |
62 | foreach ( $value as $subKey => $subValue ) { |
63 | $newKey = "$key{$this->sep}$subKey"; |
64 | $temp[$newKey] = $subValue; |
65 | } |
66 | $flat += $this->flatten( $temp ); |
67 | } |
68 | |
69 | // Can as well keep only one copy around. |
70 | unset( $unflat[$key] ); |
71 | } |
72 | |
73 | return $flat; |
74 | } |
75 | |
76 | /** |
77 | * Flattens arrays that contain CLDR plural keywords into single values using |
78 | * MediaWiki's plural syntax. |
79 | * @return bool|string |
80 | */ |
81 | public function flattenCLDRPlurals( array $messages ) { |
82 | $hasNonPluralKeys = false; |
83 | $pluralKeys = []; |
84 | foreach ( $messages as $key => $value ) { |
85 | if ( is_array( $value ) ) { |
86 | // Plurals can only happen in the lowest level of the structure |
87 | return false; |
88 | } |
89 | |
90 | // Check if we find any reserved plural keyword |
91 | if ( isset( self::$pluralWords[$key] ) ) { |
92 | $pluralKeys[] = $key; |
93 | } else { |
94 | $hasNonPluralKeys = true; |
95 | } |
96 | } |
97 | |
98 | // No plural keys at all, we can skip |
99 | if ( !$pluralKeys ) { |
100 | return false; |
101 | } |
102 | |
103 | // Mixed plural keys with other keys, should not happen |
104 | if ( $hasNonPluralKeys ) { |
105 | // Allow `other` with other keys, as long it is only one of the reserved ones |
106 | if ( $pluralKeys === [ 'other' ] ) { |
107 | return false; |
108 | } |
109 | |
110 | $keys = implode( ', ', array_keys( $messages ) ); |
111 | throw new InvalidArgumentException( "Reserved plural keywords mixed with other keys: $keys." ); |
112 | } |
113 | |
114 | $pls = '{{PLURAL'; |
115 | foreach ( $messages as $key => $value ) { |
116 | if ( $key === 'other' ) { |
117 | continue; |
118 | } |
119 | |
120 | $pls .= "|$key=$value"; |
121 | } |
122 | |
123 | // Put the "other" alternative last, without other= prefix. |
124 | $other = isset( $messages['other'] ) ? '|' . $messages['other'] : ''; |
125 | $pls .= "$other}}"; |
126 | |
127 | return $pls; |
128 | } |
129 | |
130 | /** |
131 | * Performs the reverse operation of flatten. Each dot (or custom separator) |
132 | * in the key starts a new subarray in the final array. |
133 | * @param array $flat Array of messages |
134 | */ |
135 | public function unflatten( array $flat ): array { |
136 | $unflat = []; |
137 | |
138 | if ( $this->parseCLDRPlurals ) { |
139 | $unflattenedPlurals = []; |
140 | foreach ( $flat as $key => $value ) { |
141 | $plurals = false; |
142 | if ( !is_array( $value ) ) { |
143 | $plurals = $this->unflattenCLDRPlurals( $key, $value ); |
144 | } |
145 | if ( is_array( $plurals ) ) { |
146 | $unflattenedPlurals += $plurals; |
147 | } else { |
148 | $unflattenedPlurals[$key] = $value; |
149 | } |
150 | } |
151 | $flat = $unflattenedPlurals; |
152 | } |
153 | |
154 | foreach ( $flat as $key => $value ) { |
155 | $path = explode( $this->sep, $key ); |
156 | if ( count( $path ) === 1 ) { |
157 | $unflat[$key] = $value; |
158 | continue; |
159 | } |
160 | |
161 | $pointer = &$unflat; |
162 | do { |
163 | // Extract the level and make sure it exists. |
164 | $level = array_shift( $path ); |
165 | if ( !isset( $pointer[$level] ) ) { |
166 | $pointer[$level] = []; |
167 | } |
168 | |
169 | // Update the pointer to the new reference. |
170 | $tmpPointer = &$pointer[$level]; |
171 | unset( $pointer ); |
172 | $pointer = &$tmpPointer; |
173 | unset( $tmpPointer ); |
174 | |
175 | // If next level is the last, add it into the array. |
176 | if ( count( $path ) === 1 ) { |
177 | $lastKey = array_shift( $path ); |
178 | $pointer[$lastKey] = $value; |
179 | } |
180 | } while ( count( $path ) ); |
181 | } |
182 | |
183 | return $unflat; |
184 | } |
185 | |
186 | /** |
187 | * Converts the plural syntax to array of CLDR style plurals |
188 | * @return bool|array |
189 | */ |
190 | public function unflattenCLDRPlurals( string $key, string $message ) { |
191 | // Quick escape. |
192 | if ( !str_contains( $message, '{{PLURAL' ) ) { |
193 | return false; |
194 | } |
195 | |
196 | /* |
197 | * Replace all variables with placeholders. Possible source of bugs |
198 | * if other characters that given below are used. |
199 | */ |
200 | $regex = '/\{[a-z_-]+}/i'; |
201 | $placeholders = []; |
202 | $match = []; |
203 | |
204 | while ( preg_match( $regex, $message, $match ) ) { |
205 | $uniqkey = Utilities::getPlaceholder(); |
206 | $placeholders[$uniqkey] = $match[0]; |
207 | $search = preg_quote( $match[0], '~' ); |
208 | $message = preg_replace( "~$search~", $uniqkey, $message ); |
209 | } |
210 | |
211 | // Then replace (possible multiple) plural instances into placeholders. |
212 | $regex = '~\{\{PLURAL\|(.*?)}}~s'; |
213 | $matches = []; |
214 | $match = []; |
215 | |
216 | while ( preg_match( $regex, $message, $match ) ) { |
217 | $uniqkey = Utilities::getPlaceholder(); |
218 | $matches[$uniqkey] = $match; |
219 | $message = preg_replace( $regex, $uniqkey, $message, 1 ); |
220 | } |
221 | |
222 | // No plurals, should not happen. |
223 | if ( !count( $matches ) ) { |
224 | return false; |
225 | } |
226 | |
227 | // The final array of alternative plurals forms. |
228 | $alts = []; |
229 | |
230 | /* |
231 | * Then loop trough each plural block and replacing the placeholders |
232 | * to construct the alternatives. Produces invalid output if there is |
233 | * multiple plural bocks which don't have the same set of keys. |
234 | */ |
235 | $pluralChoice = implode( '|', array_keys( self::$pluralWords ) ); |
236 | $regex = "~($pluralChoice)\s*=\s*(.*)~s"; |
237 | foreach ( $matches as $ph => $plu ) { |
238 | $forms = explode( '|', $plu[1] ); |
239 | |
240 | foreach ( $forms as $form ) { |
241 | $match = []; |
242 | if ( preg_match( $regex, $form, $match ) ) { |
243 | $formWord = "$key{$this->sep}{$match[1]}"; |
244 | $value = $match[2]; |
245 | } else { |
246 | $formWord = "$key{$this->sep}other"; |
247 | $value = $form; |
248 | } |
249 | |
250 | if ( !isset( $alts[$formWord] ) ) { |
251 | $alts[$formWord] = $message; |
252 | } |
253 | |
254 | $string = $alts[$formWord]; |
255 | $alts[$formWord] = str_replace( $ph, $value, $string ); |
256 | } |
257 | } |
258 | |
259 | // Replace other variables. |
260 | foreach ( $alts as &$value ) { |
261 | $value = str_replace( array_keys( $placeholders ), array_values( $placeholders ), $value ); |
262 | } |
263 | |
264 | if ( !isset( $alts["$key{$this->sep}other"] ) ) { |
265 | // Ensure other form is always present, even if missing from the translation |
266 | $alts["$key{$this->sep}other"] = end( $alts ); |
267 | } |
268 | |
269 | return $alts; |
270 | } |
271 | |
272 | /** Compares two strings for equal content, taking PLURAL expansion into account. */ |
273 | public function compareContent( ?string $a, ?string $b ): bool { |
274 | if ( !$this->parseCLDRPlurals || $a === null || $b === null ) { |
275 | return $a === $b; |
276 | } |
277 | |
278 | $a2 = $this->unflattenCLDRPlurals( 'prefix', $a ); |
279 | $b2 = $this->unflattenCLDRPlurals( 'prefix', $b ); |
280 | |
281 | // Fall back to regular comparison if parsing fails. |
282 | if ( $a2 === false || $b2 === false ) { |
283 | return $a === $b; |
284 | } |
285 | |
286 | // Require key-value pairs to match, but ignore order and types (all should be strings). |
287 | return $a2 == $b2; |
288 | } |
289 | } |