Translate extension for MediaWiki
 
Loading...
Searching...
No Matches
ArrayFlattener.php
1<?php
2declare( strict_types = 1 );
3
4namespace MediaWiki\Extension\Translate\MessageProcessing;
5
6use InvalidArgumentException;
8
20 protected string $sep;
21 protected bool $parseCLDRPlurals;
22 // For CLDR pluralization rules
23 protected static $pluralWords = [
24 'zero' => 1,
25 'one' => 1,
26 'many' => 1,
27 'few' => 1,
28 'other' => 1,
29 'two' => 1
30 ];
31
32 public function __construct( string $sep = '.', bool $parseCLDRPlurals = false ) {
33 $this->sep = $sep;
34 $this->parseCLDRPlurals = $parseCLDRPlurals;
35 }
36
42 public function flatten( array $unflat ): array {
43 $flat = [];
44
45 foreach ( $unflat as $key => $value ) {
46 if ( !is_array( $value ) ) {
47 $flat[$key] = $value;
48 continue;
49 }
50
51 $plurals = false;
52 if ( $this->parseCLDRPlurals ) {
53 $plurals = $this->flattenCLDRPlurals( $value );
54 }
55
56 if ( $this->parseCLDRPlurals && $plurals ) {
57 $flat[$key] = $plurals;
58 } else {
59 $temp = [];
60 foreach ( $value as $subKey => $subValue ) {
61 $newKey = "$key{$this->sep}$subKey";
62 $temp[$newKey] = $subValue;
63 }
64 $flat += $this->flatten( $temp );
65 }
66
67 // Can as well keep only one copy around.
68 unset( $unflat[$key] );
69 }
70
71 return $flat;
72 }
73
79 public function flattenCLDRPlurals( array $messages ) {
80 $hasNonPluralKeys = false;
81 $pluralKeys = [];
82 foreach ( $messages as $key => $value ) {
83 if ( is_array( $value ) ) {
84 // Plurals can only happen in the lowest level of the structure
85 return false;
86 }
87
88 // Check if we find any reserved plural keyword
89 if ( isset( self::$pluralWords[$key] ) ) {
90 $pluralKeys[] = $key;
91 } else {
92 $hasNonPluralKeys = true;
93 }
94 }
95
96 // No plural keys at all, we can skip
97 if ( !$pluralKeys ) {
98 return false;
99 }
100
101 // Mixed plural keys with other keys, should not happen
102 if ( $hasNonPluralKeys ) {
103 // Allow `other` with other keys, as long it is only one of the reserved ones
104 if ( $pluralKeys === [ 'other' ] ) {
105 return false;
106 }
107
108 $keys = implode( ', ', array_keys( $messages ) );
109 throw new InvalidArgumentException( "Reserved plural keywords mixed with other keys: $keys." );
110 }
111
112 $pls = '{{PLURAL';
113 foreach ( $messages as $key => $value ) {
114 if ( $key === 'other' ) {
115 continue;
116 }
117
118 $pls .= "|$key=$value";
119 }
120
121 // Put the "other" alternative last, without other= prefix.
122 $other = isset( $messages['other'] ) ? '|' . $messages['other'] : '';
123 $pls .= "$other}}";
124
125 return $pls;
126 }
127
133 public function unflatten( array $flat ): array {
134 $unflat = [];
135
136 if ( $this->parseCLDRPlurals ) {
137 $unflattenedPlurals = [];
138 foreach ( $flat as $key => $value ) {
139 $plurals = false;
140 if ( !is_array( $value ) ) {
141 $plurals = $this->unflattenCLDRPlurals( $key, $value );
142 }
143 if ( is_array( $plurals ) ) {
144 $unflattenedPlurals += $plurals;
145 } else {
146 $unflattenedPlurals[$key] = $value;
147 }
148 }
149 $flat = $unflattenedPlurals;
150 }
151
152 foreach ( $flat as $key => $value ) {
153 $path = explode( $this->sep, $key );
154 if ( count( $path ) === 1 ) {
155 $unflat[$key] = $value;
156 continue;
157 }
158
159 $pointer = &$unflat;
160 do {
161 // Extract the level and make sure it exists.
162 $level = array_shift( $path );
163 if ( !isset( $pointer[$level] ) ) {
164 $pointer[$level] = [];
165 }
166
167 // Update the pointer to the new reference.
168 $tmpPointer = &$pointer[$level];
169 unset( $pointer );
170 $pointer = &$tmpPointer;
171 unset( $tmpPointer );
172
173 // If next level is the last, add it into the array.
174 if ( count( $path ) === 1 ) {
175 $lastKey = array_shift( $path );
176 $pointer[$lastKey] = $value;
177 }
178 } while ( count( $path ) );
179 }
180
181 return $unflat;
182 }
183
188 public function unflattenCLDRPlurals( string $key, string $message ) {
189 // Quick escape.
190 if ( !str_contains( $message, '{{PLURAL' ) ) {
191 return false;
192 }
193
194 /*
195 * Replace all variables with placeholders. Possible source of bugs
196 * if other characters that given below are used.
197 */
198 $regex = '/\{[a-z_-]+}/i';
199 $placeholders = [];
200 $match = [];
201
202 while ( preg_match( $regex, $message, $match ) ) {
203 $uniqkey = Utilities::getPlaceholder();
204 $placeholders[$uniqkey] = $match[0];
205 $search = preg_quote( $match[0], '~' );
206 $message = preg_replace( "~$search~", $uniqkey, $message );
207 }
208
209 // Then replace (possible multiple) plural instances into placeholders.
210 $regex = '~\{\{PLURAL\|(.*?)}}~s';
211 $matches = [];
212 $match = [];
213
214 while ( preg_match( $regex, $message, $match ) ) {
215 $uniqkey = Utilities::getPlaceholder();
216 $matches[$uniqkey] = $match;
217 $message = preg_replace( $regex, $uniqkey, $message, 1 );
218 }
219
220 // No plurals, should not happen.
221 if ( !count( $matches ) ) {
222 return false;
223 }
224
225 // The final array of alternative plurals forms.
226 $alts = [];
227
228 /*
229 * Then loop trough each plural block and replacing the placeholders
230 * to construct the alternatives. Produces invalid output if there is
231 * multiple plural bocks which don't have the same set of keys.
232 */
233 $pluralChoice = implode( '|', array_keys( self::$pluralWords ) );
234 $regex = "~($pluralChoice)\s*=\s*(.*)~s";
235 foreach ( $matches as $ph => $plu ) {
236 $forms = explode( '|', $plu[1] );
237
238 foreach ( $forms as $form ) {
239 $match = [];
240 if ( preg_match( $regex, $form, $match ) ) {
241 $formWord = "$key{$this->sep}{$match[1]}";
242 $value = $match[2];
243 } else {
244 $formWord = "$key{$this->sep}other";
245 $value = $form;
246 }
247
248 if ( !isset( $alts[$formWord] ) ) {
249 $alts[$formWord] = $message;
250 }
251
252 $string = $alts[$formWord];
253 $alts[$formWord] = str_replace( $ph, $value, $string );
254 }
255 }
256
257 // Replace other variables.
258 foreach ( $alts as &$value ) {
259 $value = str_replace( array_keys( $placeholders ), array_values( $placeholders ), $value );
260 }
261
262 if ( !isset( $alts["$key{$this->sep}other"] ) ) {
263 // Ensure other form is always present, even if missing from the translation
264 $alts["$key{$this->sep}other"] = end( $alts );
265 }
266
267 return $alts;
268 }
269
271 public function compareContent( ?string $a, ?string $b ): bool {
272 if ( !$this->parseCLDRPlurals || $a === null || $b === null ) {
273 return $a === $b;
274 }
275
276 $a2 = $this->unflattenCLDRPlurals( 'prefix', $a );
277 $b2 = $this->unflattenCLDRPlurals( 'prefix', $b );
278
279 // Fall back to regular comparison if parsing fails.
280 if ( $a2 === false || $b2 === false ) {
281 return $a === $b;
282 }
283
284 // Require key-value pairs to match, but ignore order and types (all should be strings).
285 return $a2 == $b2;
286 }
287}
Flattens message arrays for further processing.
compareContent(?string $a, ?string $b)
Compares two strings for equal content, taking PLURAL expansion into account.
flattenCLDRPlurals(array $messages)
Flattens arrays that contain CLDR plural keywords into single values using MediaWiki's plural syntax.
unflatten(array $flat)
Performs the reverse operation of flatten.
flatten(array $unflat)
Flattens multidimensional array by using the path to the value as key with each individual key separa...
unflattenCLDRPlurals(string $key, string $message)
Converts the plural syntax to array of CLDR style plurals.
Essentially random collection of helper functions, similar to GlobalFunctions.php.
Definition Utilities.php:31