Translate extension for MediaWiki
 
Loading...
Searching...
No Matches
ArrayFlattener.php
1<?php
2declare( strict_types = 1 );
3
4namespace MediaWiki\Extension\Translate\MessageProcessing;
5
6use InvalidArgumentException;
8
21 protected $sep;
23 protected $parseCLDRPlurals;
24 // For CLDR pluralization rules
25 protected static $pluralWords = [
26 'zero' => 1,
27 'one' => 1,
28 'many' => 1,
29 'few' => 1,
30 'other' => 1,
31 'two' => 1
32 ];
33
34 public function __construct( string $sep = '.', bool $parseCLDRPlurals = false ) {
35 $this->sep = $sep;
36 $this->parseCLDRPlurals = $parseCLDRPlurals;
37 }
38
44 public function flatten( array $unflat ): array {
45 $flat = [];
46
47 foreach ( $unflat as $key => $value ) {
48 if ( !is_array( $value ) ) {
49 $flat[$key] = $value;
50 continue;
51 }
52
53 $plurals = false;
54 if ( $this->parseCLDRPlurals ) {
55 $plurals = $this->flattenCLDRPlurals( $value );
56 }
57
58 if ( $this->parseCLDRPlurals && $plurals ) {
59 $flat[$key] = $plurals;
60 } else {
61 $temp = [];
62 foreach ( $value as $subKey => $subValue ) {
63 $newKey = "$key{$this->sep}$subKey";
64 $temp[$newKey] = $subValue;
65 }
66 $flat += $this->flatten( $temp );
67 }
68
69 // Can as well keep only one copy around.
70 unset( $unflat[$key] );
71 }
72
73 return $flat;
74 }
75
81 public function flattenCLDRPlurals( array $messages ) {
82 $hasNonPluralKeys = false;
83 $pluralKeys = [];
84 foreach ( $messages as $key => $value ) {
85 if ( is_array( $value ) ) {
86 // Plurals can only happen in the lowest level of the structure
87 return false;
88 }
89
90 // Check if we find any reserved plural keyword
91 if ( isset( self::$pluralWords[$key] ) ) {
92 $pluralKeys[] = $key;
93 } else {
94 $hasNonPluralKeys = true;
95 }
96 }
97
98 // No plural keys at all, we can skip
99 if ( !$pluralKeys ) {
100 return false;
101 }
102
103 // Mixed plural keys with other keys, should not happen
104 if ( $hasNonPluralKeys ) {
105 // Allow `other` with other keys, as long it is only one of the reserved ones
106 if ( $pluralKeys === [ 'other' ] ) {
107 return false;
108 }
109
110 $keys = implode( ', ', array_keys( $messages ) );
111 throw new InvalidArgumentException( "Reserved plural keywords mixed with other keys: $keys." );
112 }
113
114 $pls = '{{PLURAL';
115 foreach ( $messages as $key => $value ) {
116 if ( $key === 'other' ) {
117 continue;
118 }
119
120 $pls .= "|$key=$value";
121 }
122
123 // Put the "other" alternative last, without other= prefix.
124 $other = isset( $messages['other'] ) ? '|' . $messages['other'] : '';
125 $pls .= "$other}}";
126
127 return $pls;
128 }
129
135 public function unflatten( array $flat ): array {
136 $unflat = [];
137
138 if ( $this->parseCLDRPlurals ) {
139 $unflattenedPlurals = [];
140 foreach ( $flat as $key => $value ) {
141 $plurals = false;
142 if ( !is_array( $value ) ) {
143 $plurals = $this->unflattenCLDRPlurals( $key, $value );
144 }
145 if ( is_array( $plurals ) ) {
146 $unflattenedPlurals += $plurals;
147 } else {
148 $unflattenedPlurals[$key] = $value;
149 }
150 }
151 $flat = $unflattenedPlurals;
152 }
153
154 foreach ( $flat as $key => $value ) {
155 $path = explode( $this->sep, $key );
156 if ( count( $path ) === 1 ) {
157 $unflat[$key] = $value;
158 continue;
159 }
160
161 $pointer = &$unflat;
162 do {
163 // Extract the level and make sure it exists.
164 $level = array_shift( $path );
165 if ( !isset( $pointer[$level] ) ) {
166 $pointer[$level] = [];
167 }
168
169 // Update the pointer to the new reference.
170 $tmpPointer = &$pointer[$level];
171 unset( $pointer );
172 $pointer = &$tmpPointer;
173 unset( $tmpPointer );
174
175 // If next level is the last, add it into the array.
176 if ( count( $path ) === 1 ) {
177 $lastKey = array_shift( $path );
178 $pointer[$lastKey] = $value;
179 }
180 } while ( count( $path ) );
181 }
182
183 return $unflat;
184 }
185
190 public function unflattenCLDRPlurals( string $key, string $message ) {
191 // Quick escape.
192 if ( !str_contains( $message, '{{PLURAL' ) ) {
193 return false;
194 }
195
196 /*
197 * Replace all variables with placeholders. Possible source of bugs
198 * if other characters that given below are used.
199 */
200 $regex = '/\{[a-z_-]+}/i';
201 $placeholders = [];
202 $match = [];
203
204 while ( preg_match( $regex, $message, $match ) ) {
205 $uniqkey = Utilities::getPlaceholder();
206 $placeholders[$uniqkey] = $match[0];
207 $search = preg_quote( $match[0], '~' );
208 $message = preg_replace( "~$search~", $uniqkey, $message );
209 }
210
211 // Then replace (possible multiple) plural instances into placeholders.
212 $regex = '~\{\{PLURAL\|(.*?)}}~s';
213 $matches = [];
214 $match = [];
215
216 while ( preg_match( $regex, $message, $match ) ) {
217 $uniqkey = Utilities::getPlaceholder();
218 $matches[$uniqkey] = $match;
219 $message = preg_replace( $regex, $uniqkey, $message, 1 );
220 }
221
222 // No plurals, should not happen.
223 if ( !count( $matches ) ) {
224 return false;
225 }
226
227 // The final array of alternative plurals forms.
228 $alts = [];
229
230 /*
231 * Then loop trough each plural block and replacing the placeholders
232 * to construct the alternatives. Produces invalid output if there is
233 * multiple plural bocks which don't have the same set of keys.
234 */
235 $pluralChoice = implode( '|', array_keys( self::$pluralWords ) );
236 $regex = "~($pluralChoice)\s*=\s*(.*)~s";
237 foreach ( $matches as $ph => $plu ) {
238 $forms = explode( '|', $plu[1] );
239
240 foreach ( $forms as $form ) {
241 $match = [];
242 if ( preg_match( $regex, $form, $match ) ) {
243 $formWord = "$key{$this->sep}{$match[1]}";
244 $value = $match[2];
245 } else {
246 $formWord = "$key{$this->sep}other";
247 $value = $form;
248 }
249
250 if ( !isset( $alts[$formWord] ) ) {
251 $alts[$formWord] = $message;
252 }
253
254 $string = $alts[$formWord];
255 $alts[$formWord] = str_replace( $ph, $value, $string );
256 }
257 }
258
259 // Replace other variables.
260 foreach ( $alts as &$value ) {
261 $value = str_replace( array_keys( $placeholders ), array_values( $placeholders ), $value );
262 }
263
264 if ( !isset( $alts["$key{$this->sep}other"] ) ) {
265 // Ensure other form is always present, even if missing from the translation
266 $alts["$key{$this->sep}other"] = end( $alts );
267 }
268
269 return $alts;
270 }
271
273 public function compareContent( ?string $a, ?string $b ): bool {
274 if ( !$this->parseCLDRPlurals || $a === null || $b === null ) {
275 return $a === $b;
276 }
277
278 $a2 = $this->unflattenCLDRPlurals( 'prefix', $a );
279 $b2 = $this->unflattenCLDRPlurals( 'prefix', $b );
280
281 // Fall back to regular comparison if parsing fails.
282 if ( $a2 === false || $b2 === false ) {
283 return $a === $b;
284 }
285
286 // Require key-value pairs to match, but ignore order and types (all should be strings).
287 return $a2 == $b2;
288 }
289}
Flattens message arrays for further processing.
compareContent(?string $a, ?string $b)
Compares two strings for equal content, taking PLURAL expansion into account.
flattenCLDRPlurals(array $messages)
Flattens arrays that contain CLDR plural keywords into single values using MediaWiki's plural syntax.
unflatten(array $flat)
Performs the reverse operation of flatten.
flatten(array $unflat)
Flattens multidimensional array by using the path to the value as key with each individual key separa...
unflattenCLDRPlurals(string $key, string $message)
Converts the plural syntax to array of CLDR style plurals.
Essentially random collection of helper functions, similar to GlobalFunctions.php.
Definition Utilities.php:31