Translate extension for MediaWiki
 
Loading...
Searching...
No Matches
ArrayFlattener.php
1<?php
2declare( strict_types = 1 );
3
4namespace MediaWiki\Extension\Translate\MessageProcessing;
5
7use MWException;
8
21 protected $sep;
23 protected $parseCLDRPlurals;
24 // For CLDR pluralization rules
25 protected static $pluralWords = [
26 'zero' => 1,
27 'one' => 1,
28 'many' => 1,
29 'few' => 1,
30 'other' => 1,
31 'two' => 1
32 ];
33
34 public function __construct( string $sep = '.', bool $parseCLDRPlurals = false ) {
35 $this->sep = $sep;
36 $this->parseCLDRPlurals = $parseCLDRPlurals;
37 }
38
43 public function flatten( array $unflat ): array {
44 $flat = [];
45
46 foreach ( $unflat as $key => $value ) {
47 if ( !is_array( $value ) ) {
48 $flat[$key] = $value;
49 continue;
50 }
51
52 $plurals = false;
53 if ( $this->parseCLDRPlurals ) {
54 $plurals = $this->flattenCLDRPlurals( $value );
55 }
56
57 if ( $this->parseCLDRPlurals && $plurals ) {
58 $flat[$key] = $plurals;
59 } else {
60 $temp = [];
61 foreach ( $value as $subKey => $subValue ) {
62 $newKey = "$key{$this->sep}$subKey";
63 $temp[$newKey] = $subValue;
64 }
65 $flat += $this->flatten( $temp );
66 }
67
68 // Can as well keep only one copy around.
69 unset( $unflat[$key] );
70 }
71
72 return $flat;
73 }
74
81 public function flattenCLDRPlurals( array $messages ) {
82 $hasNonPluralKeys = false;
83 $pluralKeys = [];
84 foreach ( $messages as $key => $value ) {
85 if ( is_array( $value ) ) {
86 // Plurals can only happen in the lowest level of the structure
87 return false;
88 }
89
90 // Check if we find any reserved plural keyword
91 if ( isset( self::$pluralWords[$key] ) ) {
92 $pluralKeys[] = $key;
93 } else {
94 $hasNonPluralKeys = true;
95 }
96 }
97
98 // No plural keys at all, we can skip
99 if ( !$pluralKeys ) {
100 return false;
101 }
102
103 // Mixed plural keys with other keys, should not happen
104 if ( $hasNonPluralKeys ) {
105 // Allow `other` with other keys, as long it is only one of the reserved ones
106 if ( $pluralKeys === [ 'other' ] ) {
107 return false;
108 }
109
110 $keys = implode( ', ', array_keys( $messages ) );
111 throw new MWException( "Reserved plural keywords mixed with other keys: $keys." );
112 }
113
114 $pls = '{{PLURAL';
115 foreach ( $messages as $key => $value ) {
116 if ( $key === 'other' ) {
117 continue;
118 }
119
120 $pls .= "|$key=$value";
121 }
122
123 // Put the "other" alternative last, without other= prefix.
124 $other = isset( $messages['other'] ) ? '|' . $messages['other'] : '';
125 $pls .= "$other}}";
126
127 return $pls;
128 }
129
134 public function unflatten( array $flat ): array {
135 $unflat = [];
136
137 if ( $this->parseCLDRPlurals ) {
138 $unflattenedPlurals = [];
139 foreach ( $flat as $key => $value ) {
140 $plurals = false;
141 if ( !is_array( $value ) ) {
142 $plurals = $this->unflattenCLDRPlurals( $key, $value );
143 }
144 if ( is_array( $plurals ) ) {
145 $unflattenedPlurals += $plurals;
146 } else {
147 $unflattenedPlurals[$key] = $value;
148 }
149 }
150 $flat = $unflattenedPlurals;
151 }
152
153 foreach ( $flat as $key => $value ) {
154 $path = explode( $this->sep, $key );
155 if ( count( $path ) === 1 ) {
156 $unflat[$key] = $value;
157 continue;
158 }
159
160 $pointer = &$unflat;
161 do {
162 // Extract the level and make sure it exists.
163 $level = array_shift( $path );
164 if ( !isset( $pointer[$level] ) ) {
165 $pointer[$level] = [];
166 }
167
168 // Update the pointer to the new reference.
169 $tmpPointer = &$pointer[$level];
170 unset( $pointer );
171 $pointer = &$tmpPointer;
172 unset( $tmpPointer );
173
174 // If next level is the last, add it into the array.
175 if ( count( $path ) === 1 ) {
176 $lastKey = array_shift( $path );
177 $pointer[$lastKey] = $value;
178 }
179 } while ( count( $path ) );
180 }
181
182 return $unflat;
183 }
184
189 public function unflattenCLDRPlurals( string $key, string $message ) {
190 // Quick escape.
191 if ( strpos( $message, '{{PLURAL' ) === false ) {
192 return false;
193 }
194
195 /*
196 * Replace all variables with placeholders. Possible source of bugs
197 * if other characters that given below are used.
198 */
199 $regex = '/\{[a-z_-]+}/i';
200 $placeholders = [];
201 $match = [];
202
203 while ( preg_match( $regex, $message, $match ) ) {
204 $uniqkey = Utilities::getPlaceholder();
205 $placeholders[$uniqkey] = $match[0];
206 $search = preg_quote( $match[0], '~' );
207 $message = preg_replace( "~$search~", $uniqkey, $message );
208 }
209
210 // Then replace (possible multiple) plural instances into placeholders.
211 $regex = '~\{\{PLURAL\|(.*?)}}~s';
212 $matches = [];
213 $match = [];
214
215 while ( preg_match( $regex, $message, $match ) ) {
216 $uniqkey = Utilities::getPlaceholder();
217 $matches[$uniqkey] = $match;
218 $message = preg_replace( $regex, $uniqkey, $message, 1 );
219 }
220
221 // No plurals, should not happen.
222 if ( !count( $matches ) ) {
223 return false;
224 }
225
226 // The final array of alternative plurals forms.
227 $alts = [];
228
229 /*
230 * Then loop trough each plural block and replacing the placeholders
231 * to construct the alternatives. Produces invalid output if there is
232 * multiple plural bocks which don't have the same set of keys.
233 */
234 $pluralChoice = implode( '|', array_keys( self::$pluralWords ) );
235 $regex = "~($pluralChoice)\s*=\s*(.*)~s";
236 foreach ( $matches as $ph => $plu ) {
237 $forms = explode( '|', $plu[1] );
238
239 foreach ( $forms as $form ) {
240 $match = [];
241 if ( preg_match( $regex, $form, $match ) ) {
242 $formWord = "$key{$this->sep}{$match[1]}";
243 $value = $match[2];
244 } else {
245 $formWord = "$key{$this->sep}other";
246 $value = $form;
247 }
248
249 if ( !isset( $alts[$formWord] ) ) {
250 $alts[$formWord] = $message;
251 }
252
253 $string = $alts[$formWord];
254 $alts[$formWord] = str_replace( $ph, $value, $string );
255 }
256 }
257
258 // Replace other variables.
259 foreach ( $alts as &$value ) {
260 $value = str_replace( array_keys( $placeholders ), array_values( $placeholders ), $value );
261 }
262
263 if ( !isset( $alts["$key{$this->sep}other"] ) ) {
264 // Ensure other form is always present, even if missing from the translation
265 $alts["$key{$this->sep}other"] = end( $alts );
266 }
267
268 return $alts;
269 }
270
272 public function compareContent( string $a, string $b ): bool {
273 if ( !$this->parseCLDRPlurals ) {
274 return $a === $b;
275 }
276
277 $a2 = $this->unflattenCLDRPlurals( 'prefix', $a );
278 $b2 = $this->unflattenCLDRPlurals( 'prefix', $b );
279
280 // Fall back to regular comparison if parsing fails.
281 if ( $a2 === false || $b2 === false ) {
282 return $a === $b;
283 }
284
285 // Require key-value pairs to match, but ignore order and types (all should be strings).
286 return $a2 == $b2;
287 }
288}
Flattens message arrays for further processing.
flattenCLDRPlurals(array $messages)
Flattens arrays that contain CLDR plural keywords into single values using MediaWiki's plural syntax.
unflatten(array $flat)
Performs the reverse operation of flatten.
compareContent(string $a, string $b)
Compares two strings for equal content, taking PLURAL expansion into account.
flatten(array $unflat)
Flattens multidimensional array.
unflattenCLDRPlurals(string $key, string $message)
Converts the plural syntax to array of CLDR style plurals.
Essentially random collection of helper functions, similar to GlobalFunctions.php.
Definition Utilities.php:30