Translate extension for MediaWiki
 
Loading...
Searching...
No Matches
ArrayFlattener.php
1<?php
2declare( strict_types = 1 );
3
4namespace MediaWiki\Extension\Translate\MessageProcessing;
5
6use InvalidArgumentException;
8
21 private const PLURAL_WORDS = [
22 'zero' => 1,
23 'one' => 1,
24 'many' => 1,
25 'few' => 1,
26 'other' => 1,
27 'two' => 1
28 ];
29
30 public function __construct(
31 private readonly string $sep = '.',
32 private readonly bool $parseCLDRPlurals = false,
33 ) {
34 }
35
41 public function flatten( array $unflat ): array {
42 $flat = [];
43
44 foreach ( $unflat as $key => $value ) {
45 if ( !is_array( $value ) ) {
46 $flat[$key] = $value;
47 continue;
48 }
49
50 $plurals = false;
51 if ( $this->parseCLDRPlurals ) {
52 $plurals = $this->flattenCLDRPlurals( $value );
53 }
54
55 if ( $this->parseCLDRPlurals && $plurals ) {
56 $flat[$key] = $plurals;
57 } else {
58 $temp = [];
59 foreach ( $value as $subKey => $subValue ) {
60 $newKey = "$key{$this->sep}$subKey";
61 $temp[$newKey] = $subValue;
62 }
63 $flat += $this->flatten( $temp );
64 }
65
66 // Can as well keep only one copy around.
67 unset( $unflat[$key] );
68 }
69
70 return $flat;
71 }
72
78 public function flattenCLDRPlurals( array $messages ) {
79 $hasNonPluralKeys = false;
80 $pluralKeys = [];
81 foreach ( $messages as $key => $value ) {
82 if ( is_array( $value ) ) {
83 // Plurals can only happen in the lowest level of the structure
84 return false;
85 }
86
87 // Check if we find any reserved plural keyword
88 if ( isset( self::PLURAL_WORDS[$key] ) ) {
89 $pluralKeys[] = $key;
90 } else {
91 $hasNonPluralKeys = true;
92 }
93 }
94
95 // No plural keys at all, we can skip
96 if ( !$pluralKeys ) {
97 return false;
98 }
99
100 // Mixed plural keys with other keys, should not happen
101 if ( $hasNonPluralKeys ) {
102 // Allow `other` with other keys, as long it is only one of the reserved ones
103 if ( $pluralKeys === [ 'other' ] ) {
104 return false;
105 }
106
107 $keys = implode( ', ', array_keys( $messages ) );
108 throw new InvalidArgumentException( "Reserved plural keywords mixed with other keys: $keys." );
109 }
110
111 $pls = '{{PLURAL';
112 foreach ( $messages as $key => $value ) {
113 if ( $key === 'other' ) {
114 continue;
115 }
116
117 $pls .= "|$key=$value";
118 }
119
120 // Put the "other" alternative last, without other= prefix.
121 $other = isset( $messages['other'] ) ? '|' . $messages['other'] : '';
122 $pls .= "$other}}";
123
124 return $pls;
125 }
126
132 public function unflatten( array $flat ): array {
133 $unflat = [];
134
135 if ( $this->parseCLDRPlurals ) {
136 $unflattenedPlurals = [];
137 foreach ( $flat as $key => $value ) {
138 $plurals = false;
139 if ( !is_array( $value ) ) {
140 $plurals = $this->unflattenCLDRPlurals( $key, $value );
141 }
142 if ( is_array( $plurals ) ) {
143 $unflattenedPlurals += $plurals;
144 } else {
145 $unflattenedPlurals[$key] = $value;
146 }
147 }
148 $flat = $unflattenedPlurals;
149 }
150
151 foreach ( $flat as $key => $value ) {
152 $path = explode( $this->sep, $key );
153 if ( count( $path ) === 1 ) {
154 $unflat[$key] = $value;
155 continue;
156 }
157
158 $pointer = &$unflat;
159 do {
160 // Extract the level and make sure it exists.
161 $level = array_shift( $path );
162 if ( !isset( $pointer[$level] ) ) {
163 $pointer[$level] = [];
164 }
165
166 // Update the pointer to the new reference.
167 $tmpPointer = &$pointer[$level];
168 unset( $pointer );
169 $pointer = &$tmpPointer;
170 unset( $tmpPointer );
171
172 // If next level is the last, add it into the array.
173 if ( count( $path ) === 1 ) {
174 $lastKey = array_shift( $path );
175 $pointer[$lastKey] = $value;
176 }
177 } while ( count( $path ) );
178 }
179
180 return $unflat;
181 }
182
187 public function unflattenCLDRPlurals( string $key, string $message ) {
188 // Quick escape.
189 if ( !str_contains( $message, '{{PLURAL' ) ) {
190 return false;
191 }
192
193 /*
194 * Replace all variables with placeholders. Possible source of bugs
195 * if other characters that given below are used.
196 */
197 $regex = '/\{[a-z_-]+}/i';
198 $placeholders = [];
199 $match = [];
200
201 while ( preg_match( $regex, $message, $match ) ) {
202 $uniqkey = Utilities::getPlaceholder();
203 $placeholders[$uniqkey] = $match[0];
204 $search = preg_quote( $match[0], '~' );
205 $message = preg_replace( "~$search~", $uniqkey, $message );
206 }
207
208 // Then replace (possible multiple) plural instances into placeholders.
209 $regex = '~\{\{PLURAL\|(.*?)}}~s';
210 $matches = [];
211 $match = [];
212
213 while ( preg_match( $regex, $message, $match ) ) {
214 $uniqkey = Utilities::getPlaceholder();
215 $matches[$uniqkey] = $match;
216 $message = preg_replace( $regex, $uniqkey, $message, 1 );
217 }
218
219 // No plurals, should not happen.
220 if ( !count( $matches ) ) {
221 return false;
222 }
223
224 // The final array of alternative plurals forms.
225 $alts = [];
226
227 /*
228 * Then loop trough each plural block and replacing the placeholders
229 * to construct the alternatives. Produces invalid output if there is
230 * multiple plural bocks which don't have the same set of keys.
231 */
232 $pluralChoice = implode( '|', array_keys( self::PLURAL_WORDS ) );
233 $regex = "~($pluralChoice)\s*=\s*(.*)~s";
234 foreach ( $matches as $ph => $plu ) {
235 $forms = explode( '|', $plu[1] );
236
237 foreach ( $forms as $form ) {
238 $match = [];
239 if ( preg_match( $regex, $form, $match ) ) {
240 $formWord = "$key{$this->sep}{$match[1]}";
241 $value = $match[2];
242 } else {
243 $formWord = "$key{$this->sep}other";
244 $value = $form;
245 }
246
247 if ( !isset( $alts[$formWord] ) ) {
248 $alts[$formWord] = $message;
249 }
250
251 $string = $alts[$formWord];
252 $alts[$formWord] = str_replace( $ph, $value, $string );
253 }
254 }
255
256 // Replace other variables.
257 foreach ( $alts as &$value ) {
258 $value = str_replace( array_keys( $placeholders ), array_values( $placeholders ), $value );
259 }
260
261 if ( !isset( $alts["$key{$this->sep}other"] ) ) {
262 // Ensure other form is always present, even if missing from the translation
263 $alts["$key{$this->sep}other"] = end( $alts );
264 }
265
266 return $alts;
267 }
268
270 public function compareContent( ?string $a, ?string $b ): bool {
271 if ( !$this->parseCLDRPlurals || $a === null || $b === null ) {
272 return $a === $b;
273 }
274
275 $a2 = $this->unflattenCLDRPlurals( 'prefix', $a );
276 $b2 = $this->unflattenCLDRPlurals( 'prefix', $b );
277
278 // Fall back to regular comparison if parsing fails.
279 if ( $a2 === false || $b2 === false ) {
280 return $a === $b;
281 }
282
283 // Require key-value pairs to match, but ignore order and types (all should be strings).
284 return $a2 == $b2;
285 }
286}
Flattens message arrays for further processing.
compareContent(?string $a, ?string $b)
Compares two strings for equal content, taking PLURAL expansion into account.
flattenCLDRPlurals(array $messages)
Flattens arrays that contain CLDR plural keywords into single values using MediaWiki's plural syntax.
unflatten(array $flat)
Performs the reverse operation of flatten.
flatten(array $unflat)
Flattens multidimensional array by using the path to the value as key with each individual key separa...
unflattenCLDRPlurals(string $key, string $message)
Converts the plural syntax to array of CLDR style plurals.
Essentially random collection of helper functions, similar to GlobalFunctions.php.
Definition Utilities.php:30