Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 168 |
|
0.00% |
0 / 7 |
CRAP | |
0.00% |
0 / 1 |
CLDRParser | |
0.00% |
0 / 168 |
|
0.00% |
0 / 7 |
5852 | |
0.00% |
0 / 1 |
parse | |
0.00% |
0 / 49 |
|
0.00% |
0 / 1 |
420 | |||
parse_supplemental | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
132 | |||
parse_currency_symbols | |
0.00% |
0 / 46 |
|
0.00% |
0 / 1 |
552 | |||
savephp | |
0.00% |
0 / 28 |
|
0.00% |
0 / 1 |
156 | |||
makePrettyArrayOuts | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
56 | |||
formatKey | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
isAssoc | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\CLDR; |
4 | |
5 | use SimpleXMLElement; |
6 | |
7 | /** |
8 | * Extract data from cldr XML. |
9 | * |
10 | * @author Niklas Laxström |
11 | * @author Ryan Kaldari |
12 | * @author Santhosh Thottingal |
13 | * @author Sam Reed |
14 | * @copyright Copyright © 2007-2015 |
15 | * @license GPL-2.0-or-later |
16 | */ |
17 | class CLDRParser { |
18 | |
19 | public const LOCALITY_DEFAULT = '!DEFAULT'; |
20 | public const LANGUAGE_DEFAULT = '!root'; |
21 | public const CURRENCY_DEFAULT = '!DEFAULT'; |
22 | |
23 | /** |
24 | * @param string $inputFile filename |
25 | * @param string $outputFile filename |
26 | */ |
27 | public function parse( $inputFile, $outputFile ) { |
28 | // Open the input file for reading |
29 | |
30 | $contents = file_get_contents( $inputFile ); |
31 | $doc = new SimpleXMLElement( $contents ); |
32 | |
33 | $data = [ |
34 | 'languageNames' => [], |
35 | 'currencyNames' => [], |
36 | 'currencySymbols' => [], |
37 | 'countryNames' => [], |
38 | 'timeUnits' => [], |
39 | ]; |
40 | |
41 | foreach ( $doc->xpath( '//languages/language' ) as $elem ) { |
42 | if ( (string)$elem['alt'] !== '' ) { |
43 | continue; |
44 | } |
45 | |
46 | if ( (string)$elem['type'] === 'root' ) { |
47 | continue; |
48 | } |
49 | |
50 | $key = str_replace( '_', '-', strtolower( $elem['type'] ) ); |
51 | |
52 | $data['languageNames'][$key] = (string)$elem; |
53 | } |
54 | |
55 | foreach ( $doc->xpath( '//currencies/currency' ) as $elem ) { |
56 | if ( (string)$elem->displayName[0] === '' ) { |
57 | continue; |
58 | } |
59 | |
60 | $data['currencyNames'][(string)$elem['type']] = (string)$elem->displayName[0]; |
61 | if ( (string)$elem->symbol[0] !== '' ) { |
62 | $data['currencySymbols'][(string)$elem['type']] = (string)$elem->symbol[0]; |
63 | } |
64 | } |
65 | |
66 | foreach ( $doc->xpath( '//territories/territory' ) as $elem ) { |
67 | if ( (string)$elem['alt'] !== '' && (string)$elem['alt'] !== 'short' ) { |
68 | continue; |
69 | } |
70 | |
71 | if ( (string)$elem['type'] === 'ZZ' || |
72 | !preg_match( '/^[A-Z][A-Z]$/', $elem['type'] ) |
73 | ) { |
74 | continue; |
75 | } |
76 | |
77 | $data['countryNames'][(string)$elem['type']] = (string)$elem; |
78 | } |
79 | foreach ( $doc->xpath( '//units/unitLength' ) as $unitLength ) { |
80 | if ( (string)$unitLength['type'] !== 'long' ) { |
81 | continue; |
82 | } |
83 | foreach ( $unitLength->unit as $elem ) { |
84 | $type = (string)$elem['type']; |
85 | $pos = strpos( $type, 'duration' ); |
86 | if ( $pos === false ) { |
87 | continue; |
88 | } |
89 | $type = substr( $type, strlen( 'duration-' ) ); |
90 | foreach ( $elem->unitPattern as $pattern ) { |
91 | $data['timeUnits'][$type . '-' . (string)$pattern['count']] = (string)$pattern; |
92 | } |
93 | } |
94 | } |
95 | foreach ( $doc->xpath( '//fields/field' ) as $field ) { |
96 | $fieldType = (string)$field['type']; |
97 | |
98 | foreach ( $field->relativeTime as $relative ) { |
99 | $type = (string)$relative['type']; |
100 | foreach ( $relative->relativeTimePattern as $pattern ) { |
101 | $data['timeUnits'][$fieldType . '-' . $type |
102 | . '-' . (string)$pattern['count']] = (string)$pattern; |
103 | } |
104 | } |
105 | } |
106 | |
107 | ksort( $data['timeUnits'] ); |
108 | |
109 | $this->savephp( $data, $outputFile ); |
110 | } |
111 | |
112 | /** |
113 | * Parse method for the file structure found in common/supplemental/supplementalData.xml |
114 | * @param string $inputFile |
115 | * @param string $outputFile |
116 | */ |
117 | public function parse_supplemental( $inputFile, $outputFile ) { |
118 | // Open the input file for reading |
119 | |
120 | $contents = file_get_contents( $inputFile ); |
121 | $doc = new SimpleXMLElement( $contents ); |
122 | |
123 | $data = [ |
124 | 'currencyFractions' => [], |
125 | 'localeCurrencies' => [], |
126 | ]; |
127 | |
128 | // Pull currency attributes - digits, rounding, and cashRounding. |
129 | // This will tell us how many decmal places make sense to use with any currency, |
130 | // or if the currency is totally non-fractional |
131 | foreach ( $doc->xpath( '//currencyData/fractions/info' ) as $elem ) { |
132 | $iso4217 = (string)$elem['iso4217']; |
133 | if ( $iso4217 === '' ) { |
134 | continue; |
135 | } |
136 | if ( $iso4217 === 'DEFAULT' ) { |
137 | $iso4217 = self::CURRENCY_DEFAULT; |
138 | } |
139 | |
140 | $attributes = [ 'digits', 'rounding', 'cashDigits', 'cashRounding' ]; |
141 | foreach ( $attributes as $att ) { |
142 | if ( (string)$elem[$att] !== '' ) { |
143 | $data['currencyFractions'][$iso4217][$att] = (string)$elem[$att]; |
144 | } |
145 | } |
146 | } |
147 | |
148 | ksort( $data['currencyFractions'] ); |
149 | |
150 | // Pull a map of regions to currencies in order of preference. |
151 | foreach ( $doc->xpath( '//currencyData/region' ) as $elem ) { |
152 | if ( (string)$elem['iso3166'] === '' ) { |
153 | continue; |
154 | } |
155 | |
156 | $region = (string)$elem['iso3166']; |
157 | |
158 | foreach ( $elem->currency as $currencynode ) { |
159 | if ( (string)$currencynode['to'] === '' && (string)$currencynode['tender'] !== 'false' ) { |
160 | $data['localeCurrencies'][$region][] = (string)$currencynode['iso4217']; |
161 | } |
162 | } |
163 | } |
164 | |
165 | ksort( $data['localeCurrencies'] ); |
166 | |
167 | $this->savephp( $data, $outputFile ); |
168 | } |
169 | |
170 | /** |
171 | * Parse method for the currency section in the names files. |
172 | * This is separate from the regular parse function, because we need all of |
173 | * the currency locale information, even if mediawiki doesn't support the language. |
174 | * (For instance: en_AU uses '$' for AUD, not USD, but it's not a supported mediawiki locality) |
175 | * @param string $inputDir the directory, in which we will parse everything. |
176 | * @param string $outputFile |
177 | */ |
178 | public function parse_currency_symbols( $inputDir, $outputFile ) { |
179 | if ( !file_exists( $inputDir ) ) { |
180 | return; |
181 | } |
182 | $files = scandir( $inputDir ); |
183 | |
184 | $data = [ |
185 | 'currencySymbols' => [], |
186 | ]; |
187 | |
188 | // Foreach files! |
189 | foreach ( $files as $inputFile ) { |
190 | if ( strpos( $inputFile, '.xml' ) < 1 ) { |
191 | continue; |
192 | } |
193 | |
194 | $contents = file_get_contents( $inputDir . '/' . $inputFile ); |
195 | $doc = new SimpleXMLElement( $contents ); |
196 | |
197 | // Tags in the <identity> section are guaranteed to appear once |
198 | $languages = $doc->xpath( '//identity/language/@type' ); |
199 | $language = $languages |
200 | ? (string)$languages[0] |
201 | : pathinfo( $inputFile, PATHINFO_FILENAME ); |
202 | |
203 | // The <script> element is optional |
204 | $scripts = $doc->xpath( '//identity/script/@type' ); |
205 | $script = $scripts ? (string)$scripts[0] : ''; |
206 | // expand the language |
207 | if ( $script !== '' ) { |
208 | $language .= '-' . strtolower( $script ); |
209 | } |
210 | |
211 | // The <territory> element is optional |
212 | $territories = $doc->xpath( '//identity/territory/@type' ); |
213 | $territory = $territories ? (string)$territories[0] : self::LOCALITY_DEFAULT; |
214 | |
215 | if ( $language === 'root' ) { |
216 | $language = self::LANGUAGE_DEFAULT; |
217 | } |
218 | |
219 | foreach ( $doc->xpath( '//currencies/currency' ) as $elem ) { |
220 | if ( (string)$elem->symbol[0] !== '' ) { |
221 | $data['currencySymbols'][(string)$elem['type']][$language][$territory] = |
222 | (string)$elem->symbol[0]; |
223 | } |
224 | } |
225 | } |
226 | |
227 | // now massage the data somewhat. It's pretty blown up at this point. |
228 | |
229 | /** |
230 | * Part 1: Stop blowing up on defaults. |
231 | * Defaults apparently come in many forms. Listed below in order of scope |
232 | * (widest to narrowest) |
233 | * 1) The ISO code itself, in the absence of any other defaults |
234 | * 2) The 'root' language file definition |
235 | * 3) Language with no locality - locality will come in as 'DEFAULT' |
236 | * |
237 | * Intended behavior: |
238 | * From narrowest scope to widest, collapse the defaults |
239 | */ |
240 | foreach ( $data['currencySymbols'] as $currency => $language ) { |
241 | // get the currency default symbol. This will either be defined in the |
242 | // 'root' language file, or taken from the ISO code. |
243 | $default = $language[self::LANGUAGE_DEFAULT][self::LOCALITY_DEFAULT] ?? $currency; |
244 | |
245 | foreach ( $language as $lang => $territories ) { |
246 | if ( is_array( $territories ) ) { |
247 | // Collapse a language (no locality) array if it's just the default. One value will do fine. |
248 | if ( count( $territories ) === 1 && array_key_exists( self::LOCALITY_DEFAULT, $territories ) ) { |
249 | $data['currencySymbols'][$currency][$lang] = $territories[self::LOCALITY_DEFAULT]; |
250 | if ( $territories[self::LOCALITY_DEFAULT] === $default |
251 | && $lang !== self::LANGUAGE_DEFAULT |
252 | ) { |
253 | unset( $data['currencySymbols'][$currency][$lang] ); |
254 | } |
255 | } else { |
256 | // Collapse a language (with locality) array if it's default is just the default |
257 | if ( !array_key_exists( self::LOCALITY_DEFAULT, $territories ) |
258 | || ( $territories[self::LOCALITY_DEFAULT] === $default |
259 | && $lang !== self::LANGUAGE_DEFAULT ) |
260 | ) { |
261 | foreach ( $territories as $territory => $symbol ) { |
262 | if ( $symbol === $default ) { |
263 | unset( $data['currencySymbols'][$currency][$lang][$territory] ); |
264 | } |
265 | } |
266 | } |
267 | ksort( $data['currencySymbols'][$currency][$lang] ); |
268 | } |
269 | } |
270 | } |
271 | |
272 | ksort( $data['currencySymbols'][$currency] ); |
273 | } |
274 | |
275 | ksort( $data['currencySymbols'] ); |
276 | |
277 | $this->savephp( $data, $outputFile ); |
278 | } |
279 | |
280 | /** |
281 | * savephp will build and return a string containing properly formatted php |
282 | * output of all the vars we've just parsed out of the xml. |
283 | * @param array $data The variable names and values we want defined in the php output |
284 | * @param string $location File location to write |
285 | */ |
286 | protected function savephp( $data, $location ) { |
287 | $hasData = false; |
288 | foreach ( $data as $v ) { |
289 | if ( count( $v ) ) { |
290 | $hasData = true; |
291 | break; |
292 | } |
293 | } |
294 | |
295 | if ( !$hasData ) { |
296 | return; |
297 | } |
298 | |
299 | // Yes, I am aware I could have simply used var_export. |
300 | // ...the spacing was ugly. |
301 | $output = "<?php\n// This file is generated by rebuild.php. Do not edit it directly.\n"; |
302 | foreach ( $data as $varname => $values ) { |
303 | if ( !count( $values ) ) { |
304 | // Don't output empty arrays |
305 | continue; |
306 | } |
307 | $output .= "\n\$$varname = [\n"; |
308 | if ( $this->isAssoc( $values ) ) { |
309 | foreach ( $values as $key => $value ) { |
310 | if ( is_array( $value ) ) { |
311 | $output .= $this->makePrettyArrayOuts( $key, $value, 1 ); |
312 | } else { |
313 | $key = addcslashes( $key, "'" ); |
314 | $value = addcslashes( $value, "'" ); |
315 | if ( !is_numeric( $key ) ) { |
316 | $key = "'$key'"; |
317 | } |
318 | $output .= "\t$key => '$value',\n"; |
319 | } |
320 | } |
321 | } else { |
322 | foreach ( $values as $value ) { |
323 | if ( is_array( $value ) ) { |
324 | $output .= $this->makePrettyArrayOuts( null, $value, 1 ); |
325 | } else { |
326 | $value = addcslashes( $value, "'" ); |
327 | $output .= "\t'$value',\n"; |
328 | } |
329 | } |
330 | } |
331 | $output .= "];\n"; |
332 | } |
333 | |
334 | file_put_contents( $location, $output ); |
335 | } |
336 | |
337 | /** |
338 | * It makes pretty array vals. Dur. |
339 | * @param string|null $key Use null to omit outputting the key |
340 | * @param array $value |
341 | * @param int $level |
342 | * @return string |
343 | */ |
344 | protected function makePrettyArrayOuts( $key, $value, $level = 1 ) { |
345 | $subKeys = ''; |
346 | $isAssoc = $this->isAssoc( $value ); |
347 | $tabs = str_repeat( "\t", $level ); |
348 | |
349 | foreach ( $value as $subkey => $subvalue ) { |
350 | $subkey = $isAssoc ? $subkey : null; |
351 | |
352 | if ( is_array( $subvalue ) ) { |
353 | $subKeys .= $this->makePrettyArrayOuts( $subkey, $subvalue, $level + 1 ); |
354 | } else { |
355 | $subkey = $isAssoc ? $this->formatKey( $subkey ) : ''; |
356 | $subvalue = addcslashes( $subvalue, "'" ); |
357 | $subKeys .= "$tabs\t$subkey'$subvalue',\n"; |
358 | } |
359 | } |
360 | |
361 | if ( $subKeys === '' ) { |
362 | return ''; |
363 | } |
364 | |
365 | $key = $key !== null ? $this->formatKey( $key ) : ''; |
366 | return "$tabs$key" . "[\n$subKeys$tabs],\n"; |
367 | } |
368 | |
369 | /** |
370 | * It makes pretty array keys. Dur. |
371 | * @param string $key |
372 | * @return string |
373 | */ |
374 | protected function formatKey( $key ) { |
375 | $key = addcslashes( $key, "'" ); |
376 | if ( !is_numeric( $key ) ) { |
377 | $key = "'$key'"; |
378 | } |
379 | |
380 | return "$key => "; |
381 | } |
382 | |
383 | /** |
384 | * Checks if array is associative or sequential. |
385 | * |
386 | * @param array $arr |
387 | * @return bool |
388 | */ |
389 | protected function isAssoc( array $arr ) { |
390 | return array_keys( $arr ) !== range( 0, count( $arr ) - 1 ); |
391 | } |
392 | } |