Code Coverage for /workspace/src/extensions/cldr/includes/CLDRParser.php

	Code Coverage
	Lines			Functions and Methods				Classes and Traits
Total	0.00% covered (danger)	0.00%	0 / 168	0.00% covered (danger)	0.00%	0 / 7	CRAP	0.00% covered (danger)	0.00%	0 / 1
CLDRParser	0.00% covered (danger)	0.00%	0 / 168	0.00% covered (danger)	0.00%	0 / 7	5852	0.00% covered (danger)	0.00%	0 / 1
parse	0.00% covered (danger)	0.00%	0 / 49	0.00% covered (danger)	0.00%	0 / 1	420
parse_supplemental	0.00% covered (danger)	0.00%	0 / 26	0.00% covered (danger)	0.00%	0 / 1	132
parse_currency_symbols	0.00% covered (danger)	0.00%	0 / 46	0.00% covered (danger)	0.00%	0 / 1	552
savephp	0.00% covered (danger)	0.00%	0 / 28	0.00% covered (danger)	0.00%	0 / 1	156
makePrettyArrayOuts	0.00% covered (danger)	0.00%	0 / 14	0.00% covered (danger)	0.00%	0 / 1	56
formatKey	0.00% covered (danger)	0.00%	0 / 4	0.00% covered (danger)	0.00%	0 / 1	6
isAssoc	0.00% covered (danger)	0.00%	0 / 1	0.00% covered (danger)	0.00%	0 / 1	2

1	<?php
2
3	namespace MediaWiki\Extension\CLDR;
4
5	use SimpleXMLElement;
6
7	/**
8	* Extract data from cldr XML.
9	*
10	* @author Niklas Laxström
11	* @author Ryan Kaldari
12	* @author Santhosh Thottingal
13	* @author Sam Reed
14	* @copyright Copyright © 2007-2015
15	* @license GPL-2.0-or-later
16	*/
17	class CLDRParser {
18
19	public const LOCALITY_DEFAULT = '!DEFAULT';
20	public const LANGUAGE_DEFAULT = '!root';
21	public const CURRENCY_DEFAULT = '!DEFAULT';
22
23	/**
24	* @param string $inputFile filename
25	* @param string $outputFile filename
26	*/
27	public function parse( $inputFile, $outputFile ) {
28	// Open the input file for reading
29
30	$contents = file_get_contents( $inputFile );
31	$doc = new SimpleXMLElement( $contents );
32
33	$data = [
34	'languageNames' => [],
35	'currencyNames' => [],
36	'currencySymbols' => [],
37	'countryNames' => [],
38	'timeUnits' => [],
39	];
40
41	foreach ( $doc->xpath( '//languages/language' ) as $elem ) {
42	if ( (string)$elem['alt'] !== '' ) {
43	continue;
44	}
45
46	if ( (string)$elem['type'] === 'root' ) {
47	continue;
48	}
49
50	$key = str_replace( '_', '-', strtolower( $elem['type'] ) );
51
52	$data['languageNames'][$key] = (string)$elem;
53	}
54
55	foreach ( $doc->xpath( '//currencies/currency' ) as $elem ) {
56	if ( (string)$elem->displayName[0] === '' ) {
57	continue;
58	}
59
60	$data['currencyNames'][(string)$elem['type']] = (string)$elem->displayName[0];
61	if ( (string)$elem->symbol[0] !== '' ) {
62	$data['currencySymbols'][(string)$elem['type']] = (string)$elem->symbol[0];
63	}
64	}
65
66	foreach ( $doc->xpath( '//territories/territory' ) as $elem ) {
67	if ( (string)$elem['alt'] !== '' && (string)$elem['alt'] !== 'short' ) {
68	continue;
69	}
70
71	if ( (string)$elem['type'] === 'ZZ' \|\|
72	!preg_match( '/^[A-Z][A-Z]$/', $elem['type'] )
73	) {
74	continue;
75	}
76
77	$data['countryNames'][(string)$elem['type']] = (string)$elem;
78	}
79	foreach ( $doc->xpath( '//units/unitLength' ) as $unitLength ) {
80	if ( (string)$unitLength['type'] !== 'long' ) {
81	continue;
82	}
83	foreach ( $unitLength->unit as $elem ) {
84	$type = (string)$elem['type'];
85	$pos = strpos( $type, 'duration' );
86	if ( $pos === false ) {
87	continue;
88	}
89	$type = substr( $type, strlen( 'duration-' ) );
90	foreach ( $elem->unitPattern as $pattern ) {
91	$data['timeUnits'][$type . '-' . (string)$pattern['count']] = (string)$pattern;
92	}
93	}
94	}
95	foreach ( $doc->xpath( '//fields/field' ) as $field ) {
96	$fieldType = (string)$field['type'];
97
98	foreach ( $field->relativeTime as $relative ) {
99	$type = (string)$relative['type'];
100	foreach ( $relative->relativeTimePattern as $pattern ) {
101	$data['timeUnits'][$fieldType . '-' . $type
102	. '-' . (string)$pattern['count']] = (string)$pattern;
103	}
104	}
105	}
106
107	ksort( $data['timeUnits'] );
108
109	$this->savephp( $data, $outputFile );
110	}
111
112	/**
113	* Parse method for the file structure found in common/supplemental/supplementalData.xml
114	* @param string $inputFile
115	* @param string $outputFile
116	*/
117	public function parse_supplemental( $inputFile, $outputFile ) {
118	// Open the input file for reading
119
120	$contents = file_get_contents( $inputFile );
121	$doc = new SimpleXMLElement( $contents );
122
123	$data = [
124	'currencyFractions' => [],
125	'localeCurrencies' => [],
126	];
127
128	// Pull currency attributes - digits, rounding, and cashRounding.
129	// This will tell us how many decmal places make sense to use with any currency,
130	// or if the currency is totally non-fractional
131	foreach ( $doc->xpath( '//currencyData/fractions/info' ) as $elem ) {
132	$iso4217 = (string)$elem['iso4217'];
133	if ( $iso4217 === '' ) {
134	continue;
135	}
136	if ( $iso4217 === 'DEFAULT' ) {
137	$iso4217 = self::CURRENCY_DEFAULT;
138	}
139
140	$attributes = [ 'digits', 'rounding', 'cashDigits', 'cashRounding' ];
141	foreach ( $attributes as $att ) {
142	if ( (string)$elem[$att] !== '' ) {
143	$data['currencyFractions'][$iso4217][$att] = (string)$elem[$att];
144	}
145	}
146	}
147
148	ksort( $data['currencyFractions'] );
149
150	// Pull a map of regions to currencies in order of preference.
151	foreach ( $doc->xpath( '//currencyData/region' ) as $elem ) {
152	if ( (string)$elem['iso3166'] === '' ) {
153	continue;
154	}
155
156	$region = (string)$elem['iso3166'];
157
158	foreach ( $elem->currency as $currencynode ) {
159	if ( (string)$currencynode['to'] === '' && (string)$currencynode['tender'] !== 'false' ) {
160	$data['localeCurrencies'][$region][] = (string)$currencynode['iso4217'];
161	}
162	}
163	}
164
165	ksort( $data['localeCurrencies'] );
166
167	$this->savephp( $data, $outputFile );
168	}
169
170	/**
171	* Parse method for the currency section in the names files.
172	* This is separate from the regular parse function, because we need all of
173	* the currency locale information, even if mediawiki doesn't support the language.
174	* (For instance: en_AU uses '$' for AUD, not USD, but it's not a supported mediawiki locality)
175	* @param string $inputDir the directory, in which we will parse everything.
176	* @param string $outputFile
177	*/
178	public function parse_currency_symbols( $inputDir, $outputFile ) {
179	if ( !file_exists( $inputDir ) ) {
180	return;
181	}
182	$files = scandir( $inputDir );
183
184	$data = [
185	'currencySymbols' => [],
186	];
187
188	// Foreach files!
189	foreach ( $files as $inputFile ) {
190	if ( strpos( $inputFile, '.xml' ) < 1 ) {
191	continue;
192	}
193
194	$contents = file_get_contents( $inputDir . '/' . $inputFile );
195	$doc = new SimpleXMLElement( $contents );
196
197	// Tags in the <identity> section are guaranteed to appear once
198	$languages = $doc->xpath( '//identity/language/@type' );
199	$language = $languages
200	? (string)$languages[0]
201	: pathinfo( $inputFile, PATHINFO_FILENAME );
202
203	// The <script> element is optional
204	$scripts = $doc->xpath( '//identity/script/@type' );
205	$script = $scripts ? (string)$scripts[0] : '';
206	// expand the language
207	if ( $script !== '' ) {
208	$language .= '-' . strtolower( $script );
209	}
210
211	// The <territory> element is optional
212	$territories = $doc->xpath( '//identity/territory/@type' );
213	$territory = $territories ? (string)$territories[0] : self::LOCALITY_DEFAULT;
214
215	if ( $language === 'root' ) {
216	$language = self::LANGUAGE_DEFAULT;
217	}
218
219	foreach ( $doc->xpath( '//currencies/currency' ) as $elem ) {
220	if ( (string)$elem->symbol[0] !== '' ) {
221	$data['currencySymbols'][(string)$elem['type']][$language][$territory] =
222	(string)$elem->symbol[0];
223	}
224	}
225	}
226
227	// now massage the data somewhat. It's pretty blown up at this point.
228
229	/**
230	* Part 1: Stop blowing up on defaults.
231	* Defaults apparently come in many forms. Listed below in order of scope
232	* (widest to narrowest)
233	* 1) The ISO code itself, in the absence of any other defaults
234	* 2) The 'root' language file definition
235	* 3) Language with no locality - locality will come in as 'DEFAULT'
236	*
237	* Intended behavior:
238	* From narrowest scope to widest, collapse the defaults
239	*/
240	foreach ( $data['currencySymbols'] as $currency => $language ) {
241	// get the currency default symbol. This will either be defined in the
242	// 'root' language file, or taken from the ISO code.
243	$default = $language[self::LANGUAGE_DEFAULT][self::LOCALITY_DEFAULT] ?? $currency;
244
245	foreach ( $language as $lang => $territories ) {
246	if ( is_array( $territories ) ) {
247	// Collapse a language (no locality) array if it's just the default. One value will do fine.
248	if ( count( $territories ) === 1 && array_key_exists( self::LOCALITY_DEFAULT, $territories ) ) {
249	$data['currencySymbols'][$currency][$lang] = $territories[self::LOCALITY_DEFAULT];
250	if ( $territories[self::LOCALITY_DEFAULT] === $default
251	&& $lang !== self::LANGUAGE_DEFAULT
252	) {
253	unset( $data['currencySymbols'][$currency][$lang] );
254	}
255	} else {
256	// Collapse a language (with locality) array if it's default is just the default
257	if ( !array_key_exists( self::LOCALITY_DEFAULT, $territories )
258	\|\| ( $territories[self::LOCALITY_DEFAULT] === $default
259	&& $lang !== self::LANGUAGE_DEFAULT )
260	) {
261	foreach ( $territories as $territory => $symbol ) {
262	if ( $symbol === $default ) {
263	unset( $data['currencySymbols'][$currency][$lang][$territory] );
264	}
265	}
266	}
267	ksort( $data['currencySymbols'][$currency][$lang] );
268	}
269	}
270	}
271
272	ksort( $data['currencySymbols'][$currency] );
273	}
274
275	ksort( $data['currencySymbols'] );
276
277	$this->savephp( $data, $outputFile );
278	}
279
280	/**
281	* savephp will build and return a string containing properly formatted php
282	* output of all the vars we've just parsed out of the xml.
283	* @param array $data The variable names and values we want defined in the php output
284	* @param string $location File location to write
285	*/
286	protected function savephp( $data, $location ) {
287	$hasData = false;
288	foreach ( $data as $v ) {
289	if ( count( $v ) ) {
290	$hasData = true;
291	break;
292	}
293	}
294
295	if ( !$hasData ) {
296	return;
297	}
298
299	// Yes, I am aware I could have simply used var_export.
300	// ...the spacing was ugly.
301	$output = "<?php\n// This file is generated by rebuild.php. Do not edit it directly.\n";
302	foreach ( $data as $varname => $values ) {
303	if ( !count( $values ) ) {
304	// Don't output empty arrays
305	continue;
306	}
307	$output .= "\n\$$varname = [\n";
308	if ( $this->isAssoc( $values ) ) {
309	foreach ( $values as $key => $value ) {
310	if ( is_array( $value ) ) {
311	$output .= $this->makePrettyArrayOuts( $key, $value, 1 );
312	} else {
313	$key = addcslashes( $key, "'" );
314	$value = addcslashes( $value, "'" );
315	if ( !is_numeric( $key ) ) {
316	$key = "'$key'";
317	}
318	$output .= "\t$key => '$value',\n";
319	}
320	}
321	} else {
322	foreach ( $values as $value ) {
323	if ( is_array( $value ) ) {
324	$output .= $this->makePrettyArrayOuts( null, $value, 1 );
325	} else {
326	$value = addcslashes( $value, "'" );
327	$output .= "\t'$value',\n";
328	}
329	}
330	}
331	$output .= "];\n";
332	}
333
334	file_put_contents( $location, $output );
335	}
336
337	/**
338	* It makes pretty array vals. Dur.
339	* @param string\|null $key Use null to omit outputting the key
340	* @param array $value
341	* @param int $level
342	* @return string
343	*/
344	protected function makePrettyArrayOuts( $key, $value, $level = 1 ) {
345	$subKeys = '';
346	$isAssoc = $this->isAssoc( $value );
347	$tabs = str_repeat( "\t", $level );
348
349	foreach ( $value as $subkey => $subvalue ) {
350	$subkey = $isAssoc ? $subkey : null;
351
352	if ( is_array( $subvalue ) ) {
353	$subKeys .= $this->makePrettyArrayOuts( $subkey, $subvalue, $level + 1 );
354	} else {
355	$subkey = $isAssoc ? $this->formatKey( $subkey ) : '';
356	$subvalue = addcslashes( $subvalue, "'" );
357	$subKeys .= "$tabs\t$subkey'$subvalue',\n";
358	}
359	}
360
361	if ( $subKeys === '' ) {
362	return '';
363	}
364
365	$key = $key !== null ? $this->formatKey( $key ) : '';
366	return "$tabs$key" . "[\n$subKeys$tabs],\n";
367	}
368
369	/**
370	* It makes pretty array keys. Dur.
371	* @param string $key
372	* @return string
373	*/
374	protected function formatKey( $key ) {
375	$key = addcslashes( $key, "'" );
376	if ( !is_numeric( $key ) ) {
377	$key = "'$key'";
378	}
379
380	return "$key => ";
381	}
382
383	/**
384	* Checks if array is associative or sequential.
385	*
386	* @param array $arr
387	* @return bool
388	*/
389	protected function isAssoc( array $arr ) {
390	return array_keys( $arr ) !== range( 0, count( $arr ) - 1 );
391	}
392	}