Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
98.33% |
59 / 60 |
|
88.89% |
8 / 9 |
CRAP | |
0.00% |
0 / 1 |
UnicodePlural | |
98.33% |
59 / 60 |
|
88.89% |
8 / 9 |
24 | |
0.00% |
0 / 1 |
getPluralKeywords | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
3 | |||
hasPlural | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
flattenMap | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
flattenList | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
formatForm | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
unflatten | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
parsePluralForms | |
95.24% |
20 / 21 |
|
0.00% |
0 / 1 |
6 | |||
expandTemplate | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
4 | |||
convertFormListToFormMap | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace MediaWiki\Extension\Translate\Utilities; |
5 | |
6 | use RuntimeException; |
7 | |
8 | /** |
9 | * @license GPL-2.0-or-later |
10 | * @since 2019.09 |
11 | */ |
12 | class UnicodePlural { |
13 | /** @var string[] List of supported Unicode CLDR plural keywords */ |
14 | public const KEYWORDS = [ 'zero', 'one', 'two', 'few', 'many', 'other' ]; |
15 | private const PRE = '{{PLURAL|'; |
16 | private const POST = '}}'; |
17 | |
18 | /** |
19 | * Returns CLDR plural rule for given language. |
20 | * |
21 | * @param string $code Language tag in MediaWiki internal format. |
22 | * @return ?string[] Null, if no plural rule found |
23 | */ |
24 | public static function getPluralKeywords( string $code ): ?array { |
25 | $filePath = __DIR__ . '/../../data/plural-cldr.json'; |
26 | $ruleData = json_decode( file_get_contents( $filePath ), true ); |
27 | |
28 | $ruleSet = $ruleData[ 'supplemental' ][ 'plurals-type-cardinal' ][ $code ] ?? null; |
29 | if ( $ruleSet === null ) { |
30 | return null; |
31 | } |
32 | |
33 | $keywords = []; |
34 | foreach ( array_keys( $ruleSet ) as $name ) { |
35 | $keywords[] = str_replace( 'pluralRule-count-', '', $name ); |
36 | } |
37 | |
38 | return $keywords; |
39 | } |
40 | |
41 | /** Quick way to check if the text contains plural syntax. */ |
42 | public static function hasPlural( string $text ): bool { |
43 | return str_contains( $text, self::PRE ); |
44 | } |
45 | |
46 | /** |
47 | * Format plural forms map as single string suitable for translation. |
48 | * |
49 | * This does not check validity of forms. Use ::convertFormListToFormMap for that. |
50 | * @param string[] $forms |
51 | */ |
52 | public static function flattenMap( array $forms ): string { |
53 | $list = []; |
54 | foreach ( $forms as $keyword => $value ) { |
55 | $list[] = [ $keyword, $value ]; |
56 | } |
57 | |
58 | return self::flattenList( $list ); |
59 | } |
60 | |
61 | /** |
62 | * Format plural forms list as single string. |
63 | * |
64 | * This does not check validity of forms. |
65 | * @param array[] $formList [ keyword, form ] pairs. |
66 | */ |
67 | public static function flattenList( array $formList ): string { |
68 | $formatted = []; |
69 | foreach ( $formList as [ $keyword, $value ] ) { |
70 | $formatted[] = self::formatForm( $keyword, $value ); |
71 | } |
72 | |
73 | return self::PRE . implode( '|', $formatted ) . self::POST; |
74 | } |
75 | |
76 | private static function formatForm( string $keyword, string $value ): string { |
77 | $prefix = $keyword === 'other' ? '' : "$keyword="; |
78 | return $prefix . $value; |
79 | } |
80 | |
81 | /** |
82 | * Format translation with plural forms as array of forms. |
83 | * |
84 | * Reverse of flatten. Do note that A may be != flatten( unflatten( A ) ) because |
85 | * translators can place part of the text outside the plural markup or use multiple |
86 | * instances of the markup. |
87 | * |
88 | * @param string $text |
89 | * @param string[] $expectedKeywords |
90 | * @return string[] |
91 | */ |
92 | public static function unflatten( string $text, array $expectedKeywords ): array { |
93 | [ $template, $instanceMap ] = self::parsePluralForms( $text ); |
94 | return self::expandTemplate( $template, $instanceMap, $expectedKeywords ); |
95 | } |
96 | |
97 | /** |
98 | * Parses plural markup into a structure form. |
99 | * |
100 | * @return array [ string $template, array $instanceMap ] |
101 | */ |
102 | public static function parsePluralForms( string $text ): array { |
103 | $m = []; |
104 | $pre = preg_quote( self::PRE, '/' ); |
105 | $post = preg_quote( self::POST, '/' ); |
106 | |
107 | $ok = preg_match_all( "/$pre(.*)$post/Us", $text, $m ); |
108 | if ( $ok === false ) { |
109 | throw new RuntimeException( "Plural regular expression failed for text: $text" ); |
110 | } |
111 | |
112 | $template = $text; |
113 | $instanceMap = []; |
114 | |
115 | foreach ( $m[0] as $instanceIndex => $instanceText ) { |
116 | $ph = Utilities::getPlaceholder(); |
117 | |
118 | // Using preg_replace instead of str_replace because of the limit parameter |
119 | $pattern = '/' . preg_quote( $instanceText, '/' ) . '/'; |
120 | $template = preg_replace( $pattern, $ph, $template, 1 ); |
121 | |
122 | $instanceForms = []; |
123 | foreach ( explode( '|', $m[ 1 ][ $instanceIndex ] ) as $form ) { |
124 | $m2 = []; |
125 | $ok = preg_match( '~\s*([a-z]+)\s*=(.+)~s', $form, $m2 ); |
126 | $keyword = $ok ? $m2[ 1 ] : 'other'; |
127 | $value = $ok ? trim( $m2[ 2 ] ) : $form; |
128 | $instanceForms[] = [ $keyword, $value ]; |
129 | } |
130 | |
131 | $instanceMap[$ph] = $instanceForms; |
132 | } |
133 | |
134 | return [ $template, $instanceMap ]; |
135 | } |
136 | |
137 | /** |
138 | * Gives fully expanded forms given a template and parsed plural markup instances. |
139 | * |
140 | * @param string $template |
141 | * @param array $instanceMap |
142 | * @param string[] $expectedKeywords |
143 | * @return string[] |
144 | */ |
145 | public static function expandTemplate( string $template, array $instanceMap, array $expectedKeywords ): array { |
146 | $formArray = []; |
147 | |
148 | // Convert from list of forms to map of forms for easier processing |
149 | foreach ( $instanceMap as $ph => $list ) { |
150 | $instanceMap[ $ph ] = self::convertFormListToFormMap( $list, $expectedKeywords ); |
151 | } |
152 | |
153 | foreach ( $expectedKeywords as $keyword ) { |
154 | // Start with the whole string |
155 | $form = $template; |
156 | |
157 | // Loop over each plural markup instance and replace it with the plural form belonging |
158 | // to the current index |
159 | foreach ( $instanceMap as $ph => $instanceFormMap ) { |
160 | // For missing forms, fall back to empty text. |
161 | $replacement = $instanceFormMap[ $keyword ] ?? ''; |
162 | $form = str_replace( $ph, $replacement, $form ); |
163 | } |
164 | |
165 | $formArray[ $keyword ] = $form; |
166 | } |
167 | |
168 | return $formArray; |
169 | } |
170 | |
171 | public static function convertFormListToFormMap( array $formList, array $expectedKeywords ): array { |
172 | $formMap = []; |
173 | foreach ( $formList as [ $keyword, $value ] ) { |
174 | $formMap[ $keyword ] = $value; |
175 | } |
176 | |
177 | $sortedFormMap = []; |
178 | foreach ( $expectedKeywords as $keyword ) { |
179 | $sortedFormMap[ $keyword ] = $formMap[ $keyword ] ?? null; |
180 | } |
181 | |
182 | return $sortedFormMap; |
183 | } |
184 | } |