Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 173 |
|
0.00% |
0 / 7 |
CRAP | |
0.00% |
0 / 1 |
LanguageVariantHandler | |
0.00% |
0 / 173 |
|
0.00% |
0 / 7 |
3782 | |
0.00% |
0 / 1 |
expandSpArray | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
ser | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
protectLang | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
combine | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
20 | |||
sortedFlags | |
0.00% |
0 / 28 |
|
0.00% |
0 / 1 |
72 | |||
maybeDeleteFlag | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
handleLanguageVariant | |
0.00% |
0 / 122 |
|
0.00% |
0 / 1 |
1640 |
1 | <?php |
2 | declare( strict_types = 1 ); |
3 | |
4 | namespace Wikimedia\Parsoid\Html2Wt; |
5 | |
6 | use Wikimedia\Assert\Assert; |
7 | use Wikimedia\Parsoid\DOM\Element; |
8 | use Wikimedia\Parsoid\Html2Wt\ConstrainedText\LanguageVariantText; |
9 | use Wikimedia\Parsoid\Utils\DOMCompat; |
10 | use Wikimedia\Parsoid\Utils\DOMDataUtils; |
11 | use Wikimedia\Parsoid\Utils\Utils; |
12 | use Wikimedia\Parsoid\Wikitext\Consts; |
13 | |
14 | /** |
15 | * Serializes language variant markup, like `-{ ... }-`. |
16 | */ |
17 | class LanguageVariantHandler { |
18 | |
19 | private static function expandSpArray( array $a ): array { |
20 | $result = []; |
21 | foreach ( $a as $el ) { |
22 | if ( is_int( $el ) ) { |
23 | for ( $i = $el; $i--; ) { |
24 | $result[] = ''; |
25 | } |
26 | } else { |
27 | $result[] = $el; |
28 | } |
29 | } |
30 | return $result; |
31 | } |
32 | |
33 | /** |
34 | * Helper function: serialize a DOM string |
35 | * @param SerializerState $state |
36 | * @param string $t |
37 | * @param ?array $opts |
38 | * @return string |
39 | */ |
40 | private static function ser( SerializerState $state, string $t, ?array $opts ) { |
41 | $options = |
42 | ( $opts ?? [] ) + [ |
43 | 'env' => $state->getEnv(), |
44 | 'onSOL' => false |
45 | ]; |
46 | return $state->serializer->htmlToWikitext( $options, $t ); |
47 | } |
48 | |
49 | /** |
50 | * Helper function: protect characters not allowed in language names |
51 | * @param string $l |
52 | * @return string |
53 | */ |
54 | private static function protectLang( string $l ): string { |
55 | if ( preg_match( '/^[a-z][-a-zA-Z]+$/D', $l ) ) { |
56 | return $l; |
57 | } |
58 | return '<nowiki>' . Utils::escapeWtEntities( $l ) . '</nowiki>'; |
59 | } |
60 | |
61 | /** |
62 | * Helper function: combine the three parts of the -{ }- string |
63 | * @param string $flagStr |
64 | * @param string $bodyStr |
65 | * @param string|bool $useTrailingSemi |
66 | * @return string |
67 | */ |
68 | private static function combine( string $flagStr, string $bodyStr, $useTrailingSemi ): string { |
69 | if ( !empty( $flagStr ) || str_contains( $bodyStr, '|' ) ) { |
70 | $flagStr .= '|'; |
71 | } |
72 | if ( $useTrailingSemi !== false ) { |
73 | $bodyStr .= ';' . $useTrailingSemi; |
74 | } |
75 | |
76 | return $flagStr . $bodyStr; |
77 | } |
78 | |
79 | /** |
80 | * Canonicalize combinations of flags. |
81 | * $originalFlags should be [ 'flag' => <integer position>, ... ] |
82 | * @param array $originalFlags |
83 | * @param array $flSp |
84 | * @param array $flags |
85 | * @param bool $noFilter |
86 | * @param ?string $protectFunc |
87 | * @return string |
88 | */ |
89 | private static function sortedFlags( |
90 | array $originalFlags, array $flSp, array $flags, bool $noFilter, |
91 | ?string $protectFunc |
92 | ): string { |
93 | $filterInternal = static function ( $f ) use ( $noFilter ) { |
94 | // Filter out internal-use-only flags |
95 | if ( $noFilter ) { |
96 | return true; |
97 | } |
98 | return ( $f[0] ?? null ) !== '$'; |
99 | }; |
100 | $flags = array_filter( $flags, $filterInternal ); |
101 | |
102 | $sortByOriginalPosition = static function ( $a, $b ) use ( $originalFlags ) { |
103 | $ai = $originalFlags[$a] ?? -1; |
104 | $bi = $originalFlags[$b] ?? -1; |
105 | return $ai - $bi; |
106 | }; |
107 | usort( $flags, $sortByOriginalPosition ); |
108 | |
109 | $insertOriginalWhitespace = static function ( $f ) use ( $originalFlags, $protectFunc, $flSp ) { |
110 | // Reinsert the original whitespace around the flag (if any) |
111 | $i = $originalFlags[$f] ?? null; |
112 | if ( !empty( $protectFunc ) ) { |
113 | $p = call_user_func_array( [ self::class, $protectFunc ], [ $f ] ); |
114 | } else { |
115 | $p = $f; |
116 | } |
117 | if ( $i !== null && ( 2 * $i + 1 ) < count( $flSp ) ) { |
118 | return $flSp[2 * $i] + $p + $flSp[2 * $i + 1]; |
119 | } |
120 | return $p; |
121 | }; |
122 | $flags = array_map( $insertOriginalWhitespace, $flags ); |
123 | $s = implode( ';', $flags ); |
124 | |
125 | if ( 2 * count( $originalFlags ) + 1 === count( $flSp ) ) { |
126 | if ( count( $flSp ) > 1 || strlen( $s ) ) { |
127 | $s .= ';'; |
128 | } |
129 | $s .= $flSp[2 * count( $originalFlags )]; |
130 | } |
131 | return $s; |
132 | } |
133 | |
134 | private static function maybeDeleteFlag( |
135 | array $originalFlags, array &$flags, string $f |
136 | ): void { |
137 | if ( !isset( $originalFlags[$f] ) ) { |
138 | unset( $flags[$f] ); |
139 | } |
140 | } |
141 | |
142 | /** |
143 | * LanguageVariantHandler |
144 | */ |
145 | public static function handleLanguageVariant( SerializerState $state, Element $node ): void { |
146 | $dataMWV = DOMDataUtils::getJSONAttribute( $node, 'data-mw-variant', [] ); |
147 | $dp = DOMDataUtils::getDataParsoid( $node ); |
148 | $flSp = self::expandSpArray( $dp->flSp ?? [] ); |
149 | $textSp = self::expandSpArray( $dp->tSp ?? [] ); |
150 | $trailingSemi = false; |
151 | $text = null; |
152 | $flags = []; |
153 | $originalFlags = []; |
154 | if ( isset( $dp->fl ) ) { |
155 | foreach ( $dp->fl as $key => $val ) { |
156 | if ( !isset( $originalFlags[$key] ) ) { // was $val |
157 | $originalFlags[$val] = $key; |
158 | } |
159 | } |
160 | } |
161 | |
162 | $result = '$E|'; // "error" flag |
163 | |
164 | // Backwards-compatibility: `bidir` => `twoway` ; `unidir` => `oneway` |
165 | if ( isset( $dataMWV->bidir ) ) { |
166 | $dataMWV->twoway = $dataMWV->bidir; |
167 | unset( $dataMWV->bidir ); |
168 | } |
169 | if ( isset( $dataMWV->unidir ) ) { |
170 | $dataMWV->oneway = $dataMWV->undir; |
171 | unset( $dataMWV->unidir ); |
172 | } |
173 | |
174 | foreach ( get_object_vars( $dataMWV ) as $key => $val ) { |
175 | if ( isset( Consts::$LCNameMap[$key] ) ) { |
176 | $flags[Consts::$LCNameMap[$key]] = true; |
177 | } |
178 | } |
179 | |
180 | // Tweak flag set to account for implicitly-enabled flags. |
181 | if ( DOMCompat::nodeName( $node ) !== 'meta' ) { |
182 | $flags['$S'] = true; |
183 | } |
184 | if ( !isset( $flags['$S'] ) && !isset( $flags['T'] ) && !isset( $dataMWV->filter ) ) { |
185 | $flags['H'] = true; |
186 | } |
187 | if ( count( $flags ) === 1 && isset( $flags['$S'] ) ) { |
188 | self::maybeDeleteFlag( $originalFlags, $flags, '$S' ); |
189 | } elseif ( isset( $flags['D'] ) ) { |
190 | // Weird: Only way to hide a 'describe' rule is to write -{D;A|...}- |
191 | if ( isset( $flags['$S'] ) ) { |
192 | if ( isset( $flags['A'] ) ) { |
193 | $flags['H'] = true; |
194 | } |
195 | unset( $flags['A'] ); |
196 | } else { |
197 | $flags['A'] = true; |
198 | unset( $flags['H'] ); |
199 | } |
200 | } elseif ( isset( $flags['T'] ) ) { |
201 | if ( isset( $flags['A'] ) && !isset( $flags['$S'] ) ) { |
202 | unset( $flags['A'] ); |
203 | $flags['H'] = true; |
204 | } |
205 | } elseif ( isset( $flags['A'] ) ) { |
206 | if ( isset( $flags['$S'] ) ) { |
207 | self::maybeDeleteFlag( $originalFlags, $flags, '$S' ); |
208 | } elseif ( isset( $flags['H'] ) ) { |
209 | self::maybeDeleteFlag( $originalFlags, $flags, 'A' ); |
210 | } |
211 | } elseif ( isset( $flags['R'] ) ) { |
212 | self::maybeDeleteFlag( $originalFlags, $flags, '$S' ); |
213 | } elseif ( isset( $flags['-'] ) ) { |
214 | self::maybeDeleteFlag( $originalFlags, $flags, 'H' ); |
215 | } |
216 | |
217 | if ( isset( $dataMWV->filter ) && $dataMWV->filter->l ) { |
218 | // "Restrict possible variants to a limited set" |
219 | $text = self::ser( $state, $dataMWV->filter->t, [ 'protect' => '/\}-/' ] ); |
220 | Assert::invariant( count( $flags ) === 0, 'Error in language variant flags' ); |
221 | $result = self::combine( |
222 | self::sortedFlags( |
223 | $originalFlags, $flSp, $dataMWV->filter->l, true, |
224 | 'protectLang' |
225 | ), |
226 | $text, false |
227 | ); |
228 | } else { /* no trailing semi */ |
229 | if ( isset( $dataMWV->disabled ) || isset( $dataMWV->name ) ) { |
230 | // "Raw" / protect contents from language converter |
231 | $text = self::ser( $state, ( $dataMWV->disabled ?? $dataMWV->name )->t, |
232 | [ 'protect' => '/\}-/' ] ); |
233 | if ( !preg_match( '/[:;|]/', $text ) ) { |
234 | self::maybeDeleteFlag( $originalFlags, $flags, 'R' ); |
235 | } |
236 | $result = self::combine( |
237 | self::sortedFlags( |
238 | $originalFlags, $flSp, array_keys( $flags ), false, null |
239 | ), |
240 | $text, false |
241 | ); |
242 | } elseif ( isset( $dataMWV->twoway ) ) { |
243 | // Two-way rules (most common) |
244 | if ( count( $textSp ) % 3 === 1 ) { |
245 | $trailingSemi = $textSp[count( $textSp ) - 1]; |
246 | } |
247 | $b = isset( $dataMWV->twoway[0] ) && $dataMWV->twoway[0]->l === '*' ? |
248 | array_slice( $dataMWV->twoway, 0, 1 ) : |
249 | $dataMWV->twoway ?? []; |
250 | $text = implode( ';', |
251 | array_map( |
252 | function ( $rule, $idx ) use ( $state, $textSp ) { |
253 | $text = self::ser( $state, $rule->t, [ 'protect' => '/;|\}-/' ] ); |
254 | if ( $rule->l === '*' ) { |
255 | $trailingSemi = false; |
256 | return $text; |
257 | } |
258 | $length = ( 3 * ( $idx + 1 ) ) - ( 3 * $idx ); |
259 | $ws = ( 3 * $idx + 2 < count( $textSp ) ) ? |
260 | array_slice( $textSp, 3 * $idx, $length ) : |
261 | [ ( $idx > 0 ) ? ' ' : '', '', '' ]; |
262 | return $ws[0] . self::protectLang( $rule->l ) . $ws[1] . ':' . $ws[2] . $text; |
263 | }, |
264 | $b, |
265 | array_keys( $b ) |
266 | ) |
267 | ); |
268 | // suppress output of default flag ('S') |
269 | self::maybeDeleteFlag( $originalFlags, $flags, '$S' ); |
270 | $result = self::combine( |
271 | self::sortedFlags( |
272 | $originalFlags, $flSp, array_keys( $flags ), false, null |
273 | ), |
274 | $text, $trailingSemi |
275 | ); |
276 | } elseif ( isset( $dataMWV->oneway ) ) { |
277 | // One-way rules (uncommon) |
278 | if ( count( $textSp ) % 4 === 1 ) { |
279 | $trailingSemi = $textSp[count( $textSp ) - 1]; |
280 | } |
281 | $text = implode( ';', |
282 | array_map( function ( $rule, $idx ) use ( $state, $textSp ) { |
283 | $from = self::ser( $state, $rule->f, [ 'protect' => '/:|;|=>|\}-/' ] ); |
284 | $to = self::ser( $state, $rule->t, [ 'protect' => '/;|\}-/' ] ); |
285 | $length = ( 4 * ( $idx + 1 ) ) - ( 4 * $idx ); |
286 | $ws = ( 4 * $idx + 3 < count( $textSp ) ) ? |
287 | array_slice( $textSp, 4 * $idx, $length ) : |
288 | [ '', '', '', '' ]; |
289 | return $ws[0] . $from . '=>' . $ws[1] . self::protectLang( $rule->l ) . |
290 | $ws[2] . ':' . $ws[3] . $to; |
291 | }, $dataMWV->oneway, range( 0, count( $dataMWV->oneway ) - 1 ) |
292 | ) |
293 | ); |
294 | $result = self::combine( |
295 | self::sortedFlags( |
296 | $originalFlags, $flSp, array_keys( $flags ), false, null |
297 | ), |
298 | $text, $trailingSemi |
299 | ); |
300 | } |
301 | } |
302 | $state->emitChunk( new LanguageVariantText( '-{' . $result . '}-', $node ), $node ); |
303 | } |
304 | |
305 | } |