Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
23.11% |
52 / 225 |
|
25.00% |
4 / 16 |
CRAP | |
0.00% |
0 / 1 |
ConverterRule | |
23.21% |
52 / 224 |
|
25.00% |
4 / 16 |
4719.32 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getTextInBidtable | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
parseFlags | |
24.39% |
10 / 41 |
|
0.00% |
0 / 1 |
126.66 | |||
parseRules | |
31.82% |
14 / 44 |
|
0.00% |
0 / 1 |
86.32 | |||
getRulesDesc | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
20 | |||
getRuleConvertedStr | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
72 | |||
getRuleConvertedTitle | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
20 | |||
generateConvTable | |
11.54% |
3 / 26 |
|
0.00% |
0 / 1 |
149.68 | |||
parse | |
31.25% |
20 / 64 |
|
0.00% |
0 / 1 |
282.76 | |||
hasRules | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getDisplay | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getTitle | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getRulesAction | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getConvTable | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getRules | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getFlags | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | * @author fdcn <fdcn64@gmail.com>, PhiLiP <philip.npc@gmail.com> |
20 | */ |
21 | |
22 | namespace MediaWiki\Language; |
23 | |
24 | use MediaWiki\Logger\LoggerFactory; |
25 | use StringUtils; |
26 | |
27 | /** |
28 | * The rules used for language conversion, this processes the rules |
29 | * extracted by Parser from the `-{ }-` wikitext syntax. |
30 | * |
31 | * @ingroup Language |
32 | */ |
33 | class ConverterRule { |
34 | /** |
35 | * @var string original text in -{text}- |
36 | */ |
37 | public $mText; |
38 | /** |
39 | * @var LanguageConverter |
40 | */ |
41 | public $mConverter; |
42 | public $mRuleDisplay = ''; |
43 | public $mRuleTitle = false; |
44 | /** |
45 | * @var string the text of the rules |
46 | */ |
47 | public $mRules = ''; |
48 | public $mRulesAction = 'none'; |
49 | public $mFlags = []; |
50 | public $mVariantFlags = []; |
51 | public $mConvTable = []; |
52 | /** |
53 | * @var array of the translation in each variant |
54 | */ |
55 | public $mBidtable = []; |
56 | /** |
57 | * @var array of the translation in each variant |
58 | */ |
59 | public $mUnidtable = []; |
60 | |
61 | /** |
62 | * @param string $text The text between -{ and }- |
63 | * @param LanguageConverter $converter |
64 | */ |
65 | public function __construct( $text, LanguageConverter $converter ) { |
66 | $this->mText = $text; |
67 | $this->mConverter = $converter; |
68 | } |
69 | |
70 | /** |
71 | * Check if the variant array is in the convert array. |
72 | * |
73 | * @param array|string $variants Variant language code |
74 | * @return string|false Translated text |
75 | */ |
76 | public function getTextInBidtable( $variants ) { |
77 | $variants = (array)$variants; |
78 | if ( !$variants ) { |
79 | return false; |
80 | } |
81 | foreach ( $variants as $variant ) { |
82 | if ( isset( $this->mBidtable[$variant] ) ) { |
83 | return $this->mBidtable[$variant]; |
84 | } |
85 | } |
86 | return false; |
87 | } |
88 | |
89 | /** |
90 | * Parse flags with syntax -{FLAG| ... }- |
91 | */ |
92 | private function parseFlags() { |
93 | $text = $this->mText; |
94 | $flags = []; |
95 | $variantFlags = []; |
96 | |
97 | $sepPos = strpos( $text, '|' ); |
98 | if ( $sepPos !== false ) { |
99 | $validFlags = $this->mConverter->getFlags(); |
100 | $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) ); |
101 | foreach ( $f as $ff ) { |
102 | $ff = trim( $ff ); |
103 | if ( isset( $validFlags[$ff] ) ) { |
104 | $flags[$validFlags[$ff]] = true; |
105 | } |
106 | } |
107 | $text = strval( substr( $text, $sepPos + 1 ) ); |
108 | } |
109 | |
110 | if ( !$flags ) { |
111 | $flags['S'] = true; |
112 | } elseif ( isset( $flags['R'] ) ) { |
113 | // remove other flags |
114 | $flags = [ 'R' => true ]; |
115 | } elseif ( isset( $flags['N'] ) ) { |
116 | // remove other flags |
117 | $flags = [ 'N' => true ]; |
118 | } elseif ( isset( $flags['-'] ) ) { |
119 | // remove other flags |
120 | $flags = [ '-' => true ]; |
121 | } elseif ( count( $flags ) === 1 && isset( $flags['T'] ) ) { |
122 | $flags['H'] = true; |
123 | } elseif ( isset( $flags['H'] ) ) { |
124 | // replace A flag, and remove other flags except T |
125 | $temp = [ '+' => true, 'H' => true ]; |
126 | if ( isset( $flags['T'] ) ) { |
127 | $temp['T'] = true; |
128 | } |
129 | if ( isset( $flags['D'] ) ) { |
130 | $temp['D'] = true; |
131 | } |
132 | $flags = $temp; |
133 | } else { |
134 | if ( isset( $flags['A'] ) ) { |
135 | $flags['+'] = true; |
136 | $flags['S'] = true; |
137 | } |
138 | if ( isset( $flags['D'] ) ) { |
139 | unset( $flags['S'] ); |
140 | } |
141 | // try to find flags like "zh-hans", "zh-hant" |
142 | // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-" |
143 | $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->getVariants() ); |
144 | if ( $variantFlags ) { |
145 | $variantFlags = array_fill_keys( $variantFlags, true ); |
146 | $flags = []; |
147 | } |
148 | } |
149 | $this->mVariantFlags = $variantFlags; |
150 | $this->mRules = $text; |
151 | $this->mFlags = $flags; |
152 | } |
153 | |
154 | /** |
155 | * Generate conversion table. |
156 | */ |
157 | private function parseRules() { |
158 | $rules = $this->mRules; |
159 | $bidtable = []; |
160 | $unidtable = []; |
161 | $varsep_pattern = $this->mConverter->getVarSeparatorPattern(); |
162 | |
163 | // Split text according to $varsep_pattern, but ignore semicolons from HTML entities |
164 | $rules = preg_replace( '/(&[#a-zA-Z0-9]+);/', "$1\x01", $rules ); |
165 | $choice = preg_split( $varsep_pattern, $rules ); |
166 | // @phan-suppress-next-line PhanTypeComparisonFromArray |
167 | if ( $choice === false ) { |
168 | $error = preg_last_error(); |
169 | $errorText = preg_last_error_msg(); |
170 | LoggerFactory::getInstance( 'parser' )->warning( |
171 | 'ConverterRule preg_split error: {code} {errorText}', |
172 | [ |
173 | 'code' => $error, |
174 | 'errorText' => $errorText |
175 | ] |
176 | ); |
177 | $choice = []; |
178 | } |
179 | $choice = str_replace( "\x01", ';', $choice ); |
180 | |
181 | foreach ( $choice as $c ) { |
182 | $v = explode( ':', $c, 2 ); |
183 | if ( count( $v ) !== 2 ) { |
184 | // syntax error, skip |
185 | continue; |
186 | } |
187 | $to = trim( $v[1] ); |
188 | $v = trim( $v[0] ); |
189 | $u = explode( '=>', $v, 2 ); |
190 | $vv = $this->mConverter->validateVariant( $v ); |
191 | // if $to is empty (which is also used as $from in bidtable), |
192 | // strtr() could return a wrong result. |
193 | if ( count( $u ) === 1 && $to !== '' && $vv ) { |
194 | $bidtable[$vv] = $to; |
195 | } elseif ( count( $u ) === 2 ) { |
196 | $from = trim( $u[0] ); |
197 | $v = trim( $u[1] ); |
198 | $vv = $this->mConverter->validateVariant( $v ); |
199 | // if $from is empty, strtr() could return a wrong result. |
200 | if ( array_key_exists( $vv, $unidtable ) |
201 | && !is_array( $unidtable[$vv] ) |
202 | && $from !== '' |
203 | && $vv ) { |
204 | $unidtable[$vv] = [ $from => $to ]; |
205 | } elseif ( $from !== '' && $vv ) { |
206 | $unidtable[$vv][$from] = $to; |
207 | } |
208 | } |
209 | // syntax error, pass |
210 | if ( !isset( $this->mConverter->getVariantNames()[$vv] ) ) { |
211 | $bidtable = []; |
212 | $unidtable = []; |
213 | break; |
214 | } |
215 | } |
216 | $this->mBidtable = $bidtable; |
217 | $this->mUnidtable = $unidtable; |
218 | } |
219 | |
220 | /** |
221 | * @return string |
222 | */ |
223 | private function getRulesDesc() { |
224 | $codesep = $this->mConverter->getDescCodeSeparator(); |
225 | $varsep = $this->mConverter->getDescVarSeparator(); |
226 | $text = ''; |
227 | foreach ( $this->mBidtable as $k => $v ) { |
228 | $text .= $this->mConverter->getVariantNames()[$k] . "$codesep$v$varsep"; |
229 | } |
230 | foreach ( $this->mUnidtable as $k => $a ) { |
231 | foreach ( $a as $from => $to ) { |
232 | $text .= $from . '⇒' . $this->mConverter->getVariantNames()[$k] . |
233 | "$codesep$to$varsep"; |
234 | } |
235 | } |
236 | return $text; |
237 | } |
238 | |
239 | /** |
240 | * Parse rules conversion. |
241 | * |
242 | * @param string $variant |
243 | * |
244 | * @return string |
245 | */ |
246 | private function getRuleConvertedStr( $variant ) { |
247 | $bidtable = $this->mBidtable; |
248 | $unidtable = $this->mUnidtable; |
249 | |
250 | if ( count( $bidtable ) + count( $unidtable ) === 0 ) { |
251 | return $this->mRules; |
252 | } |
253 | |
254 | // display current variant in bidirectional array |
255 | $disp = $this->getTextInBidtable( $variant ); |
256 | // or display current variant in fallbacks |
257 | if ( $disp === false ) { |
258 | $disp = $this->getTextInBidtable( |
259 | $this->mConverter->getVariantFallbacks( $variant ) ); |
260 | } |
261 | // or display current variant in unidirectional array |
262 | if ( $disp === false && array_key_exists( $variant, $unidtable ) ) { |
263 | $disp = array_values( $unidtable[$variant] )[0]; |
264 | } |
265 | // or display first text under disable manual convert |
266 | if ( $disp === false && $this->mConverter->getManualLevel()[$variant] === 'disable' ) { |
267 | if ( count( $bidtable ) > 0 ) { |
268 | $disp = array_values( $bidtable )[0]; |
269 | } else { |
270 | $disp = array_values( array_values( $unidtable )[0] )[0]; |
271 | } |
272 | } |
273 | |
274 | return $disp; |
275 | } |
276 | |
277 | /** |
278 | * Similar to getRuleConvertedStr(), but this prefers to use MediaWiki\Title\Title; |
279 | * use original page title if $variant === $this->mConverter->getMainCode(), |
280 | * and may return false in this case (so this title conversion rule |
281 | * will be ignored and the original title is shown). |
282 | * |
283 | * @since 1.22 |
284 | * @param string $variant The variant code to display page title in |
285 | * @return string|false The converted title or false if just page name |
286 | */ |
287 | private function getRuleConvertedTitle( $variant ) { |
288 | if ( $variant === $this->mConverter->getMainCode() ) { |
289 | // If a string targeting exactly this variant is set, |
290 | // use it. Otherwise, just return false, so the real |
291 | // page name can be shown (and because variant === main, |
292 | // there'll be no further automatic conversion). |
293 | $disp = $this->getTextInBidtable( $variant ); |
294 | if ( $disp ) { |
295 | return $disp; |
296 | } |
297 | if ( array_key_exists( $variant, $this->mUnidtable ) ) { |
298 | $disp = array_values( $this->mUnidtable[$variant] )[0]; |
299 | } |
300 | // Assigned above or still false. |
301 | return $disp; |
302 | } |
303 | |
304 | return $this->getRuleConvertedStr( $variant ); |
305 | } |
306 | |
307 | /** |
308 | * Generate conversion table for all text. |
309 | */ |
310 | private function generateConvTable() { |
311 | // Special case optimisation |
312 | if ( !$this->mBidtable && !$this->mUnidtable ) { |
313 | $this->mConvTable = []; |
314 | return; |
315 | } |
316 | |
317 | $bidtable = $this->mBidtable; |
318 | $unidtable = $this->mUnidtable; |
319 | $manLevel = $this->mConverter->getManualLevel(); |
320 | |
321 | $vmarked = []; |
322 | foreach ( $this->mConverter->getVariants() as $v ) { |
323 | /* for bidirectional array |
324 | fill in the missing variants, if any, |
325 | with fallbacks */ |
326 | if ( !isset( $bidtable[$v] ) ) { |
327 | $variantFallbacks = |
328 | $this->mConverter->getVariantFallbacks( $v ); |
329 | $vf = $this->getTextInBidtable( $variantFallbacks ); |
330 | if ( $vf ) { |
331 | $bidtable[$v] = $vf; |
332 | } |
333 | } |
334 | |
335 | if ( isset( $bidtable[$v] ) ) { |
336 | foreach ( $vmarked as $vo ) { |
337 | // use syntax: -{A|zh:WordZh;zh-tw:WordTw}- |
338 | // or -{H|zh:WordZh;zh-tw:WordTw}- |
339 | // or -{-|zh:WordZh;zh-tw:WordTw}- |
340 | // to introduce a custom mapping between |
341 | // words WordZh and WordTw in the whole text |
342 | if ( $manLevel[$v] === 'bidirectional' ) { |
343 | $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v]; |
344 | } |
345 | if ( $manLevel[$vo] === 'bidirectional' ) { |
346 | $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo]; |
347 | } |
348 | } |
349 | $vmarked[] = $v; |
350 | } |
351 | /* for unidirectional array fill to convert tables */ |
352 | if ( ( $manLevel[$v] === 'bidirectional' || $manLevel[$v] === 'unidirectional' ) |
353 | && isset( $unidtable[$v] ) |
354 | ) { |
355 | if ( isset( $this->mConvTable[$v] ) ) { |
356 | $this->mConvTable[$v] = $unidtable[$v] + $this->mConvTable[$v]; |
357 | } else { |
358 | $this->mConvTable[$v] = $unidtable[$v]; |
359 | } |
360 | } |
361 | } |
362 | } |
363 | |
364 | /** |
365 | * Parse rules and flags. |
366 | * @param string|null $variant Variant language code |
367 | */ |
368 | public function parse( $variant = null ) { |
369 | if ( !$variant ) { |
370 | $variant = $this->mConverter->getPreferredVariant(); |
371 | } |
372 | |
373 | $this->parseFlags(); |
374 | $flags = $this->mFlags; |
375 | |
376 | // convert to specified variant |
377 | // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}- |
378 | if ( $this->mVariantFlags ) { |
379 | // check if current variant in flags |
380 | if ( isset( $this->mVariantFlags[$variant] ) ) { |
381 | // then convert <text to convert> to current language |
382 | $this->mRules = $this->mConverter->autoConvert( $this->mRules, |
383 | $variant ); |
384 | } else { |
385 | // if the current variant is not in flags, |
386 | // then we check its fallback variants. |
387 | $variantFallbacks = |
388 | $this->mConverter->getVariantFallbacks( $variant ); |
389 | if ( is_array( $variantFallbacks ) ) { |
390 | foreach ( $variantFallbacks as $variantFallback ) { |
391 | // if current variant's fallback exist in flags |
392 | if ( isset( $this->mVariantFlags[$variantFallback] ) ) { |
393 | // then convert <text to convert> to fallback language |
394 | $this->mRules = |
395 | $this->mConverter->autoConvert( $this->mRules, |
396 | $variantFallback ); |
397 | break; |
398 | } |
399 | } |
400 | } |
401 | } |
402 | $this->mFlags = $flags = [ 'R' => true ]; |
403 | } |
404 | |
405 | if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) { |
406 | // decode => HTML entities modified by Sanitizer::internalRemoveHtmlTags |
407 | $this->mRules = str_replace( '=>', '=>', $this->mRules ); |
408 | $this->parseRules(); |
409 | } |
410 | $rules = $this->mRules; |
411 | |
412 | if ( !$this->mBidtable && !$this->mUnidtable ) { |
413 | if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) { |
414 | // fill all variants if the text in -{A/H/-|text}- is non-empty but without rules |
415 | if ( $rules !== '' ) { |
416 | foreach ( $this->mConverter->getVariants() as $v ) { |
417 | $this->mBidtable[$v] = $rules; |
418 | } |
419 | } |
420 | } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) { |
421 | $this->mFlags = $flags = [ 'R' => true ]; |
422 | } |
423 | } |
424 | |
425 | $this->mRuleDisplay = false; |
426 | foreach ( $flags as $flag => $unused ) { |
427 | switch ( $flag ) { |
428 | case 'R': |
429 | // if we don't do content convert, still strip the -{}- tags |
430 | $this->mRuleDisplay = $rules; |
431 | break; |
432 | case 'N': |
433 | // process N flag: output current variant name |
434 | $ruleVar = trim( $rules ); |
435 | $this->mRuleDisplay = $this->mConverter->getVariantNames()[$ruleVar] ?? ''; |
436 | break; |
437 | case 'D': |
438 | // process D flag: output rules description |
439 | $this->mRuleDisplay = $this->getRulesDesc(); |
440 | break; |
441 | case 'H': |
442 | // process H,- flag or T only: output nothing |
443 | $this->mRuleDisplay = ''; |
444 | break; |
445 | case '-': |
446 | $this->mRulesAction = 'remove'; |
447 | $this->mRuleDisplay = ''; |
448 | break; |
449 | case '+': |
450 | $this->mRulesAction = 'add'; |
451 | $this->mRuleDisplay = ''; |
452 | break; |
453 | case 'S': |
454 | $this->mRuleDisplay = $this->getRuleConvertedStr( $variant ); |
455 | break; |
456 | case 'T': |
457 | $this->mRuleTitle = $this->getRuleConvertedTitle( $variant ); |
458 | $this->mRuleDisplay = ''; |
459 | break; |
460 | default: |
461 | // ignore unknown flags (but see error-case below) |
462 | } |
463 | } |
464 | if ( $this->mRuleDisplay === false ) { |
465 | $this->mRuleDisplay = '<span class="error">' |
466 | . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped() |
467 | . '</span>'; |
468 | } |
469 | |
470 | $this->generateConvTable(); |
471 | } |
472 | |
473 | /** |
474 | * Checks if there are conversion rules. |
475 | * @return bool |
476 | */ |
477 | public function hasRules() { |
478 | return $this->mRules !== ''; |
479 | } |
480 | |
481 | /** |
482 | * Get display text on markup -{...}- |
483 | * @return string |
484 | */ |
485 | public function getDisplay() { |
486 | return $this->mRuleDisplay; |
487 | } |
488 | |
489 | /** |
490 | * Get converted title. |
491 | * @return string|false |
492 | */ |
493 | public function getTitle() { |
494 | return $this->mRuleTitle; |
495 | } |
496 | |
497 | /** |
498 | * Return how to deal with conversion rules. |
499 | * @return string |
500 | */ |
501 | public function getRulesAction() { |
502 | return $this->mRulesAction; |
503 | } |
504 | |
505 | /** |
506 | * Get conversion table. (bidirectional and unidirectional |
507 | * conversion table) |
508 | * @return array |
509 | */ |
510 | public function getConvTable() { |
511 | return $this->mConvTable; |
512 | } |
513 | |
514 | /** |
515 | * Get conversion rules string. |
516 | * @return string |
517 | */ |
518 | public function getRules() { |
519 | return $this->mRules; |
520 | } |
521 | |
522 | /** |
523 | * Get conversion flags. |
524 | * @return array |
525 | */ |
526 | public function getFlags() { |
527 | return $this->mFlags; |
528 | } |
529 | } |
530 | |
531 | /** @deprecated class alias since 1.43 */ |
532 | class_alias( ConverterRule::class, 'ConverterRule' ); |