Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
23.11% |
52 / 225 |
|
25.00% |
4 / 16 |
CRAP | |
0.00% |
0 / 1 |
ConverterRule | |
23.21% |
52 / 224 |
|
25.00% |
4 / 16 |
4719.32 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getTextInBidtable | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
parseFlags | |
24.39% |
10 / 41 |
|
0.00% |
0 / 1 |
126.66 | |||
parseRules | |
31.82% |
14 / 44 |
|
0.00% |
0 / 1 |
86.32 | |||
getRulesDesc | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
20 | |||
getRuleConvertedStr | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
72 | |||
getRuleConvertedTitle | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
20 | |||
generateConvTable | |
11.54% |
3 / 26 |
|
0.00% |
0 / 1 |
149.68 | |||
parse | |
31.25% |
20 / 64 |
|
0.00% |
0 / 1 |
282.76 | |||
hasRules | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getDisplay | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getTitle | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getRulesAction | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getConvTable | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getRules | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getFlags | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | * @author fdcn <fdcn64@gmail.com>, PhiLiP <philip.npc@gmail.com> |
20 | */ |
21 | |
22 | namespace MediaWiki\Language; |
23 | |
24 | use MediaWiki\Logger\LoggerFactory; |
25 | use StringUtils; |
26 | |
27 | /** |
28 | * The rules used for language conversion, this processes the rules |
29 | * extracted by Parser from the `-{ }-` wikitext syntax. |
30 | * |
31 | * @ingroup Language |
32 | */ |
33 | class ConverterRule { |
34 | /** |
35 | * @var string original text in -{text}- |
36 | */ |
37 | public $mText; |
38 | /** |
39 | * @var LanguageConverter |
40 | */ |
41 | public $mConverter; |
42 | /** @var string|false */ |
43 | public $mRuleDisplay = ''; |
44 | /** @var string|false */ |
45 | public $mRuleTitle = false; |
46 | /** |
47 | * @var string the text of the rules |
48 | */ |
49 | public $mRules = ''; |
50 | /** @var string */ |
51 | public $mRulesAction = 'none'; |
52 | /** @var array */ |
53 | public $mFlags = []; |
54 | /** @var array */ |
55 | public $mVariantFlags = []; |
56 | /** @var array */ |
57 | public $mConvTable = []; |
58 | /** |
59 | * @var array of the translation in each variant |
60 | */ |
61 | public $mBidtable = []; |
62 | /** |
63 | * @var array of the translation in each variant |
64 | */ |
65 | public $mUnidtable = []; |
66 | |
67 | /** |
68 | * @param string $text The text between -{ and }- |
69 | * @param LanguageConverter $converter |
70 | */ |
71 | public function __construct( $text, LanguageConverter $converter ) { |
72 | $this->mText = $text; |
73 | $this->mConverter = $converter; |
74 | } |
75 | |
76 | /** |
77 | * Check if the variant array is in the convert array. |
78 | * |
79 | * @param array|string $variants Variant language code |
80 | * @return string|false Translated text |
81 | */ |
82 | public function getTextInBidtable( $variants ) { |
83 | $variants = (array)$variants; |
84 | if ( !$variants ) { |
85 | return false; |
86 | } |
87 | foreach ( $variants as $variant ) { |
88 | if ( isset( $this->mBidtable[$variant] ) ) { |
89 | return $this->mBidtable[$variant]; |
90 | } |
91 | } |
92 | return false; |
93 | } |
94 | |
95 | /** |
96 | * Parse flags with syntax -{FLAG| ... }- |
97 | */ |
98 | private function parseFlags() { |
99 | $text = $this->mText; |
100 | $flags = []; |
101 | $variantFlags = []; |
102 | |
103 | $sepPos = strpos( $text, '|' ); |
104 | if ( $sepPos !== false ) { |
105 | $validFlags = $this->mConverter->getFlags(); |
106 | $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) ); |
107 | foreach ( $f as $ff ) { |
108 | $ff = trim( $ff ); |
109 | if ( isset( $validFlags[$ff] ) ) { |
110 | $flags[$validFlags[$ff]] = true; |
111 | } |
112 | } |
113 | $text = strval( substr( $text, $sepPos + 1 ) ); |
114 | } |
115 | |
116 | if ( !$flags ) { |
117 | $flags['S'] = true; |
118 | } elseif ( isset( $flags['R'] ) ) { |
119 | // remove other flags |
120 | $flags = [ 'R' => true ]; |
121 | } elseif ( isset( $flags['N'] ) ) { |
122 | // remove other flags |
123 | $flags = [ 'N' => true ]; |
124 | } elseif ( isset( $flags['-'] ) ) { |
125 | // remove other flags |
126 | $flags = [ '-' => true ]; |
127 | } elseif ( count( $flags ) === 1 && isset( $flags['T'] ) ) { |
128 | $flags['H'] = true; |
129 | } elseif ( isset( $flags['H'] ) ) { |
130 | // replace A flag, and remove other flags except T |
131 | $temp = [ '+' => true, 'H' => true ]; |
132 | if ( isset( $flags['T'] ) ) { |
133 | $temp['T'] = true; |
134 | } |
135 | if ( isset( $flags['D'] ) ) { |
136 | $temp['D'] = true; |
137 | } |
138 | $flags = $temp; |
139 | } else { |
140 | if ( isset( $flags['A'] ) ) { |
141 | $flags['+'] = true; |
142 | $flags['S'] = true; |
143 | } |
144 | if ( isset( $flags['D'] ) ) { |
145 | unset( $flags['S'] ); |
146 | } |
147 | // try to find flags like "zh-hans", "zh-hant" |
148 | // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-" |
149 | $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->getVariants() ); |
150 | if ( $variantFlags ) { |
151 | $variantFlags = array_fill_keys( $variantFlags, true ); |
152 | $flags = []; |
153 | } |
154 | } |
155 | $this->mVariantFlags = $variantFlags; |
156 | $this->mRules = $text; |
157 | $this->mFlags = $flags; |
158 | } |
159 | |
160 | /** |
161 | * Generate conversion table. |
162 | */ |
163 | private function parseRules() { |
164 | $rules = $this->mRules; |
165 | $bidtable = []; |
166 | $unidtable = []; |
167 | $varsep_pattern = $this->mConverter->getVarSeparatorPattern(); |
168 | |
169 | // Split text according to $varsep_pattern, but ignore semicolons from HTML entities |
170 | $rules = preg_replace( '/(&[#a-zA-Z0-9]+);/', "$1\x01", $rules ); |
171 | $choice = preg_split( $varsep_pattern, $rules ); |
172 | if ( $choice === false ) { |
173 | $error = preg_last_error(); |
174 | $errorText = preg_last_error_msg(); |
175 | LoggerFactory::getInstance( 'parser' )->warning( |
176 | 'ConverterRule preg_split error: {code} {errorText}', |
177 | [ |
178 | 'code' => $error, |
179 | 'errorText' => $errorText |
180 | ] |
181 | ); |
182 | $choice = []; |
183 | } |
184 | $choice = str_replace( "\x01", ';', $choice ); |
185 | |
186 | foreach ( $choice as $c ) { |
187 | $v = explode( ':', $c, 2 ); |
188 | if ( count( $v ) !== 2 ) { |
189 | // syntax error, skip |
190 | continue; |
191 | } |
192 | $to = trim( $v[1] ); |
193 | $v = trim( $v[0] ); |
194 | $u = explode( '=>', $v, 2 ); |
195 | $vv = $this->mConverter->validateVariant( $v ); |
196 | // if $to is empty (which is also used as $from in bidtable), |
197 | // strtr() could return a wrong result. |
198 | if ( count( $u ) === 1 && $to !== '' && $vv ) { |
199 | $bidtable[$vv] = $to; |
200 | } elseif ( count( $u ) === 2 ) { |
201 | $from = trim( $u[0] ); |
202 | $v = trim( $u[1] ); |
203 | $vv = $this->mConverter->validateVariant( $v ); |
204 | // if $from is empty, strtr() could return a wrong result. |
205 | if ( array_key_exists( $vv, $unidtable ) |
206 | && !is_array( $unidtable[$vv] ) |
207 | && $from !== '' |
208 | && $vv ) { |
209 | $unidtable[$vv] = [ $from => $to ]; |
210 | } elseif ( $from !== '' && $vv ) { |
211 | $unidtable[$vv][$from] = $to; |
212 | } |
213 | } |
214 | // syntax error, pass |
215 | if ( !isset( $this->mConverter->getVariantNames()[$vv] ) ) { |
216 | $bidtable = []; |
217 | $unidtable = []; |
218 | break; |
219 | } |
220 | } |
221 | $this->mBidtable = $bidtable; |
222 | $this->mUnidtable = $unidtable; |
223 | } |
224 | |
225 | /** |
226 | * @return string |
227 | */ |
228 | private function getRulesDesc() { |
229 | $codesep = $this->mConverter->getDescCodeSeparator(); |
230 | $varsep = $this->mConverter->getDescVarSeparator(); |
231 | $text = ''; |
232 | foreach ( $this->mBidtable as $k => $v ) { |
233 | $text .= $this->mConverter->getVariantNames()[$k] . "$codesep$v$varsep"; |
234 | } |
235 | foreach ( $this->mUnidtable as $k => $a ) { |
236 | foreach ( $a as $from => $to ) { |
237 | $text .= $from . '⇒' . $this->mConverter->getVariantNames()[$k] . |
238 | "$codesep$to$varsep"; |
239 | } |
240 | } |
241 | return $text; |
242 | } |
243 | |
244 | /** |
245 | * Parse rules conversion. |
246 | * |
247 | * @param string $variant |
248 | * |
249 | * @return string |
250 | */ |
251 | private function getRuleConvertedStr( $variant ) { |
252 | $bidtable = $this->mBidtable; |
253 | $unidtable = $this->mUnidtable; |
254 | |
255 | if ( count( $bidtable ) + count( $unidtable ) === 0 ) { |
256 | return $this->mRules; |
257 | } |
258 | |
259 | // display current variant in bidirectional array |
260 | $disp = $this->getTextInBidtable( $variant ); |
261 | // or display current variant in fallbacks |
262 | if ( $disp === false ) { |
263 | $disp = $this->getTextInBidtable( |
264 | $this->mConverter->getVariantFallbacks( $variant ) ); |
265 | } |
266 | // or display current variant in unidirectional array |
267 | if ( $disp === false && array_key_exists( $variant, $unidtable ) ) { |
268 | $disp = array_values( $unidtable[$variant] )[0]; |
269 | } |
270 | // or display first text under disable manual convert |
271 | if ( $disp === false && $this->mConverter->getManualLevel()[$variant] === 'disable' ) { |
272 | if ( count( $bidtable ) > 0 ) { |
273 | $disp = array_values( $bidtable )[0]; |
274 | } else { |
275 | $disp = array_values( array_values( $unidtable )[0] )[0]; |
276 | } |
277 | } |
278 | |
279 | return $disp; |
280 | } |
281 | |
282 | /** |
283 | * Similar to getRuleConvertedStr(), but this prefers to use MediaWiki\Title\Title; |
284 | * use original page title if $variant === $this->mConverter->getMainCode(), |
285 | * and may return false in this case (so this title conversion rule |
286 | * will be ignored and the original title is shown). |
287 | * |
288 | * @since 1.22 |
289 | * @param string $variant The variant code to display page title in |
290 | * @return string|false The converted title or false if just page name |
291 | */ |
292 | private function getRuleConvertedTitle( $variant ) { |
293 | if ( $variant === $this->mConverter->getMainCode() ) { |
294 | // If a string targeting exactly this variant is set, |
295 | // use it. Otherwise, just return false, so the real |
296 | // page name can be shown (and because variant === main, |
297 | // there'll be no further automatic conversion). |
298 | $disp = $this->getTextInBidtable( $variant ); |
299 | if ( $disp ) { |
300 | return $disp; |
301 | } |
302 | if ( array_key_exists( $variant, $this->mUnidtable ) ) { |
303 | $disp = array_values( $this->mUnidtable[$variant] )[0]; |
304 | } |
305 | // Assigned above or still false. |
306 | return $disp; |
307 | } |
308 | |
309 | return $this->getRuleConvertedStr( $variant ); |
310 | } |
311 | |
312 | /** |
313 | * Generate conversion table for all text. |
314 | */ |
315 | private function generateConvTable() { |
316 | // Special case optimisation |
317 | if ( !$this->mBidtable && !$this->mUnidtable ) { |
318 | $this->mConvTable = []; |
319 | return; |
320 | } |
321 | |
322 | $bidtable = $this->mBidtable; |
323 | $unidtable = $this->mUnidtable; |
324 | $manLevel = $this->mConverter->getManualLevel(); |
325 | |
326 | $vmarked = []; |
327 | foreach ( $this->mConverter->getVariants() as $v ) { |
328 | /* for bidirectional array |
329 | fill in the missing variants, if any, |
330 | with fallbacks */ |
331 | if ( !isset( $bidtable[$v] ) ) { |
332 | $variantFallbacks = |
333 | $this->mConverter->getVariantFallbacks( $v ); |
334 | $vf = $this->getTextInBidtable( $variantFallbacks ); |
335 | if ( $vf ) { |
336 | $bidtable[$v] = $vf; |
337 | } |
338 | } |
339 | |
340 | if ( isset( $bidtable[$v] ) ) { |
341 | foreach ( $vmarked as $vo ) { |
342 | // use syntax: -{A|zh:WordZh;zh-tw:WordTw}- |
343 | // or -{H|zh:WordZh;zh-tw:WordTw}- |
344 | // or -{-|zh:WordZh;zh-tw:WordTw}- |
345 | // to introduce a custom mapping between |
346 | // words WordZh and WordTw in the whole text |
347 | if ( $manLevel[$v] === 'bidirectional' ) { |
348 | $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v]; |
349 | } |
350 | if ( $manLevel[$vo] === 'bidirectional' ) { |
351 | $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo]; |
352 | } |
353 | } |
354 | $vmarked[] = $v; |
355 | } |
356 | /* for unidirectional array fill to convert tables */ |
357 | if ( ( $manLevel[$v] === 'bidirectional' || $manLevel[$v] === 'unidirectional' ) |
358 | && isset( $unidtable[$v] ) |
359 | ) { |
360 | if ( isset( $this->mConvTable[$v] ) ) { |
361 | $this->mConvTable[$v] = $unidtable[$v] + $this->mConvTable[$v]; |
362 | } else { |
363 | $this->mConvTable[$v] = $unidtable[$v]; |
364 | } |
365 | } |
366 | } |
367 | } |
368 | |
369 | /** |
370 | * Parse rules and flags. |
371 | * @param string|null $variant Variant language code |
372 | */ |
373 | public function parse( $variant = null ) { |
374 | if ( !$variant ) { |
375 | $variant = $this->mConverter->getPreferredVariant(); |
376 | } |
377 | |
378 | $this->parseFlags(); |
379 | $flags = $this->mFlags; |
380 | |
381 | // convert to specified variant |
382 | // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}- |
383 | if ( $this->mVariantFlags ) { |
384 | // check if current variant in flags |
385 | if ( isset( $this->mVariantFlags[$variant] ) ) { |
386 | // then convert <text to convert> to current language |
387 | $this->mRules = $this->mConverter->autoConvert( $this->mRules, |
388 | $variant ); |
389 | } else { |
390 | // if the current variant is not in flags, |
391 | // then we check its fallback variants. |
392 | $variantFallbacks = |
393 | $this->mConverter->getVariantFallbacks( $variant ); |
394 | if ( is_array( $variantFallbacks ) ) { |
395 | foreach ( $variantFallbacks as $variantFallback ) { |
396 | // if current variant's fallback exist in flags |
397 | if ( isset( $this->mVariantFlags[$variantFallback] ) ) { |
398 | // then convert <text to convert> to fallback language |
399 | $this->mRules = |
400 | $this->mConverter->autoConvert( $this->mRules, |
401 | $variantFallback ); |
402 | break; |
403 | } |
404 | } |
405 | } |
406 | } |
407 | $this->mFlags = $flags = [ 'R' => true ]; |
408 | } |
409 | |
410 | if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) { |
411 | // decode => HTML entities modified by Sanitizer::internalRemoveHtmlTags |
412 | $this->mRules = str_replace( '=>', '=>', $this->mRules ); |
413 | $this->parseRules(); |
414 | } |
415 | $rules = $this->mRules; |
416 | |
417 | if ( !$this->mBidtable && !$this->mUnidtable ) { |
418 | if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) { |
419 | // fill all variants if the text in -{A/H/-|text}- is non-empty but without rules |
420 | if ( $rules !== '' ) { |
421 | foreach ( $this->mConverter->getVariants() as $v ) { |
422 | $this->mBidtable[$v] = $rules; |
423 | } |
424 | } |
425 | } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) { |
426 | $this->mFlags = $flags = [ 'R' => true ]; |
427 | } |
428 | } |
429 | |
430 | $this->mRuleDisplay = false; |
431 | foreach ( $flags as $flag => $unused ) { |
432 | switch ( $flag ) { |
433 | case 'R': |
434 | // if we don't do content convert, still strip the -{}- tags |
435 | $this->mRuleDisplay = $rules; |
436 | break; |
437 | case 'N': |
438 | // process N flag: output current variant name |
439 | $ruleVar = trim( $rules ); |
440 | $this->mRuleDisplay = $this->mConverter->getVariantNames()[$ruleVar] ?? ''; |
441 | break; |
442 | case 'D': |
443 | // process D flag: output rules description |
444 | $this->mRuleDisplay = $this->getRulesDesc(); |
445 | break; |
446 | case 'H': |
447 | // process H,- flag or T only: output nothing |
448 | $this->mRuleDisplay = ''; |
449 | break; |
450 | case '-': |
451 | $this->mRulesAction = 'remove'; |
452 | $this->mRuleDisplay = ''; |
453 | break; |
454 | case '+': |
455 | $this->mRulesAction = 'add'; |
456 | $this->mRuleDisplay = ''; |
457 | break; |
458 | case 'S': |
459 | $this->mRuleDisplay = $this->getRuleConvertedStr( $variant ); |
460 | break; |
461 | case 'T': |
462 | $this->mRuleTitle = $this->getRuleConvertedTitle( $variant ); |
463 | $this->mRuleDisplay = ''; |
464 | break; |
465 | default: |
466 | // ignore unknown flags (but see error-case below) |
467 | } |
468 | } |
469 | if ( $this->mRuleDisplay === false ) { |
470 | $this->mRuleDisplay = '<span class="error">' |
471 | . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped() |
472 | . '</span>'; |
473 | } |
474 | |
475 | $this->generateConvTable(); |
476 | } |
477 | |
478 | /** |
479 | * Checks if there are conversion rules. |
480 | * @return bool |
481 | */ |
482 | public function hasRules() { |
483 | return $this->mRules !== ''; |
484 | } |
485 | |
486 | /** |
487 | * Get display text on markup -{...}- |
488 | * @return string |
489 | */ |
490 | public function getDisplay() { |
491 | return $this->mRuleDisplay; |
492 | } |
493 | |
494 | /** |
495 | * Get converted title. |
496 | * @return string|false |
497 | */ |
498 | public function getTitle() { |
499 | return $this->mRuleTitle; |
500 | } |
501 | |
502 | /** |
503 | * Return how to deal with conversion rules. |
504 | * @return string |
505 | */ |
506 | public function getRulesAction() { |
507 | return $this->mRulesAction; |
508 | } |
509 | |
510 | /** |
511 | * Get conversion table. (bidirectional and unidirectional |
512 | * conversion table) |
513 | * @return array |
514 | */ |
515 | public function getConvTable() { |
516 | return $this->mConvTable; |
517 | } |
518 | |
519 | /** |
520 | * Get conversion rules string. |
521 | * @return string |
522 | */ |
523 | public function getRules() { |
524 | return $this->mRules; |
525 | } |
526 | |
527 | /** |
528 | * Get conversion flags. |
529 | * @return array |
530 | */ |
531 | public function getFlags() { |
532 | return $this->mFlags; |
533 | } |
534 | } |
535 | |
536 | /** @deprecated class alias since 1.43 */ |
537 | class_alias( ConverterRule::class, 'ConverterRule' ); |