MediaWiki master
ConverterRule.php
Go to the documentation of this file.
1<?php
23
25use StringUtils;
26
37 public $mText;
43 public $mRuleDisplay = '';
45 public $mRuleTitle = false;
49 public $mRules = '';
51 public $mRulesAction = 'none';
53 public $mFlags = [];
55 public $mVariantFlags = [];
57 public $mConvTable = [];
61 public $mBidtable = [];
65 public $mUnidtable = [];
66
71 public function __construct( $text, LanguageConverter $converter ) {
72 $this->mText = $text;
73 $this->mConverter = $converter;
74 }
75
82 public function getTextInBidtable( $variants ) {
83 $variants = (array)$variants;
84 if ( !$variants ) {
85 return false;
86 }
87 foreach ( $variants as $variant ) {
88 if ( isset( $this->mBidtable[$variant] ) ) {
89 return $this->mBidtable[$variant];
90 }
91 }
92 return false;
93 }
94
98 private function parseFlags() {
99 $text = $this->mText;
100 $flags = [];
101 $variantFlags = [];
102
103 $sepPos = strpos( $text, '|' );
104 if ( $sepPos !== false ) {
105 $validFlags = $this->mConverter->getFlags();
106 $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) );
107 foreach ( $f as $ff ) {
108 $ff = trim( $ff );
109 if ( isset( $validFlags[$ff] ) ) {
110 $flags[$validFlags[$ff]] = true;
111 }
112 }
113 $text = strval( substr( $text, $sepPos + 1 ) );
114 }
115
116 if ( !$flags ) {
117 $flags['S'] = true;
118 } elseif ( isset( $flags['R'] ) ) {
119 // remove other flags
120 $flags = [ 'R' => true ];
121 } elseif ( isset( $flags['N'] ) ) {
122 // remove other flags
123 $flags = [ 'N' => true ];
124 } elseif ( isset( $flags['-'] ) ) {
125 // remove other flags
126 $flags = [ '-' => true ];
127 } elseif ( count( $flags ) === 1 && isset( $flags['T'] ) ) {
128 $flags['H'] = true;
129 } elseif ( isset( $flags['H'] ) ) {
130 // replace A flag, and remove other flags except T
131 $temp = [ '+' => true, 'H' => true ];
132 if ( isset( $flags['T'] ) ) {
133 $temp['T'] = true;
134 }
135 if ( isset( $flags['D'] ) ) {
136 $temp['D'] = true;
137 }
138 $flags = $temp;
139 } else {
140 if ( isset( $flags['A'] ) ) {
141 $flags['+'] = true;
142 $flags['S'] = true;
143 }
144 if ( isset( $flags['D'] ) ) {
145 unset( $flags['S'] );
146 }
147 // try to find flags like "zh-hans", "zh-hant"
148 // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
149 $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->getVariants() );
150 if ( $variantFlags ) {
151 $variantFlags = array_fill_keys( $variantFlags, true );
152 $flags = [];
153 }
154 }
155 $this->mVariantFlags = $variantFlags;
156 $this->mRules = $text;
157 $this->mFlags = $flags;
158 }
159
163 private function parseRules() {
164 $rules = $this->mRules;
165 $bidtable = [];
166 $unidtable = [];
167 $varsep_pattern = $this->mConverter->getVarSeparatorPattern();
168
169 // Split text according to $varsep_pattern, but ignore semicolons from HTML entities
170 $rules = preg_replace( '/(&[#a-zA-Z0-9]+);/', "$1\x01", $rules );
171 $choice = preg_split( $varsep_pattern, $rules );
172 // @phan-suppress-next-line PhanTypeComparisonFromArray
173 if ( $choice === false ) {
174 $error = preg_last_error();
175 $errorText = preg_last_error_msg();
176 LoggerFactory::getInstance( 'parser' )->warning(
177 'ConverterRule preg_split error: {code} {errorText}',
178 [
179 'code' => $error,
180 'errorText' => $errorText
181 ]
182 );
183 $choice = [];
184 }
185 $choice = str_replace( "\x01", ';', $choice );
186
187 foreach ( $choice as $c ) {
188 $v = explode( ':', $c, 2 );
189 if ( count( $v ) !== 2 ) {
190 // syntax error, skip
191 continue;
192 }
193 $to = trim( $v[1] );
194 $v = trim( $v[0] );
195 $u = explode( '=>', $v, 2 );
196 $vv = $this->mConverter->validateVariant( $v );
197 // if $to is empty (which is also used as $from in bidtable),
198 // strtr() could return a wrong result.
199 if ( count( $u ) === 1 && $to !== '' && $vv ) {
200 $bidtable[$vv] = $to;
201 } elseif ( count( $u ) === 2 ) {
202 $from = trim( $u[0] );
203 $v = trim( $u[1] );
204 $vv = $this->mConverter->validateVariant( $v );
205 // if $from is empty, strtr() could return a wrong result.
206 if ( array_key_exists( $vv, $unidtable )
207 && !is_array( $unidtable[$vv] )
208 && $from !== ''
209 && $vv ) {
210 $unidtable[$vv] = [ $from => $to ];
211 } elseif ( $from !== '' && $vv ) {
212 $unidtable[$vv][$from] = $to;
213 }
214 }
215 // syntax error, pass
216 if ( !isset( $this->mConverter->getVariantNames()[$vv] ) ) {
217 $bidtable = [];
218 $unidtable = [];
219 break;
220 }
221 }
222 $this->mBidtable = $bidtable;
223 $this->mUnidtable = $unidtable;
224 }
225
229 private function getRulesDesc() {
230 $codesep = $this->mConverter->getDescCodeSeparator();
231 $varsep = $this->mConverter->getDescVarSeparator();
232 $text = '';
233 foreach ( $this->mBidtable as $k => $v ) {
234 $text .= $this->mConverter->getVariantNames()[$k] . "$codesep$v$varsep";
235 }
236 foreach ( $this->mUnidtable as $k => $a ) {
237 foreach ( $a as $from => $to ) {
238 $text .= $from . '⇒' . $this->mConverter->getVariantNames()[$k] .
239 "$codesep$to$varsep";
240 }
241 }
242 return $text;
243 }
244
252 private function getRuleConvertedStr( $variant ) {
253 $bidtable = $this->mBidtable;
254 $unidtable = $this->mUnidtable;
255
256 if ( count( $bidtable ) + count( $unidtable ) === 0 ) {
257 return $this->mRules;
258 }
259
260 // display current variant in bidirectional array
261 $disp = $this->getTextInBidtable( $variant );
262 // or display current variant in fallbacks
263 if ( $disp === false ) {
264 $disp = $this->getTextInBidtable(
265 $this->mConverter->getVariantFallbacks( $variant ) );
266 }
267 // or display current variant in unidirectional array
268 if ( $disp === false && array_key_exists( $variant, $unidtable ) ) {
269 $disp = array_values( $unidtable[$variant] )[0];
270 }
271 // or display first text under disable manual convert
272 if ( $disp === false && $this->mConverter->getManualLevel()[$variant] === 'disable' ) {
273 if ( count( $bidtable ) > 0 ) {
274 $disp = array_values( $bidtable )[0];
275 } else {
276 $disp = array_values( array_values( $unidtable )[0] )[0];
277 }
278 }
279
280 return $disp;
281 }
282
293 private function getRuleConvertedTitle( $variant ) {
294 if ( $variant === $this->mConverter->getMainCode() ) {
295 // If a string targeting exactly this variant is set,
296 // use it. Otherwise, just return false, so the real
297 // page name can be shown (and because variant === main,
298 // there'll be no further automatic conversion).
299 $disp = $this->getTextInBidtable( $variant );
300 if ( $disp ) {
301 return $disp;
302 }
303 if ( array_key_exists( $variant, $this->mUnidtable ) ) {
304 $disp = array_values( $this->mUnidtable[$variant] )[0];
305 }
306 // Assigned above or still false.
307 return $disp;
308 }
309
310 return $this->getRuleConvertedStr( $variant );
311 }
312
316 private function generateConvTable() {
317 // Special case optimisation
318 if ( !$this->mBidtable && !$this->mUnidtable ) {
319 $this->mConvTable = [];
320 return;
321 }
322
323 $bidtable = $this->mBidtable;
324 $unidtable = $this->mUnidtable;
325 $manLevel = $this->mConverter->getManualLevel();
326
327 $vmarked = [];
328 foreach ( $this->mConverter->getVariants() as $v ) {
329 /* for bidirectional array
330 fill in the missing variants, if any,
331 with fallbacks */
332 if ( !isset( $bidtable[$v] ) ) {
333 $variantFallbacks =
334 $this->mConverter->getVariantFallbacks( $v );
335 $vf = $this->getTextInBidtable( $variantFallbacks );
336 if ( $vf ) {
337 $bidtable[$v] = $vf;
338 }
339 }
340
341 if ( isset( $bidtable[$v] ) ) {
342 foreach ( $vmarked as $vo ) {
343 // use syntax: -{A|zh:WordZh;zh-tw:WordTw}-
344 // or -{H|zh:WordZh;zh-tw:WordTw}-
345 // or -{-|zh:WordZh;zh-tw:WordTw}-
346 // to introduce a custom mapping between
347 // words WordZh and WordTw in the whole text
348 if ( $manLevel[$v] === 'bidirectional' ) {
349 $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v];
350 }
351 if ( $manLevel[$vo] === 'bidirectional' ) {
352 $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo];
353 }
354 }
355 $vmarked[] = $v;
356 }
357 /* for unidirectional array fill to convert tables */
358 if ( ( $manLevel[$v] === 'bidirectional' || $manLevel[$v] === 'unidirectional' )
359 && isset( $unidtable[$v] )
360 ) {
361 if ( isset( $this->mConvTable[$v] ) ) {
362 $this->mConvTable[$v] = $unidtable[$v] + $this->mConvTable[$v];
363 } else {
364 $this->mConvTable[$v] = $unidtable[$v];
365 }
366 }
367 }
368 }
369
374 public function parse( $variant = null ) {
375 if ( !$variant ) {
376 $variant = $this->mConverter->getPreferredVariant();
377 }
378
379 $this->parseFlags();
380 $flags = $this->mFlags;
381
382 // convert to specified variant
383 // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}-
384 if ( $this->mVariantFlags ) {
385 // check if current variant in flags
386 if ( isset( $this->mVariantFlags[$variant] ) ) {
387 // then convert <text to convert> to current language
388 $this->mRules = $this->mConverter->autoConvert( $this->mRules,
389 $variant );
390 } else {
391 // if the current variant is not in flags,
392 // then we check its fallback variants.
393 $variantFallbacks =
394 $this->mConverter->getVariantFallbacks( $variant );
395 if ( is_array( $variantFallbacks ) ) {
396 foreach ( $variantFallbacks as $variantFallback ) {
397 // if current variant's fallback exist in flags
398 if ( isset( $this->mVariantFlags[$variantFallback] ) ) {
399 // then convert <text to convert> to fallback language
400 $this->mRules =
401 $this->mConverter->autoConvert( $this->mRules,
402 $variantFallback );
403 break;
404 }
405 }
406 }
407 }
408 $this->mFlags = $flags = [ 'R' => true ];
409 }
410
411 if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) {
412 // decode => HTML entities modified by Sanitizer::internalRemoveHtmlTags
413 $this->mRules = str_replace( '=&gt;', '=>', $this->mRules );
414 $this->parseRules();
415 }
416 $rules = $this->mRules;
417
418 if ( !$this->mBidtable && !$this->mUnidtable ) {
419 if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) {
420 // fill all variants if the text in -{A/H/-|text}- is non-empty but without rules
421 if ( $rules !== '' ) {
422 foreach ( $this->mConverter->getVariants() as $v ) {
423 $this->mBidtable[$v] = $rules;
424 }
425 }
426 } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) {
427 $this->mFlags = $flags = [ 'R' => true ];
428 }
429 }
430
431 $this->mRuleDisplay = false;
432 foreach ( $flags as $flag => $unused ) {
433 switch ( $flag ) {
434 case 'R':
435 // if we don't do content convert, still strip the -{}- tags
436 $this->mRuleDisplay = $rules;
437 break;
438 case 'N':
439 // process N flag: output current variant name
440 $ruleVar = trim( $rules );
441 $this->mRuleDisplay = $this->mConverter->getVariantNames()[$ruleVar] ?? '';
442 break;
443 case 'D':
444 // process D flag: output rules description
445 $this->mRuleDisplay = $this->getRulesDesc();
446 break;
447 case 'H':
448 // process H,- flag or T only: output nothing
449 $this->mRuleDisplay = '';
450 break;
451 case '-':
452 $this->mRulesAction = 'remove';
453 $this->mRuleDisplay = '';
454 break;
455 case '+':
456 $this->mRulesAction = 'add';
457 $this->mRuleDisplay = '';
458 break;
459 case 'S':
460 $this->mRuleDisplay = $this->getRuleConvertedStr( $variant );
461 break;
462 case 'T':
463 $this->mRuleTitle = $this->getRuleConvertedTitle( $variant );
464 $this->mRuleDisplay = '';
465 break;
466 default:
467 // ignore unknown flags (but see error-case below)
468 }
469 }
470 if ( $this->mRuleDisplay === false ) {
471 $this->mRuleDisplay = '<span class="error">'
472 . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped()
473 . '</span>';
474 }
475
476 $this->generateConvTable();
477 }
478
483 public function hasRules() {
484 return $this->mRules !== '';
485 }
486
491 public function getDisplay() {
492 return $this->mRuleDisplay;
493 }
494
499 public function getTitle() {
500 return $this->mRuleTitle;
501 }
502
507 public function getRulesAction() {
508 return $this->mRulesAction;
509 }
510
516 public function getConvTable() {
517 return $this->mConvTable;
518 }
519
524 public function getRules() {
525 return $this->mRules;
526 }
527
532 public function getFlags() {
533 return $this->mFlags;
534 }
535}
536
538class_alias( ConverterRule::class, 'ConverterRule' );
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
The rules used for language conversion, this processes the rules extracted by Parser from the -{ }- w...
string $mRules
the text of the rules
getFlags()
Get conversion flags.
getRulesAction()
Return how to deal with conversion rules.
string $mText
original text in -{text}-
getTextInBidtable( $variants)
Check if the variant array is in the convert array.
hasRules()
Checks if there are conversion rules.
getDisplay()
Get display text on markup -{...}-.
array $mUnidtable
of the translation in each variant
parse( $variant=null)
Parse rules and flags.
array $mBidtable
of the translation in each variant
__construct( $text, LanguageConverter $converter)
getConvTable()
Get conversion table.
getRules()
Get conversion rules string.
Base class for multi-variant language conversion.
Create PSR-3 logger objects.
A collection of static methods to play with strings.