MediaWiki master
ConverterRule.php
Go to the documentation of this file.
1<?php
23
25use StringUtils;
26
37 public $mText;
43 public $mRuleDisplay = '';
45 public $mRuleTitle = false;
49 public $mRules = '';
51 public $mRulesAction = 'none';
53 public $mFlags = [];
55 public $mVariantFlags = [];
57 public $mConvTable = [];
61 public $mBidtable = [];
65 public $mUnidtable = [];
66
71 public function __construct( $text, LanguageConverter $converter ) {
72 $this->mText = $text;
73 $this->mConverter = $converter;
74 }
75
82 public function getTextInBidtable( $variants ) {
83 $variants = (array)$variants;
84 if ( !$variants ) {
85 return false;
86 }
87 foreach ( $variants as $variant ) {
88 if ( isset( $this->mBidtable[$variant] ) ) {
89 return $this->mBidtable[$variant];
90 }
91 }
92 return false;
93 }
94
98 private function parseFlags() {
99 $text = $this->mText;
100 $flags = [];
101 $variantFlags = [];
102
103 $sepPos = strpos( $text, '|' );
104 if ( $sepPos !== false ) {
105 $validFlags = $this->mConverter->getFlags();
106 $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) );
107 foreach ( $f as $ff ) {
108 $ff = trim( $ff );
109 if ( isset( $validFlags[$ff] ) ) {
110 $flags[$validFlags[$ff]] = true;
111 }
112 }
113 $text = strval( substr( $text, $sepPos + 1 ) );
114 }
115
116 if ( !$flags ) {
117 $flags['S'] = true;
118 } elseif ( isset( $flags['R'] ) ) {
119 // remove other flags
120 $flags = [ 'R' => true ];
121 } elseif ( isset( $flags['N'] ) ) {
122 // remove other flags
123 $flags = [ 'N' => true ];
124 } elseif ( isset( $flags['-'] ) ) {
125 // remove other flags
126 $flags = [ '-' => true ];
127 } elseif ( count( $flags ) === 1 && isset( $flags['T'] ) ) {
128 $flags['H'] = true;
129 } elseif ( isset( $flags['H'] ) ) {
130 // replace A flag, and remove other flags except T
131 $temp = [ '+' => true, 'H' => true ];
132 if ( isset( $flags['T'] ) ) {
133 $temp['T'] = true;
134 }
135 if ( isset( $flags['D'] ) ) {
136 $temp['D'] = true;
137 }
138 $flags = $temp;
139 } else {
140 if ( isset( $flags['A'] ) ) {
141 $flags['+'] = true;
142 $flags['S'] = true;
143 }
144 if ( isset( $flags['D'] ) ) {
145 unset( $flags['S'] );
146 }
147 // try to find flags like "zh-hans", "zh-hant"
148 // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
149 $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->getVariants() );
150 if ( $variantFlags ) {
151 $variantFlags = array_fill_keys( $variantFlags, true );
152 $flags = [];
153 }
154 }
155 $this->mVariantFlags = $variantFlags;
156 $this->mRules = $text;
157 $this->mFlags = $flags;
158 }
159
163 private function parseRules() {
164 $rules = $this->mRules;
165 $bidtable = [];
166 $unidtable = [];
167 $varsep_pattern = $this->mConverter->getVarSeparatorPattern();
168
169 // Split text according to $varsep_pattern, but ignore semicolons from HTML entities
170 $rules = preg_replace( '/(&[#a-zA-Z0-9]+);/', "$1\x01", $rules );
171 $choice = preg_split( $varsep_pattern, $rules );
172 if ( $choice === false ) {
173 $error = preg_last_error();
174 $errorText = preg_last_error_msg();
175 LoggerFactory::getInstance( 'parser' )->warning(
176 'ConverterRule preg_split error: {code} {errorText}',
177 [
178 'code' => $error,
179 'errorText' => $errorText
180 ]
181 );
182 $choice = [];
183 }
184 $choice = str_replace( "\x01", ';', $choice );
185
186 foreach ( $choice as $c ) {
187 $v = explode( ':', $c, 2 );
188 if ( count( $v ) !== 2 ) {
189 // syntax error, skip
190 continue;
191 }
192 $to = trim( $v[1] );
193 $v = trim( $v[0] );
194 $u = explode( '=>', $v, 2 );
195 $vv = $this->mConverter->validateVariant( $v );
196 // if $to is empty (which is also used as $from in bidtable),
197 // strtr() could return a wrong result.
198 if ( count( $u ) === 1 && $to !== '' && $vv ) {
199 $bidtable[$vv] = $to;
200 } elseif ( count( $u ) === 2 ) {
201 $from = trim( $u[0] );
202 $v = trim( $u[1] );
203 $vv = $this->mConverter->validateVariant( $v );
204 // if $from is empty, strtr() could return a wrong result.
205 if ( array_key_exists( $vv, $unidtable )
206 && !is_array( $unidtable[$vv] )
207 && $from !== ''
208 && $vv ) {
209 $unidtable[$vv] = [ $from => $to ];
210 } elseif ( $from !== '' && $vv ) {
211 $unidtable[$vv][$from] = $to;
212 }
213 }
214 // syntax error, pass
215 if ( !isset( $this->mConverter->getVariantNames()[$vv] ) ) {
216 $bidtable = [];
217 $unidtable = [];
218 break;
219 }
220 }
221 $this->mBidtable = $bidtable;
222 $this->mUnidtable = $unidtable;
223 }
224
228 private function getRulesDesc() {
229 $codesep = $this->mConverter->getDescCodeSeparator();
230 $varsep = $this->mConverter->getDescVarSeparator();
231 $text = '';
232 foreach ( $this->mBidtable as $k => $v ) {
233 $text .= $this->mConverter->getVariantNames()[$k] . "$codesep$v$varsep";
234 }
235 foreach ( $this->mUnidtable as $k => $a ) {
236 foreach ( $a as $from => $to ) {
237 $text .= $from . '⇒' . $this->mConverter->getVariantNames()[$k] .
238 "$codesep$to$varsep";
239 }
240 }
241 return $text;
242 }
243
251 private function getRuleConvertedStr( $variant ) {
252 $bidtable = $this->mBidtable;
253 $unidtable = $this->mUnidtable;
254
255 if ( count( $bidtable ) + count( $unidtable ) === 0 ) {
256 return $this->mRules;
257 }
258
259 // display current variant in bidirectional array
260 $disp = $this->getTextInBidtable( $variant );
261 // or display current variant in fallbacks
262 if ( $disp === false ) {
263 $disp = $this->getTextInBidtable(
264 $this->mConverter->getVariantFallbacks( $variant ) );
265 }
266 // or display current variant in unidirectional array
267 if ( $disp === false && array_key_exists( $variant, $unidtable ) ) {
268 $disp = array_values( $unidtable[$variant] )[0];
269 }
270 // or display first text under disable manual convert
271 if ( $disp === false && $this->mConverter->getManualLevel()[$variant] === 'disable' ) {
272 if ( count( $bidtable ) > 0 ) {
273 $disp = array_values( $bidtable )[0];
274 } else {
275 $disp = array_values( array_values( $unidtable )[0] )[0];
276 }
277 }
278
279 return $disp;
280 }
281
292 private function getRuleConvertedTitle( $variant ) {
293 if ( $variant === $this->mConverter->getMainCode() ) {
294 // If a string targeting exactly this variant is set,
295 // use it. Otherwise, just return false, so the real
296 // page name can be shown (and because variant === main,
297 // there'll be no further automatic conversion).
298 $disp = $this->getTextInBidtable( $variant );
299 if ( $disp ) {
300 return $disp;
301 }
302 if ( array_key_exists( $variant, $this->mUnidtable ) ) {
303 $disp = array_values( $this->mUnidtable[$variant] )[0];
304 }
305 // Assigned above or still false.
306 return $disp;
307 }
308
309 return $this->getRuleConvertedStr( $variant );
310 }
311
315 private function generateConvTable() {
316 // Special case optimisation
317 if ( !$this->mBidtable && !$this->mUnidtable ) {
318 $this->mConvTable = [];
319 return;
320 }
321
322 $bidtable = $this->mBidtable;
323 $unidtable = $this->mUnidtable;
324 $manLevel = $this->mConverter->getManualLevel();
325
326 $vmarked = [];
327 foreach ( $this->mConverter->getVariants() as $v ) {
328 /* for bidirectional array
329 fill in the missing variants, if any,
330 with fallbacks */
331 if ( !isset( $bidtable[$v] ) ) {
332 $variantFallbacks =
333 $this->mConverter->getVariantFallbacks( $v );
334 $vf = $this->getTextInBidtable( $variantFallbacks );
335 if ( $vf ) {
336 $bidtable[$v] = $vf;
337 }
338 }
339
340 if ( isset( $bidtable[$v] ) ) {
341 foreach ( $vmarked as $vo ) {
342 // use syntax: -{A|zh:WordZh;zh-tw:WordTw}-
343 // or -{H|zh:WordZh;zh-tw:WordTw}-
344 // or -{-|zh:WordZh;zh-tw:WordTw}-
345 // to introduce a custom mapping between
346 // words WordZh and WordTw in the whole text
347 if ( $manLevel[$v] === 'bidirectional' ) {
348 $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v];
349 }
350 if ( $manLevel[$vo] === 'bidirectional' ) {
351 $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo];
352 }
353 }
354 $vmarked[] = $v;
355 }
356 /* for unidirectional array fill to convert tables */
357 if ( ( $manLevel[$v] === 'bidirectional' || $manLevel[$v] === 'unidirectional' )
358 && isset( $unidtable[$v] )
359 ) {
360 if ( isset( $this->mConvTable[$v] ) ) {
361 $this->mConvTable[$v] = $unidtable[$v] + $this->mConvTable[$v];
362 } else {
363 $this->mConvTable[$v] = $unidtable[$v];
364 }
365 }
366 }
367 }
368
373 public function parse( $variant = null ) {
374 if ( !$variant ) {
375 $variant = $this->mConverter->getPreferredVariant();
376 }
377
378 $this->parseFlags();
379 $flags = $this->mFlags;
380
381 // convert to specified variant
382 // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}-
383 if ( $this->mVariantFlags ) {
384 // check if current variant in flags
385 if ( isset( $this->mVariantFlags[$variant] ) ) {
386 // then convert <text to convert> to current language
387 $this->mRules = $this->mConverter->autoConvert( $this->mRules,
388 $variant );
389 } else {
390 // if the current variant is not in flags,
391 // then we check its fallback variants.
392 $variantFallbacks =
393 $this->mConverter->getVariantFallbacks( $variant );
394 if ( is_array( $variantFallbacks ) ) {
395 foreach ( $variantFallbacks as $variantFallback ) {
396 // if current variant's fallback exist in flags
397 if ( isset( $this->mVariantFlags[$variantFallback] ) ) {
398 // then convert <text to convert> to fallback language
399 $this->mRules =
400 $this->mConverter->autoConvert( $this->mRules,
401 $variantFallback );
402 break;
403 }
404 }
405 }
406 }
407 $this->mFlags = $flags = [ 'R' => true ];
408 }
409
410 if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) {
411 // decode => HTML entities modified by Sanitizer::internalRemoveHtmlTags
412 $this->mRules = str_replace( '=&gt;', '=>', $this->mRules );
413 $this->parseRules();
414 }
415 $rules = $this->mRules;
416
417 if ( !$this->mBidtable && !$this->mUnidtable ) {
418 if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) {
419 // fill all variants if the text in -{A/H/-|text}- is non-empty but without rules
420 if ( $rules !== '' ) {
421 foreach ( $this->mConverter->getVariants() as $v ) {
422 $this->mBidtable[$v] = $rules;
423 }
424 }
425 } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) {
426 $this->mFlags = $flags = [ 'R' => true ];
427 }
428 }
429
430 $this->mRuleDisplay = false;
431 foreach ( $flags as $flag => $unused ) {
432 switch ( $flag ) {
433 case 'R':
434 // if we don't do content convert, still strip the -{}- tags
435 $this->mRuleDisplay = $rules;
436 break;
437 case 'N':
438 // process N flag: output current variant name
439 $ruleVar = trim( $rules );
440 $this->mRuleDisplay = $this->mConverter->getVariantNames()[$ruleVar] ?? '';
441 break;
442 case 'D':
443 // process D flag: output rules description
444 $this->mRuleDisplay = $this->getRulesDesc();
445 break;
446 case 'H':
447 // process H,- flag or T only: output nothing
448 $this->mRuleDisplay = '';
449 break;
450 case '-':
451 $this->mRulesAction = 'remove';
452 $this->mRuleDisplay = '';
453 break;
454 case '+':
455 $this->mRulesAction = 'add';
456 $this->mRuleDisplay = '';
457 break;
458 case 'S':
459 $this->mRuleDisplay = $this->getRuleConvertedStr( $variant );
460 break;
461 case 'T':
462 $this->mRuleTitle = $this->getRuleConvertedTitle( $variant );
463 $this->mRuleDisplay = '';
464 break;
465 default:
466 // ignore unknown flags (but see error-case below)
467 }
468 }
469 if ( $this->mRuleDisplay === false ) {
470 $this->mRuleDisplay = '<span class="error">'
471 . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped()
472 . '</span>';
473 }
474
475 $this->generateConvTable();
476 }
477
482 public function hasRules() {
483 return $this->mRules !== '';
484 }
485
490 public function getDisplay() {
491 return $this->mRuleDisplay;
492 }
493
498 public function getTitle() {
499 return $this->mRuleTitle;
500 }
501
506 public function getRulesAction() {
507 return $this->mRulesAction;
508 }
509
515 public function getConvTable() {
516 return $this->mConvTable;
517 }
518
523 public function getRules() {
524 return $this->mRules;
525 }
526
531 public function getFlags() {
532 return $this->mFlags;
533 }
534}
535
537class_alias( ConverterRule::class, 'ConverterRule' );
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
The rules used for language conversion, this processes the rules extracted by Parser from the -{ }- w...
string $mRules
the text of the rules
getFlags()
Get conversion flags.
getRulesAction()
Return how to deal with conversion rules.
string $mText
original text in -{text}-
getTextInBidtable( $variants)
Check if the variant array is in the convert array.
hasRules()
Checks if there are conversion rules.
getDisplay()
Get display text on markup -{...}-.
array $mUnidtable
of the translation in each variant
parse( $variant=null)
Parse rules and flags.
array $mBidtable
of the translation in each variant
__construct( $text, LanguageConverter $converter)
getConvTable()
Get conversion table.
getRules()
Get conversion rules string.
Base class for multi-variant language conversion.
Create PSR-3 logger objects.
A collection of static methods to play with strings.