MediaWiki master
ConverterRule.php
Go to the documentation of this file.
1<?php
23
34 public $mText;
39 public $mRuleDisplay = '';
40 public $mRuleTitle = false;
44 public $mRules = '';
45 public $mRulesAction = 'none';
46 public $mFlags = [];
47 public $mVariantFlags = [];
48 public $mConvTable = [];
52 public $mBidtable = [];
56 public $mUnidtable = [];
57
62 public function __construct( $text, LanguageConverter $converter ) {
63 $this->mText = $text;
64 $this->mConverter = $converter;
65 }
66
73 public function getTextInBidtable( $variants ) {
74 $variants = (array)$variants;
75 if ( !$variants ) {
76 return false;
77 }
78 foreach ( $variants as $variant ) {
79 if ( isset( $this->mBidtable[$variant] ) ) {
80 return $this->mBidtable[$variant];
81 }
82 }
83 return false;
84 }
85
89 private function parseFlags() {
90 $text = $this->mText;
91 $flags = [];
92 $variantFlags = [];
93
94 $sepPos = strpos( $text, '|' );
95 if ( $sepPos !== false ) {
96 $validFlags = $this->mConverter->getFlags();
97 $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) );
98 foreach ( $f as $ff ) {
99 $ff = trim( $ff );
100 if ( isset( $validFlags[$ff] ) ) {
101 $flags[$validFlags[$ff]] = true;
102 }
103 }
104 $text = strval( substr( $text, $sepPos + 1 ) );
105 }
106
107 if ( !$flags ) {
108 $flags['S'] = true;
109 } elseif ( isset( $flags['R'] ) ) {
110 // remove other flags
111 $flags = [ 'R' => true ];
112 } elseif ( isset( $flags['N'] ) ) {
113 // remove other flags
114 $flags = [ 'N' => true ];
115 } elseif ( isset( $flags['-'] ) ) {
116 // remove other flags
117 $flags = [ '-' => true ];
118 } elseif ( count( $flags ) === 1 && isset( $flags['T'] ) ) {
119 $flags['H'] = true;
120 } elseif ( isset( $flags['H'] ) ) {
121 // replace A flag, and remove other flags except T
122 $temp = [ '+' => true, 'H' => true ];
123 if ( isset( $flags['T'] ) ) {
124 $temp['T'] = true;
125 }
126 if ( isset( $flags['D'] ) ) {
127 $temp['D'] = true;
128 }
129 $flags = $temp;
130 } else {
131 if ( isset( $flags['A'] ) ) {
132 $flags['+'] = true;
133 $flags['S'] = true;
134 }
135 if ( isset( $flags['D'] ) ) {
136 unset( $flags['S'] );
137 }
138 // try to find flags like "zh-hans", "zh-hant"
139 // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
140 $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->getVariants() );
141 if ( $variantFlags ) {
142 $variantFlags = array_fill_keys( $variantFlags, true );
143 $flags = [];
144 }
145 }
146 $this->mVariantFlags = $variantFlags;
147 $this->mRules = $text;
148 $this->mFlags = $flags;
149 }
150
154 private function parseRules() {
155 $rules = $this->mRules;
156 $bidtable = [];
157 $unidtable = [];
158 $varsep_pattern = $this->mConverter->getVarSeparatorPattern();
159
160 // Split text according to $varsep_pattern, but ignore semicolons from HTML entities
161 $rules = preg_replace( '/(&[#a-zA-Z0-9]+);/', "$1\x01", $rules );
162 $choice = preg_split( $varsep_pattern, $rules );
163 // @phan-suppress-next-line PhanTypeComparisonFromArray
164 if ( $choice === false ) {
165 $error = preg_last_error();
166 $errorText = preg_last_error_msg();
167 LoggerFactory::getInstance( 'parser' )->warning(
168 'ConverterRule preg_split error: {code} {errorText}',
169 [
170 'code' => $error,
171 'errorText' => $errorText
172 ]
173 );
174 $choice = [];
175 }
176 $choice = str_replace( "\x01", ';', $choice );
177
178 foreach ( $choice as $c ) {
179 $v = explode( ':', $c, 2 );
180 if ( count( $v ) !== 2 ) {
181 // syntax error, skip
182 continue;
183 }
184 $to = trim( $v[1] );
185 $v = trim( $v[0] );
186 $u = explode( '=>', $v, 2 );
187 $vv = $this->mConverter->validateVariant( $v );
188 // if $to is empty (which is also used as $from in bidtable),
189 // strtr() could return a wrong result.
190 if ( count( $u ) === 1 && $to !== '' && $vv ) {
191 $bidtable[$vv] = $to;
192 } elseif ( count( $u ) === 2 ) {
193 $from = trim( $u[0] );
194 $v = trim( $u[1] );
195 $vv = $this->mConverter->validateVariant( $v );
196 // if $from is empty, strtr() could return a wrong result.
197 if ( array_key_exists( $vv, $unidtable )
198 && !is_array( $unidtable[$vv] )
199 && $from !== ''
200 && $vv ) {
201 $unidtable[$vv] = [ $from => $to ];
202 } elseif ( $from !== '' && $vv ) {
203 $unidtable[$vv][$from] = $to;
204 }
205 }
206 // syntax error, pass
207 if ( !isset( $this->mConverter->getVariantNames()[$vv] ) ) {
208 $bidtable = [];
209 $unidtable = [];
210 break;
211 }
212 }
213 $this->mBidtable = $bidtable;
214 $this->mUnidtable = $unidtable;
215 }
216
220 private function getRulesDesc() {
221 $codesep = $this->mConverter->getDescCodeSeparator();
222 $varsep = $this->mConverter->getDescVarSeparator();
223 $text = '';
224 foreach ( $this->mBidtable as $k => $v ) {
225 $text .= $this->mConverter->getVariantNames()[$k] . "$codesep$v$varsep";
226 }
227 foreach ( $this->mUnidtable as $k => $a ) {
228 foreach ( $a as $from => $to ) {
229 $text .= $from . '⇒' . $this->mConverter->getVariantNames()[$k] .
230 "$codesep$to$varsep";
231 }
232 }
233 return $text;
234 }
235
243 private function getRuleConvertedStr( $variant ) {
244 $bidtable = $this->mBidtable;
245 $unidtable = $this->mUnidtable;
246
247 if ( count( $bidtable ) + count( $unidtable ) === 0 ) {
248 return $this->mRules;
249 }
250
251 // display current variant in bidirectional array
252 $disp = $this->getTextInBidtable( $variant );
253 // or display current variant in fallbacks
254 if ( $disp === false ) {
255 $disp = $this->getTextInBidtable(
256 $this->mConverter->getVariantFallbacks( $variant ) );
257 }
258 // or display current variant in unidirectional array
259 if ( $disp === false && array_key_exists( $variant, $unidtable ) ) {
260 $disp = array_values( $unidtable[$variant] )[0];
261 }
262 // or display first text under disable manual convert
263 if ( $disp === false && $this->mConverter->getManualLevel()[$variant] === 'disable' ) {
264 if ( count( $bidtable ) > 0 ) {
265 $disp = array_values( $bidtable )[0];
266 } else {
267 $disp = array_values( array_values( $unidtable )[0] )[0];
268 }
269 }
270
271 return $disp;
272 }
273
284 private function getRuleConvertedTitle( $variant ) {
285 if ( $variant === $this->mConverter->getMainCode() ) {
286 // If a string targeting exactly this variant is set,
287 // use it. Otherwise, just return false, so the real
288 // page name can be shown (and because variant === main,
289 // there'll be no further automatic conversion).
290 $disp = $this->getTextInBidtable( $variant );
291 if ( $disp ) {
292 return $disp;
293 }
294 if ( array_key_exists( $variant, $this->mUnidtable ) ) {
295 $disp = array_values( $this->mUnidtable[$variant] )[0];
296 }
297 // Assigned above or still false.
298 return $disp;
299 }
300
301 return $this->getRuleConvertedStr( $variant );
302 }
303
307 private function generateConvTable() {
308 // Special case optimisation
309 if ( !$this->mBidtable && !$this->mUnidtable ) {
310 $this->mConvTable = [];
311 return;
312 }
313
314 $bidtable = $this->mBidtable;
315 $unidtable = $this->mUnidtable;
316 $manLevel = $this->mConverter->getManualLevel();
317
318 $vmarked = [];
319 foreach ( $this->mConverter->getVariants() as $v ) {
320 /* for bidirectional array
321 fill in the missing variants, if any,
322 with fallbacks */
323 if ( !isset( $bidtable[$v] ) ) {
324 $variantFallbacks =
325 $this->mConverter->getVariantFallbacks( $v );
326 $vf = $this->getTextInBidtable( $variantFallbacks );
327 if ( $vf ) {
328 $bidtable[$v] = $vf;
329 }
330 }
331
332 if ( isset( $bidtable[$v] ) ) {
333 foreach ( $vmarked as $vo ) {
334 // use syntax: -{A|zh:WordZh;zh-tw:WordTw}-
335 // or -{H|zh:WordZh;zh-tw:WordTw}-
336 // or -{-|zh:WordZh;zh-tw:WordTw}-
337 // to introduce a custom mapping between
338 // words WordZh and WordTw in the whole text
339 if ( $manLevel[$v] === 'bidirectional' ) {
340 $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v];
341 }
342 if ( $manLevel[$vo] === 'bidirectional' ) {
343 $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo];
344 }
345 }
346 $vmarked[] = $v;
347 }
348 /* for unidirectional array fill to convert tables */
349 if ( ( $manLevel[$v] === 'bidirectional' || $manLevel[$v] === 'unidirectional' )
350 && isset( $unidtable[$v] )
351 ) {
352 if ( isset( $this->mConvTable[$v] ) ) {
353 $this->mConvTable[$v] = $unidtable[$v] + $this->mConvTable[$v];
354 } else {
355 $this->mConvTable[$v] = $unidtable[$v];
356 }
357 }
358 }
359 }
360
365 public function parse( $variant = null ) {
366 if ( !$variant ) {
367 $variant = $this->mConverter->getPreferredVariant();
368 }
369
370 $this->parseFlags();
371 $flags = $this->mFlags;
372
373 // convert to specified variant
374 // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}-
375 if ( $this->mVariantFlags ) {
376 // check if current variant in flags
377 if ( isset( $this->mVariantFlags[$variant] ) ) {
378 // then convert <text to convert> to current language
379 $this->mRules = $this->mConverter->autoConvert( $this->mRules,
380 $variant );
381 } else {
382 // if the current variant is not in flags,
383 // then we check its fallback variants.
384 $variantFallbacks =
385 $this->mConverter->getVariantFallbacks( $variant );
386 if ( is_array( $variantFallbacks ) ) {
387 foreach ( $variantFallbacks as $variantFallback ) {
388 // if current variant's fallback exist in flags
389 if ( isset( $this->mVariantFlags[$variantFallback] ) ) {
390 // then convert <text to convert> to fallback language
391 $this->mRules =
392 $this->mConverter->autoConvert( $this->mRules,
393 $variantFallback );
394 break;
395 }
396 }
397 }
398 }
399 $this->mFlags = $flags = [ 'R' => true ];
400 }
401
402 if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) {
403 // decode => HTML entities modified by Sanitizer::internalRemoveHtmlTags
404 $this->mRules = str_replace( '=&gt;', '=>', $this->mRules );
405 $this->parseRules();
406 }
407 $rules = $this->mRules;
408
409 if ( !$this->mBidtable && !$this->mUnidtable ) {
410 if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) {
411 // fill all variants if the text in -{A/H/-|text}- is non-empty but without rules
412 if ( $rules !== '' ) {
413 foreach ( $this->mConverter->getVariants() as $v ) {
414 $this->mBidtable[$v] = $rules;
415 }
416 }
417 } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) {
418 $this->mFlags = $flags = [ 'R' => true ];
419 }
420 }
421
422 $this->mRuleDisplay = false;
423 foreach ( $flags as $flag => $unused ) {
424 switch ( $flag ) {
425 case 'R':
426 // if we don't do content convert, still strip the -{}- tags
427 $this->mRuleDisplay = $rules;
428 break;
429 case 'N':
430 // process N flag: output current variant name
431 $ruleVar = trim( $rules );
432 $this->mRuleDisplay = $this->mConverter->getVariantNames()[$ruleVar] ?? '';
433 break;
434 case 'D':
435 // process D flag: output rules description
436 $this->mRuleDisplay = $this->getRulesDesc();
437 break;
438 case 'H':
439 // process H,- flag or T only: output nothing
440 $this->mRuleDisplay = '';
441 break;
442 case '-':
443 $this->mRulesAction = 'remove';
444 $this->mRuleDisplay = '';
445 break;
446 case '+':
447 $this->mRulesAction = 'add';
448 $this->mRuleDisplay = '';
449 break;
450 case 'S':
451 $this->mRuleDisplay = $this->getRuleConvertedStr( $variant );
452 break;
453 case 'T':
454 $this->mRuleTitle = $this->getRuleConvertedTitle( $variant );
455 $this->mRuleDisplay = '';
456 break;
457 default:
458 // ignore unknown flags (but see error-case below)
459 }
460 }
461 if ( $this->mRuleDisplay === false ) {
462 $this->mRuleDisplay = '<span class="error">'
463 . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped()
464 . '</span>';
465 }
466
467 $this->generateConvTable();
468 }
469
474 public function hasRules() {
475 return $this->mRules !== '';
476 }
477
482 public function getDisplay() {
483 return $this->mRuleDisplay;
484 }
485
490 public function getTitle() {
491 return $this->mRuleTitle;
492 }
493
498 public function getRulesAction() {
499 return $this->mRulesAction;
500 }
501
507 public function getConvTable() {
508 return $this->mConvTable;
509 }
510
515 public function getRules() {
516 return $this->mRules;
517 }
518
523 public function getFlags() {
524 return $this->mFlags;
525 }
526}
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
The rules used for language conversion, this processes the rules extracted by Parser from the -{ }- w...
array $mBidtable
of the translation in each variant
getRules()
Get conversion rules string.
parse( $variant=null)
Parse rules and flags.
getFlags()
Get conversion flags.
__construct( $text, LanguageConverter $converter)
getTextInBidtable( $variants)
Check if the variant array is in the convert array.
getDisplay()
Get display text on markup -{...}-.
getRulesAction()
Return how to deal with conversion rules.
array $mUnidtable
of the translation in each variant
LanguageConverter $mConverter
string $mRules
the text of the rules
hasRules()
Checks if there are conversion rules.
getConvTable()
Get conversion table.
string $mText
original text in -{text}-
getTitle()
Get converted title.
Base class for multi-variant language conversion.
Create PSR-3 logger objects.