MediaWiki  master
ConverterRule.php
Go to the documentation of this file.
1 <?php
32  public $mText;
36  public $mConverter;
37  public $mRuleDisplay = '';
38  public $mRuleTitle = false;
42  public $mRules = '';
43  public $mRulesAction = 'none';
44  public $mFlags = [];
45  public $mVariantFlags = [];
46  public $mConvTable = [];
50  public $mBidtable = [];
54  public $mUnidtable = [];
55 
60  public function __construct( $text, LanguageConverter $converter ) {
61  $this->mText = $text;
62  $this->mConverter = $converter;
63  }
64 
71  public function getTextInBidtable( $variants ) {
72  $variants = (array)$variants;
73  if ( !$variants ) {
74  return false;
75  }
76  foreach ( $variants as $variant ) {
77  if ( isset( $this->mBidtable[$variant] ) ) {
78  return $this->mBidtable[$variant];
79  }
80  }
81  return false;
82  }
83 
87  private function parseFlags() {
88  $text = $this->mText;
89  $flags = [];
90  $variantFlags = [];
91 
92  $sepPos = strpos( $text, '|' );
93  if ( $sepPos !== false ) {
94  $validFlags = $this->mConverter->mFlags;
95  $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) );
96  foreach ( $f as $ff ) {
97  $ff = trim( $ff );
98  if ( isset( $validFlags[$ff] ) ) {
99  $flags[$validFlags[$ff]] = true;
100  }
101  }
102  $text = strval( substr( $text, $sepPos + 1 ) );
103  }
104 
105  if ( !$flags ) {
106  $flags['S'] = true;
107  } elseif ( isset( $flags['R'] ) ) {
108  // remove other flags
109  $flags = [ 'R' => true ];
110  } elseif ( isset( $flags['N'] ) ) {
111  // remove other flags
112  $flags = [ 'N' => true ];
113  } elseif ( isset( $flags['-'] ) ) {
114  // remove other flags
115  $flags = [ '-' => true ];
116  } elseif ( count( $flags ) === 1 && isset( $flags['T'] ) ) {
117  $flags['H'] = true;
118  } elseif ( isset( $flags['H'] ) ) {
119  // replace A flag, and remove other flags except T
120  $temp = [ '+' => true, 'H' => true ];
121  if ( isset( $flags['T'] ) ) {
122  $temp['T'] = true;
123  }
124  if ( isset( $flags['D'] ) ) {
125  $temp['D'] = true;
126  }
127  $flags = $temp;
128  } else {
129  if ( isset( $flags['A'] ) ) {
130  $flags['+'] = true;
131  $flags['S'] = true;
132  }
133  if ( isset( $flags['D'] ) ) {
134  unset( $flags['S'] );
135  }
136  // try to find flags like "zh-hans", "zh-hant"
137  // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
138  $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->getVariants() );
139  if ( $variantFlags ) {
140  $variantFlags = array_flip( $variantFlags );
141  $flags = [];
142  }
143  }
144  $this->mVariantFlags = $variantFlags;
145  $this->mRules = $text;
146  $this->mFlags = $flags;
147  }
148 
152  private function parseRules() {
153  $rules = $this->mRules;
154  $bidtable = [];
155  $unidtable = [];
156  $varsep_pattern = $this->mConverter->getVarSeparatorPattern();
157 
158  // Split according to $varsep_pattern, but ignore semicolons from HTML entities
159  $rules = preg_replace( '/(&[#a-zA-Z0-9]+);/', "$1\x01", $rules );
160  $choice = preg_split( $varsep_pattern, $rules );
161  $choice = str_replace( "\x01", ';', $choice );
162 
163  foreach ( $choice as $c ) {
164  $v = explode( ':', $c, 2 );
165  if ( count( $v ) !== 2 ) {
166  // syntax error, skip
167  continue;
168  }
169  $to = trim( $v[1] );
170  $v = trim( $v[0] );
171  $u = explode( '=>', $v, 2 );
172  $vv = $this->mConverter->validateVariant( $v );
173  // if $to is empty (which is also used as $from in bidtable),
174  // strtr() could return a wrong result.
175  if ( count( $u ) === 1 && $to !== '' && $vv ) {
176  $bidtable[$vv] = $to;
177  } elseif ( count( $u ) === 2 ) {
178  $from = trim( $u[0] );
179  $v = trim( $u[1] );
180  $vv = $this->mConverter->validateVariant( $v );
181  // if $from is empty, strtr() could return a wrong result.
182  if ( array_key_exists( $vv, $unidtable )
183  && !is_array( $unidtable[$vv] )
184  && $from !== ''
185  && $vv ) {
186  $unidtable[$vv] = [ $from => $to ];
187  } elseif ( $from !== '' && $vv ) {
188  $unidtable[$vv][$from] = $to;
189  }
190  }
191  // syntax error, pass
192  if ( !isset( $this->mConverter->mVariantNames[$vv] ) ) {
193  $bidtable = [];
194  $unidtable = [];
195  break;
196  }
197  }
198  $this->mBidtable = $bidtable;
199  $this->mUnidtable = $unidtable;
200  }
201 
205  private function getRulesDesc() {
206  $codesep = $this->mConverter->mDescCodeSep;
207  $varsep = $this->mConverter->mDescVarSep;
208  $text = '';
209  foreach ( $this->mBidtable as $k => $v ) {
210  $text .= $this->mConverter->mVariantNames[$k] . "$codesep$v$varsep";
211  }
212  foreach ( $this->mUnidtable as $k => $a ) {
213  foreach ( $a as $from => $to ) {
214  $text .= $from . '⇒' . $this->mConverter->mVariantNames[$k] .
215  "$codesep$to$varsep";
216  }
217  }
218  return $text;
219  }
220 
228  private function getRuleConvertedStr( $variant ) {
229  $bidtable = $this->mBidtable;
230  $unidtable = $this->mUnidtable;
231 
232  if ( count( $bidtable ) + count( $unidtable ) === 0 ) {
233  return $this->mRules;
234  }
235 
236  // display current variant in bidirectional array
237  $disp = $this->getTextInBidtable( $variant );
238  // or display current variant in fallbacks
239  if ( $disp === false ) {
240  $disp = $this->getTextInBidtable(
241  $this->mConverter->getVariantFallbacks( $variant ) );
242  }
243  // or display current variant in unidirectional array
244  if ( $disp === false && array_key_exists( $variant, $unidtable ) ) {
245  $disp = array_values( $unidtable[$variant] )[0];
246  }
247  // or display first text under disable manual convert
248  if ( $disp === false && $this->mConverter->mManualLevel[$variant] === 'disable' ) {
249  if ( count( $bidtable ) > 0 ) {
250  $disp = array_values( $bidtable )[0];
251  } else {
252  $disp = array_values( array_values( $unidtable )[0] )[0];
253  }
254  }
255 
256  return $disp;
257  }
258 
269  private function getRuleConvertedTitle( $variant ) {
270  if ( $variant === $this->mConverter->mMainLanguageCode ) {
271  // If a string targeting exactly this variant is set,
272  // use it. Otherwise, just return false, so the real
273  // page name can be shown (and because variant === main,
274  // there'll be no further automatic conversion).
275  $disp = $this->getTextInBidtable( $variant );
276  if ( $disp ) {
277  return $disp;
278  }
279  if ( array_key_exists( $variant, $this->mUnidtable ) ) {
280  $disp = array_values( $this->mUnidtable[$variant] )[0];
281  }
282  // Assigned above or still false.
283  return $disp;
284  }
285 
286  return $this->getRuleConvertedStr( $variant );
287  }
288 
292  private function generateConvTable() {
293  // Special case optimisation
294  if ( !$this->mBidtable && !$this->mUnidtable ) {
295  $this->mConvTable = [];
296  return;
297  }
298 
299  $bidtable = $this->mBidtable;
300  $unidtable = $this->mUnidtable;
301  $manLevel = $this->mConverter->mManualLevel;
302 
303  $vmarked = [];
304  foreach ( $this->mConverter->mVariants as $v ) {
305  /* for bidirectional array
306  fill in the missing variants, if any,
307  with fallbacks */
308  if ( !isset( $bidtable[$v] ) ) {
309  $variantFallbacks =
310  $this->mConverter->getVariantFallbacks( $v );
311  $vf = $this->getTextInBidtable( $variantFallbacks );
312  if ( $vf ) {
313  $bidtable[$v] = $vf;
314  }
315  }
316 
317  if ( isset( $bidtable[$v] ) ) {
318  foreach ( $vmarked as $vo ) {
319  // use syntax: -{A|zh:WordZh;zh-tw:WordTw}-
320  // or -{H|zh:WordZh;zh-tw:WordTw}-
321  // or -{-|zh:WordZh;zh-tw:WordTw}-
322  // to introduce a custom mapping between
323  // words WordZh and WordTw in the whole text
324  if ( $manLevel[$v] === 'bidirectional' ) {
325  $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v];
326  }
327  if ( $manLevel[$vo] === 'bidirectional' ) {
328  $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo];
329  }
330  }
331  $vmarked[] = $v;
332  }
333  /* for unidirectional array fill to convert tables */
334  if ( ( $manLevel[$v] === 'bidirectional' || $manLevel[$v] === 'unidirectional' )
335  && isset( $unidtable[$v] )
336  ) {
337  if ( isset( $this->mConvTable[$v] ) ) {
338  $this->mConvTable[$v] = $unidtable[$v] + $this->mConvTable[$v];
339  } else {
340  $this->mConvTable[$v] = $unidtable[$v];
341  }
342  }
343  }
344  }
345 
350  public function parse( $variant = null ) {
351  if ( !$variant ) {
352  $variant = $this->mConverter->getPreferredVariant();
353  }
354 
355  $this->parseFlags();
356  $flags = $this->mFlags;
357 
358  // convert to specified variant
359  // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}-
360  if ( $this->mVariantFlags ) {
361  // check if current variant in flags
362  if ( isset( $this->mVariantFlags[$variant] ) ) {
363  // then convert <text to convert> to current language
364  $this->mRules = $this->mConverter->autoConvert( $this->mRules,
365  $variant );
366  } else {
367  // if current variant no in flags,
368  // then we check its fallback variants.
369  $variantFallbacks =
370  $this->mConverter->getVariantFallbacks( $variant );
371  if ( is_array( $variantFallbacks ) ) {
372  foreach ( $variantFallbacks as $variantFallback ) {
373  // if current variant's fallback exist in flags
374  if ( isset( $this->mVariantFlags[$variantFallback] ) ) {
375  // then convert <text to convert> to fallback language
376  $this->mRules =
377  $this->mConverter->autoConvert( $this->mRules,
378  $variantFallback );
379  break;
380  }
381  }
382  }
383  }
384  $this->mFlags = $flags = [ 'R' => true ];
385  }
386 
387  if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) {
388  // decode => HTML entities modified by Sanitizer::removeHTMLtags
389  $this->mRules = str_replace( '=&gt;', '=>', $this->mRules );
390  $this->parseRules();
391  }
392  $rules = $this->mRules;
393 
394  if ( !$this->mBidtable && !$this->mUnidtable ) {
395  if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) {
396  // fill all variants if text in -{A/H/-|text}- is non-empty but without rules
397  if ( $rules !== '' ) {
398  foreach ( $this->mConverter->mVariants as $v ) {
399  $this->mBidtable[$v] = $rules;
400  }
401  }
402  } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) {
403  $this->mFlags = $flags = [ 'R' => true ];
404  }
405  }
406 
407  $this->mRuleDisplay = false;
408  foreach ( $flags as $flag => $unused ) {
409  switch ( $flag ) {
410  case 'R':
411  // if we don't do content convert, still strip the -{}- tags
412  $this->mRuleDisplay = $rules;
413  break;
414  case 'N':
415  // process N flag: output current variant name
416  $ruleVar = trim( $rules );
417  $this->mRuleDisplay = $this->mConverter->mVariantNames[$ruleVar] ?? '';
418  break;
419  case 'D':
420  // process D flag: output rules description
421  $this->mRuleDisplay = $this->getRulesDesc();
422  break;
423  case 'H':
424  // process H,- flag or T only: output nothing
425  $this->mRuleDisplay = '';
426  break;
427  case '-':
428  $this->mRulesAction = 'remove';
429  $this->mRuleDisplay = '';
430  break;
431  case '+':
432  $this->mRulesAction = 'add';
433  $this->mRuleDisplay = '';
434  break;
435  case 'S':
436  $this->mRuleDisplay = $this->getRuleConvertedStr( $variant );
437  break;
438  case 'T':
439  $this->mRuleTitle = $this->getRuleConvertedTitle( $variant );
440  $this->mRuleDisplay = '';
441  break;
442  default:
443  // ignore unknown flags (but see error case below)
444  }
445  }
446  if ( $this->mRuleDisplay === false ) {
447  $this->mRuleDisplay = '<span class="error">'
448  . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped()
449  . '</span>';
450  }
451 
452  $this->generateConvTable();
453  }
454 
459  public function hasRules() {
460  return $this->mRules !== '';
461  }
462 
467  public function getDisplay() {
468  return $this->mRuleDisplay;
469  }
470 
475  public function getTitle() {
476  return $this->mRuleTitle;
477  }
478 
483  public function getRulesAction() {
484  return $this->mRulesAction;
485  }
486 
492  public function getConvTable() {
493  return $this->mConvTable;
494  }
495 
500  public function getRules() {
501  return $this->mRules;
502  }
503 
508  public function getFlags() {
509  return $this->mFlags;
510  }
511 }
ConverterRule
The rules used for language conversion, this processes the rules extracted by Parser from the -{ }- w...
Definition: ConverterRule.php:28
ConverterRule\$mRules
string $mRules
the text of the rules
Definition: ConverterRule.php:42
ConverterRule\getDisplay
getDisplay()
Get display text on markup -{...}-.
Definition: ConverterRule.php:467
ConverterRule\$mConvTable
$mConvTable
Definition: ConverterRule.php:46
ConverterRule\$mRuleDisplay
$mRuleDisplay
Definition: ConverterRule.php:37
true
return true
Definition: router.php:90
ConverterRule\$mRulesAction
$mRulesAction
Definition: ConverterRule.php:43
ConverterRule\$mConverter
LanguageConverter $mConverter
Definition: ConverterRule.php:36
wfMessage
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Definition: GlobalFunctions.php:1220
ConverterRule\getRulesDesc
getRulesDesc()
Definition: ConverterRule.php:205
ConverterRule\getFlags
getFlags()
Get conversion flags.
Definition: ConverterRule.php:508
ConverterRule\$mFlags
$mFlags
Definition: ConverterRule.php:44
ConverterRule\getRuleConvertedTitle
getRuleConvertedTitle( $variant)
Similar to getRuleConvertedStr(), but this prefers to use original page title if $variant === $this->...
Definition: ConverterRule.php:269
StringUtils\explode
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
Definition: StringUtils.php:326
ConverterRule\parse
parse( $variant=null)
Parse rules and flags.
Definition: ConverterRule.php:350
ConverterRule\getTextInBidtable
getTextInBidtable( $variants)
Check if variants array in convert array.
Definition: ConverterRule.php:71
ConverterRule\getConvTable
getConvTable()
Get conversion table.
Definition: ConverterRule.php:492
ConverterRule\getRules
getRules()
Get conversion rules string.
Definition: ConverterRule.php:500
ConverterRule\parseRules
parseRules()
Generate conversion table.
Definition: ConverterRule.php:152
ConverterRule\$mText
string $mText
original text in -{text}-
Definition: ConverterRule.php:32
ConverterRule\$mBidtable
array $mBidtable
of the translation in each variant
Definition: ConverterRule.php:50
ConverterRule\getRulesAction
getRulesAction()
Return how deal with conversion rules.
Definition: ConverterRule.php:483
ConverterRule\$mUnidtable
array $mUnidtable
of the translation in each variant
Definition: ConverterRule.php:54
ConverterRule\__construct
__construct( $text, LanguageConverter $converter)
Definition: ConverterRule.php:60
ConverterRule\parseFlags
parseFlags()
Parse flags with syntax -{FLAG| ...
Definition: ConverterRule.php:87
ConverterRule\hasRules
hasRules()
Checks if there are conversion rules.
Definition: ConverterRule.php:459
ConverterRule\getRuleConvertedStr
getRuleConvertedStr( $variant)
Parse rules conversion.
Definition: ConverterRule.php:228
ConverterRule\generateConvTable
generateConvTable()
Generate conversion table for all text.
Definition: ConverterRule.php:292
ConverterRule\$mVariantFlags
$mVariantFlags
Definition: ConverterRule.php:45
LanguageConverter
Base class for multi-variant language conversion.
Definition: LanguageConverter.php:35
ConverterRule\getTitle
getTitle()
Get converted title.
Definition: ConverterRule.php:475
ConverterRule\$mRuleTitle
$mRuleTitle
Definition: ConverterRule.php:38