MediaWiki  master
ConverterRule.php
Go to the documentation of this file.
1 <?php
32  public $mText;
36  public $mConverter;
37  public $mRuleDisplay = '';
38  public $mRuleTitle = false;
42  public $mRules = '';
43  public $mRulesAction = 'none';
44  public $mFlags = [];
45  public $mVariantFlags = [];
46  public $mConvTable = [];
50  public $mBidtable = [];
54  public $mUnidtable = [];
55 
60  public function __construct( $text, LanguageConverter $converter ) {
61  $this->mText = $text;
62  $this->mConverter = $converter;
63  }
64 
71  public function getTextInBidtable( $variants ) {
72  $variants = (array)$variants;
73  if ( !$variants ) {
74  return false;
75  }
76  foreach ( $variants as $variant ) {
77  if ( isset( $this->mBidtable[$variant] ) ) {
78  return $this->mBidtable[$variant];
79  }
80  }
81  return false;
82  }
83 
87  private function parseFlags() {
88  $text = $this->mText;
89  $flags = [];
90  $variantFlags = [];
91 
92  $sepPos = strpos( $text, '|' );
93  if ( $sepPos !== false ) {
94  $validFlags = $this->mConverter->getFlags();
95  $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) );
96  foreach ( $f as $ff ) {
97  $ff = trim( $ff );
98  if ( isset( $validFlags[$ff] ) ) {
99  $flags[$validFlags[$ff]] = true;
100  }
101  }
102  $text = strval( substr( $text, $sepPos + 1 ) );
103  }
104 
105  if ( !$flags ) {
106  $flags['S'] = true;
107  } elseif ( isset( $flags['R'] ) ) {
108  // remove other flags
109  $flags = [ 'R' => true ];
110  } elseif ( isset( $flags['N'] ) ) {
111  // remove other flags
112  $flags = [ 'N' => true ];
113  } elseif ( isset( $flags['-'] ) ) {
114  // remove other flags
115  $flags = [ '-' => true ];
116  } elseif ( count( $flags ) === 1 && isset( $flags['T'] ) ) {
117  $flags['H'] = true;
118  } elseif ( isset( $flags['H'] ) ) {
119  // replace A flag, and remove other flags except T
120  $temp = [ '+' => true, 'H' => true ];
121  if ( isset( $flags['T'] ) ) {
122  $temp['T'] = true;
123  }
124  if ( isset( $flags['D'] ) ) {
125  $temp['D'] = true;
126  }
127  $flags = $temp;
128  } else {
129  if ( isset( $flags['A'] ) ) {
130  $flags['+'] = true;
131  $flags['S'] = true;
132  }
133  if ( isset( $flags['D'] ) ) {
134  unset( $flags['S'] );
135  }
136  // try to find flags like "zh-hans", "zh-hant"
137  // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
138  $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->getVariants() );
139  if ( $variantFlags ) {
140  $variantFlags = array_fill_keys( $variantFlags, true );
141  $flags = [];
142  }
143  }
144  $this->mVariantFlags = $variantFlags;
145  $this->mRules = $text;
146  $this->mFlags = $flags;
147  }
148 
152  private function parseRules() {
153  $rules = $this->mRules;
154  $bidtable = [];
155  $unidtable = [];
156  $varsep_pattern = $this->mConverter->getVarSeparatorPattern();
157 
158  // Split according to $varsep_pattern, but ignore semicolons from HTML entities
159  $rules = preg_replace( '/(&[#a-zA-Z0-9]+);/', "$1\x01", $rules );
160  $choice = preg_split( $varsep_pattern, $rules );
161  $choice = str_replace( "\x01", ';', $choice );
162 
163  foreach ( $choice as $c ) {
164  $v = explode( ':', $c, 2 );
165  if ( count( $v ) !== 2 ) {
166  // syntax error, skip
167  continue;
168  }
169  $to = trim( $v[1] );
170  $v = trim( $v[0] );
171  $u = explode( '=>', $v, 2 );
172  $vv = $this->mConverter->validateVariant( $v );
173  // if $to is empty (which is also used as $from in bidtable),
174  // strtr() could return a wrong result.
175  if ( count( $u ) === 1 && $to !== '' && $vv ) {
176  $bidtable[$vv] = $to;
177  } elseif ( count( $u ) === 2 ) {
178  $from = trim( $u[0] );
179  $v = trim( $u[1] );
180  $vv = $this->mConverter->validateVariant( $v );
181  // if $from is empty, strtr() could return a wrong result.
182  if ( array_key_exists( $vv, $unidtable )
183  && !is_array( $unidtable[$vv] )
184  && $from !== ''
185  && $vv ) {
186  $unidtable[$vv] = [ $from => $to ];
187  } elseif ( $from !== '' && $vv ) {
188  $unidtable[$vv][$from] = $to;
189  }
190  }
191  // syntax error, pass
192  if ( !isset( $this->mConverter->getVariantNames()[$vv] ) ) {
193  $bidtable = [];
194  $unidtable = [];
195  break;
196  }
197  }
198  $this->mBidtable = $bidtable;
199  $this->mUnidtable = $unidtable;
200  }
201 
205  private function getRulesDesc() {
206  $codesep = $this->mConverter->getDescCodeSeparator();
207  $varsep = $this->mConverter->getDescVarSeparator();
208  $text = '';
209  foreach ( $this->mBidtable as $k => $v ) {
210  $text .= $this->mConverter->getVariantNames()[$k] . "$codesep$v$varsep";
211  }
212  foreach ( $this->mUnidtable as $k => $a ) {
213  foreach ( $a as $from => $to ) {
214  $text .= $from . '⇒' . $this->mConverter->getVariantNames()[$k] .
215  "$codesep$to$varsep";
216  }
217  }
218  return $text;
219  }
220 
228  private function getRuleConvertedStr( $variant ) {
229  $bidtable = $this->mBidtable;
230  $unidtable = $this->mUnidtable;
231 
232  if ( count( $bidtable ) + count( $unidtable ) === 0 ) {
233  return $this->mRules;
234  }
235 
236  // display current variant in bidirectional array
237  $disp = $this->getTextInBidtable( $variant );
238  // or display current variant in fallbacks
239  if ( $disp === false ) {
240  $disp = $this->getTextInBidtable(
241  $this->mConverter->getVariantFallbacks( $variant ) );
242  }
243  // or display current variant in unidirectional array
244  if ( $disp === false && array_key_exists( $variant, $unidtable ) ) {
245  $disp = array_values( $unidtable[$variant] )[0];
246  }
247  // or display first text under disable manual convert
248  if ( $disp === false && $this->mConverter->getManualLevel()[$variant] === 'disable' ) {
249  if ( count( $bidtable ) > 0 ) {
250  $disp = array_values( $bidtable )[0];
251  } else {
252  $disp = array_values( array_values( $unidtable )[0] )[0];
253  }
254  }
255 
256  return $disp;
257  }
258 
270  private function getRuleConvertedTitle( $variant ) {
271  if ( $variant === $this->mConverter->getMainCode() ) {
272  // If a string targeting exactly this variant is set,
273  // use it. Otherwise, just return false, so the real
274  // page name can be shown (and because variant === main,
275  // there'll be no further automatic conversion).
276  $disp = $this->getTextInBidtable( $variant );
277  if ( $disp ) {
278  return $disp;
279  }
280  if ( array_key_exists( $variant, $this->mUnidtable ) ) {
281  $disp = array_values( $this->mUnidtable[$variant] )[0];
282  }
283  // Assigned above or still false.
284  return $disp;
285  }
286 
287  return $this->getRuleConvertedStr( $variant );
288  }
289 
293  private function generateConvTable() {
294  // Special case optimisation
295  if ( !$this->mBidtable && !$this->mUnidtable ) {
296  $this->mConvTable = [];
297  return;
298  }
299 
300  $bidtable = $this->mBidtable;
301  $unidtable = $this->mUnidtable;
302  $manLevel = $this->mConverter->getManualLevel();
303 
304  $vmarked = [];
305  foreach ( $this->mConverter->getVariants() as $v ) {
306  /* for bidirectional array
307  fill in the missing variants, if any,
308  with fallbacks */
309  if ( !isset( $bidtable[$v] ) ) {
310  $variantFallbacks =
311  $this->mConverter->getVariantFallbacks( $v );
312  $vf = $this->getTextInBidtable( $variantFallbacks );
313  if ( $vf ) {
314  $bidtable[$v] = $vf;
315  }
316  }
317 
318  if ( isset( $bidtable[$v] ) ) {
319  foreach ( $vmarked as $vo ) {
320  // use syntax: -{A|zh:WordZh;zh-tw:WordTw}-
321  // or -{H|zh:WordZh;zh-tw:WordTw}-
322  // or -{-|zh:WordZh;zh-tw:WordTw}-
323  // to introduce a custom mapping between
324  // words WordZh and WordTw in the whole text
325  if ( $manLevel[$v] === 'bidirectional' ) {
326  $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v];
327  }
328  if ( $manLevel[$vo] === 'bidirectional' ) {
329  $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo];
330  }
331  }
332  $vmarked[] = $v;
333  }
334  /* for unidirectional array fill to convert tables */
335  if ( ( $manLevel[$v] === 'bidirectional' || $manLevel[$v] === 'unidirectional' )
336  && isset( $unidtable[$v] )
337  ) {
338  if ( isset( $this->mConvTable[$v] ) ) {
339  $this->mConvTable[$v] = $unidtable[$v] + $this->mConvTable[$v];
340  } else {
341  $this->mConvTable[$v] = $unidtable[$v];
342  }
343  }
344  }
345  }
346 
351  public function parse( $variant = null ) {
352  if ( !$variant ) {
353  $variant = $this->mConverter->getPreferredVariant();
354  }
355 
356  $this->parseFlags();
357  $flags = $this->mFlags;
358 
359  // convert to specified variant
360  // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}-
361  if ( $this->mVariantFlags ) {
362  // check if current variant in flags
363  if ( isset( $this->mVariantFlags[$variant] ) ) {
364  // then convert <text to convert> to current language
365  $this->mRules = $this->mConverter->autoConvert( $this->mRules,
366  $variant );
367  } else {
368  // if current variant no in flags,
369  // then we check its fallback variants.
370  $variantFallbacks =
371  $this->mConverter->getVariantFallbacks( $variant );
372  if ( is_array( $variantFallbacks ) ) {
373  foreach ( $variantFallbacks as $variantFallback ) {
374  // if current variant's fallback exist in flags
375  if ( isset( $this->mVariantFlags[$variantFallback] ) ) {
376  // then convert <text to convert> to fallback language
377  $this->mRules =
378  $this->mConverter->autoConvert( $this->mRules,
379  $variantFallback );
380  break;
381  }
382  }
383  }
384  }
385  $this->mFlags = $flags = [ 'R' => true ];
386  }
387 
388  if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) {
389  // decode => HTML entities modified by Sanitizer::internalRemoveHtmlTags
390  $this->mRules = str_replace( '=&gt;', '=>', $this->mRules );
391  $this->parseRules();
392  }
393  $rules = $this->mRules;
394 
395  if ( !$this->mBidtable && !$this->mUnidtable ) {
396  if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) {
397  // fill all variants if text in -{A/H/-|text}- is non-empty but without rules
398  if ( $rules !== '' ) {
399  foreach ( $this->mConverter->getVariants() as $v ) {
400  $this->mBidtable[$v] = $rules;
401  }
402  }
403  } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) {
404  $this->mFlags = $flags = [ 'R' => true ];
405  }
406  }
407 
408  $this->mRuleDisplay = false;
409  foreach ( $flags as $flag => $unused ) {
410  switch ( $flag ) {
411  case 'R':
412  // if we don't do content convert, still strip the -{}- tags
413  $this->mRuleDisplay = $rules;
414  break;
415  case 'N':
416  // process N flag: output current variant name
417  $ruleVar = trim( $rules );
418  $this->mRuleDisplay = $this->mConverter->getVariantNames()[$ruleVar] ?? '';
419  break;
420  case 'D':
421  // process D flag: output rules description
422  $this->mRuleDisplay = $this->getRulesDesc();
423  break;
424  case 'H':
425  // process H,- flag or T only: output nothing
426  $this->mRuleDisplay = '';
427  break;
428  case '-':
429  $this->mRulesAction = 'remove';
430  $this->mRuleDisplay = '';
431  break;
432  case '+':
433  $this->mRulesAction = 'add';
434  $this->mRuleDisplay = '';
435  break;
436  case 'S':
437  $this->mRuleDisplay = $this->getRuleConvertedStr( $variant );
438  break;
439  case 'T':
440  $this->mRuleTitle = $this->getRuleConvertedTitle( $variant );
441  $this->mRuleDisplay = '';
442  break;
443  default:
444  // ignore unknown flags (but see error case below)
445  }
446  }
447  if ( $this->mRuleDisplay === false ) {
448  $this->mRuleDisplay = '<span class="error">'
449  . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped()
450  . '</span>';
451  }
452 
453  $this->generateConvTable();
454  }
455 
460  public function hasRules() {
461  return $this->mRules !== '';
462  }
463 
468  public function getDisplay() {
469  return $this->mRuleDisplay;
470  }
471 
476  public function getTitle() {
477  return $this->mRuleTitle;
478  }
479 
484  public function getRulesAction() {
485  return $this->mRulesAction;
486  }
487 
493  public function getConvTable() {
494  return $this->mConvTable;
495  }
496 
501  public function getRules() {
502  return $this->mRules;
503  }
504 
509  public function getFlags() {
510  return $this->mFlags;
511  }
512 }
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
The rules used for language conversion, this processes the rules extracted by Parser from the -{ }- w...
array $mBidtable
of the translation in each variant
getRules()
Get conversion rules string.
parse( $variant=null)
Parse rules and flags.
getFlags()
Get conversion flags.
__construct( $text, LanguageConverter $converter)
getTextInBidtable( $variants)
Check if variants array in convert array.
getDisplay()
Get display text on markup -{...}-.
getRulesAction()
Return how deal with conversion rules.
array $mUnidtable
of the translation in each variant
LanguageConverter $mConverter
string $mRules
the text of the rules
hasRules()
Checks if there are conversion rules.
getConvTable()
Get conversion table.
string $mText
original text in -{text}-
getTitle()
Get converted title.
Base class for multi-variant language conversion.
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
return true
Definition: router.php:90