MediaWiki  master
ConverterRule.php
Go to the documentation of this file.
1 <?php
28  public $mText; // original text in -{text}-
29  public $mConverter; // LanguageConverter object
30  public $mRuleDisplay = '';
31  public $mRuleTitle = false;
32  public $mRules = ''; // string : the text of the rules
33  public $mRulesAction = 'none';
34  public $mFlags = [];
35  public $mVariantFlags = [];
36  public $mConvTable = [];
37  public $mBidtable = []; // array of the translation in each variant
38  public $mUnidtable = []; // array of the translation in each variant
39 
44  public function __construct( $text, $converter ) {
45  $this->mText = $text;
46  $this->mConverter = $converter;
47  }
48 
55  public function getTextInBidtable( $variants ) {
56  $variants = (array)$variants;
57  if ( !$variants ) {
58  return false;
59  }
60  foreach ( $variants as $variant ) {
61  if ( isset( $this->mBidtable[$variant] ) ) {
62  return $this->mBidtable[$variant];
63  }
64  }
65  return false;
66  }
67 
72  function parseFlags() {
73  $text = $this->mText;
74  $flags = [];
75  $variantFlags = [];
76 
77  $sepPos = strpos( $text, '|' );
78  if ( $sepPos !== false ) {
79  $validFlags = $this->mConverter->mFlags;
80  $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) );
81  foreach ( $f as $ff ) {
82  $ff = trim( $ff );
83  if ( isset( $validFlags[$ff] ) ) {
84  $flags[$validFlags[$ff]] = true;
85  }
86  }
87  $text = strval( substr( $text, $sepPos + 1 ) );
88  }
89 
90  if ( !$flags ) {
91  $flags['S'] = true;
92  } elseif ( isset( $flags['R'] ) ) {
93  $flags = [ 'R' => true ];// remove other flags
94  } elseif ( isset( $flags['N'] ) ) {
95  $flags = [ 'N' => true ];// remove other flags
96  } elseif ( isset( $flags['-'] ) ) {
97  $flags = [ '-' => true ];// remove other flags
98  } elseif ( count( $flags ) == 1 && isset( $flags['T'] ) ) {
99  $flags['H'] = true;
100  } elseif ( isset( $flags['H'] ) ) {
101  // replace A flag, and remove other flags except T
102  $temp = [ '+' => true, 'H' => true ];
103  if ( isset( $flags['T'] ) ) {
104  $temp['T'] = true;
105  }
106  if ( isset( $flags['D'] ) ) {
107  $temp['D'] = true;
108  }
109  $flags = $temp;
110  } else {
111  if ( isset( $flags['A'] ) ) {
112  $flags['+'] = true;
113  $flags['S'] = true;
114  }
115  if ( isset( $flags['D'] ) ) {
116  unset( $flags['S'] );
117  }
118  // try to find flags like "zh-hans", "zh-hant"
119  // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
120  $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->mVariants );
121  if ( $variantFlags ) {
122  $variantFlags = array_flip( $variantFlags );
123  $flags = [];
124  }
125  }
126  $this->mVariantFlags = $variantFlags;
127  $this->mRules = $text;
128  $this->mFlags = $flags;
129  }
130 
135  function parseRules() {
136  $rules = $this->mRules;
137  $bidtable = [];
138  $unidtable = [];
139  $variants = $this->mConverter->mVariants;
140  $varsep_pattern = $this->mConverter->getVarSeparatorPattern();
141 
142  // Split according to $varsep_pattern, but ignore semicolons from HTML entities
143  $rules = preg_replace( '/(&[#a-zA-Z0-9]+);/', "$1\x01", $rules );
144  $choice = preg_split( $varsep_pattern, $rules );
145  $choice = str_replace( "\x01", ';', $choice );
146 
147  foreach ( $choice as $c ) {
148  $v = explode( ':', $c, 2 );
149  if ( count( $v ) != 2 ) {
150  // syntax error, skip
151  continue;
152  }
153  $to = trim( $v[1] );
154  $v = trim( $v[0] );
155  $u = explode( '=>', $v, 2 );
156  $vv = $this->mConverter->validateVariant( $v );
157  // if $to is empty (which is also used as $from in bidtable),
158  // strtr() could return a wrong result.
159  if ( count( $u ) == 1 && $to !== '' && $vv ) {
160  $bidtable[$vv] = $to;
161  } elseif ( count( $u ) == 2 ) {
162  $from = trim( $u[0] );
163  $v = trim( $u[1] );
164  $vv = $this->mConverter->validateVariant( $v );
165  // if $from is empty, strtr() could return a wrong result.
166  if ( array_key_exists( $vv, $unidtable )
167  && !is_array( $unidtable[$vv] )
168  && $from !== ''
169  && $vv ) {
170  $unidtable[$vv] = [ $from => $to ];
171  } elseif ( $from !== '' && $vv ) {
172  $unidtable[$vv][$from] = $to;
173  }
174  }
175  // syntax error, pass
176  if ( !isset( $this->mConverter->mVariantNames[$vv] ) ) {
177  $bidtable = [];
178  $unidtable = [];
179  break;
180  }
181  }
182  $this->mBidtable = $bidtable;
183  $this->mUnidtable = $unidtable;
184  }
185 
191  function getRulesDesc() {
192  $codesep = $this->mConverter->mDescCodeSep;
193  $varsep = $this->mConverter->mDescVarSep;
194  $text = '';
195  foreach ( $this->mBidtable as $k => $v ) {
196  $text .= $this->mConverter->mVariantNames[$k] . "$codesep$v$varsep";
197  }
198  foreach ( $this->mUnidtable as $k => $a ) {
199  foreach ( $a as $from => $to ) {
200  $text .= $from . '⇒' . $this->mConverter->mVariantNames[$k] .
201  "$codesep$to$varsep";
202  }
203  }
204  return $text;
205  }
206 
215  function getRuleConvertedStr( $variant ) {
216  $bidtable = $this->mBidtable;
217  $unidtable = $this->mUnidtable;
218 
219  if ( count( $bidtable ) + count( $unidtable ) == 0 ) {
220  return $this->mRules;
221  } else {
222  // display current variant in bidirectional array
223  $disp = $this->getTextInBidtable( $variant );
224  // or display current variant in fallbacks
225  if ( $disp === false ) {
226  $disp = $this->getTextInBidtable(
227  $this->mConverter->getVariantFallbacks( $variant ) );
228  }
229  // or display current variant in unidirectional array
230  if ( $disp === false && array_key_exists( $variant, $unidtable ) ) {
231  $disp = array_values( $unidtable[$variant] )[0];
232  }
233  // or display first text under disable manual convert
234  if ( $disp === false && $this->mConverter->mManualLevel[$variant] == 'disable' ) {
235  if ( count( $bidtable ) > 0 ) {
236  $disp = array_values( $bidtable )[0];
237  } else {
238  $disp = array_values( array_values( $unidtable )[0] )[0];
239  }
240  }
241  return $disp;
242  }
243  }
244 
255  function getRuleConvertedTitle( $variant ) {
256  if ( $variant === $this->mConverter->mMainLanguageCode ) {
257  // If a string targeting exactly this variant is set,
258  // use it. Otherwise, just return false, so the real
259  // page name can be shown (and because variant === main,
260  // there'll be no further automatic conversion).
261  $disp = $this->getTextInBidtable( $variant );
262  if ( $disp ) {
263  return $disp;
264  }
265  if ( array_key_exists( $variant, $this->mUnidtable ) ) {
266  $disp = array_values( $this->mUnidtable[$variant] )[0];
267  }
268  // Assigned above or still false.
269  return $disp;
270  } else {
271  return $this->getRuleConvertedStr( $variant );
272  }
273  }
274 
279  function generateConvTable() {
280  // Special case optimisation
281  if ( !$this->mBidtable && !$this->mUnidtable ) {
282  $this->mConvTable = [];
283  return;
284  }
285 
286  $bidtable = $this->mBidtable;
287  $unidtable = $this->mUnidtable;
288  $manLevel = $this->mConverter->mManualLevel;
289 
290  $vmarked = [];
291  foreach ( $this->mConverter->mVariants as $v ) {
292  /* for bidirectional array
293  fill in the missing variants, if any,
294  with fallbacks */
295  if ( !isset( $bidtable[$v] ) ) {
296  $variantFallbacks =
297  $this->mConverter->getVariantFallbacks( $v );
298  $vf = $this->getTextInBidtable( $variantFallbacks );
299  if ( $vf ) {
300  $bidtable[$v] = $vf;
301  }
302  }
303 
304  if ( isset( $bidtable[$v] ) ) {
305  foreach ( $vmarked as $vo ) {
306  // use syntax: -{A|zh:WordZh;zh-tw:WordTw}-
307  // or -{H|zh:WordZh;zh-tw:WordTw}-
308  // or -{-|zh:WordZh;zh-tw:WordTw}-
309  // to introduce a custom mapping between
310  // words WordZh and WordTw in the whole text
311  if ( $manLevel[$v] == 'bidirectional' ) {
312  $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v];
313  }
314  if ( $manLevel[$vo] == 'bidirectional' ) {
315  $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo];
316  }
317  }
318  $vmarked[] = $v;
319  }
320  /* for unidirectional array fill to convert tables */
321  if ( ( $manLevel[$v] == 'bidirectional' || $manLevel[$v] == 'unidirectional' )
322  && isset( $unidtable[$v] )
323  ) {
324  if ( isset( $this->mConvTable[$v] ) ) {
325  $this->mConvTable[$v] = $unidtable[$v] + $this->mConvTable[$v];
326  } else {
327  $this->mConvTable[$v] = $unidtable[$v];
328  }
329  }
330  }
331  }
332 
337  public function parse( $variant = null ) {
338  if ( !$variant ) {
339  $variant = $this->mConverter->getPreferredVariant();
340  }
341 
342  $this->parseFlags();
343  $flags = $this->mFlags;
344 
345  // convert to specified variant
346  // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}-
347  if ( $this->mVariantFlags ) {
348  // check if current variant in flags
349  if ( isset( $this->mVariantFlags[$variant] ) ) {
350  // then convert <text to convert> to current language
351  $this->mRules = $this->mConverter->autoConvert( $this->mRules,
352  $variant );
353  } else {
354  // if current variant no in flags,
355  // then we check its fallback variants.
356  $variantFallbacks =
357  $this->mConverter->getVariantFallbacks( $variant );
358  if ( is_array( $variantFallbacks ) ) {
359  foreach ( $variantFallbacks as $variantFallback ) {
360  // if current variant's fallback exist in flags
361  if ( isset( $this->mVariantFlags[$variantFallback] ) ) {
362  // then convert <text to convert> to fallback language
363  $this->mRules =
364  $this->mConverter->autoConvert( $this->mRules,
365  $variantFallback );
366  break;
367  }
368  }
369  }
370  }
371  $this->mFlags = $flags = [ 'R' => true ];
372  }
373 
374  if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) {
375  // decode => HTML entities modified by Sanitizer::removeHTMLtags
376  $this->mRules = str_replace( '=&gt;', '=>', $this->mRules );
377  $this->parseRules();
378  }
379  $rules = $this->mRules;
380 
381  if ( !$this->mBidtable && !$this->mUnidtable ) {
382  if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) {
383  // fill all variants if text in -{A/H/-|text}- is non-empty but without rules
384  if ( $rules !== '' ) {
385  foreach ( $this->mConverter->mVariants as $v ) {
386  $this->mBidtable[$v] = $rules;
387  }
388  }
389  } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) {
390  $this->mFlags = $flags = [ 'R' => true ];
391  }
392  }
393 
394  $this->mRuleDisplay = false;
395  foreach ( $flags as $flag => $unused ) {
396  switch ( $flag ) {
397  case 'R':
398  // if we don't do content convert, still strip the -{}- tags
399  $this->mRuleDisplay = $rules;
400  break;
401  case 'N':
402  // process N flag: output current variant name
403  $ruleVar = trim( $rules );
404  $this->mRuleDisplay = $this->mConverter->mVariantNames[$ruleVar] ?? '';
405  break;
406  case 'D':
407  // process D flag: output rules description
408  $this->mRuleDisplay = $this->getRulesDesc();
409  break;
410  case 'H':
411  // process H,- flag or T only: output nothing
412  $this->mRuleDisplay = '';
413  break;
414  case '-':
415  $this->mRulesAction = 'remove';
416  $this->mRuleDisplay = '';
417  break;
418  case '+':
419  $this->mRulesAction = 'add';
420  $this->mRuleDisplay = '';
421  break;
422  case 'S':
423  $this->mRuleDisplay = $this->getRuleConvertedStr( $variant );
424  break;
425  case 'T':
426  $this->mRuleTitle = $this->getRuleConvertedTitle( $variant );
427  $this->mRuleDisplay = '';
428  break;
429  default:
430  // ignore unknown flags (but see error case below)
431  }
432  }
433  if ( $this->mRuleDisplay === false ) {
434  $this->mRuleDisplay = '<span class="error">'
435  . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped()
436  . '</span>';
437  }
438 
439  $this->generateConvTable();
440  }
441 
446  public function hasRules() {
447  return $this->mRules !== '';
448  }
449 
454  public function getDisplay() {
455  return $this->mRuleDisplay;
456  }
457 
462  public function getTitle() {
463  return $this->mRuleTitle;
464  }
465 
470  public function getRulesAction() {
471  return $this->mRulesAction;
472  }
473 
479  public function getConvTable() {
480  return $this->mConvTable;
481  }
482 
487  public function getRules() {
488  return $this->mRules;
489  }
490 
495  public function getFlags() {
496  return $this->mFlags;
497  }
498 }
getTitle()
Get converted title.
__construct( $text, $converter)
getRulesAction()
Return how deal with conversion rules.
getFlags()
Get conversion flags.
parse( $variant=null)
Parse rules and flags.
parseRules()
Generate conversion table.
generateConvTable()
Generate conversion table for all text.
getRuleConvertedTitle( $variant)
Similar to getRuleConvertedStr(), but this prefers to use original page title if $variant === $this->...
getTextInBidtable( $variants)
Check if variants array in convert array.
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
Parser for rules of language conversion, parse rules in -{ }- tag.
parseFlags()
Parse flags with syntax -{FLAG| ...
getRuleConvertedStr( $variant)
Parse rules conversion.
getConvTable()
Get conversion table.
hasRules()
Checks if there are conversion rules.
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
getRules()
Get conversion rules string.
return true
Definition: router.php:92
getDisplay()
Get display text on markup -{...}-.