MediaWiki  1.28.0
ConverterRule.php
Go to the documentation of this file.
1 <?php
28  public $mText; // original text in -{text}-
29  public $mConverter; // LanguageConverter object
30  public $mRuleDisplay = '';
31  public $mRuleTitle = false;
32  public $mRules = '';// string : the text of the rules
33  public $mRulesAction = 'none';
34  public $mFlags = [];
35  public $mVariantFlags = [];
36  public $mConvTable = [];
37  public $mBidtable = [];// array of the translation in each variant
38  public $mUnidtable = [];// array of the translation in each variant
39 
46  public function __construct( $text, $converter ) {
47  $this->mText = $text;
48  $this->mConverter = $converter;
49  }
50 
57  public function getTextInBidtable( $variants ) {
58  $variants = (array)$variants;
59  if ( !$variants ) {
60  return false;
61  }
62  foreach ( $variants as $variant ) {
63  if ( isset( $this->mBidtable[$variant] ) ) {
64  return $this->mBidtable[$variant];
65  }
66  }
67  return false;
68  }
69 
74  function parseFlags() {
75  $text = $this->mText;
76  $flags = [];
77  $variantFlags = [];
78 
79  $sepPos = strpos( $text, '|' );
80  if ( $sepPos !== false ) {
81  $validFlags = $this->mConverter->mFlags;
82  $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) );
83  foreach ( $f as $ff ) {
84  $ff = trim( $ff );
85  if ( isset( $validFlags[$ff] ) ) {
86  $flags[$validFlags[$ff]] = true;
87  }
88  }
89  $text = strval( substr( $text, $sepPos + 1 ) );
90  }
91 
92  if ( !$flags ) {
93  $flags['S'] = true;
94  } elseif ( isset( $flags['R'] ) ) {
95  $flags = [ 'R' => true ];// remove other flags
96  } elseif ( isset( $flags['N'] ) ) {
97  $flags = [ 'N' => true ];// remove other flags
98  } elseif ( isset( $flags['-'] ) ) {
99  $flags = [ '-' => true ];// remove other flags
100  } elseif ( count( $flags ) == 1 && isset( $flags['T'] ) ) {
101  $flags['H'] = true;
102  } elseif ( isset( $flags['H'] ) ) {
103  // replace A flag, and remove other flags except T
104  $temp = [ '+' => true, 'H' => true ];
105  if ( isset( $flags['T'] ) ) {
106  $temp['T'] = true;
107  }
108  if ( isset( $flags['D'] ) ) {
109  $temp['D'] = true;
110  }
111  $flags = $temp;
112  } else {
113  if ( isset( $flags['A'] ) ) {
114  $flags['+'] = true;
115  $flags['S'] = true;
116  }
117  if ( isset( $flags['D'] ) ) {
118  unset( $flags['S'] );
119  }
120  // try to find flags like "zh-hans", "zh-hant"
121  // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
122  $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->mVariants );
123  if ( $variantFlags ) {
124  $variantFlags = array_flip( $variantFlags );
125  $flags = [];
126  }
127  }
128  $this->mVariantFlags = $variantFlags;
129  $this->mRules = $text;
130  $this->mFlags = $flags;
131  }
132 
137  function parseRules() {
138  $rules = $this->mRules;
139  $bidtable = [];
140  $unidtable = [];
141  $variants = $this->mConverter->mVariants;
142  $varsep_pattern = $this->mConverter->getVarSeparatorPattern();
143 
144  // Split according to $varsep_pattern, but ignore semicolons from HTML entities
145  $rules = preg_replace( '/(&[#a-zA-Z0-9]+);/', "$1\x01", $rules );
146  $choice = preg_split( $varsep_pattern, $rules );
147  $choice = str_replace( "\x01", ';', $choice );
148 
149  foreach ( $choice as $c ) {
150  $v = explode( ':', $c, 2 );
151  if ( count( $v ) != 2 ) {
152  // syntax error, skip
153  continue;
154  }
155  $to = trim( $v[1] );
156  $v = trim( $v[0] );
157  $u = explode( '=>', $v, 2 );
158  // if $to is empty (which is also used as $from in bidtable),
159  // strtr() could return a wrong result.
160  if ( count( $u ) == 1 && $to !== '' && in_array( $v, $variants ) ) {
161  $bidtable[$v] = $to;
162  } elseif ( count( $u ) == 2 ) {
163  $from = trim( $u[0] );
164  $v = trim( $u[1] );
165  // if $from is empty, strtr() could return a wrong result.
166  if ( array_key_exists( $v, $unidtable )
167  && !is_array( $unidtable[$v] )
168  && $from !== ''
169  && in_array( $v, $variants ) ) {
170  $unidtable[$v] = [ $from => $to ];
171  } elseif ( $from !== '' && in_array( $v, $variants ) ) {
172  $unidtable[$v][$from] = $to;
173  }
174  }
175  // syntax error, pass
176  if ( !isset( $this->mConverter->mVariantNames[$v] ) ) {
177  $bidtable = [];
178  $unidtable = [];
179  break;
180  }
181  }
182  $this->mBidtable = $bidtable;
183  $this->mUnidtable = $unidtable;
184  }
185 
191  function getRulesDesc() {
192  $codesep = $this->mConverter->mDescCodeSep;
193  $varsep = $this->mConverter->mDescVarSep;
194  $text = '';
195  foreach ( $this->mBidtable as $k => $v ) {
196  $text .= $this->mConverter->mVariantNames[$k] . "$codesep$v$varsep";
197  }
198  foreach ( $this->mUnidtable as $k => $a ) {
199  foreach ( $a as $from => $to ) {
200  $text .= $from . '⇒' . $this->mConverter->mVariantNames[$k] .
201  "$codesep$to$varsep";
202  }
203  }
204  return $text;
205  }
206 
215  function getRuleConvertedStr( $variant ) {
216  $bidtable = $this->mBidtable;
217  $unidtable = $this->mUnidtable;
218 
219  if ( count( $bidtable ) + count( $unidtable ) == 0 ) {
220  return $this->mRules;
221  } else {
222  // display current variant in bidirectional array
223  $disp = $this->getTextInBidtable( $variant );
224  // or display current variant in fallbacks
225  if ( $disp === false ) {
226  $disp = $this->getTextInBidtable(
227  $this->mConverter->getVariantFallbacks( $variant ) );
228  }
229  // or display current variant in unidirectional array
230  if ( $disp === false && array_key_exists( $variant, $unidtable ) ) {
231  $disp = array_values( $unidtable[$variant] )[0];
232  }
233  // or display frist text under disable manual convert
234  if ( $disp === false && $this->mConverter->mManualLevel[$variant] == 'disable' ) {
235  if ( count( $bidtable ) > 0 ) {
236  $disp = array_values( $bidtable )[0];
237  } else {
238  $disp = array_values( array_values( $unidtable )[0] )[0];
239  }
240  }
241  return $disp;
242  }
243  }
244 
255  function getRuleConvertedTitle( $variant ) {
256  if ( $variant === $this->mConverter->mMainLanguageCode ) {
257  // If a string targeting exactly this variant is set,
258  // use it. Otherwise, just return false, so the real
259  // page name can be shown (and because variant === main,
260  // there'll be no further automatic conversion).
261  $disp = $this->getTextInBidtable( $variant );
262  if ( $disp ) {
263  return $disp;
264  }
265  if ( array_key_exists( $variant, $this->mUnidtable ) ) {
266  $disp = array_values( $this->mUnidtable[$variant] )[0];
267  }
268  // Assigned above or still false.
269  return $disp;
270  } else {
271  return $this->getRuleConvertedStr( $variant );
272  }
273  }
274 
279  function generateConvTable() {
280  // Special case optimisation
281  if ( !$this->mBidtable && !$this->mUnidtable ) {
282  $this->mConvTable = [];
283  return;
284  }
285 
286  $bidtable = $this->mBidtable;
287  $unidtable = $this->mUnidtable;
288  $manLevel = $this->mConverter->mManualLevel;
289 
290  $vmarked = [];
291  foreach ( $this->mConverter->mVariants as $v ) {
292  /* for bidirectional array
293  fill in the missing variants, if any,
294  with fallbacks */
295  if ( !isset( $bidtable[$v] ) ) {
296  $variantFallbacks =
297  $this->mConverter->getVariantFallbacks( $v );
298  $vf = $this->getTextInBidtable( $variantFallbacks );
299  if ( $vf ) {
300  $bidtable[$v] = $vf;
301  }
302  }
303 
304  if ( isset( $bidtable[$v] ) ) {
305  foreach ( $vmarked as $vo ) {
306  // use syntax: -{A|zh:WordZh;zh-tw:WordTw}-
307  // or -{H|zh:WordZh;zh-tw:WordTw}-
308  // or -{-|zh:WordZh;zh-tw:WordTw}-
309  // to introduce a custom mapping between
310  // words WordZh and WordTw in the whole text
311  if ( $manLevel[$v] == 'bidirectional' ) {
312  $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v];
313  }
314  if ( $manLevel[$vo] == 'bidirectional' ) {
315  $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo];
316  }
317  }
318  $vmarked[] = $v;
319  }
320  /* for unidirectional array fill to convert tables */
321  if ( ( $manLevel[$v] == 'bidirectional' || $manLevel[$v] == 'unidirectional' )
322  && isset( $unidtable[$v] )
323  ) {
324  if ( isset( $this->mConvTable[$v] ) ) {
325  $this->mConvTable[$v] = $unidtable[$v] + $this->mConvTable[$v];
326  } else {
327  $this->mConvTable[$v] = $unidtable[$v];
328  }
329  }
330  }
331  }
332 
337  public function parse( $variant = null ) {
338  if ( !$variant ) {
339  $variant = $this->mConverter->getPreferredVariant();
340  }
341 
342  $this->parseFlags();
344 
345  // convert to specified variant
346  // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}-
347  if ( $this->mVariantFlags ) {
348  // check if current variant in flags
349  if ( isset( $this->mVariantFlags[$variant] ) ) {
350  // then convert <text to convert> to current language
351  $this->mRules = $this->mConverter->autoConvert( $this->mRules,
352  $variant );
353  } else {
354  // if current variant no in flags,
355  // then we check its fallback variants.
356  $variantFallbacks =
357  $this->mConverter->getVariantFallbacks( $variant );
358  if ( is_array( $variantFallbacks ) ) {
359  foreach ( $variantFallbacks as $variantFallback ) {
360  // if current variant's fallback exist in flags
361  if ( isset( $this->mVariantFlags[$variantFallback] ) ) {
362  // then convert <text to convert> to fallback language
363  $this->mRules =
364  $this->mConverter->autoConvert( $this->mRules,
365  $variantFallback );
366  break;
367  }
368  }
369  }
370  }
371  $this->mFlags = $flags = [ 'R' => true ];
372  }
373 
374  if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) {
375  // decode => HTML entities modified by Sanitizer::removeHTMLtags
376  $this->mRules = str_replace( '=&gt;', '=>', $this->mRules );
377  $this->parseRules();
378  }
379  $rules = $this->mRules;
380 
381  if ( !$this->mBidtable && !$this->mUnidtable ) {
382  if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) {
383  // fill all variants if text in -{A/H/-|text}- is non-empty but without rules
384  if ( $rules !== '' ) {
385  foreach ( $this->mConverter->mVariants as $v ) {
386  $this->mBidtable[$v] = $rules;
387  }
388  }
389  } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) {
390  $this->mFlags = $flags = [ 'R' => true ];
391  }
392  }
393 
394  $this->mRuleDisplay = false;
395  foreach ( $flags as $flag => $unused ) {
396  switch ( $flag ) {
397  case 'R':
398  // if we don't do content convert, still strip the -{}- tags
399  $this->mRuleDisplay = $rules;
400  break;
401  case 'N':
402  // process N flag: output current variant name
403  $ruleVar = trim( $rules );
404  if ( isset( $this->mConverter->mVariantNames[$ruleVar] ) ) {
405  $this->mRuleDisplay = $this->mConverter->mVariantNames[$ruleVar];
406  } else {
407  $this->mRuleDisplay = '';
408  }
409  break;
410  case 'D':
411  // process D flag: output rules description
412  $this->mRuleDisplay = $this->getRulesDesc();
413  break;
414  case 'H':
415  // process H,- flag or T only: output nothing
416  $this->mRuleDisplay = '';
417  break;
418  case '-':
419  $this->mRulesAction = 'remove';
420  $this->mRuleDisplay = '';
421  break;
422  case '+':
423  $this->mRulesAction = 'add';
424  $this->mRuleDisplay = '';
425  break;
426  case 'S':
427  $this->mRuleDisplay = $this->getRuleConvertedStr( $variant );
428  break;
429  case 'T':
430  $this->mRuleTitle = $this->getRuleConvertedTitle( $variant );
431  $this->mRuleDisplay = '';
432  break;
433  default:
434  // ignore unknown flags (but see error case below)
435  }
436  }
437  if ( $this->mRuleDisplay === false ) {
438  $this->mRuleDisplay = '<span class="error">'
439  . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped()
440  . '</span>';
441  }
442 
443  $this->generateConvTable();
444  }
445 
450  public function hasRules() {
451  return $this->mRules !== '';
452  }
453 
458  public function getDisplay() {
459  return $this->mRuleDisplay;
460  }
461 
466  public function getTitle() {
467  return $this->mRuleTitle;
468  }
469 
474  public function getRulesAction() {
475  return $this->mRulesAction;
476  }
477 
483  public function getConvTable() {
484  return $this->mConvTable;
485  }
486 
491  public function getRules() {
492  return $this->mRules;
493  }
494 
499  public function getFlags() {
500  return $this->mFlags;
501  }
502 }
getTitle()
Get converted title.
the array() calling protocol came about after MediaWiki 1.4rc1.
getRulesAction()
Return how deal with conversion rules.
getFlags()
Get conversion flags.
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition: hooks.txt:2703
getRuleConvertedTitle($variant)
Similar to getRuleConvertedStr(), but this prefers to use original page title if $variant === $this->...
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return true
Definition: hooks.txt:1936
parseRules()
Generate conversion table.
generateConvTable()
Generate conversion table for all text.
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock()-offset Set to overwrite offset parameter in $wgRequest set to ''to unsetoffset-wrap String Wrap the message in html(usually something like"&lt
Parser for rules of language conversion , parse rules in -{ }- tag.
parseFlags()
Parse flags with syntax -{FLAG| ...
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
parse($variant=null)
Parse rules and flags.
$from
getTextInBidtable($variants)
Check if variants array in convert array.
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
getConvTable()
Get conversion table.
getRuleConvertedStr($variant)
Parse rules conversion.
__construct($text, $converter)
Constructor.
hasRules()
Checks if there are conversion rules.
static explode($separator, $subject)
Workalike for explode() with limited memory usage.
getRules()
Get conversion rules string.
getDisplay()
Get display text on markup -{...}-.