MediaWiki  master
ConverterRule.php
Go to the documentation of this file.
1 <?php
28  public $mText; // original text in -{text}-
29  public $mConverter; // LanguageConverter object
30  public $mRuleDisplay = '';
31  public $mRuleTitle = false;
32  public $mRules = ''; // string : the text of the rules
33  public $mRulesAction = 'none';
34  public $mFlags = [];
35  public $mVariantFlags = [];
36  public $mConvTable = [];
37  public $mBidtable = []; // array of the translation in each variant
38  public $mUnidtable = []; // array of the translation in each variant
39 
44  public function __construct( $text, $converter ) {
45  $this->mText = $text;
46  $this->mConverter = $converter;
47  }
48 
55  public function getTextInBidtable( $variants ) {
56  $variants = (array)$variants;
57  if ( !$variants ) {
58  return false;
59  }
60  foreach ( $variants as $variant ) {
61  if ( isset( $this->mBidtable[$variant] ) ) {
62  return $this->mBidtable[$variant];
63  }
64  }
65  return false;
66  }
67 
72  function parseFlags() {
73  $text = $this->mText;
74  $flags = [];
75  $variantFlags = [];
76 
77  $sepPos = strpos( $text, '|' );
78  if ( $sepPos !== false ) {
79  $validFlags = $this->mConverter->mFlags;
80  $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) );
81  foreach ( $f as $ff ) {
82  $ff = trim( $ff );
83  if ( isset( $validFlags[$ff] ) ) {
84  $flags[$validFlags[$ff]] = true;
85  }
86  }
87  $text = strval( substr( $text, $sepPos + 1 ) );
88  }
89 
90  if ( !$flags ) {
91  $flags['S'] = true;
92  } elseif ( isset( $flags['R'] ) ) {
93  $flags = [ 'R' => true ];// remove other flags
94  } elseif ( isset( $flags['N'] ) ) {
95  $flags = [ 'N' => true ];// remove other flags
96  } elseif ( isset( $flags['-'] ) ) {
97  $flags = [ '-' => true ];// remove other flags
98  } elseif ( count( $flags ) == 1 && isset( $flags['T'] ) ) {
99  $flags['H'] = true;
100  } elseif ( isset( $flags['H'] ) ) {
101  // replace A flag, and remove other flags except T
102  $temp = [ '+' => true, 'H' => true ];
103  if ( isset( $flags['T'] ) ) {
104  $temp['T'] = true;
105  }
106  if ( isset( $flags['D'] ) ) {
107  $temp['D'] = true;
108  }
109  $flags = $temp;
110  } else {
111  if ( isset( $flags['A'] ) ) {
112  $flags['+'] = true;
113  $flags['S'] = true;
114  }
115  if ( isset( $flags['D'] ) ) {
116  unset( $flags['S'] );
117  }
118  // try to find flags like "zh-hans", "zh-hant"
119  // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
120  $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->mVariants );
121  if ( $variantFlags ) {
122  $variantFlags = array_flip( $variantFlags );
123  $flags = [];
124  }
125  }
126  $this->mVariantFlags = $variantFlags;
127  $this->mRules = $text;
128  $this->mFlags = $flags;
129  }
130 
135  function parseRules() {
136  $rules = $this->mRules;
137  $bidtable = [];
138  $unidtable = [];
139  $variants = $this->mConverter->mVariants;
140  $varsep_pattern = $this->mConverter->getVarSeparatorPattern();
141 
142  // Split according to $varsep_pattern, but ignore semicolons from HTML entities
143  $rules = preg_replace( '/(&[#a-zA-Z0-9]+);/', "$1\x01", $rules );
144  $choice = preg_split( $varsep_pattern, $rules );
145  $choice = str_replace( "\x01", ';', $choice );
146 
147  foreach ( $choice as $c ) {
148  $v = explode( ':', $c, 2 );
149  if ( count( $v ) != 2 ) {
150  // syntax error, skip
151  continue;
152  }
153  $to = trim( $v[1] );
154  $v = trim( $v[0] );
155  $u = explode( '=>', $v, 2 );
156  $vv = $this->mConverter->validateVariant( $v );
157  // if $to is empty (which is also used as $from in bidtable),
158  // strtr() could return a wrong result.
159  if ( count( $u ) == 1 && $to !== '' && $vv ) {
160  $bidtable[$vv] = $to;
161  } elseif ( count( $u ) == 2 ) {
162  $from = trim( $u[0] );
163  $v = trim( $u[1] );
164  $vv = $this->mConverter->validateVariant( $v );
165  // if $from is empty, strtr() could return a wrong result.
166  if ( array_key_exists( $vv, $unidtable )
167  && !is_array( $unidtable[$vv] )
168  && $from !== ''
169  && $vv ) {
170  $unidtable[$vv] = [ $from => $to ];
171  } elseif ( $from !== '' && $vv ) {
172  $unidtable[$vv][$from] = $to;
173  }
174  }
175  // syntax error, pass
176  if ( !isset( $this->mConverter->mVariantNames[$vv] ) ) {
177  $bidtable = [];
178  $unidtable = [];
179  break;
180  }
181  }
182  $this->mBidtable = $bidtable;
183  $this->mUnidtable = $unidtable;
184  }
185 
191  function getRulesDesc() {
192  $codesep = $this->mConverter->mDescCodeSep;
193  $varsep = $this->mConverter->mDescVarSep;
194  $text = '';
195  foreach ( $this->mBidtable as $k => $v ) {
196  $text .= $this->mConverter->mVariantNames[$k] . "$codesep$v$varsep";
197  }
198  foreach ( $this->mUnidtable as $k => $a ) {
199  foreach ( $a as $from => $to ) {
200  $text .= $from . '⇒' . $this->mConverter->mVariantNames[$k] .
201  "$codesep$to$varsep";
202  }
203  }
204  return $text;
205  }
206 
215  function getRuleConvertedStr( $variant ) {
216  $bidtable = $this->mBidtable;
217  $unidtable = $this->mUnidtable;
218 
219  if ( count( $bidtable ) + count( $unidtable ) == 0 ) {
220  return $this->mRules;
221  } else {
222  // display current variant in bidirectional array
223  $disp = $this->getTextInBidtable( $variant );
224  // or display current variant in fallbacks
225  if ( $disp === false ) {
226  $disp = $this->getTextInBidtable(
227  $this->mConverter->getVariantFallbacks( $variant ) );
228  }
229  // or display current variant in unidirectional array
230  if ( $disp === false && array_key_exists( $variant, $unidtable ) ) {
231  $disp = array_values( $unidtable[$variant] )[0];
232  }
233  // or display first text under disable manual convert
234  if ( $disp === false && $this->mConverter->mManualLevel[$variant] == 'disable' ) {
235  if ( count( $bidtable ) > 0 ) {
236  $disp = array_values( $bidtable )[0];
237  } else {
238  $disp = array_values( array_values( $unidtable )[0] )[0];
239  }
240  }
241  return $disp;
242  }
243  }
244 
255  function getRuleConvertedTitle( $variant ) {
256  if ( $variant === $this->mConverter->mMainLanguageCode ) {
257  // If a string targeting exactly this variant is set,
258  // use it. Otherwise, just return false, so the real
259  // page name can be shown (and because variant === main,
260  // there'll be no further automatic conversion).
261  $disp = $this->getTextInBidtable( $variant );
262  if ( $disp ) {
263  return $disp;
264  }
265  if ( array_key_exists( $variant, $this->mUnidtable ) ) {
266  $disp = array_values( $this->mUnidtable[$variant] )[0];
267  }
268  // Assigned above or still false.
269  return $disp;
270  } else {
271  return $this->getRuleConvertedStr( $variant );
272  }
273  }
274 
279  function generateConvTable() {
280  // Special case optimisation
281  if ( !$this->mBidtable && !$this->mUnidtable ) {
282  $this->mConvTable = [];
283  return;
284  }
285 
286  $bidtable = $this->mBidtable;
287  $unidtable = $this->mUnidtable;
288  $manLevel = $this->mConverter->mManualLevel;
289 
290  $vmarked = [];
291  foreach ( $this->mConverter->mVariants as $v ) {
292  /* for bidirectional array
293  fill in the missing variants, if any,
294  with fallbacks */
295  if ( !isset( $bidtable[$v] ) ) {
296  $variantFallbacks =
297  $this->mConverter->getVariantFallbacks( $v );
298  $vf = $this->getTextInBidtable( $variantFallbacks );
299  if ( $vf ) {
300  $bidtable[$v] = $vf;
301  }
302  }
303 
304  if ( isset( $bidtable[$v] ) ) {
305  foreach ( $vmarked as $vo ) {
306  // use syntax: -{A|zh:WordZh;zh-tw:WordTw}-
307  // or -{H|zh:WordZh;zh-tw:WordTw}-
308  // or -{-|zh:WordZh;zh-tw:WordTw}-
309  // to introduce a custom mapping between
310  // words WordZh and WordTw in the whole text
311  if ( $manLevel[$v] == 'bidirectional' ) {
312  $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v];
313  }
314  if ( $manLevel[$vo] == 'bidirectional' ) {
315  $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo];
316  }
317  }
318  $vmarked[] = $v;
319  }
320  /* for unidirectional array fill to convert tables */
321  if ( ( $manLevel[$v] == 'bidirectional' || $manLevel[$v] == 'unidirectional' )
322  && isset( $unidtable[$v] )
323  ) {
324  if ( isset( $this->mConvTable[$v] ) ) {
325  $this->mConvTable[$v] = $unidtable[$v] + $this->mConvTable[$v];
326  } else {
327  $this->mConvTable[$v] = $unidtable[$v];
328  }
329  }
330  }
331  }
332 
337  public function parse( $variant = null ) {
338  if ( !$variant ) {
339  $variant = $this->mConverter->getPreferredVariant();
340  }
341 
342  $this->parseFlags();
343  $flags = $this->mFlags;
344 
345  // convert to specified variant
346  // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}-
347  if ( $this->mVariantFlags ) {
348  // check if current variant in flags
349  if ( isset( $this->mVariantFlags[$variant] ) ) {
350  // then convert <text to convert> to current language
351  $this->mRules = $this->mConverter->autoConvert( $this->mRules,
352  $variant );
353  } else {
354  // if current variant no in flags,
355  // then we check its fallback variants.
356  $variantFallbacks =
357  $this->mConverter->getVariantFallbacks( $variant );
358  if ( is_array( $variantFallbacks ) ) {
359  foreach ( $variantFallbacks as $variantFallback ) {
360  // if current variant's fallback exist in flags
361  if ( isset( $this->mVariantFlags[$variantFallback] ) ) {
362  // then convert <text to convert> to fallback language
363  $this->mRules =
364  $this->mConverter->autoConvert( $this->mRules,
365  $variantFallback );
366  break;
367  }
368  }
369  }
370  }
371  $this->mFlags = $flags = [ 'R' => true ];
372  }
373 
374  if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) {
375  // decode => HTML entities modified by Sanitizer::removeHTMLtags
376  $this->mRules = str_replace( '=&gt;', '=>', $this->mRules );
377  $this->parseRules();
378  }
379  $rules = $this->mRules;
380 
381  if ( !$this->mBidtable && !$this->mUnidtable ) {
382  if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) {
383  // fill all variants if text in -{A/H/-|text}- is non-empty but without rules
384  if ( $rules !== '' ) {
385  foreach ( $this->mConverter->mVariants as $v ) {
386  $this->mBidtable[$v] = $rules;
387  }
388  }
389  } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) {
390  $this->mFlags = $flags = [ 'R' => true ];
391  }
392  }
393 
394  $this->mRuleDisplay = false;
395  foreach ( $flags as $flag => $unused ) {
396  switch ( $flag ) {
397  case 'R':
398  // if we don't do content convert, still strip the -{}- tags
399  $this->mRuleDisplay = $rules;
400  break;
401  case 'N':
402  // process N flag: output current variant name
403  $ruleVar = trim( $rules );
404  $this->mRuleDisplay = $this->mConverter->mVariantNames[$ruleVar] ?? '';
405  break;
406  case 'D':
407  // process D flag: output rules description
408  $this->mRuleDisplay = $this->getRulesDesc();
409  break;
410  case 'H':
411  // process H,- flag or T only: output nothing
412  $this->mRuleDisplay = '';
413  break;
414  case '-':
415  $this->mRulesAction = 'remove';
416  $this->mRuleDisplay = '';
417  break;
418  case '+':
419  $this->mRulesAction = 'add';
420  $this->mRuleDisplay = '';
421  break;
422  case 'S':
423  $this->mRuleDisplay = $this->getRuleConvertedStr( $variant );
424  break;
425  case 'T':
426  $this->mRuleTitle = $this->getRuleConvertedTitle( $variant );
427  $this->mRuleDisplay = '';
428  break;
429  default:
430  // ignore unknown flags (but see error case below)
431  }
432  }
433  if ( $this->mRuleDisplay === false ) {
434  $this->mRuleDisplay = '<span class="error">'
435  . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped()
436  . '</span>';
437  }
438 
439  $this->generateConvTable();
440  }
441 
446  public function hasRules() {
447  return $this->mRules !== '';
448  }
449 
454  public function getDisplay() {
455  return $this->mRuleDisplay;
456  }
457 
462  public function getTitle() {
463  return $this->mRuleTitle;
464  }
465 
470  public function getRulesAction() {
471  return $this->mRulesAction;
472  }
473 
479  public function getConvTable() {
480  return $this->mConvTable;
481  }
482 
487  public function getRules() {
488  return $this->mRules;
489  }
490 
495  public function getFlags() {
496  return $this->mFlags;
497  }
498 }
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
getTitle()
Get converted title.
__construct( $text, $converter)
getRulesAction()
Return how deal with conversion rules.
getFlags()
Get conversion flags.
parse( $variant=null)
Parse rules and flags.
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return true
Definition: hooks.txt:1982
parseRules()
Generate conversion table.
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation use $formDescriptor instead default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt
generateConvTable()
Generate conversion table for all text.
getRuleConvertedTitle( $variant)
Similar to getRuleConvertedStr(), but this prefers to use original page title if $variant === $this->...
getTextInBidtable( $variants)
Check if variants array in convert array.
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
Parser for rules of language conversion, parse rules in -{ }- tag.
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not null
Definition: hooks.txt:780
parseFlags()
Parse flags with syntax -{FLAG| ...
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
getRuleConvertedStr( $variant)
Parse rules conversion.
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
getConvTable()
Get conversion table.
$f
Definition: router.php:79
hasRules()
Checks if there are conversion rules.
getRules()
Get conversion rules string.
getDisplay()
Get display text on markup -{...}-.