MediaWiki REL1_35
ConverterRule.php
Go to the documentation of this file.
1<?php
32 public $mText;
37 public $mRuleDisplay = '';
38 public $mRuleTitle = false;
42 public $mRules = '';
43 public $mRulesAction = 'none';
44 public $mFlags = [];
45 public $mVariantFlags = [];
46 public $mConvTable = [];
50 public $mBidtable = [];
54 public $mUnidtable = [];
55
60 public function __construct( $text, LanguageConverter $converter ) {
61 $this->mText = $text;
62 $this->mConverter = $converter;
63 }
64
71 public function getTextInBidtable( $variants ) {
72 $variants = (array)$variants;
73 if ( !$variants ) {
74 return false;
75 }
76 foreach ( $variants as $variant ) {
77 if ( isset( $this->mBidtable[$variant] ) ) {
78 return $this->mBidtable[$variant];
79 }
80 }
81 return false;
82 }
83
87 private function parseFlags() {
88 $text = $this->mText;
89 $flags = [];
90 $variantFlags = [];
91
92 $sepPos = strpos( $text, '|' );
93 if ( $sepPos !== false ) {
94 $validFlags = $this->mConverter->mFlags;
95 $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) );
96 foreach ( $f as $ff ) {
97 $ff = trim( $ff );
98 if ( isset( $validFlags[$ff] ) ) {
99 $flags[$validFlags[$ff]] = true;
100 }
101 }
102 $text = strval( substr( $text, $sepPos + 1 ) );
103 }
104
105 if ( !$flags ) {
106 $flags['S'] = true;
107 } elseif ( isset( $flags['R'] ) ) {
108 // remove other flags
109 $flags = [ 'R' => true ];
110 } elseif ( isset( $flags['N'] ) ) {
111 // remove other flags
112 $flags = [ 'N' => true ];
113 } elseif ( isset( $flags['-'] ) ) {
114 // remove other flags
115 $flags = [ '-' => true ];
116 } elseif ( count( $flags ) === 1 && isset( $flags['T'] ) ) {
117 $flags['H'] = true;
118 } elseif ( isset( $flags['H'] ) ) {
119 // replace A flag, and remove other flags except T
120 $temp = [ '+' => true, 'H' => true ];
121 if ( isset( $flags['T'] ) ) {
122 $temp['T'] = true;
123 }
124 if ( isset( $flags['D'] ) ) {
125 $temp['D'] = true;
126 }
127 $flags = $temp;
128 } else {
129 if ( isset( $flags['A'] ) ) {
130 $flags['+'] = true;
131 $flags['S'] = true;
132 }
133 if ( isset( $flags['D'] ) ) {
134 unset( $flags['S'] );
135 }
136 // try to find flags like "zh-hans", "zh-hant"
137 // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
138 $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->getVariants() );
139 if ( $variantFlags ) {
140 $variantFlags = array_flip( $variantFlags );
141 $flags = [];
142 }
143 }
144 $this->mVariantFlags = $variantFlags;
145 $this->mRules = $text;
146 $this->mFlags = $flags;
147 }
148
152 private function parseRules() {
153 $rules = $this->mRules;
154 $bidtable = [];
155 $unidtable = [];
156 $varsep_pattern = $this->mConverter->getVarSeparatorPattern();
157
158 // Split according to $varsep_pattern, but ignore semicolons from HTML entities
159 $rules = preg_replace( '/(&[#a-zA-Z0-9]+);/', "$1\x01", $rules );
160 $choice = preg_split( $varsep_pattern, $rules );
161 $choice = str_replace( "\x01", ';', $choice );
162
163 foreach ( $choice as $c ) {
164 $v = explode( ':', $c, 2 );
165 if ( count( $v ) !== 2 ) {
166 // syntax error, skip
167 continue;
168 }
169 $to = trim( $v[1] );
170 $v = trim( $v[0] );
171 $u = explode( '=>', $v, 2 );
172 $vv = $this->mConverter->validateVariant( $v );
173 // if $to is empty (which is also used as $from in bidtable),
174 // strtr() could return a wrong result.
175 if ( count( $u ) === 1 && $to !== '' && $vv ) {
176 $bidtable[$vv] = $to;
177 } elseif ( count( $u ) === 2 ) {
178 $from = trim( $u[0] );
179 $v = trim( $u[1] );
180 $vv = $this->mConverter->validateVariant( $v );
181 // if $from is empty, strtr() could return a wrong result.
182 if ( array_key_exists( $vv, $unidtable )
183 && !is_array( $unidtable[$vv] )
184 && $from !== ''
185 && $vv ) {
186 $unidtable[$vv] = [ $from => $to ];
187 } elseif ( $from !== '' && $vv ) {
188 $unidtable[$vv][$from] = $to;
189 }
190 }
191 // syntax error, pass
192 if ( !isset( $this->mConverter->mVariantNames[$vv] ) ) {
193 $bidtable = [];
194 $unidtable = [];
195 break;
196 }
197 }
198 $this->mBidtable = $bidtable;
199 $this->mUnidtable = $unidtable;
200 }
201
205 private function getRulesDesc() {
206 $codesep = $this->mConverter->mDescCodeSep;
207 $varsep = $this->mConverter->mDescVarSep;
208 $text = '';
209 foreach ( $this->mBidtable as $k => $v ) {
210 $text .= $this->mConverter->mVariantNames[$k] . "$codesep$v$varsep";
211 }
212 foreach ( $this->mUnidtable as $k => $a ) {
213 foreach ( $a as $from => $to ) {
214 $text .= $from . '⇒' . $this->mConverter->mVariantNames[$k] .
215 "$codesep$to$varsep";
216 }
217 }
218 return $text;
219 }
220
228 private function getRuleConvertedStr( $variant ) {
229 $bidtable = $this->mBidtable;
230 $unidtable = $this->mUnidtable;
231
232 if ( count( $bidtable ) + count( $unidtable ) === 0 ) {
233 return $this->mRules;
234 }
235
236 // display current variant in bidirectional array
237 $disp = $this->getTextInBidtable( $variant );
238 // or display current variant in fallbacks
239 if ( $disp === false ) {
240 $disp = $this->getTextInBidtable(
241 $this->mConverter->getVariantFallbacks( $variant ) );
242 }
243 // or display current variant in unidirectional array
244 if ( $disp === false && array_key_exists( $variant, $unidtable ) ) {
245 $disp = array_values( $unidtable[$variant] )[0];
246 }
247 // or display first text under disable manual convert
248 if ( $disp === false && $this->mConverter->mManualLevel[$variant] === 'disable' ) {
249 if ( count( $bidtable ) > 0 ) {
250 $disp = array_values( $bidtable )[0];
251 } else {
252 $disp = array_values( array_values( $unidtable )[0] )[0];
253 }
254 }
255
256 return $disp;
257 }
258
269 private function getRuleConvertedTitle( $variant ) {
270 if ( $variant === $this->mConverter->mMainLanguageCode ) {
271 // If a string targeting exactly this variant is set,
272 // use it. Otherwise, just return false, so the real
273 // page name can be shown (and because variant === main,
274 // there'll be no further automatic conversion).
275 $disp = $this->getTextInBidtable( $variant );
276 if ( $disp ) {
277 return $disp;
278 }
279 if ( array_key_exists( $variant, $this->mUnidtable ) ) {
280 $disp = array_values( $this->mUnidtable[$variant] )[0];
281 }
282 // Assigned above or still false.
283 return $disp;
284 }
285
286 return $this->getRuleConvertedStr( $variant );
287 }
288
292 private function generateConvTable() {
293 // Special case optimisation
294 if ( !$this->mBidtable && !$this->mUnidtable ) {
295 $this->mConvTable = [];
296 return;
297 }
298
299 $bidtable = $this->mBidtable;
300 $unidtable = $this->mUnidtable;
301 $manLevel = $this->mConverter->mManualLevel;
302
303 $vmarked = [];
304 foreach ( $this->mConverter->mVariants as $v ) {
305 /* for bidirectional array
306 fill in the missing variants, if any,
307 with fallbacks */
308 if ( !isset( $bidtable[$v] ) ) {
309 $variantFallbacks =
310 $this->mConverter->getVariantFallbacks( $v );
311 $vf = $this->getTextInBidtable( $variantFallbacks );
312 if ( $vf ) {
313 $bidtable[$v] = $vf;
314 }
315 }
316
317 if ( isset( $bidtable[$v] ) ) {
318 foreach ( $vmarked as $vo ) {
319 // use syntax: -{A|zh:WordZh;zh-tw:WordTw}-
320 // or -{H|zh:WordZh;zh-tw:WordTw}-
321 // or -{-|zh:WordZh;zh-tw:WordTw}-
322 // to introduce a custom mapping between
323 // words WordZh and WordTw in the whole text
324 if ( $manLevel[$v] === 'bidirectional' ) {
325 $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v];
326 }
327 if ( $manLevel[$vo] === 'bidirectional' ) {
328 $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo];
329 }
330 }
331 $vmarked[] = $v;
332 }
333 /* for unidirectional array fill to convert tables */
334 if ( ( $manLevel[$v] === 'bidirectional' || $manLevel[$v] === 'unidirectional' )
335 && isset( $unidtable[$v] )
336 ) {
337 if ( isset( $this->mConvTable[$v] ) ) {
338 $this->mConvTable[$v] = $unidtable[$v] + $this->mConvTable[$v];
339 } else {
340 $this->mConvTable[$v] = $unidtable[$v];
341 }
342 }
343 }
344 }
345
350 public function parse( $variant = null ) {
351 if ( !$variant ) {
352 $variant = $this->mConverter->getPreferredVariant();
353 }
354
355 $this->parseFlags();
356 $flags = $this->mFlags;
357
358 // convert to specified variant
359 // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}-
360 if ( $this->mVariantFlags ) {
361 // check if current variant in flags
362 if ( isset( $this->mVariantFlags[$variant] ) ) {
363 // then convert <text to convert> to current language
364 $this->mRules = $this->mConverter->autoConvert( $this->mRules,
365 $variant );
366 } else {
367 // if current variant no in flags,
368 // then we check its fallback variants.
369 $variantFallbacks =
370 $this->mConverter->getVariantFallbacks( $variant );
371 if ( is_array( $variantFallbacks ) ) {
372 foreach ( $variantFallbacks as $variantFallback ) {
373 // if current variant's fallback exist in flags
374 if ( isset( $this->mVariantFlags[$variantFallback] ) ) {
375 // then convert <text to convert> to fallback language
376 $this->mRules =
377 $this->mConverter->autoConvert( $this->mRules,
378 $variantFallback );
379 break;
380 }
381 }
382 }
383 }
384 $this->mFlags = $flags = [ 'R' => true ];
385 }
386
387 if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) {
388 // decode => HTML entities modified by Sanitizer::removeHTMLtags
389 $this->mRules = str_replace( '=&gt;', '=>', $this->mRules );
390 $this->parseRules();
391 }
392 $rules = $this->mRules;
393
394 if ( !$this->mBidtable && !$this->mUnidtable ) {
395 if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) {
396 // fill all variants if text in -{A/H/-|text}- is non-empty but without rules
397 if ( $rules !== '' ) {
398 foreach ( $this->mConverter->mVariants as $v ) {
399 $this->mBidtable[$v] = $rules;
400 }
401 }
402 } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) {
403 $this->mFlags = $flags = [ 'R' => true ];
404 }
405 }
406
407 $this->mRuleDisplay = false;
408 foreach ( $flags as $flag => $unused ) {
409 switch ( $flag ) {
410 case 'R':
411 // if we don't do content convert, still strip the -{}- tags
412 $this->mRuleDisplay = $rules;
413 break;
414 case 'N':
415 // process N flag: output current variant name
416 $ruleVar = trim( $rules );
417 $this->mRuleDisplay = $this->mConverter->mVariantNames[$ruleVar] ?? '';
418 break;
419 case 'D':
420 // process D flag: output rules description
421 $this->mRuleDisplay = $this->getRulesDesc();
422 break;
423 case 'H':
424 // process H,- flag or T only: output nothing
425 $this->mRuleDisplay = '';
426 break;
427 case '-':
428 $this->mRulesAction = 'remove';
429 $this->mRuleDisplay = '';
430 break;
431 case '+':
432 $this->mRulesAction = 'add';
433 $this->mRuleDisplay = '';
434 break;
435 case 'S':
436 $this->mRuleDisplay = $this->getRuleConvertedStr( $variant );
437 break;
438 case 'T':
439 $this->mRuleTitle = $this->getRuleConvertedTitle( $variant );
440 $this->mRuleDisplay = '';
441 break;
442 default:
443 // ignore unknown flags (but see error case below)
444 }
445 }
446 if ( $this->mRuleDisplay === false ) {
447 $this->mRuleDisplay = '<span class="error">'
448 . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped()
449 . '</span>';
450 }
451
452 $this->generateConvTable();
453 }
454
459 public function hasRules() {
460 return $this->mRules !== '';
461 }
462
467 public function getDisplay() {
468 return $this->mRuleDisplay;
469 }
470
475 public function getTitle() {
476 return $this->mRuleTitle;
477 }
478
483 public function getRulesAction() {
484 return $this->mRulesAction;
485 }
486
492 public function getConvTable() {
493 return $this->mConvTable;
494 }
495
500 public function getRules() {
501 return $this->mRules;
502 }
503
508 public function getFlags() {
509 return $this->mFlags;
510 }
511}
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
The rules used for language conversion, this processes the rules extracted by Parser from the -{ }- w...
array $mBidtable
of the translation in each variant
getRules()
Get conversion rules string.
parse( $variant=null)
Parse rules and flags.
getFlags()
Get conversion flags.
__construct( $text, LanguageConverter $converter)
getTextInBidtable( $variants)
Check if variants array in convert array.
getRuleConvertedTitle( $variant)
Similar to getRuleConvertedStr(), but this prefers to use original page title if $variant === $this->...
getDisplay()
Get display text on markup -{...}-.
getRulesAction()
Return how deal with conversion rules.
parseFlags()
Parse flags with syntax -{FLAG| ... }-.
array $mUnidtable
of the translation in each variant
getRuleConvertedStr( $variant)
Parse rules conversion.
LanguageConverter $mConverter
generateConvTable()
Generate conversion table for all text.
string $mRules
the text of the rules
parseRules()
Generate conversion table.
hasRules()
Checks if there are conversion rules.
getConvTable()
Get conversion table.
string $mText
original text in -{text}-
getTitle()
Get converted title.
Base class for multi-variant language conversion.
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
return true
Definition router.php:92