MediaWiki  master
LanguageConverter.php
Go to the documentation of this file.
1 <?php
22 
25 
41  public static $languagesWithVariants = [
42  'en',
43  'crh',
44  'gan',
45  'iu',
46  'kk',
47  'ku',
48  'shi',
49  'sr',
50  'tg',
51  'uz',
52  'zh',
53  ];
54 
56 
60  public $mVariants;
63  public $mTablesLoaded = false;
64 
68  public $mTables;
69 
70  // 'bidirectional' 'unidirectional' 'disable' for each variant
71  public $mManualLevel;
72 
73  public $mLangObj;
74  public $mFlags;
75  public $mDescCodeSep = ':', $mDescVarSep = ';';
76  public $mUcfirst = false;
77  public $mConvRuleTitle = false;
78  public $mURLVariant;
79  public $mUserVariant;
81  public $mMaxDepth = 10;
83 
84  const CACHE_VERSION_KEY = 'VERSION 7';
85 
94  public function __construct( Language $langobj, $maincode, $variants = [],
95  $variantfallbacks = [], $flags = [],
96  $manualLevel = [] ) {
97  global $wgDisabledVariants;
98  $this->mLangObj = $langobj;
99  $this->mMainLanguageCode = $maincode;
100  $this->mVariants = array_diff( $variants, $wgDisabledVariants );
101  $this->mVariantFallbacks = $variantfallbacks;
102  $this->mVariantNames = Language::fetchLanguageNames();
103  $defaultflags = [
104  // 'S' show converted text
105  // '+' add rules for alltext
106  // 'E' the gave flags is error
107  // these flags above are reserved for program
108  'A' => 'A', // add rule for convert code (all text convert)
109  'T' => 'T', // title convert
110  'R' => 'R', // raw content
111  'D' => 'D', // convert description (subclass implement)
112  '-' => '-', // remove convert (not implement)
113  'H' => 'H', // add rule for convert code (but no display in placed code)
114  'N' => 'N', // current variant name
115  ];
116  $this->mFlags = array_merge( $defaultflags, $flags );
117  foreach ( $this->mVariants as $v ) {
118  if ( array_key_exists( $v, $manualLevel ) ) {
119  $this->mManualLevel[$v] = $manualLevel[$v];
120  } else {
121  $this->mManualLevel[$v] = 'bidirectional';
122  }
123  $this->mFlags[$v] = $v;
124  }
125  }
126 
133  public function getVariants() {
134  return $this->mVariants;
135  }
136 
148  public function getVariantFallbacks( $variant ) {
149  return $this->mVariantFallbacks[$variant] ?? $this->mMainLanguageCode;
150  }
151 
156  public function getConvRuleTitle() {
157  return $this->mConvRuleTitle;
158  }
159 
164  public function getPreferredVariant() {
165  global $wgDefaultLanguageVariant, $wgUser;
166 
167  $req = $this->getURLVariant();
168 
169  Hooks::run( 'GetLangPreferredVariant', [ &$req ] );
170 
171  if ( $wgUser->isSafeToLoad() && $wgUser->isLoggedIn() && !$req ) {
172  $req = $this->getUserVariant();
173  } elseif ( !$req ) {
174  $req = $this->getHeaderVariant();
175  }
176 
177  if ( $wgDefaultLanguageVariant && !$req ) {
178  $req = $this->validateVariant( $wgDefaultLanguageVariant );
179  }
180 
181  $req = $this->validateVariant( $req );
182 
183  // This function, unlike the other get*Variant functions, is
184  // not memoized (i.e. there return value is not cached) since
185  // new information might appear during processing after this
186  // is first called.
187  if ( $req ) {
188  return $req;
189  }
191  }
192 
198  public function getDefaultVariant() {
200 
201  $req = $this->getURLVariant();
202 
203  if ( !$req ) {
204  $req = $this->getHeaderVariant();
205  }
206 
207  if ( $wgDefaultLanguageVariant && !$req ) {
208  $req = $this->validateVariant( $wgDefaultLanguageVariant );
209  }
210 
211  if ( $req ) {
212  return $req;
213  }
215  }
216 
226  public function validateVariant( $variant = null ) {
227  if ( $variant === null ) {
228  return null;
229  }
230  // Our internal variants are always lower-case; the variant we
231  // are validating may have mixed case.
232  $variant = LanguageCode::replaceDeprecatedCodes( strtolower( $variant ) );
233  if ( in_array( $variant, $this->mVariants ) ) {
234  return $variant;
235  }
236  // Browsers are supposed to use BCP 47 standard in the
237  // Accept-Language header, but not all of our internal
238  // mediawiki variant codes are BCP 47. Map BCP 47 code
239  // to our internal code.
240  foreach ( $this->mVariants as $v ) {
241  // Case-insensitive match (BCP 47 is mixed case)
242  if ( strtolower( LanguageCode::bcp47( $v ) ) === $variant ) {
243  return $v;
244  }
245  }
246  return null;
247  }
248 
254  public function getURLVariant() {
255  global $wgRequest;
256 
257  if ( $this->mURLVariant ) {
258  return $this->mURLVariant;
259  }
260 
261  // see if the preference is set in the request
262  $ret = $wgRequest->getText( 'variant' );
263 
264  if ( !$ret ) {
265  $ret = $wgRequest->getVal( 'uselang' );
266  }
267 
268  $this->mURLVariant = $this->validateVariant( $ret );
269  return $this->mURLVariant;
270  }
271 
277  protected function getUserVariant() {
278  global $wgUser;
279 
280  // memoizing this function wreaks havoc on parserTest.php
281  /*
282  if ( $this->mUserVariant ) {
283  return $this->mUserVariant;
284  }
285  */
286 
287  // Get language variant preference from logged in users
288  // Don't call this on stub objects because that causes infinite
289  // recursion during initialisation
290  if ( !$wgUser->isSafeToLoad() ) {
291  return false;
292  }
293  if ( $wgUser->isLoggedIn() ) {
294  if (
295  $this->mMainLanguageCode ==
296  MediaWikiServices::getInstance()->getContentLanguage()->getCode()
297  ) {
298  $ret = $wgUser->getOption( 'variant' );
299  } else {
300  $ret = $wgUser->getOption( 'variant-' . $this->mMainLanguageCode );
301  }
302  } else {
303  // figure out user lang without constructing wgLang to avoid
304  // infinite recursion
305  $ret = $wgUser->getOption( 'language' );
306  }
307 
308  $this->mUserVariant = $this->validateVariant( $ret );
309  return $this->mUserVariant;
310  }
311 
317  protected function getHeaderVariant() {
318  global $wgRequest;
319 
320  if ( $this->mHeaderVariant ) {
321  return $this->mHeaderVariant;
322  }
323 
324  // See if some supported language variant is set in the
325  // HTTP header.
326  $languages = array_keys( $wgRequest->getAcceptLang() );
327  if ( empty( $languages ) ) {
328  return null;
329  }
330 
331  $fallbackLanguages = [];
332  foreach ( $languages as $language ) {
333  $this->mHeaderVariant = $this->validateVariant( $language );
334  if ( $this->mHeaderVariant ) {
335  break;
336  }
337 
338  // To see if there are fallbacks of current language.
339  // We record these fallback variants, and process
340  // them later.
341  $fallbacks = $this->getVariantFallbacks( $language );
342  if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) {
343  $fallbackLanguages[] = $fallbacks;
344  } elseif ( is_array( $fallbacks ) ) {
345  $fallbackLanguages =
346  array_merge( $fallbackLanguages, $fallbacks );
347  }
348  }
349 
350  if ( !$this->mHeaderVariant ) {
351  // process fallback languages now
352  $fallback_languages = array_unique( $fallbackLanguages );
353  foreach ( $fallback_languages as $language ) {
354  $this->mHeaderVariant = $this->validateVariant( $language );
355  if ( $this->mHeaderVariant ) {
356  break;
357  }
358  }
359  }
360 
361  return $this->mHeaderVariant;
362  }
363 
374  public function autoConvert( $text, $toVariant = false ) {
375  $this->loadTables();
376 
377  if ( !$toVariant ) {
378  $toVariant = $this->getPreferredVariant();
379  if ( !$toVariant ) {
380  return $text;
381  }
382  }
383 
384  if ( $this->guessVariant( $text, $toVariant ) ) {
385  return $text;
386  }
387  /* we convert everything except:
388  1. HTML markups (anything between < and >)
389  2. HTML entities
390  3. placeholders created by the parser
391  IMPORTANT: Beware of failure from pcre.backtrack_limit (T124404).
392  Minimize use of backtracking where possible.
393  */
394  static $reg;
395  if ( $reg === null ) {
396  $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f';
397 
398  // this one is needed when the text is inside an HTML markup
399  $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>';
400 
401  // Optimize for the common case where these tags have
402  // few or no children. Thus try and possesively get as much as
403  // possible, and only engage in backtracking when we hit a '<'.
404 
405  // disable convert to variants between <code> tags
406  $codefix = '<code>[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|';
407  // disable conversion of <script> tags
408  $scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|';
409  // disable conversion of <pre> tags
410  $prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|';
411  // The "|.*+)" at the end, is in case we missed some part of html syntax,
412  // we will fail securely (hopefully) by matching the rest of the string.
413  $htmlFullTag = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|';
414 
415  $reg = '/' . $codefix . $scriptfix . $prefix . $htmlFullTag .
416  '&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s';
417  }
418  $startPos = 0;
419  $sourceBlob = '';
420  $literalBlob = '';
421 
422  // Guard against delimiter nulls in the input
423  // (should never happen: see T159174)
424  $text = str_replace( "\000", '', $text );
425  $text = str_replace( "\004", '', $text );
426 
427  $markupMatches = null;
428  $elementMatches = null;
429 
430  // We add a marker (\004) at the end of text, to ensure we always match the
431  // entire text (Otherwise, pcre.backtrack_limit might cause silent failure)
432  $textWithMarker = $text . "\004";
433  while ( $startPos < strlen( $text ) ) {
434  if ( preg_match( $reg, $textWithMarker, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
435  $elementPos = $markupMatches[0][1];
436  $element = $markupMatches[0][0];
437  if ( $element === "\004" ) {
438  // We hit the end.
439  $elementPos = strlen( $text );
440  $element = '';
441  } elseif ( substr( $element, -1 ) === "\004" ) {
442  // This can sometimes happen if we have
443  // unclosed html tags (For example
444  // when converting a title attribute
445  // during a recursive call that contains
446  // a &lt; e.g. <div title="&lt;">.
447  $element = substr( $element, 0, -1 );
448  }
449  } else {
450  // If we hit here, then Language Converter could be tricked
451  // into doing an XSS, so we refuse to translate.
452  // If non-crazy input manages to reach this code path,
453  // we should consider it a bug.
454  $log = LoggerFactory::getInstance( 'languageconverter' );
455  $log->error( "Hit pcre.backtrack_limit in " . __METHOD__
456  . ". Disabling language conversion for this page.",
457  [
458  "method" => __METHOD__,
459  "variant" => $toVariant,
460  "startOfText" => substr( $text, 0, 500 )
461  ]
462  );
463  return $text;
464  }
465  // Queue the part before the markup for translation in a batch
466  $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000";
467 
468  // Advance to the next position
469  $startPos = $elementPos + strlen( $element );
470 
471  // Translate any alt or title attributes inside the matched element
472  if ( $element !== ''
473  && preg_match( '/^(<[^>\s]*+)\s([^>]*+)(.*+)$/', $element, $elementMatches )
474  ) {
475  // FIXME, this decodes entities, so if you have something
476  // like <div title="foo&lt;bar"> the bar won't get
477  // translated since after entity decoding it looks like
478  // unclosed html and we call this method recursively
479  // on attributes.
480  $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
481  // Ensure self-closing tags stay self-closing.
482  $close = substr( $elementMatches[2], -1 ) === '/' ? ' /' : '';
483  $changed = false;
484  foreach ( [ 'title', 'alt' ] as $attrName ) {
485  if ( !isset( $attrs[$attrName] ) ) {
486  continue;
487  }
488  $attr = $attrs[$attrName];
489  // Don't convert URLs
490  if ( !strpos( $attr, '://' ) ) {
491  $attr = $this->recursiveConvertTopLevel( $attr, $toVariant );
492  }
493 
494  if ( $attr !== $attrs[$attrName] ) {
495  $attrs[$attrName] = $attr;
496  $changed = true;
497  }
498  }
499  if ( $changed ) {
500  $element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
501  $close . $elementMatches[3];
502  }
503  }
504  $literalBlob .= $element . "\000";
505  }
506 
507  // Do the main translation batch
508  $translatedBlob = $this->translate( $sourceBlob, $toVariant );
509 
510  // Put the output back together
511  $translatedIter = StringUtils::explode( "\000", $translatedBlob );
512  $literalIter = StringUtils::explode( "\000", $literalBlob );
513  $output = '';
514  while ( $translatedIter->valid() && $literalIter->valid() ) {
515  $output .= $translatedIter->current();
516  $output .= $literalIter->current();
517  $translatedIter->next();
518  $literalIter->next();
519  }
520 
521  return $output;
522  }
523 
533  public function translate( $text, $variant ) {
534  // If $text is empty or only includes spaces, do nothing
535  // Otherwise translate it
536  if ( trim( $text ) ) {
537  $this->loadTables();
538  $text = $this->mTables[$variant]->replace( $text );
539  }
540  return $text;
541  }
542 
549  public function autoConvertToAllVariants( $text ) {
550  $this->loadTables();
551 
552  $ret = [];
553  foreach ( $this->mVariants as $variant ) {
554  $ret[$variant] = $this->translate( $text, $variant );
555  }
556 
557  return $ret;
558  }
559 
565  protected function applyManualConv( $convRule ) {
566  // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
567  // title conversion.
568  // T26072: $mConvRuleTitle was overwritten by other manual
569  // rule(s) not for title, this breaks the title conversion.
570  $newConvRuleTitle = $convRule->getTitle();
571  if ( $newConvRuleTitle ) {
572  // So I add an empty check for getTitle()
573  $this->mConvRuleTitle = $newConvRuleTitle;
574  }
575 
576  // merge/remove manual conversion rules to/from global table
577  $convTable = $convRule->getConvTable();
578  $action = $convRule->getRulesAction();
579  foreach ( $convTable as $variant => $pair ) {
580  $v = $this->validateVariant( $variant );
581  if ( !$v ) {
582  continue;
583  }
584 
585  if ( $action == 'add' ) {
586  // More efficient than array_merge(), about 2.5 times.
587  foreach ( $pair as $from => $to ) {
588  $this->mTables[$v]->setPair( $from, $to );
589  }
590  } elseif ( $action == 'remove' ) {
591  $this->mTables[$v]->removeArray( $pair );
592  }
593  }
594  }
595 
603  public function convertTitle( $title ) {
604  $variant = $this->getPreferredVariant();
605  $index = $title->getNamespace();
606  if ( $index !== NS_MAIN ) {
607  $text = $this->convertNamespace( $index, $variant ) . ':';
608  } else {
609  $text = '';
610  }
611  $text .= $this->translate( $title->getText(), $variant );
612  return $text;
613  }
614 
622  public function convertNamespace( $index, $variant = null ) {
623  if ( $index === NS_MAIN ) {
624  return '';
625  }
626 
627  if ( $variant === null ) {
628  $variant = $this->getPreferredVariant();
629  }
630 
631  $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache();
632  $key = $cache->makeKey( 'languageconverter', 'namespace-text', $index, $variant );
633  $nsVariantText = $cache->get( $key );
634  if ( $nsVariantText !== false ) {
635  return $nsVariantText;
636  }
637 
638  // First check if a message gives a converted name in the target variant.
639  $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant );
640  if ( $nsConvMsg->exists() ) {
641  $nsVariantText = $nsConvMsg->plain();
642  }
643 
644  // Then check if a message gives a converted name in content language
645  // which needs extra translation to the target variant.
646  if ( $nsVariantText === false ) {
647  $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage();
648  if ( $nsConvMsg->exists() ) {
649  $nsVariantText = $this->translate( $nsConvMsg->plain(), $variant );
650  }
651  }
652 
653  if ( $nsVariantText === false ) {
654  // No message exists, retrieve it from the target variant's namespace names.
655  $langObj = $this->mLangObj->factory( $variant );
656  $nsVariantText = $langObj->getFormattedNsText( $index );
657  }
658 
659  $cache->set( $key, $nsVariantText, 60 );
660 
661  return $nsVariantText;
662  }
663 
682  public function convert( $text ) {
683  $variant = $this->getPreferredVariant();
684  return $this->convertTo( $text, $variant );
685  }
686 
696  public function convertTo( $text, $variant ) {
698  if ( $wgDisableLangConversion ) {
699  return $text;
700  }
701  // Reset converter state for a new converter run.
702  $this->mConvRuleTitle = false;
703  return $this->recursiveConvertTopLevel( $text, $variant );
704  }
705 
715  protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
716  $startPos = 0;
717  $out = '';
718  $length = strlen( $text );
719  $shouldConvert = !$this->guessVariant( $text, $variant );
720  $continue = 1;
721 
722  $noScript = '<script.*?>.*?<\/script>(*SKIP)(*FAIL)';
723  $noStyle = '<style.*?>.*?<\/style>(*SKIP)(*FAIL)';
724  // phpcs:ignore Generic.Files.LineLength
725  $noHtml = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)(*SKIP)(*FAIL)';
726  while ( $startPos < $length && $continue ) {
727  $continue = preg_match(
728  // Only match -{ outside of html.
729  "/$noScript|$noStyle|$noHtml|-\{/",
730  $text,
731  $m,
732  PREG_OFFSET_CAPTURE,
733  $startPos
734  );
735 
736  if ( !$continue ) {
737  // No more markup, append final segment
738  $fragment = substr( $text, $startPos );
739  $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
740  return $out;
741  }
742 
743  // Offset of the match of the regex pattern.
744  $pos = $m[0][1];
745 
746  // Append initial segment
747  $fragment = substr( $text, $startPos, $pos - $startPos );
748  $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
749  // -{ marker found, not in attribute
750  // Advance position up to -{ marker.
751  $startPos = $pos;
752  // Do recursive conversion
753  // Note: This passes $startPos by reference, and advances it.
754  $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
755  }
756  return $out;
757  }
758 
770  protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
771  // Quick sanity check (no function calls)
772  if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) {
773  throw new MWException( __METHOD__ . ': invalid input string' );
774  }
775 
776  $startPos += 2;
777  $inner = '';
778  $warningDone = false;
779  $length = strlen( $text );
780 
781  while ( $startPos < $length ) {
782  $m = false;
783  preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos );
784  if ( !$m ) {
785  // Unclosed rule
786  break;
787  }
788 
789  $token = $m[0][0];
790  $pos = $m[0][1];
791 
792  // Markup found
793  // Append initial segment
794  $inner .= substr( $text, $startPos, $pos - $startPos );
795 
796  // Advance position
797  $startPos = $pos;
798 
799  switch ( $token ) {
800  case '-{':
801  // Check max depth
802  if ( $depth >= $this->mMaxDepth ) {
803  $inner .= '-{';
804  if ( !$warningDone ) {
805  $inner .= '<span class="error">' .
806  wfMessage( 'language-converter-depth-warning' )
807  ->numParams( $this->mMaxDepth )->inContentLanguage()->text() .
808  '</span>';
809  $warningDone = true;
810  }
811  $startPos += 2;
812  break;
813  }
814  // Recursively parse another rule
815  $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
816  break;
817  case '}-':
818  // Apply the rule
819  $startPos += 2;
820  $rule = new ConverterRule( $inner, $this );
821  $rule->parse( $variant );
822  $this->applyManualConv( $rule );
823  return $rule->getDisplay();
824  default:
825  throw new MWException( __METHOD__ . ': invalid regex match' );
826  }
827  }
828 
829  // Unclosed rule
830  if ( $startPos < $length ) {
831  $inner .= substr( $text, $startPos );
832  }
833  $startPos = $length;
834  return '-{' . $this->autoConvert( $inner, $variant );
835  }
836 
848  public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
849  # If the article has already existed, there is no need to
850  # check it again, otherwise it may cause a fault.
851  if ( is_object( $nt ) && $nt->exists() ) {
852  return;
853  }
854 
856  $isredir = $wgRequest->getText( 'redirect', 'yes' );
857  $action = $wgRequest->getText( 'action' );
858  if ( $action == 'edit' && $wgRequest->getBool( 'redlink' ) ) {
859  $action = 'view';
860  }
861  $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' );
862  $disableLinkConversion = $wgDisableLangConversion
864  $linkBatch = new LinkBatch();
865 
866  $ns = NS_MAIN;
867 
868  if ( $disableLinkConversion ||
869  ( !$ignoreOtherCond &&
870  ( $isredir == 'no'
871  || $action == 'edit'
872  || $action == 'submit'
873  || $linkconvert == 'no' ) ) ) {
874  return;
875  }
876 
877  if ( is_object( $nt ) ) {
878  $ns = $nt->getNamespace();
879  }
880 
881  $variants = $this->autoConvertToAllVariants( $link );
882  if ( !$variants ) { // give up
883  return;
884  }
885 
886  $titles = [];
887 
888  foreach ( $variants as $v ) {
889  if ( $v != $link ) {
890  $varnt = Title::newFromText( $v, $ns );
891  if ( !is_null( $varnt ) ) {
892  $linkBatch->addObj( $varnt );
893  $titles[] = $varnt;
894  }
895  }
896  }
897 
898  // fetch all variants in single query
899  $linkBatch->execute();
900 
901  foreach ( $titles as $varnt ) {
902  if ( $varnt->getArticleID() > 0 ) {
903  $nt = $varnt;
904  $link = $varnt->getText();
905  break;
906  }
907  }
908  }
909 
915  public function getExtraHashOptions() {
916  $variant = $this->getPreferredVariant();
917 
918  return '!' . $variant;
919  }
920 
931  public function guessVariant( $text, $variant ) {
932  return false;
933  }
934 
942  function loadDefaultTables() {
943  $class = static::class;
944  throw new MWException( "Must implement loadDefaultTables() method in class $class" );
945  }
946 
952  function loadTables( $fromCache = true ) {
954 
955  if ( $this->mTablesLoaded ) {
956  return;
957  }
958 
959  $this->mTablesLoaded = true;
960  $this->mTables = null;
961  $cache = ObjectCache::getInstance( $wgLanguageConverterCacheType );
962  $cacheKey = $cache->makeKey( 'conversiontables', $this->mMainLanguageCode );
963  if ( $fromCache ) {
964  $this->mTables = $cache->get( $cacheKey );
965  }
966  if ( !$this->mTables || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
967  // not in cache, or we need a fresh reload.
968  // We will first load the default tables
969  // then update them using things in MediaWiki:Conversiontable/*
970  $this->loadDefaultTables();
971  foreach ( $this->mVariants as $var ) {
972  $cached = $this->parseCachedTable( $var );
973  // @phan-suppress-next-next-line PhanTypeArraySuspiciousNullable
974  // FIXME: $this->mTables could theoretically be null here
975  $this->mTables[$var]->mergeArray( $cached );
976  }
977 
978  $this->postLoadTables();
979  $this->mTables[self::CACHE_VERSION_KEY] = true;
980 
981  $cache->set( $cacheKey, $this->mTables, 43200 );
982  }
983  }
984 
988  function postLoadTables() {
989  }
990 
998  private function reloadTables() {
999  if ( $this->mTables ) {
1000  // @phan-suppress-next-line PhanTypeObjectUnsetDeclaredProperty
1001  unset( $this->mTables );
1002  }
1003 
1004  $this->mTablesLoaded = false;
1005  $this->loadTables( false );
1006  }
1007 
1027  function parseCachedTable( $code, $subpage = '', $recursive = true ) {
1028  static $parsed = [];
1029 
1030  $key = 'Conversiontable/' . $code;
1031  if ( $subpage ) {
1032  $key .= '/' . $subpage;
1033  }
1034  if ( array_key_exists( $key, $parsed ) ) {
1035  return [];
1036  }
1037 
1038  $parsed[$key] = true;
1039 
1040  if ( $subpage === '' ) {
1041  $txt = MessageCache::singleton()->getMsgFromNamespace( $key, $code );
1042  } else {
1043  $txt = false;
1045  if ( $title && $title->exists() ) {
1046  $revision = Revision::newFromTitle( $title );
1047  if ( $revision ) {
1048  if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) {
1049  // @phan-suppress-next-line PhanUndeclaredMethod
1050  $txt = $revision->getContent( RevisionRecord::RAW )->getText();
1051  }
1052 
1053  // @todo in the future, use a specialized content model, perhaps based on json!
1054  }
1055  }
1056  }
1057 
1058  # Nothing to parse if there's no text
1059  if ( $txt === false || $txt === null || $txt === '' ) {
1060  return [];
1061  }
1062 
1063  // get all subpage links of the form
1064  // [[MediaWiki:Conversiontable/zh-xx/...|...]]
1065  $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
1066  ':Conversiontable';
1067  $subs = StringUtils::explode( '[[', $txt );
1068  $sublinks = [];
1069  foreach ( $subs as $sub ) {
1070  $link = explode( ']]', $sub, 2 );
1071  if ( count( $link ) != 2 ) {
1072  continue;
1073  }
1074  $b = explode( '|', $link[0], 2 );
1075  $b = explode( '/', trim( $b[0] ), 3 );
1076  if ( count( $b ) == 3 ) {
1077  $sublink = $b[2];
1078  } else {
1079  $sublink = '';
1080  }
1081 
1082  if ( $b[0] == $linkhead && $b[1] == $code ) {
1083  $sublinks[] = $sublink;
1084  }
1085  }
1086 
1087  // parse the mappings in this page
1088  $blocks = StringUtils::explode( '-{', $txt );
1089  $ret = [];
1090  $first = true;
1091  foreach ( $blocks as $block ) {
1092  if ( $first ) {
1093  // Skip the part before the first -{
1094  $first = false;
1095  continue;
1096  }
1097  $mappings = explode( '}-', $block, 2 )[0];
1098  $stripped = str_replace( [ "'", '"', '*', '#' ], '', $mappings );
1099  $table = StringUtils::explode( ';', $stripped );
1100  foreach ( $table as $t ) {
1101  $m = explode( '=>', $t, 3 );
1102  if ( count( $m ) != 2 ) {
1103  continue;
1104  }
1105  // trim any trailling comments starting with '//'
1106  $tt = explode( '//', $m[1], 2 );
1107  $ret[trim( $m[0] )] = trim( $tt[0] );
1108  }
1109  }
1110 
1111  // recursively parse the subpages
1112  if ( $recursive ) {
1113  foreach ( $sublinks as $link ) {
1114  $s = $this->parseCachedTable( $code, $link, $recursive );
1115  $ret = $s + $ret;
1116  }
1117  }
1118 
1119  if ( $this->mUcfirst ) {
1120  foreach ( $ret as $k => $v ) {
1121  $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
1122  }
1123  }
1124  return $ret;
1125  }
1126 
1135  public function markNoConversion( $text, $noParse = false ) {
1136  # don't mark if already marked
1137  if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) {
1138  return $text;
1139  }
1140 
1141  $ret = "-{R|$text}-";
1142  return $ret;
1143  }
1144 
1153  function convertCategoryKey( $key ) {
1154  return $key;
1155  }
1156 
1163  public function updateConversionTable( Title $titleobj ) {
1164  if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) {
1165  $title = $titleobj->getDBkey();
1166  $t = explode( '/', $title, 3 );
1167  $c = count( $t );
1168  if ( $c > 1 && $t[0] == 'Conversiontable' ) {
1169  if ( $this->validateVariant( $t[1] ) ) {
1170  $this->reloadTables();
1171  }
1172  }
1173  }
1174  }
1175 
1181  if ( is_null( $this->mVarSeparatorPattern ) ) {
1182  // varsep_pattern for preg_split:
1183  // text should be splited by ";" only if a valid variant
1184  // name exist after the markup, for example:
1185  // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\
1186  // <span style="font-size:120%;">yyy</span>;}-
1187  // we should split it as:
1188  // [
1189  // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>'
1190  // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
1191  // [2] => ''
1192  // ]
1193  $expandedVariants = [];
1194  foreach ( $this->mVariants as $variant ) {
1195  $expandedVariants[ $variant ] = 1;
1196  // Accept standard BCP 47 names for variants as well.
1197  $expandedVariants[ LanguageCode::bcp47( $variant ) ] = 1;
1198  }
1199  // Accept old deprecated names for variants
1200  foreach ( LanguageCode::getDeprecatedCodeMapping() as $old => $new ) {
1201  if ( isset( $expandedVariants[ $new ] ) ) {
1202  $expandedVariants[ $old ] = 1;
1203  }
1204  }
1205 
1206  $pat = '/;\s*(?=';
1207  foreach ( $expandedVariants as $variant => $ignore ) {
1208  // zh-hans:xxx;zh-hant:yyy
1209  $pat .= $variant . '\s*:|';
1210  // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
1211  $pat .= '[^;]*?=>\s*' . $variant . '\s*:|';
1212  }
1213  $pat .= '\s*$)/';
1214  $this->mVarSeparatorPattern = $pat;
1215  }
1217  }
1218 }
const MARKER_PREFIX
Definition: Parser.php:139
updateConversionTable(Title $titleobj)
Refresh the cache of conversion tables when MediaWiki:Conversiontable* is updated.
static fetchLanguageNames( $inLanguage=self::AS_AUTONYMS, $include='mw')
Get an array of language names, indexed by code.
Definition: Language.php:814
const CONTENT_MODEL_WIKITEXT
Definition: Defines.php:215
const NS_MAIN
Definition: Defines.php:60
validateVariant( $variant=null)
Validate the variant and return an appropriate strict internal variant code if one exists...
autoConvertToAllVariants( $text)
Call translate() to convert text to all valid variants.
ReplacementArray [] bool [] $mTables
getVarSeparatorPattern()
Get the cached separator pattern for ConverterRule::parseRules()
guessVariant( $text, $variant)
Guess if a text is written in a variant.
static getInstance( $id)
Get a cached instance of the specified type of cache object.
Definition: ObjectCache.php:78
getExtraHashOptions()
Returns language specific hash options.
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that&#39;s attached to a given link target...
Definition: Revision.php:138
postLoadTables()
Hook for post processing after conversion tables are loaded.
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition: LinkBatch.php:34
markNoConversion( $text, $noParse=false)
Enclose a string with the "no conversion" tag.
autoConvert( $text, $toVariant=false)
Dictionary-based conversion.
getDBkey()
Get the main part with underscores.
Definition: Title.php:1016
convertCategoryKey( $key)
Convert the sorting key for category links.
getURLVariant()
Get the variant specified in the URL.
findVariantLink(&$link, &$nt, $ignoreOtherCond=false)
If a language supports multiple variants, it is possible that non-existing link in one variant actual...
parseCachedTable( $code, $subpage='', $recursive=true)
Parse the conversion table stored in the cache.
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
getVariantFallbacks( $variant)
In case some variant is not defined in the markup, we need to have some fallback. ...
reloadTables()
Reload the conversion tables.
$cache
Definition: mcc.php:33
Parser for rules of language conversion, parse rules in -{ }- tag.
getUserVariant()
Determine if the user has a variant set.
$wgLanguageConverterCacheType
The cache type for storing language conversion tables, which are used when parsing certain text and i...
$wgDisableTitleConversion
Whether to enable language variant conversion for links.
convert( $text)
Convert text to different variants of a language.
static decodeTagAttributes( $text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1450
$wgDisabledVariants
Disabled variants array of language variant conversion.
static getDeprecatedCodeMapping()
Returns a mapping of deprecated language codes that were used in previous versions of MediaWiki to up...
getVariants()
Get all valid variants.
getNamespace()
Get the namespace index, i.e.
Definition: Title.php:1040
applyManualConv( $convRule)
Apply manual conversion rules.
static expandAttributes(array $attribs)
Given an associative array of element attributes, generate a string to stick after the element name i...
Definition: Html.php:480
recursiveConvertTopLevel( $text, $variant, $depth=0)
Recursively convert text on the outside.
$wgDisableLangConversion
Whether to enable language variant conversion.
$wgDefaultLanguageVariant
Default variant code, if false, the default will be the language code.
const NS_MEDIAWIKI
Definition: Defines.php:68
convertTo( $text, $variant)
Same as convert() except a extra parameter to custom variant.
translate( $text, $variant)
Translate a string to a variant.
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:612
recursiveConvertRule( $text, $variant, &$startPos, $depth=0)
Recursively convert text on the inside.
convertNamespace( $index, $variant=null)
Get the namespace display name in the preferred variant.
static replaceDeprecatedCodes( $code)
Replace deprecated language codes that were used in previous versions of MediaWiki to up-to-date...
getHeaderVariant()
Determine the language variant from the Accept-Language header.
getDefaultVariant()
Get default variant.
__construct(Language $langobj, $maincode, $variants=[], $variantfallbacks=[], $flags=[], $manualLevel=[])
getConvRuleTitle()
Get the title produced by the conversion rule.
if(! $wgDBerrorLogTZ) $wgRequest
Definition: Setup.php:727
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
static array $languagesWithVariants
languages supporting variants
getPreferredVariant()
Get preferred language variant.
loadDefaultTables()
Load default conversion tables.
static bcp47( $code)
Get the normalised IETF language tag See unit test for examples.
static singleton()
Get the singleton instance of this class.
convertTitle( $title)
Auto convert a Title object to a readable string in the preferred variant.
switch( $options['output']) $languages
Definition: transstat.php:76
loadTables( $fromCache=true)
Load conversion tables either from the cache or the disk.
static run( $event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:200
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:319