MediaWiki  master
LanguageConverter.php
Go to the documentation of this file.
1 <?php
22 
25 
41  public static $languagesWithVariants = [
42  'en',
43  'crh',
44  'gan',
45  'iu',
46  'kk',
47  'ku',
48  'shi',
49  'sr',
50  'tg',
51  'uz',
52  'zh',
53  ];
54 
56 
60  public $mVariants;
63  public $mTablesLoaded = false;
64 
69  public $mTables;
70 
71  // 'bidirectional' 'unidirectional' 'disable' for each variant
72  public $mManualLevel;
73 
74  public $mLangObj;
75  public $mFlags;
76  public $mDescCodeSep = ':', $mDescVarSep = ';';
77  public $mUcfirst = false;
78  public $mConvRuleTitle = false;
79  public $mURLVariant;
80  public $mUserVariant;
82  public $mMaxDepth = 10;
84 
85  const CACHE_VERSION_KEY = 'VERSION 7';
86 
95  public function __construct( Language $langobj, $maincode, $variants = [],
96  $variantfallbacks = [], $flags = [],
97  $manualLevel = [] ) {
98  global $wgDisabledVariants;
99  $this->mLangObj = $langobj;
100  $this->mMainLanguageCode = $maincode;
101  $this->mVariants = array_diff( $variants, $wgDisabledVariants );
102  $this->mVariantFallbacks = $variantfallbacks;
103  $this->mVariantNames = Language::fetchLanguageNames();
104  $defaultflags = [
105  // 'S' show converted text
106  // '+' add rules for alltext
107  // 'E' the gave flags is error
108  // these flags above are reserved for program
109  'A' => 'A', // add rule for convert code (all text convert)
110  'T' => 'T', // title convert
111  'R' => 'R', // raw content
112  'D' => 'D', // convert description (subclass implement)
113  '-' => '-', // remove convert (not implement)
114  'H' => 'H', // add rule for convert code (but no display in placed code)
115  'N' => 'N', // current variant name
116  ];
117  $this->mFlags = array_merge( $defaultflags, $flags );
118  foreach ( $this->mVariants as $v ) {
119  if ( array_key_exists( $v, $manualLevel ) ) {
120  $this->mManualLevel[$v] = $manualLevel[$v];
121  } else {
122  $this->mManualLevel[$v] = 'bidirectional';
123  }
124  $this->mFlags[$v] = $v;
125  }
126  }
127 
134  public function getVariants() {
135  return $this->mVariants;
136  }
137 
149  public function getVariantFallbacks( $variant ) {
150  return $this->mVariantFallbacks[$variant] ?? $this->mMainLanguageCode;
151  }
152 
157  public function getConvRuleTitle() {
158  return $this->mConvRuleTitle;
159  }
160 
165  public function getPreferredVariant() {
166  global $wgDefaultLanguageVariant, $wgUser;
167 
168  $req = $this->getURLVariant();
169 
170  Hooks::run( 'GetLangPreferredVariant', [ &$req ] );
171 
172  if ( $wgUser->isSafeToLoad() && $wgUser->isLoggedIn() && !$req ) {
173  $req = $this->getUserVariant();
174  } elseif ( !$req ) {
175  $req = $this->getHeaderVariant();
176  }
177 
178  if ( $wgDefaultLanguageVariant && !$req ) {
179  $req = $this->validateVariant( $wgDefaultLanguageVariant );
180  }
181 
182  $req = $this->validateVariant( $req );
183 
184  // This function, unlike the other get*Variant functions, is
185  // not memoized (i.e. there return value is not cached) since
186  // new information might appear during processing after this
187  // is first called.
188  if ( $req ) {
189  return $req;
190  }
192  }
193 
199  public function getDefaultVariant() {
201 
202  $req = $this->getURLVariant();
203 
204  if ( !$req ) {
205  $req = $this->getHeaderVariant();
206  }
207 
208  if ( $wgDefaultLanguageVariant && !$req ) {
209  $req = $this->validateVariant( $wgDefaultLanguageVariant );
210  }
211 
212  if ( $req ) {
213  return $req;
214  }
216  }
217 
227  public function validateVariant( $variant = null ) {
228  if ( $variant === null ) {
229  return null;
230  }
231  // Our internal variants are always lower-case; the variant we
232  // are validating may have mixed case.
233  $variant = LanguageCode::replaceDeprecatedCodes( strtolower( $variant ) );
234  if ( in_array( $variant, $this->mVariants ) ) {
235  return $variant;
236  }
237  // Browsers are supposed to use BCP 47 standard in the
238  // Accept-Language header, but not all of our internal
239  // mediawiki variant codes are BCP 47. Map BCP 47 code
240  // to our internal code.
241  foreach ( $this->mVariants as $v ) {
242  // Case-insensitive match (BCP 47 is mixed case)
243  if ( strtolower( LanguageCode::bcp47( $v ) ) === $variant ) {
244  return $v;
245  }
246  }
247  return null;
248  }
249 
255  public function getURLVariant() {
256  global $wgRequest;
257 
258  if ( $this->mURLVariant ) {
259  return $this->mURLVariant;
260  }
261 
262  // see if the preference is set in the request
263  $ret = $wgRequest->getText( 'variant' );
264 
265  if ( !$ret ) {
266  $ret = $wgRequest->getVal( 'uselang' );
267  }
268 
269  $this->mURLVariant = $this->validateVariant( $ret );
270  return $this->mURLVariant;
271  }
272 
278  protected function getUserVariant() {
279  global $wgUser;
280 
281  // memoizing this function wreaks havoc on parserTest.php
282  /*
283  if ( $this->mUserVariant ) {
284  return $this->mUserVariant;
285  }
286  */
287 
288  // Get language variant preference from logged in users
289  // Don't call this on stub objects because that causes infinite
290  // recursion during initialisation
291  if ( !$wgUser->isSafeToLoad() ) {
292  return false;
293  }
294  if ( $wgUser->isLoggedIn() ) {
295  if (
296  $this->mMainLanguageCode ==
297  MediaWikiServices::getInstance()->getContentLanguage()->getCode()
298  ) {
299  $ret = $wgUser->getOption( 'variant' );
300  } else {
301  $ret = $wgUser->getOption( 'variant-' . $this->mMainLanguageCode );
302  }
303  } else {
304  // figure out user lang without constructing wgLang to avoid
305  // infinite recursion
306  $ret = $wgUser->getOption( 'language' );
307  }
308 
309  $this->mUserVariant = $this->validateVariant( $ret );
310  return $this->mUserVariant;
311  }
312 
318  protected function getHeaderVariant() {
319  global $wgRequest;
320 
321  if ( $this->mHeaderVariant ) {
322  return $this->mHeaderVariant;
323  }
324 
325  // See if some supported language variant is set in the
326  // HTTP header.
327  $languages = array_keys( $wgRequest->getAcceptLang() );
328  if ( empty( $languages ) ) {
329  return null;
330  }
331 
332  $fallbackLanguages = [];
333  foreach ( $languages as $language ) {
334  $this->mHeaderVariant = $this->validateVariant( $language );
335  if ( $this->mHeaderVariant ) {
336  break;
337  }
338 
339  // To see if there are fallbacks of current language.
340  // We record these fallback variants, and process
341  // them later.
342  $fallbacks = $this->getVariantFallbacks( $language );
343  if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) {
344  $fallbackLanguages[] = $fallbacks;
345  } elseif ( is_array( $fallbacks ) ) {
346  $fallbackLanguages =
347  array_merge( $fallbackLanguages, $fallbacks );
348  }
349  }
350 
351  if ( !$this->mHeaderVariant ) {
352  // process fallback languages now
353  $fallback_languages = array_unique( $fallbackLanguages );
354  foreach ( $fallback_languages as $language ) {
355  $this->mHeaderVariant = $this->validateVariant( $language );
356  if ( $this->mHeaderVariant ) {
357  break;
358  }
359  }
360  }
361 
362  return $this->mHeaderVariant;
363  }
364 
375  public function autoConvert( $text, $toVariant = false ) {
376  $this->loadTables();
377 
378  if ( !$toVariant ) {
379  $toVariant = $this->getPreferredVariant();
380  if ( !$toVariant ) {
381  return $text;
382  }
383  }
384 
385  if ( $this->guessVariant( $text, $toVariant ) ) {
386  return $text;
387  }
388  /* we convert everything except:
389  1. HTML markups (anything between < and >)
390  2. HTML entities
391  3. placeholders created by the parser
392  IMPORTANT: Beware of failure from pcre.backtrack_limit (T124404).
393  Minimize use of backtracking where possible.
394  */
395  static $reg;
396  if ( $reg === null ) {
397  $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f';
398 
399  // this one is needed when the text is inside an HTML markup
400  $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>';
401 
402  // Optimize for the common case where these tags have
403  // few or no children. Thus try and possesively get as much as
404  // possible, and only engage in backtracking when we hit a '<'.
405 
406  // disable convert to variants between <code> tags
407  $codefix = '<code>[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|';
408  // disable conversion of <script> tags
409  $scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|';
410  // disable conversion of <pre> tags
411  $prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|';
412  // The "|.*+)" at the end, is in case we missed some part of html syntax,
413  // we will fail securely (hopefully) by matching the rest of the string.
414  $htmlFullTag = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|';
415 
416  $reg = '/' . $codefix . $scriptfix . $prefix . $htmlFullTag .
417  '&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s';
418  }
419  $startPos = 0;
420  $sourceBlob = '';
421  $literalBlob = '';
422 
423  // Guard against delimiter nulls in the input
424  // (should never happen: see T159174)
425  $text = str_replace( "\000", '', $text );
426  $text = str_replace( "\004", '', $text );
427 
428  $markupMatches = null;
429  $elementMatches = null;
430 
431  // We add a marker (\004) at the end of text, to ensure we always match the
432  // entire text (Otherwise, pcre.backtrack_limit might cause silent failure)
433  $textWithMarker = $text . "\004";
434  while ( $startPos < strlen( $text ) ) {
435  if ( preg_match( $reg, $textWithMarker, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
436  $elementPos = $markupMatches[0][1];
437  $element = $markupMatches[0][0];
438  if ( $element === "\004" ) {
439  // We hit the end.
440  $elementPos = strlen( $text );
441  $element = '';
442  } elseif ( substr( $element, -1 ) === "\004" ) {
443  // This can sometimes happen if we have
444  // unclosed html tags (For example
445  // when converting a title attribute
446  // during a recursive call that contains
447  // a &lt; e.g. <div title="&lt;">.
448  $element = substr( $element, 0, -1 );
449  }
450  } else {
451  // If we hit here, then Language Converter could be tricked
452  // into doing an XSS, so we refuse to translate.
453  // If non-crazy input manages to reach this code path,
454  // we should consider it a bug.
455  $log = LoggerFactory::getInstance( 'languageconverter' );
456  $log->error( "Hit pcre.backtrack_limit in " . __METHOD__
457  . ". Disabling language conversion for this page.",
458  [
459  "method" => __METHOD__,
460  "variant" => $toVariant,
461  "startOfText" => substr( $text, 0, 500 )
462  ]
463  );
464  return $text;
465  }
466  // Queue the part before the markup for translation in a batch
467  $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000";
468 
469  // Advance to the next position
470  $startPos = $elementPos + strlen( $element );
471 
472  // Translate any alt or title attributes inside the matched element
473  if ( $element !== ''
474  && preg_match( '/^(<[^>\s]*+)\s([^>]*+)(.*+)$/', $element, $elementMatches )
475  ) {
476  // FIXME, this decodes entities, so if you have something
477  // like <div title="foo&lt;bar"> the bar won't get
478  // translated since after entity decoding it looks like
479  // unclosed html and we call this method recursively
480  // on attributes.
481  $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
482  // Ensure self-closing tags stay self-closing.
483  $close = substr( $elementMatches[2], -1 ) === '/' ? ' /' : '';
484  $changed = false;
485  foreach ( [ 'title', 'alt' ] as $attrName ) {
486  if ( !isset( $attrs[$attrName] ) ) {
487  continue;
488  }
489  $attr = $attrs[$attrName];
490  // Don't convert URLs
491  if ( !strpos( $attr, '://' ) ) {
492  $attr = $this->recursiveConvertTopLevel( $attr, $toVariant );
493  }
494 
495  if ( $attr !== $attrs[$attrName] ) {
496  $attrs[$attrName] = $attr;
497  $changed = true;
498  }
499  }
500  if ( $changed ) {
501  $element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
502  $close . $elementMatches[3];
503  }
504  }
505  $literalBlob .= $element . "\000";
506  }
507 
508  // Do the main translation batch
509  $translatedBlob = $this->translate( $sourceBlob, $toVariant );
510 
511  // Put the output back together
512  $translatedIter = StringUtils::explode( "\000", $translatedBlob );
513  $literalIter = StringUtils::explode( "\000", $literalBlob );
514  $output = '';
515  while ( $translatedIter->valid() && $literalIter->valid() ) {
516  $output .= $translatedIter->current();
517  $output .= $literalIter->current();
518  $translatedIter->next();
519  $literalIter->next();
520  }
521 
522  return $output;
523  }
524 
534  public function translate( $text, $variant ) {
535  // If $text is empty or only includes spaces, do nothing
536  // Otherwise translate it
537  if ( trim( $text ) ) {
538  $this->loadTables();
539  $text = $this->mTables[$variant]->replace( $text );
540  }
541  return $text;
542  }
543 
550  public function autoConvertToAllVariants( $text ) {
551  $this->loadTables();
552 
553  $ret = [];
554  foreach ( $this->mVariants as $variant ) {
555  $ret[$variant] = $this->translate( $text, $variant );
556  }
557 
558  return $ret;
559  }
560 
566  protected function applyManualConv( $convRule ) {
567  // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
568  // title conversion.
569  // T26072: $mConvRuleTitle was overwritten by other manual
570  // rule(s) not for title, this breaks the title conversion.
571  $newConvRuleTitle = $convRule->getTitle();
572  if ( $newConvRuleTitle ) {
573  // So I add an empty check for getTitle()
574  $this->mConvRuleTitle = $newConvRuleTitle;
575  }
576 
577  // merge/remove manual conversion rules to/from global table
578  $convTable = $convRule->getConvTable();
579  $action = $convRule->getRulesAction();
580  foreach ( $convTable as $variant => $pair ) {
581  $v = $this->validateVariant( $variant );
582  if ( !$v ) {
583  continue;
584  }
585 
586  if ( $action == 'add' ) {
587  // More efficient than array_merge(), about 2.5 times.
588  foreach ( $pair as $from => $to ) {
589  $this->mTables[$v]->setPair( $from, $to );
590  }
591  } elseif ( $action == 'remove' ) {
592  $this->mTables[$v]->removeArray( $pair );
593  }
594  }
595  }
596 
604  public function convertTitle( $title ) {
605  $variant = $this->getPreferredVariant();
606  $index = $title->getNamespace();
607  if ( $index !== NS_MAIN ) {
608  $text = $this->convertNamespace( $index, $variant ) . ':';
609  } else {
610  $text = '';
611  }
612  $text .= $this->translate( $title->getText(), $variant );
613  return $text;
614  }
615 
623  public function convertNamespace( $index, $variant = null ) {
624  if ( $index === NS_MAIN ) {
625  return '';
626  }
627 
628  if ( $variant === null ) {
629  $variant = $this->getPreferredVariant();
630  }
631 
632  $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache();
633  $key = $cache->makeKey( 'languageconverter', 'namespace-text', $index, $variant );
634  $nsVariantText = $cache->get( $key );
635  if ( $nsVariantText !== false ) {
636  return $nsVariantText;
637  }
638 
639  // First check if a message gives a converted name in the target variant.
640  $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant );
641  if ( $nsConvMsg->exists() ) {
642  $nsVariantText = $nsConvMsg->plain();
643  }
644 
645  // Then check if a message gives a converted name in content language
646  // which needs extra translation to the target variant.
647  if ( $nsVariantText === false ) {
648  $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage();
649  if ( $nsConvMsg->exists() ) {
650  $nsVariantText = $this->translate( $nsConvMsg->plain(), $variant );
651  }
652  }
653 
654  if ( $nsVariantText === false ) {
655  // No message exists, retrieve it from the target variant's namespace names.
656  $langObj = $this->mLangObj->factory( $variant );
657  $nsVariantText = $langObj->getFormattedNsText( $index );
658  }
659 
660  $cache->set( $key, $nsVariantText, 60 );
661 
662  return $nsVariantText;
663  }
664 
683  public function convert( $text ) {
684  $variant = $this->getPreferredVariant();
685  return $this->convertTo( $text, $variant );
686  }
687 
697  public function convertTo( $text, $variant ) {
699  if ( $wgDisableLangConversion ) {
700  return $text;
701  }
702  // Reset converter state for a new converter run.
703  $this->mConvRuleTitle = false;
704  return $this->recursiveConvertTopLevel( $text, $variant );
705  }
706 
716  protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
717  $startPos = 0;
718  $out = '';
719  $length = strlen( $text );
720  $shouldConvert = !$this->guessVariant( $text, $variant );
721  $continue = 1;
722 
723  $noScript = '<script.*?>.*?<\/script>(*SKIP)(*FAIL)';
724  $noStyle = '<style.*?>.*?<\/style>(*SKIP)(*FAIL)';
725  // phpcs:ignore Generic.Files.LineLength
726  $noHtml = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)(*SKIP)(*FAIL)';
727  while ( $startPos < $length && $continue ) {
728  $continue = preg_match(
729  // Only match -{ outside of html.
730  "/$noScript|$noStyle|$noHtml|-\{/",
731  $text,
732  $m,
733  PREG_OFFSET_CAPTURE,
734  $startPos
735  );
736 
737  if ( !$continue ) {
738  // No more markup, append final segment
739  $fragment = substr( $text, $startPos );
740  $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
741  return $out;
742  }
743 
744  // Offset of the match of the regex pattern.
745  $pos = $m[0][1];
746 
747  // Append initial segment
748  $fragment = substr( $text, $startPos, $pos - $startPos );
749  $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
750  // -{ marker found, not in attribute
751  // Advance position up to -{ marker.
752  $startPos = $pos;
753  // Do recursive conversion
754  // Note: This passes $startPos by reference, and advances it.
755  $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
756  }
757  return $out;
758  }
759 
771  protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
772  // Quick sanity check (no function calls)
773  if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) {
774  throw new MWException( __METHOD__ . ': invalid input string' );
775  }
776 
777  $startPos += 2;
778  $inner = '';
779  $warningDone = false;
780  $length = strlen( $text );
781 
782  while ( $startPos < $length ) {
783  $m = false;
784  preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos );
785  if ( !$m ) {
786  // Unclosed rule
787  break;
788  }
789 
790  $token = $m[0][0];
791  $pos = $m[0][1];
792 
793  // Markup found
794  // Append initial segment
795  $inner .= substr( $text, $startPos, $pos - $startPos );
796 
797  // Advance position
798  $startPos = $pos;
799 
800  switch ( $token ) {
801  case '-{':
802  // Check max depth
803  if ( $depth >= $this->mMaxDepth ) {
804  $inner .= '-{';
805  if ( !$warningDone ) {
806  $inner .= '<span class="error">' .
807  wfMessage( 'language-converter-depth-warning' )
808  ->numParams( $this->mMaxDepth )->inContentLanguage()->text() .
809  '</span>';
810  $warningDone = true;
811  }
812  $startPos += 2;
813  break;
814  }
815  // Recursively parse another rule
816  $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
817  break;
818  case '}-':
819  // Apply the rule
820  $startPos += 2;
821  $rule = new ConverterRule( $inner, $this );
822  $rule->parse( $variant );
823  $this->applyManualConv( $rule );
824  return $rule->getDisplay();
825  default:
826  throw new MWException( __METHOD__ . ': invalid regex match' );
827  }
828  }
829 
830  // Unclosed rule
831  if ( $startPos < $length ) {
832  $inner .= substr( $text, $startPos );
833  }
834  $startPos = $length;
835  return '-{' . $this->autoConvert( $inner, $variant );
836  }
837 
849  public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
850  # If the article has already existed, there is no need to
851  # check it again, otherwise it may cause a fault.
852  if ( is_object( $nt ) && $nt->exists() ) {
853  return;
854  }
855 
857  $isredir = $wgRequest->getText( 'redirect', 'yes' );
858  $action = $wgRequest->getText( 'action' );
859  if ( $action == 'edit' && $wgRequest->getBool( 'redlink' ) ) {
860  $action = 'view';
861  }
862  $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' );
863  $disableLinkConversion = $wgDisableLangConversion
865  $linkBatch = new LinkBatch();
866 
867  $ns = NS_MAIN;
868 
869  if ( $disableLinkConversion ||
870  ( !$ignoreOtherCond &&
871  ( $isredir == 'no'
872  || $action == 'edit'
873  || $action == 'submit'
874  || $linkconvert == 'no' ) ) ) {
875  return;
876  }
877 
878  if ( is_object( $nt ) ) {
879  $ns = $nt->getNamespace();
880  }
881 
882  $variants = $this->autoConvertToAllVariants( $link );
883  if ( !$variants ) { // give up
884  return;
885  }
886 
887  $titles = [];
888 
889  foreach ( $variants as $v ) {
890  if ( $v != $link ) {
891  $varnt = Title::newFromText( $v, $ns );
892  if ( !is_null( $varnt ) ) {
893  $linkBatch->addObj( $varnt );
894  $titles[] = $varnt;
895  }
896  }
897  }
898 
899  // fetch all variants in single query
900  $linkBatch->execute();
901 
902  foreach ( $titles as $varnt ) {
903  if ( $varnt->getArticleID() > 0 ) {
904  $nt = $varnt;
905  $link = $varnt->getText();
906  break;
907  }
908  }
909  }
910 
916  public function getExtraHashOptions() {
917  $variant = $this->getPreferredVariant();
918 
919  return '!' . $variant;
920  }
921 
932  public function guessVariant( $text, $variant ) {
933  return false;
934  }
935 
943  function loadDefaultTables() {
944  $class = static::class;
945  throw new MWException( "Must implement loadDefaultTables() method in class $class" );
946  }
947 
953  function loadTables( $fromCache = true ) {
955 
956  if ( $this->mTablesLoaded ) {
957  return;
958  }
959 
960  $this->mTablesLoaded = true;
961  $this->mTables = false;
962  $cache = ObjectCache::getInstance( $wgLanguageConverterCacheType );
963  $cacheKey = $cache->makeKey( 'conversiontables', $this->mMainLanguageCode );
964  if ( $fromCache ) {
965  $this->mTables = $cache->get( $cacheKey );
966  }
967  if ( !$this->mTables || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
968  // not in cache, or we need a fresh reload.
969  // We will first load the default tables
970  // then update them using things in MediaWiki:Conversiontable/*
971  $this->loadDefaultTables();
972  foreach ( $this->mVariants as $var ) {
973  $cached = $this->parseCachedTable( $var );
974  $this->mTables[$var]->mergeArray( $cached );
975  }
976 
977  $this->postLoadTables();
978  $this->mTables[self::CACHE_VERSION_KEY] = true;
979 
980  $cache->set( $cacheKey, $this->mTables, 43200 );
981  }
982  }
983 
987  function postLoadTables() {
988  }
989 
997  private function reloadTables() {
998  if ( $this->mTables ) {
999  unset( $this->mTables );
1000  }
1001 
1002  $this->mTablesLoaded = false;
1003  $this->loadTables( false );
1004  }
1005 
1025  function parseCachedTable( $code, $subpage = '', $recursive = true ) {
1026  static $parsed = [];
1027 
1028  $key = 'Conversiontable/' . $code;
1029  if ( $subpage ) {
1030  $key .= '/' . $subpage;
1031  }
1032  if ( array_key_exists( $key, $parsed ) ) {
1033  return [];
1034  }
1035 
1036  $parsed[$key] = true;
1037 
1038  if ( $subpage === '' ) {
1039  $txt = MessageCache::singleton()->getMsgFromNamespace( $key, $code );
1040  } else {
1041  $txt = false;
1043  if ( $title && $title->exists() ) {
1044  $revision = Revision::newFromTitle( $title );
1045  if ( $revision ) {
1046  if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) {
1047  $txt = $revision->getContent( RevisionRecord::RAW )->getText();
1048  }
1049 
1050  // @todo in the future, use a specialized content model, perhaps based on json!
1051  }
1052  }
1053  }
1054 
1055  # Nothing to parse if there's no text
1056  if ( $txt === false || $txt === null || $txt === '' ) {
1057  return [];
1058  }
1059 
1060  // get all subpage links of the form
1061  // [[MediaWiki:Conversiontable/zh-xx/...|...]]
1062  $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
1063  ':Conversiontable';
1064  $subs = StringUtils::explode( '[[', $txt );
1065  $sublinks = [];
1066  foreach ( $subs as $sub ) {
1067  $link = explode( ']]', $sub, 2 );
1068  if ( count( $link ) != 2 ) {
1069  continue;
1070  }
1071  $b = explode( '|', $link[0], 2 );
1072  $b = explode( '/', trim( $b[0] ), 3 );
1073  if ( count( $b ) == 3 ) {
1074  $sublink = $b[2];
1075  } else {
1076  $sublink = '';
1077  }
1078 
1079  if ( $b[0] == $linkhead && $b[1] == $code ) {
1080  $sublinks[] = $sublink;
1081  }
1082  }
1083 
1084  // parse the mappings in this page
1085  $blocks = StringUtils::explode( '-{', $txt );
1086  $ret = [];
1087  $first = true;
1088  foreach ( $blocks as $block ) {
1089  if ( $first ) {
1090  // Skip the part before the first -{
1091  $first = false;
1092  continue;
1093  }
1094  $mappings = explode( '}-', $block, 2 )[0];
1095  $stripped = str_replace( [ "'", '"', '*', '#' ], '', $mappings );
1096  $table = StringUtils::explode( ';', $stripped );
1097  foreach ( $table as $t ) {
1098  $m = explode( '=>', $t, 3 );
1099  if ( count( $m ) != 2 ) {
1100  continue;
1101  }
1102  // trim any trailling comments starting with '//'
1103  $tt = explode( '//', $m[1], 2 );
1104  $ret[trim( $m[0] )] = trim( $tt[0] );
1105  }
1106  }
1107 
1108  // recursively parse the subpages
1109  if ( $recursive ) {
1110  foreach ( $sublinks as $link ) {
1111  $s = $this->parseCachedTable( $code, $link, $recursive );
1112  $ret = $s + $ret;
1113  }
1114  }
1115 
1116  if ( $this->mUcfirst ) {
1117  foreach ( $ret as $k => $v ) {
1118  $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
1119  }
1120  }
1121  return $ret;
1122  }
1123 
1132  public function markNoConversion( $text, $noParse = false ) {
1133  # don't mark if already marked
1134  if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) {
1135  return $text;
1136  }
1137 
1138  $ret = "-{R|$text}-";
1139  return $ret;
1140  }
1141 
1150  function convertCategoryKey( $key ) {
1151  return $key;
1152  }
1153 
1160  public function updateConversionTable( Title $titleobj ) {
1161  if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) {
1162  $title = $titleobj->getDBkey();
1163  $t = explode( '/', $title, 3 );
1164  $c = count( $t );
1165  if ( $c > 1 && $t[0] == 'Conversiontable' ) {
1166  if ( $this->validateVariant( $t[1] ) ) {
1167  $this->reloadTables();
1168  }
1169  }
1170  }
1171  }
1172 
1178  if ( is_null( $this->mVarSeparatorPattern ) ) {
1179  // varsep_pattern for preg_split:
1180  // text should be splited by ";" only if a valid variant
1181  // name exist after the markup, for example:
1182  // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\
1183  // <span style="font-size:120%;">yyy</span>;}-
1184  // we should split it as:
1185  // [
1186  // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>'
1187  // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
1188  // [2] => ''
1189  // ]
1190  $expandedVariants = [];
1191  foreach ( $this->mVariants as $variant ) {
1192  $expandedVariants[ $variant ] = 1;
1193  // Accept standard BCP 47 names for variants as well.
1194  $expandedVariants[ LanguageCode::bcp47( $variant ) ] = 1;
1195  }
1196  // Accept old deprecated names for variants
1197  foreach ( LanguageCode::getDeprecatedCodeMapping() as $old => $new ) {
1198  if ( isset( $expandedVariants[ $new ] ) ) {
1199  $expandedVariants[ $old ] = 1;
1200  }
1201  }
1202 
1203  $pat = '/;\s*(?=';
1204  foreach ( $expandedVariants as $variant => $ignore ) {
1205  // zh-hans:xxx;zh-hant:yyy
1206  $pat .= $variant . '\s*:|';
1207  // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
1208  $pat .= '[^;]*?=>\s*' . $variant . '\s*:|';
1209  }
1210  $pat .= '\s*$)/';
1211  $this->mVarSeparatorPattern = $pat;
1212  }
1214  }
1215 }
const MARKER_PREFIX
Definition: Parser.php:138
updateConversionTable(Title $titleobj)
Refresh the cache of conversion tables when MediaWiki:Conversiontable* is updated.
static fetchLanguageNames( $inLanguage=self::AS_AUTONYMS, $include='mw')
Get an array of language names, indexed by code.
Definition: Language.php:840
const CONTENT_MODEL_WIKITEXT
Definition: Defines.php:215
const NS_MAIN
Definition: Defines.php:60
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:1972
validateVariant( $variant=null)
Validate the variant and return an appropriate strict internal variant code if one exists...
autoConvertToAllVariants( $text)
Call translate() to convert text to all valid variants.
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
getVarSeparatorPattern()
Get the cached separator pattern for ConverterRule::parseRules()
guessVariant( $text, $variant)
Guess if a text is written in a variant.
static getInstance( $id)
Get a cached instance of the specified type of cache object.
Definition: ObjectCache.php:92
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency MediaWikiServices
Definition: injection.txt:23
getExtraHashOptions()
Returns language specific hash options.
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that&#39;s attached to a given link target...
Definition: Revision.php:137
postLoadTables()
Hook for post processing after conversion tables are loaded.
static configuration should be added through ResourceLoaderGetConfigVars instead can be used to get the real title e g db for database replication lag or jobqueue for job queue size converted to pseudo seconds It is possible to add more fields and they will be returned to the user in the API response after the basic globals have been set but before ordinary actions take place $output
Definition: hooks.txt:2205
usually copyright or history_copyright This message must be in HTML not wikitext & $link
Definition: hooks.txt:3039
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not it can be in the form of< username >< more info > e g for bot passwords intended to be added to log contexts Fields it might only if the login was with a bot password it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition: hooks.txt:767
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition: LinkBatch.php:34
ReplacementArray [] $mTables
-var array<string,ReplacementArray>
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation use $formDescriptor instead default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt
markNoConversion( $text, $noParse=false)
Enclose a string with the "no conversion" tag.
autoConvert( $text, $toVariant=false)
Dictionary-based conversion.
getDBkey()
Get the main part with underscores.
Definition: Title.php:1021
convertCategoryKey( $key)
Convert the sorting key for category links.
getURLVariant()
Get the variant specified in the URL.
findVariantLink(&$link, &$nt, $ignoreOtherCond=false)
If a language supports multiple variants, it is possible that non-existing link in one variant actual...
parseCachedTable( $code, $subpage='', $recursive=true)
Parse the conversion table stored in the cache.
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
getVariantFallbacks( $variant)
In case some variant is not defined in the markup, we need to have some fallback. ...
reloadTables()
Reload the conversion tables.
$cache
Definition: mcc.php:33
Parser for rules of language conversion, parse rules in -{ }- tag.
getUserVariant()
Determine if the user has a variant set.
$wgLanguageConverterCacheType
The cache type for storing language conversion tables, which are used when parsing certain text and i...
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not null
Definition: hooks.txt:767
$wgDisableTitleConversion
Whether to enable language variant conversion for links.
convert( $text)
Convert text to different variants of a language.
static decodeTagAttributes( $text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1450
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:912
$wgDisabledVariants
Disabled variants array of language variant conversion.
getVariants()
Get all valid variants.
getNamespace()
Get the namespace index, i.e.
Definition: Title.php:1045
applyManualConv( $convRule)
Apply manual conversion rules.
static expandAttributes(array $attribs)
Given an associative array of element attributes, generate a string to stick after the element name i...
Definition: Html.php:480
recursiveConvertTopLevel( $text, $variant, $depth=0)
Recursively convert text on the outside.
static array static array static getDeprecatedCodeMapping()
Returns a mapping of deprecated language codes that were used in previous versions of MediaWiki to up...
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not it can be in the form of< username >< more info > e g for bot passwords intended to be added to log contexts Fields it might only if the login was with a bot password it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable & $code
Definition: hooks.txt:767
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
$wgDisableLangConversion
Whether to enable language variant conversion.
$wgDefaultLanguageVariant
Default variant code, if false, the default will be the language code.
const NS_MEDIAWIKI
Definition: Defines.php:68
convertTo( $text, $variant)
Same as convert() except a extra parameter to custom variant.
translate( $text, $variant)
Translate a string to a variant.
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:620
recursiveConvertRule( $text, $variant, &$startPos, $depth=0)
Recursively convert text on the inside.
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
this hook is for auditing only $req
Definition: hooks.txt:960
convertNamespace( $index, $variant=null)
Get the namespace display name in the preferred variant.
static replaceDeprecatedCodes( $code)
Replace deprecated language codes that were used in previous versions of MediaWiki to up-to-date...
linkcache txt The LinkCache class maintains a list of article titles and the information about whether or not the article exists in the database This is used to mark up links when displaying a page If the same link appears more than once on any page then it only has to be looked up once In most cases link lookups are done in batches with the LinkBatch class or the equivalent in so the link cache is mostly useful for short snippets of parsed and for links in the navigation areas of the skin The link cache was formerly used to track links used in a document for the purposes of updating the link tables This application is now deprecated To create a you can use the following $titles
Definition: linkcache.txt:17
getHeaderVariant()
Determine the language variant from the Accept-Language header.
you have access to all of the normal MediaWiki so you can get a DB use the etc For full docs on the Maintenance class
Definition: maintenance.txt:52
getDefaultVariant()
Get default variant.
__construct(Language $langobj, $maincode, $variants=[], $variantfallbacks=[], $flags=[], $manualLevel=[])
getConvRuleTitle()
Get the title produced by the conversion rule.
MediaWiki Logger LoggerFactory implements a PSR [0] compatible message logging system Named Psr Log LoggerInterface instances can be obtained from the MediaWiki Logger LoggerFactory::getInstance() static method. MediaWiki\Logger\LoggerFactory expects a class implementing the MediaWiki\Logger\Spi interface to act as a factory for new Psr\Log\LoggerInterface instances. The "Spi" in MediaWiki\Logger\Spi stands for "service provider interface". An SPI is an API intended to be implemented or extended by a third party. This software design pattern is intended to enable framework extension and replaceable components. It is specifically used in the MediaWiki\Logger\LoggerFactory service to allow alternate PSR-3 logging implementations to be easily integrated with MediaWiki. The service provider interface allows the backend logging library to be implemented in multiple ways. The $wgMWLoggerDefaultSpi global provides the classname of the default MediaWiki\Logger\Spi implementation to be loaded at runtime. This can either be the name of a class implementing the MediaWiki\Logger\Spi with a zero argument const ructor or a callable that will return an MediaWiki\Logger\Spi instance. Alternately the MediaWiki\Logger\LoggerFactory MediaWiki Logger LoggerFactory
Definition: logger.txt:5
if(! $wgDBerrorLogTZ) $wgRequest
Definition: Setup.php:776
static array $languagesWithVariants
languages supporting variants
getPreferredVariant()
Get preferred language variant.
loadDefaultTables()
Load default conversion tables.
static bcp47( $code)
Get the normalised IETF language tag See unit test for examples.
static singleton()
Get the signleton instance of this class.
convertTitle( $title)
Auto convert a Title object to a readable string in the preferred variant.
switch( $options['output']) $languages
Definition: transstat.php:76
loadTables( $fromCache=true)
Load conversion tables either from the cache or the disk.
static run( $event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:200
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:322