MediaWiki  1.33.0
LanguageConverter.php
Go to the documentation of this file.
1 <?php
22 
24 
34 class LanguageConverter {
40  public static $languagesWithVariants = [
41  'en',
42  'crh',
43  'gan',
44  'iu',
45  'kk',
46  'ku',
47  'shi',
48  'sr',
49  'tg',
50  'uz',
51  'zh',
52  ];
53 
54  public $mMainLanguageCode;
55 
59  public $mVariants;
60  public $mVariantFallbacks;
61  public $mVariantNames;
62  public $mTablesLoaded = false;
63 
68  public $mTables;
69 
70  // 'bidirectional' 'unidirectional' 'disable' for each variant
71  public $mManualLevel;
72 
73  public $mLangObj;
74  public $mFlags;
75  public $mDescCodeSep = ':', $mDescVarSep = ';';
76  public $mUcfirst = false;
77  public $mConvRuleTitle = false;
78  public $mURLVariant;
79  public $mUserVariant;
80  public $mHeaderVariant;
81  public $mMaxDepth = 10;
82  public $mVarSeparatorPattern;
83 
84  const CACHE_VERSION_KEY = 'VERSION 7';
85 
94  public function __construct( Language $langobj, $maincode, $variants = [],
95  $variantfallbacks = [], $flags = [],
96  $manualLevel = [] ) {
97  global $wgDisabledVariants;
98  $this->mLangObj = $langobj;
99  $this->mMainLanguageCode = $maincode;
100  $this->mVariants = array_diff( $variants, $wgDisabledVariants );
101  $this->mVariantFallbacks = $variantfallbacks;
102  $this->mVariantNames = Language::fetchLanguageNames();
103  $defaultflags = [
104  // 'S' show converted text
105  // '+' add rules for alltext
106  // 'E' the gave flags is error
107  // these flags above are reserved for program
108  'A' => 'A', // add rule for convert code (all text convert)
109  'T' => 'T', // title convert
110  'R' => 'R', // raw content
111  'D' => 'D', // convert description (subclass implement)
112  '-' => '-', // remove convert (not implement)
113  'H' => 'H', // add rule for convert code (but no display in placed code)
114  'N' => 'N', // current variant name
115  ];
116  $this->mFlags = array_merge( $defaultflags, $flags );
117  foreach ( $this->mVariants as $v ) {
118  if ( array_key_exists( $v, $manualLevel ) ) {
119  $this->mManualLevel[$v] = $manualLevel[$v];
120  } else {
121  $this->mManualLevel[$v] = 'bidirectional';
122  }
123  $this->mFlags[$v] = $v;
124  }
125  }
126 
133  public function getVariants() {
134  return $this->mVariants;
135  }
136 
148  public function getVariantFallbacks( $variant ) {
149  return $this->mVariantFallbacks[$variant] ?? $this->mMainLanguageCode;
150  }
151 
156  public function getConvRuleTitle() {
157  return $this->mConvRuleTitle;
158  }
159 
164  public function getPreferredVariant() {
165  global $wgDefaultLanguageVariant, $wgUser;
166 
167  $req = $this->getURLVariant();
168 
169  Hooks::run( 'GetLangPreferredVariant', [ &$req ] );
170 
171  if ( $wgUser->isSafeToLoad() && $wgUser->isLoggedIn() && !$req ) {
172  $req = $this->getUserVariant();
173  } elseif ( !$req ) {
174  $req = $this->getHeaderVariant();
175  }
176 
177  if ( $wgDefaultLanguageVariant && !$req ) {
178  $req = $this->validateVariant( $wgDefaultLanguageVariant );
179  }
180 
181  $req = $this->validateVariant( $req );
182 
183  // This function, unlike the other get*Variant functions, is
184  // not memoized (i.e. there return value is not cached) since
185  // new information might appear during processing after this
186  // is first called.
187  if ( $req ) {
188  return $req;
189  }
190  return $this->mMainLanguageCode;
191  }
192 
198  public function getDefaultVariant() {
200 
201  $req = $this->getURLVariant();
202 
203  if ( !$req ) {
204  $req = $this->getHeaderVariant();
205  }
206 
207  if ( $wgDefaultLanguageVariant && !$req ) {
208  $req = $this->validateVariant( $wgDefaultLanguageVariant );
209  }
210 
211  if ( $req ) {
212  return $req;
213  }
214  return $this->mMainLanguageCode;
215  }
216 
226  public function validateVariant( $variant = null ) {
227  if ( $variant === null ) {
228  return null;
229  }
230  // Our internal variants are always lower-case; the variant we
231  // are validating may have mixed case.
232  $variant = LanguageCode::replaceDeprecatedCodes( strtolower( $variant ) );
233  if ( in_array( $variant, $this->mVariants ) ) {
234  return $variant;
235  }
236  // Browsers are supposed to use BCP 47 standard in the
237  // Accept-Language header, but not all of our internal
238  // mediawiki variant codes are BCP 47. Map BCP 47 code
239  // to our internal code.
240  foreach ( $this->mVariants as $v ) {
241  // Case-insensitive match (BCP 47 is mixed case)
242  if ( strtolower( LanguageCode::bcp47( $v ) ) === $variant ) {
243  return $v;
244  }
245  }
246  return null;
247  }
248 
254  public function getURLVariant() {
255  global $wgRequest;
256 
257  if ( $this->mURLVariant ) {
258  return $this->mURLVariant;
259  }
260 
261  // see if the preference is set in the request
262  $ret = $wgRequest->getText( 'variant' );
263 
264  if ( !$ret ) {
265  $ret = $wgRequest->getVal( 'uselang' );
266  }
267 
268  $this->mURLVariant = $this->validateVariant( $ret );
269  return $this->mURLVariant;
270  }
271 
277  protected function getUserVariant() {
278  global $wgUser;
279 
280  // memoizing this function wreaks havoc on parserTest.php
281  /*
282  if ( $this->mUserVariant ) {
283  return $this->mUserVariant;
284  }
285  */
286 
287  // Get language variant preference from logged in users
288  // Don't call this on stub objects because that causes infinite
289  // recursion during initialisation
290  if ( !$wgUser->isSafeToLoad() ) {
291  return false;
292  }
293  if ( $wgUser->isLoggedIn() ) {
294  if (
295  $this->mMainLanguageCode ==
296  MediaWikiServices::getInstance()->getContentLanguage()->getCode()
297  ) {
298  $ret = $wgUser->getOption( 'variant' );
299  } else {
300  $ret = $wgUser->getOption( 'variant-' . $this->mMainLanguageCode );
301  }
302  } else {
303  // figure out user lang without constructing wgLang to avoid
304  // infinite recursion
305  $ret = $wgUser->getOption( 'language' );
306  }
307 
308  $this->mUserVariant = $this->validateVariant( $ret );
309  return $this->mUserVariant;
310  }
311 
317  protected function getHeaderVariant() {
318  global $wgRequest;
319 
320  if ( $this->mHeaderVariant ) {
321  return $this->mHeaderVariant;
322  }
323 
324  // See if some supported language variant is set in the
325  // HTTP header.
326  $languages = array_keys( $wgRequest->getAcceptLang() );
327  if ( empty( $languages ) ) {
328  return null;
329  }
330 
331  $fallbackLanguages = [];
332  foreach ( $languages as $language ) {
333  $this->mHeaderVariant = $this->validateVariant( $language );
334  if ( $this->mHeaderVariant ) {
335  break;
336  }
337 
338  // To see if there are fallbacks of current language.
339  // We record these fallback variants, and process
340  // them later.
341  $fallbacks = $this->getVariantFallbacks( $language );
342  if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) {
343  $fallbackLanguages[] = $fallbacks;
344  } elseif ( is_array( $fallbacks ) ) {
345  $fallbackLanguages =
346  array_merge( $fallbackLanguages, $fallbacks );
347  }
348  }
349 
350  if ( !$this->mHeaderVariant ) {
351  // process fallback languages now
352  $fallback_languages = array_unique( $fallbackLanguages );
353  foreach ( $fallback_languages as $language ) {
354  $this->mHeaderVariant = $this->validateVariant( $language );
355  if ( $this->mHeaderVariant ) {
356  break;
357  }
358  }
359  }
360 
361  return $this->mHeaderVariant;
362  }
363 
374  public function autoConvert( $text, $toVariant = false ) {
375  $this->loadTables();
376 
377  if ( !$toVariant ) {
378  $toVariant = $this->getPreferredVariant();
379  if ( !$toVariant ) {
380  return $text;
381  }
382  }
383 
384  if ( $this->guessVariant( $text, $toVariant ) ) {
385  return $text;
386  }
387  /* we convert everything except:
388  1. HTML markups (anything between < and >)
389  2. HTML entities
390  3. placeholders created by the parser
391  IMPORTANT: Beware of failure from pcre.backtrack_limit (T124404).
392  Minimize use of backtracking where possible.
393  */
394  $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f';
395 
396  // this one is needed when the text is inside an HTML markup
397  $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>';
398 
399  // Optimize for the common case where these tags have
400  // few or no children. Thus try and possesively get as much as
401  // possible, and only engage in backtracking when we hit a '<'.
402 
403  // disable convert to variants between <code> tags
404  $codefix = '<code>[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|';
405  // disable conversion of <script> tags
406  $scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|';
407  // disable conversion of <pre> tags
408  $prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|';
409  // The "|.*+)" at the end, is in case we missed some part of html syntax,
410  // we will fail securely (hopefully) by matching the rest of the string.
411  $htmlFullTag = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|';
412 
413  $reg = '/' . $codefix . $scriptfix . $prefix . $htmlFullTag .
414  '&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s';
415  $startPos = 0;
416  $sourceBlob = '';
417  $literalBlob = '';
418 
419  // Guard against delimiter nulls in the input
420  // (should never happen: see T159174)
421  $text = str_replace( "\000", '', $text );
422  $text = str_replace( "\004", '', $text );
423 
424  $markupMatches = null;
425  $elementMatches = null;
426 
427  // We add a marker (\004) at the end of text, to ensure we always match the
428  // entire text (Otherwise, pcre.backtrack_limit might cause silent failure)
429  while ( $startPos < strlen( $text ) ) {
430  if ( preg_match( $reg, $text . "\004", $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
431  $elementPos = $markupMatches[0][1];
432  $element = $markupMatches[0][0];
433  if ( $element === "\004" ) {
434  // We hit the end.
435  $elementPos = strlen( $text );
436  $element = '';
437  } elseif ( substr( $element, -1 ) === "\004" ) {
438  // This can sometimes happen if we have
439  // unclosed html tags (For example
440  // when converting a title attribute
441  // during a recursive call that contains
442  // a &lt; e.g. <div title="&lt;">.
443  $element = substr( $element, 0, -1 );
444  }
445  } else {
446  // If we hit here, then Language Converter could be tricked
447  // into doing an XSS, so we refuse to translate.
448  // If non-crazy input manages to reach this code path,
449  // we should consider it a bug.
450  $log = LoggerFactory::getInstance( 'languageconverter' );
451  $log->error( "Hit pcre.backtrack_limit in " . __METHOD__
452  . ". Disabling language conversion for this page.",
453  [
454  "method" => __METHOD__,
455  "variant" => $toVariant,
456  "startOfText" => substr( $text, 0, 500 )
457  ]
458  );
459  return $text;
460  }
461  // Queue the part before the markup for translation in a batch
462  $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000";
463 
464  // Advance to the next position
465  $startPos = $elementPos + strlen( $element );
466 
467  // Translate any alt or title attributes inside the matched element
468  if ( $element !== ''
469  && preg_match( '/^(<[^>\s]*+)\s([^>]*+)(.*+)$/', $element, $elementMatches )
470  ) {
471  // FIXME, this decodes entities, so if you have something
472  // like <div title="foo&lt;bar"> the bar won't get
473  // translated since after entity decoding it looks like
474  // unclosed html and we call this method recursively
475  // on attributes.
476  $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
477  // Ensure self-closing tags stay self-closing.
478  $close = substr( $elementMatches[2], -1 ) === '/' ? ' /' : '';
479  $changed = false;
480  foreach ( [ 'title', 'alt' ] as $attrName ) {
481  if ( !isset( $attrs[$attrName] ) ) {
482  continue;
483  }
484  $attr = $attrs[$attrName];
485  // Don't convert URLs
486  if ( !strpos( $attr, '://' ) ) {
487  $attr = $this->recursiveConvertTopLevel( $attr, $toVariant );
488  }
489 
490  if ( $attr !== $attrs[$attrName] ) {
491  $attrs[$attrName] = $attr;
492  $changed = true;
493  }
494  }
495  if ( $changed ) {
496  $element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
497  $close . $elementMatches[3];
498  }
499  }
500  $literalBlob .= $element . "\000";
501  }
502 
503  // Do the main translation batch
504  $translatedBlob = $this->translate( $sourceBlob, $toVariant );
505 
506  // Put the output back together
507  $translatedIter = StringUtils::explode( "\000", $translatedBlob );
508  $literalIter = StringUtils::explode( "\000", $literalBlob );
509  $output = '';
510  while ( $translatedIter->valid() && $literalIter->valid() ) {
511  $output .= $translatedIter->current();
512  $output .= $literalIter->current();
513  $translatedIter->next();
514  $literalIter->next();
515  }
516 
517  return $output;
518  }
519 
529  public function translate( $text, $variant ) {
530  // If $text is empty or only includes spaces, do nothing
531  // Otherwise translate it
532  if ( trim( $text ) ) {
533  $this->loadTables();
534  $text = $this->mTables[$variant]->replace( $text );
535  }
536  return $text;
537  }
538 
545  public function autoConvertToAllVariants( $text ) {
546  $this->loadTables();
547 
548  $ret = [];
549  foreach ( $this->mVariants as $variant ) {
550  $ret[$variant] = $this->translate( $text, $variant );
551  }
552 
553  return $ret;
554  }
555 
561  protected function applyManualConv( $convRule ) {
562  // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
563  // title conversion.
564  // T26072: $mConvRuleTitle was overwritten by other manual
565  // rule(s) not for title, this breaks the title conversion.
566  $newConvRuleTitle = $convRule->getTitle();
567  if ( $newConvRuleTitle ) {
568  // So I add an empty check for getTitle()
569  $this->mConvRuleTitle = $newConvRuleTitle;
570  }
571 
572  // merge/remove manual conversion rules to/from global table
573  $convTable = $convRule->getConvTable();
574  $action = $convRule->getRulesAction();
575  foreach ( $convTable as $variant => $pair ) {
576  $v = $this->validateVariant( $variant );
577  if ( !$v ) {
578  continue;
579  }
580 
581  if ( $action == 'add' ) {
582  // More efficient than array_merge(), about 2.5 times.
583  foreach ( $pair as $from => $to ) {
584  $this->mTables[$v]->setPair( $from, $to );
585  }
586  } elseif ( $action == 'remove' ) {
587  $this->mTables[$v]->removeArray( $pair );
588  }
589  }
590  }
591 
599  public function convertTitle( $title ) {
600  $variant = $this->getPreferredVariant();
601  $index = $title->getNamespace();
602  if ( $index !== NS_MAIN ) {
603  $text = $this->convertNamespace( $index, $variant ) . ':';
604  } else {
605  $text = '';
606  }
607  $text .= $this->translate( $title->getText(), $variant );
608  return $text;
609  }
610 
618  public function convertNamespace( $index, $variant = null ) {
619  if ( $index === NS_MAIN ) {
620  return '';
621  }
622 
623  if ( $variant === null ) {
624  $variant = $this->getPreferredVariant();
625  }
626 
627  $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache();
628  $key = $cache->makeKey( 'languageconverter', 'namespace-text', $index, $variant );
629  $nsVariantText = $cache->get( $key );
630  if ( $nsVariantText !== false ) {
631  return $nsVariantText;
632  }
633 
634  // First check if a message gives a converted name in the target variant.
635  $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant );
636  if ( $nsConvMsg->exists() ) {
637  $nsVariantText = $nsConvMsg->plain();
638  }
639 
640  // Then check if a message gives a converted name in content language
641  // which needs extra translation to the target variant.
642  if ( $nsVariantText === false ) {
643  $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage();
644  if ( $nsConvMsg->exists() ) {
645  $nsVariantText = $this->translate( $nsConvMsg->plain(), $variant );
646  }
647  }
648 
649  if ( $nsVariantText === false ) {
650  // No message exists, retrieve it from the target variant's namespace names.
651  $langObj = $this->mLangObj->factory( $variant );
652  $nsVariantText = $langObj->getFormattedNsText( $index );
653  }
654 
655  $cache->set( $key, $nsVariantText, 60 );
656 
657  return $nsVariantText;
658  }
659 
678  public function convert( $text ) {
679  $variant = $this->getPreferredVariant();
680  return $this->convertTo( $text, $variant );
681  }
682 
692  public function convertTo( $text, $variant ) {
694  if ( $wgDisableLangConversion ) {
695  return $text;
696  }
697  // Reset converter state for a new converter run.
698  $this->mConvRuleTitle = false;
699  return $this->recursiveConvertTopLevel( $text, $variant );
700  }
701 
711  protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
712  $startPos = 0;
713  $out = '';
714  $length = strlen( $text );
715  $shouldConvert = !$this->guessVariant( $text, $variant );
716  $continue = 1;
717 
718  $noScript = '<script.*?>.*?<\/script>(*SKIP)(*FAIL)';
719  $noStyle = '<style.*?>.*?<\/style>(*SKIP)(*FAIL)';
720  // phpcs:ignore Generic.Files.LineLength
721  $noHtml = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)(*SKIP)(*FAIL)';
722  while ( $startPos < $length && $continue ) {
723  $continue = preg_match(
724  // Only match -{ outside of html.
725  "/$noScript|$noStyle|$noHtml|-\{/",
726  $text,
727  $m,
728  PREG_OFFSET_CAPTURE,
729  $startPos
730  );
731 
732  if ( !$continue ) {
733  // No more markup, append final segment
734  $fragment = substr( $text, $startPos );
735  $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
736  return $out;
737  }
738 
739  // Offset of the match of the regex pattern.
740  $pos = $m[0][1];
741 
742  // Append initial segment
743  $fragment = substr( $text, $startPos, $pos - $startPos );
744  $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
745  // -{ marker found, not in attribute
746  // Advance position up to -{ marker.
747  $startPos = $pos;
748  // Do recursive conversion
749  // Note: This passes $startPos by reference, and advances it.
750  $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
751  }
752  return $out;
753  }
754 
766  protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
767  // Quick sanity check (no function calls)
768  if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) {
769  throw new MWException( __METHOD__ . ': invalid input string' );
770  }
771 
772  $startPos += 2;
773  $inner = '';
774  $warningDone = false;
775  $length = strlen( $text );
776 
777  while ( $startPos < $length ) {
778  $m = false;
779  preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos );
780  if ( !$m ) {
781  // Unclosed rule
782  break;
783  }
784 
785  $token = $m[0][0];
786  $pos = $m[0][1];
787 
788  // Markup found
789  // Append initial segment
790  $inner .= substr( $text, $startPos, $pos - $startPos );
791 
792  // Advance position
793  $startPos = $pos;
794 
795  switch ( $token ) {
796  case '-{':
797  // Check max depth
798  if ( $depth >= $this->mMaxDepth ) {
799  $inner .= '-{';
800  if ( !$warningDone ) {
801  $inner .= '<span class="error">' .
802  wfMessage( 'language-converter-depth-warning' )
803  ->numParams( $this->mMaxDepth )->inContentLanguage()->text() .
804  '</span>';
805  $warningDone = true;
806  }
807  $startPos += 2;
808  break;
809  }
810  // Recursively parse another rule
811  $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
812  break;
813  case '}-':
814  // Apply the rule
815  $startPos += 2;
816  $rule = new ConverterRule( $inner, $this );
817  $rule->parse( $variant );
818  $this->applyManualConv( $rule );
819  return $rule->getDisplay();
820  default:
821  throw new MWException( __METHOD__ . ': invalid regex match' );
822  }
823  }
824 
825  // Unclosed rule
826  if ( $startPos < $length ) {
827  $inner .= substr( $text, $startPos );
828  }
829  $startPos = $length;
830  return '-{' . $this->autoConvert( $inner, $variant );
831  }
832 
844  public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
845  # If the article has already existed, there is no need to
846  # check it again, otherwise it may cause a fault.
847  if ( is_object( $nt ) && $nt->exists() ) {
848  return;
849  }
850 
852  $isredir = $wgRequest->getText( 'redirect', 'yes' );
853  $action = $wgRequest->getText( 'action' );
854  if ( $action == 'edit' && $wgRequest->getBool( 'redlink' ) ) {
855  $action = 'view';
856  }
857  $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' );
858  $disableLinkConversion = $wgDisableLangConversion
860  $linkBatch = new LinkBatch();
861 
862  $ns = NS_MAIN;
863 
864  if ( $disableLinkConversion ||
865  ( !$ignoreOtherCond &&
866  ( $isredir == 'no'
867  || $action == 'edit'
868  || $action == 'submit'
869  || $linkconvert == 'no' ) ) ) {
870  return;
871  }
872 
873  if ( is_object( $nt ) ) {
874  $ns = $nt->getNamespace();
875  }
876 
877  $variants = $this->autoConvertToAllVariants( $link );
878  if ( !$variants ) { // give up
879  return;
880  }
881 
882  $titles = [];
883 
884  foreach ( $variants as $v ) {
885  if ( $v != $link ) {
886  $varnt = Title::newFromText( $v, $ns );
887  if ( !is_null( $varnt ) ) {
888  $linkBatch->addObj( $varnt );
889  $titles[] = $varnt;
890  }
891  }
892  }
893 
894  // fetch all variants in single query
895  $linkBatch->execute();
896 
897  foreach ( $titles as $varnt ) {
898  if ( $varnt->getArticleID() > 0 ) {
899  $nt = $varnt;
900  $link = $varnt->getText();
901  break;
902  }
903  }
904  }
905 
911  public function getExtraHashOptions() {
912  $variant = $this->getPreferredVariant();
913 
914  return '!' . $variant;
915  }
916 
927  public function guessVariant( $text, $variant ) {
928  return false;
929  }
930 
938  function loadDefaultTables() {
939  $class = static::class;
940  throw new MWException( "Must implement loadDefaultTables() method in class $class" );
941  }
942 
948  function loadTables( $fromCache = true ) {
950 
951  if ( $this->mTablesLoaded ) {
952  return;
953  }
954 
955  $this->mTablesLoaded = true;
956  $this->mTables = false;
958  $cacheKey = $cache->makeKey( 'conversiontables', $this->mMainLanguageCode );
959  if ( $fromCache ) {
960  $this->mTables = $cache->get( $cacheKey );
961  }
962  if ( !$this->mTables || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
963  // not in cache, or we need a fresh reload.
964  // We will first load the default tables
965  // then update them using things in MediaWiki:Conversiontable/*
966  $this->loadDefaultTables();
967  foreach ( $this->mVariants as $var ) {
968  $cached = $this->parseCachedTable( $var );
969  $this->mTables[$var]->mergeArray( $cached );
970  }
971 
972  $this->postLoadTables();
973  $this->mTables[self::CACHE_VERSION_KEY] = true;
974 
975  $cache->set( $cacheKey, $this->mTables, 43200 );
976  }
977  }
978 
982  function postLoadTables() {
983  }
984 
992  private function reloadTables() {
993  if ( $this->mTables ) {
994  unset( $this->mTables );
995  }
996 
997  $this->mTablesLoaded = false;
998  $this->loadTables( false );
999  }
1000 
1020  function parseCachedTable( $code, $subpage = '', $recursive = true ) {
1021  static $parsed = [];
1022 
1023  $key = 'Conversiontable/' . $code;
1024  if ( $subpage ) {
1025  $key .= '/' . $subpage;
1026  }
1027  if ( array_key_exists( $key, $parsed ) ) {
1028  return [];
1029  }
1030 
1031  $parsed[$key] = true;
1032 
1033  if ( $subpage === '' ) {
1034  $txt = MessageCache::singleton()->getMsgFromNamespace( $key, $code );
1035  } else {
1036  $txt = false;
1038  if ( $title && $title->exists() ) {
1039  $revision = Revision::newFromTitle( $title );
1040  if ( $revision ) {
1041  if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) {
1042  $txt = $revision->getContent( Revision::RAW )->getText();
1043  }
1044 
1045  // @todo in the future, use a specialized content model, perhaps based on json!
1046  }
1047  }
1048  }
1049 
1050  # Nothing to parse if there's no text
1051  if ( $txt === false || $txt === null || $txt === '' ) {
1052  return [];
1053  }
1054 
1055  // get all subpage links of the form
1056  // [[MediaWiki:Conversiontable/zh-xx/...|...]]
1057  $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
1058  ':Conversiontable';
1059  $subs = StringUtils::explode( '[[', $txt );
1060  $sublinks = [];
1061  foreach ( $subs as $sub ) {
1062  $link = explode( ']]', $sub, 2 );
1063  if ( count( $link ) != 2 ) {
1064  continue;
1065  }
1066  $b = explode( '|', $link[0], 2 );
1067  $b = explode( '/', trim( $b[0] ), 3 );
1068  if ( count( $b ) == 3 ) {
1069  $sublink = $b[2];
1070  } else {
1071  $sublink = '';
1072  }
1073 
1074  if ( $b[0] == $linkhead && $b[1] == $code ) {
1075  $sublinks[] = $sublink;
1076  }
1077  }
1078 
1079  // parse the mappings in this page
1080  $blocks = StringUtils::explode( '-{', $txt );
1081  $ret = [];
1082  $first = true;
1083  foreach ( $blocks as $block ) {
1084  if ( $first ) {
1085  // Skip the part before the first -{
1086  $first = false;
1087  continue;
1088  }
1089  $mappings = explode( '}-', $block, 2 )[0];
1090  $stripped = str_replace( [ "'", '"', '*', '#' ], '', $mappings );
1091  $table = StringUtils::explode( ';', $stripped );
1092  foreach ( $table as $t ) {
1093  $m = explode( '=>', $t, 3 );
1094  if ( count( $m ) != 2 ) {
1095  continue;
1096  }
1097  // trim any trailling comments starting with '//'
1098  $tt = explode( '//', $m[1], 2 );
1099  $ret[trim( $m[0] )] = trim( $tt[0] );
1100  }
1101  }
1102 
1103  // recursively parse the subpages
1104  if ( $recursive ) {
1105  foreach ( $sublinks as $link ) {
1106  $s = $this->parseCachedTable( $code, $link, $recursive );
1107  $ret = $s + $ret;
1108  }
1109  }
1110 
1111  if ( $this->mUcfirst ) {
1112  foreach ( $ret as $k => $v ) {
1113  $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
1114  }
1115  }
1116  return $ret;
1117  }
1118 
1127  public function markNoConversion( $text, $noParse = false ) {
1128  # don't mark if already marked
1129  if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) {
1130  return $text;
1131  }
1132 
1133  $ret = "-{R|$text}-";
1134  return $ret;
1135  }
1136 
1145  function convertCategoryKey( $key ) {
1146  return $key;
1147  }
1148 
1155  public function updateConversionTable( Title $titleobj ) {
1156  if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) {
1157  $title = $titleobj->getDBkey();
1158  $t = explode( '/', $title, 3 );
1159  $c = count( $t );
1160  if ( $c > 1 && $t[0] == 'Conversiontable' ) {
1161  if ( $this->validateVariant( $t[1] ) ) {
1162  $this->reloadTables();
1163  }
1164  }
1165  }
1166  }
1167 
1172  function getVarSeparatorPattern() {
1173  if ( is_null( $this->mVarSeparatorPattern ) ) {
1174  // varsep_pattern for preg_split:
1175  // text should be splited by ";" only if a valid variant
1176  // name exist after the markup, for example:
1177  // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\
1178  // <span style="font-size:120%;">yyy</span>;}-
1179  // we should split it as:
1180  // [
1181  // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>'
1182  // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
1183  // [2] => ''
1184  // ]
1185  $expandedVariants = [];
1186  foreach ( $this->mVariants as $variant ) {
1187  $expandedVariants[ $variant ] = 1;
1188  // Accept standard BCP 47 names for variants as well.
1189  $expandedVariants[ LanguageCode::bcp47( $variant ) ] = 1;
1190  }
1191  // Accept old deprecated names for variants
1192  foreach ( LanguageCode::getDeprecatedCodeMapping() as $old => $new ) {
1193  if ( isset( $expandedVariants[ $new ] ) ) {
1194  $expandedVariants[ $old ] = 1;
1195  }
1196  }
1197 
1198  $pat = '/;\s*(?=';
1199  foreach ( $expandedVariants as $variant => $ignore ) {
1200  // zh-hans:xxx;zh-hant:yyy
1201  $pat .= $variant . '\s*:|';
1202  // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
1203  $pat .= '[^;]*?=>\s*' . $variant . '\s*:|';
1204  }
1205  $pat .= '\s*$)/';
1206  $this->mVarSeparatorPattern = $pat;
1207  }
1208  return $this->mVarSeparatorPattern;
1209  }
1210 }
LanguageCode\replaceDeprecatedCodes
static replaceDeprecatedCodes( $code)
Replace deprecated language codes that were used in previous versions of MediaWiki to up-to-date,...
Definition: LanguageCode.php:165
ConverterRule
Parser for rules of language conversion, parse rules in -{ }- tag.
Definition: ConverterRule.php:27
LanguageCode\getDeprecatedCodeMapping
static getDeprecatedCodeMapping()
Returns a mapping of deprecated language codes that were used in previous versions of MediaWiki to up...
Definition: LanguageCode.php:128
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:306
$wgDisabledVariants
$wgDisabledVariants
Disabled variants array of language variant conversion.
Definition: DefaultSettings.php:3112
LinkBatch
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition: LinkBatch.php:34
captcha-old.count
count
Definition: captcha-old.py:249
$languages
switch( $options['output']) $languages
Definition: transstat.php:76
$out
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not it can be in the form of< username >< more info > e g for bot passwords intended to be added to log contexts Fields it might only if the login was with a bot password it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition: hooks.txt:780
$req
this hook is for auditing only $req
Definition: hooks.txt:979
$wgDisableTitleConversion
$wgDisableTitleConversion
Whether to enable language variant conversion for links.
Definition: DefaultSettings.php:3090
$s
$s
Definition: mergeMessageFileList.php:186
CONTENT_MODEL_WIKITEXT
const CONTENT_MODEL_WIKITEXT
Definition: Defines.php:235
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
NS_MAIN
const NS_MAIN
Definition: Defines.php:64
Revision\newFromTitle
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that's attached to a given link target.
Definition: Revision.php:137
Title\getDBkey
getDBkey()
Get the main part with underscores.
Definition: Title.php:970
MWException
MediaWiki exception.
Definition: MWException.php:26
$title
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:925
$titles
linkcache txt The LinkCache class maintains a list of article titles and the information about whether or not the article exists in the database This is used to mark up links when displaying a page If the same link appears more than once on any page then it only has to be looked up once In most cases link lookups are done in batches with the LinkBatch class or the equivalent in so the link cache is mostly useful for short snippets of parsed and for links in the navigation areas of the skin The link cache was formerly used to track links used in a document for the purposes of updating the link tables This application is now deprecated To create a you can use the following $titles
Definition: linkcache.txt:17
Title\getNamespace
getNamespace()
Get the namespace index, i.e.
Definition: Title.php:994
$wgLanguageConverterCacheType
$wgLanguageConverterCacheType
The cache type for storing language conversion tables, which are used when parsing certain text and i...
Definition: DefaultSettings.php:2369
$wgDefaultLanguageVariant
$wgDefaultLanguageVariant
Default variant code, if false, the default will be the language code.
Definition: DefaultSettings.php:3095
StringUtils\explode
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
Definition: StringUtils.php:336
ObjectCache\getInstance
static getInstance( $id)
Get a cached instance of the specified type of cache object.
Definition: ObjectCache.php:92
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
$code
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not it can be in the form of< username >< more info > e g for bot passwords intended to be added to log contexts Fields it might only if the login was with a bot password it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable & $code
Definition: hooks.txt:780
$output
$output
Definition: SyntaxHighlight.php:334
array
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
$wgDisableLangConversion
$wgDisableLangConversion
Whether to enable language variant conversion.
Definition: DefaultSettings.php:3085
MessageCache\singleton
static singleton()
Get the signleton instance of this class.
Definition: MessageCache.php:114
Title\makeTitleSafe
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:604
ReplacementArray
Wrapper around strtr() that holds replacements.
Definition: ReplacementArray.php:24
$ret
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:1985
Revision\RAW
const RAW
Definition: Revision.php:56
Title
Represents a title within MediaWiki.
Definition: Title.php:40
$cache
$cache
Definition: mcc.php:33
Makefile.translate
def translate(text, conv_table)
Definition: Makefile.py:235
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
LanguageCode\bcp47
static bcp47( $code)
Get the normalised IETF language tag See unit test for examples.
Definition: LanguageCode.php:179
$link
usually copyright or history_copyright This message must be in HTML not wikitext & $link
Definition: hooks.txt:3053
LoggerFactory
MediaWiki Logger LoggerFactory implements a PSR[0] compatible message logging system Named Psr Log LoggerInterface instances can be obtained from the MediaWiki Logger LoggerFactory::getInstance() static method. MediaWiki\Logger\LoggerFactory expects a class implementing the MediaWiki\Logger\Spi interface to act as a factory for new Psr\Log\LoggerInterface instances. The "Spi" in MediaWiki\Logger\Spi stands for "service provider interface". An SPI is an API intended to be implemented or extended by a third party. This software design pattern is intended to enable framework extension and replaceable components. It is specifically used in the MediaWiki\Logger\LoggerFactory service to allow alternate PSR-3 logging implementations to be easily integrated with MediaWiki. The service provider interface allows the backend logging library to be implemented in multiple ways. The $wgMWLoggerDefaultSpi global provides the classname of the default MediaWiki\Logger\Spi implementation to be loaded at runtime. This can either be the name of a class implementing the MediaWiki\Logger\Spi with a zero argument const ructor or a callable that will return an MediaWiki\Logger\Spi instance. Alternately the MediaWiki\Logger\LoggerFactory MediaWiki Logger LoggerFactory
Definition: logger.txt:5
NS_MEDIAWIKI
const NS_MEDIAWIKI
Definition: Defines.php:72
class
you have access to all of the normal MediaWiki so you can get a DB use the etc For full docs on the Maintenance class
Definition: maintenance.txt:52
$t
$t
Definition: testCompression.php:69
$wgRequest
if(! $wgDBerrorLogTZ) $wgRequest
Definition: Setup.php:728
MediaWikiServices
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency MediaWikiServices
Definition: injection.txt:23
Language\fetchLanguageNames
static fetchLanguageNames( $inLanguage=self::AS_AUTONYMS, $include='mw')
Get an array of language names, indexed by code.
Definition: Language.php:836
wfMessage
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation use $formDescriptor instead default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt
Hooks\run
static run( $event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:200
Language
Internationalisation code.
Definition: Language.php:36