MediaWiki  REL1_31
LanguageConverter.php
Go to the documentation of this file.
1 <?php
22 
24 
40  static public $languagesWithVariants = [
41  'en',
42  'crh',
43  'gan',
44  'iu',
45  'kk',
46  'ku',
47  'shi',
48  'sr',
49  'tg',
50  'uz',
51  'zh',
52  ];
53 
55 
59  public $mVariants;
62  public $mTablesLoaded = false;
63  public $mTables;
64  // 'bidirectional' 'unidirectional' 'disable' for each variant
65  public $mManualLevel;
66 
67  public $mLangObj;
68  public $mFlags;
69  public $mDescCodeSep = ':', $mDescVarSep = ';';
70  public $mUcfirst = false;
71  public $mConvRuleTitle = false;
72  public $mURLVariant;
73  public $mUserVariant;
75  public $mMaxDepth = 10;
77 
78  const CACHE_VERSION_KEY = 'VERSION 7';
79 
88  public function __construct( $langobj, $maincode, $variants = [],
89  $variantfallbacks = [], $flags = [],
90  $manualLevel = [] ) {
92  $this->mLangObj = $langobj;
93  $this->mMainLanguageCode = $maincode;
94  $this->mVariants = array_diff( $variants, $wgDisabledVariants );
95  $this->mVariantFallbacks = $variantfallbacks;
96  $this->mVariantNames = Language::fetchLanguageNames();
97  $defaultflags = [
98  // 'S' show converted text
99  // '+' add rules for alltext
100  // 'E' the gave flags is error
101  // these flags above are reserved for program
102  'A' => 'A', // add rule for convert code (all text convert)
103  'T' => 'T', // title convert
104  'R' => 'R', // raw content
105  'D' => 'D', // convert description (subclass implement)
106  '-' => '-', // remove convert (not implement)
107  'H' => 'H', // add rule for convert code (but no display in placed code)
108  'N' => 'N', // current variant name
109  ];
110  $this->mFlags = array_merge( $defaultflags, $flags );
111  foreach ( $this->mVariants as $v ) {
112  if ( array_key_exists( $v, $manualLevel ) ) {
113  $this->mManualLevel[$v] = $manualLevel[$v];
114  } else {
115  $this->mManualLevel[$v] = 'bidirectional';
116  }
117  $this->mFlags[$v] = $v;
118  }
119  }
120 
127  public function getVariants() {
128  return $this->mVariants;
129  }
130 
142  public function getVariantFallbacks( $variant ) {
143  if ( isset( $this->mVariantFallbacks[$variant] ) ) {
144  return $this->mVariantFallbacks[$variant];
145  }
147  }
148 
153  public function getConvRuleTitle() {
154  return $this->mConvRuleTitle;
155  }
156 
161  public function getPreferredVariant() {
163 
164  $req = $this->getURLVariant();
165 
166  Hooks::run( 'GetLangPreferredVariant', [ &$req ] );
167 
168  if ( $wgUser->isSafeToLoad() && $wgUser->isLoggedIn() && !$req ) {
169  $req = $this->getUserVariant();
170  } elseif ( !$req ) {
171  $req = $this->getHeaderVariant();
172  }
173 
174  if ( $wgDefaultLanguageVariant && !$req ) {
175  $req = $this->validateVariant( $wgDefaultLanguageVariant );
176  }
177 
178  // This function, unlike the other get*Variant functions, is
179  // not memoized (i.e. there return value is not cached) since
180  // new information might appear during processing after this
181  // is first called.
182  if ( $this->validateVariant( $req ) ) {
183  return $req;
184  }
186  }
187 
193  public function getDefaultVariant() {
195 
196  $req = $this->getURLVariant();
197 
198  if ( !$req ) {
199  $req = $this->getHeaderVariant();
200  }
201 
202  if ( $wgDefaultLanguageVariant && !$req ) {
203  $req = $this->validateVariant( $wgDefaultLanguageVariant );
204  }
205 
206  if ( $req ) {
207  return $req;
208  }
210  }
211 
217  public function validateVariant( $variant = null ) {
218  if ( $variant !== null && in_array( $variant, $this->mVariants ) ) {
219  return $variant;
220  }
221  return null;
222  }
223 
229  public function getURLVariant() {
231 
232  if ( $this->mURLVariant ) {
233  return $this->mURLVariant;
234  }
235 
236  // see if the preference is set in the request
237  $ret = $wgRequest->getText( 'variant' );
238 
239  if ( !$ret ) {
240  $ret = $wgRequest->getVal( 'uselang' );
241  }
242 
243  $this->mURLVariant = $this->validateVariant( $ret );
244  return $this->mURLVariant;
245  }
246 
252  protected function getUserVariant() {
254 
255  // memoizing this function wreaks havoc on parserTest.php
256  /*
257  if ( $this->mUserVariant ) {
258  return $this->mUserVariant;
259  }
260  */
261 
262  // Get language variant preference from logged in users
263  // Don't call this on stub objects because that causes infinite
264  // recursion during initialisation
265  if ( !$wgUser->isSafeToLoad() ) {
266  return false;
267  }
268  if ( $wgUser->isLoggedIn() ) {
269  if ( $this->mMainLanguageCode == $wgContLang->getCode() ) {
270  $ret = $wgUser->getOption( 'variant' );
271  } else {
272  $ret = $wgUser->getOption( 'variant-' . $this->mMainLanguageCode );
273  }
274  } else {
275  // figure out user lang without constructing wgLang to avoid
276  // infinite recursion
277  $ret = $wgUser->getOption( 'language' );
278  }
279 
280  $this->mUserVariant = $this->validateVariant( $ret );
281  return $this->mUserVariant;
282  }
283 
289  protected function getHeaderVariant() {
291 
292  if ( $this->mHeaderVariant ) {
293  return $this->mHeaderVariant;
294  }
295 
296  // see if some supported language variant is set in the
297  // HTTP header.
298  $languages = array_keys( $wgRequest->getAcceptLang() );
299  if ( empty( $languages ) ) {
300  return null;
301  }
302 
303  $fallbackLanguages = [];
304  foreach ( $languages as $language ) {
305  $this->mHeaderVariant = $this->validateVariant( $language );
306  if ( $this->mHeaderVariant ) {
307  break;
308  }
309 
310  // To see if there are fallbacks of current language.
311  // We record these fallback variants, and process
312  // them later.
313  $fallbacks = $this->getVariantFallbacks( $language );
314  if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) {
315  $fallbackLanguages[] = $fallbacks;
316  } elseif ( is_array( $fallbacks ) ) {
317  $fallbackLanguages =
318  array_merge( $fallbackLanguages, $fallbacks );
319  }
320  }
321 
322  if ( !$this->mHeaderVariant ) {
323  // process fallback languages now
324  $fallback_languages = array_unique( $fallbackLanguages );
325  foreach ( $fallback_languages as $language ) {
326  $this->mHeaderVariant = $this->validateVariant( $language );
327  if ( $this->mHeaderVariant ) {
328  break;
329  }
330  }
331  }
332 
333  return $this->mHeaderVariant;
334  }
335 
346  public function autoConvert( $text, $toVariant = false ) {
347  $this->loadTables();
348 
349  if ( !$toVariant ) {
350  $toVariant = $this->getPreferredVariant();
351  if ( !$toVariant ) {
352  return $text;
353  }
354  }
355 
356  if ( $this->guessVariant( $text, $toVariant ) ) {
357  return $text;
358  }
359  /* we convert everything except:
360  1. HTML markups (anything between < and >)
361  2. HTML entities
362  3. placeholders created by the parser
363  IMPORTANT: Beware of failure from pcre.backtrack_limit (T124404).
364  Minimize use of backtracking where possible.
365  */
366  $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f';
367 
368  // this one is needed when the text is inside an HTML markup
369  $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>';
370 
371  // Optimize for the common case where these tags have
372  // few or no children. Thus try and possesively get as much as
373  // possible, and only engage in backtracking when we hit a '<'.
374 
375  // disable convert to variants between <code> tags
376  $codefix = '<code>[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|';
377  // disable conversion of <script> tags
378  $scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|';
379  // disable conversion of <pre> tags
380  $prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|';
381  // The "|.*+)" at the end, is in case we missed some part of html syntax,
382  // we will fail securely (hopefully) by matching the rest of the string.
383  $htmlFullTag = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|';
384 
385  $reg = '/' . $codefix . $scriptfix . $prefix . $htmlFullTag .
386  '&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s';
387  $startPos = 0;
388  $sourceBlob = '';
389  $literalBlob = '';
390 
391  // Guard against delimiter nulls in the input
392  // (should never happen: see T159174)
393  $text = str_replace( "\000", '', $text );
394  $text = str_replace( "\004", '', $text );
395 
396  $markupMatches = null;
397  $elementMatches = null;
398 
399  // We add a marker (\004) at the end of text, to ensure we always match the
400  // entire text (Otherwise, pcre.backtrack_limit might cause silent failure)
401  while ( $startPos < strlen( $text ) ) {
402  if ( preg_match( $reg, $text . "\004", $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
403  $elementPos = $markupMatches[0][1];
404  $element = $markupMatches[0][0];
405  if ( $element === "\004" ) {
406  // We hit the end.
407  $elementPos = strlen( $text );
408  $element = '';
409  } elseif ( substr( $element, -1 ) === "\004" ) {
410  // This can sometimes happen if we have
411  // unclosed html tags (For example
412  // when converting a title attribute
413  // during a recursive call that contains
414  // a &lt; e.g. <div title="&lt;">.
415  $element = substr( $element, 0, -1 );
416  }
417  } else {
418  // If we hit here, then Language Converter could be tricked
419  // into doing an XSS, so we refuse to translate.
420  // If non-crazy input manages to reach this code path,
421  // we should consider it a bug.
422  $log = LoggerFactory::getInstance( 'languageconverter' );
423  $log->error( "Hit pcre.backtrack_limit in " . __METHOD__
424  . ". Disabling language conversion for this page.",
425  [
426  "method" => __METHOD__,
427  "variant" => $toVariant,
428  "startOfText" => substr( $text, 0, 500 )
429  ]
430  );
431  return $text;
432  }
433  // Queue the part before the markup for translation in a batch
434  $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000";
435 
436  // Advance to the next position
437  $startPos = $elementPos + strlen( $element );
438 
439  // Translate any alt or title attributes inside the matched element
440  if ( $element !== ''
441  && preg_match( '/^(<[^>\s]*+)\s([^>]*+)(.*+)$/', $element, $elementMatches )
442  ) {
443  // FIXME, this decodes entities, so if you have something
444  // like <div title="foo&lt;bar"> the bar won't get
445  // translated since after entity decoding it looks like
446  // unclosed html and we call this method recursively
447  // on attributes.
448  $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
449  // Ensure self-closing tags stay self-closing.
450  $close = substr( $elementMatches[2], -1 ) === '/' ? ' /' : '';
451  $changed = false;
452  foreach ( [ 'title', 'alt' ] as $attrName ) {
453  if ( !isset( $attrs[$attrName] ) ) {
454  continue;
455  }
456  $attr = $attrs[$attrName];
457  // Don't convert URLs
458  if ( !strpos( $attr, '://' ) ) {
459  $attr = $this->recursiveConvertTopLevel( $attr, $toVariant );
460  }
461 
462  if ( $attr !== $attrs[$attrName] ) {
463  $attrs[$attrName] = $attr;
464  $changed = true;
465  }
466  }
467  if ( $changed ) {
468  $element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
469  $close . $elementMatches[3];
470  }
471  }
472  $literalBlob .= $element . "\000";
473  }
474 
475  // Do the main translation batch
476  $translatedBlob = $this->translate( $sourceBlob, $toVariant );
477 
478  // Put the output back together
479  $translatedIter = StringUtils::explode( "\000", $translatedBlob );
480  $literalIter = StringUtils::explode( "\000", $literalBlob );
481  $output = '';
482  while ( $translatedIter->valid() && $literalIter->valid() ) {
483  $output .= $translatedIter->current();
484  $output .= $literalIter->current();
485  $translatedIter->next();
486  $literalIter->next();
487  }
488 
489  return $output;
490  }
491 
501  public function translate( $text, $variant ) {
502  // If $text is empty or only includes spaces, do nothing
503  // Otherwise translate it
504  if ( trim( $text ) ) {
505  $this->loadTables();
506  $text = $this->mTables[$variant]->replace( $text );
507  }
508  return $text;
509  }
510 
517  public function autoConvertToAllVariants( $text ) {
518  $this->loadTables();
519 
520  $ret = [];
521  foreach ( $this->mVariants as $variant ) {
522  $ret[$variant] = $this->translate( $text, $variant );
523  }
524 
525  return $ret;
526  }
527 
533  protected function applyManualConv( $convRule ) {
534  // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
535  // title conversion.
536  // T26072: $mConvRuleTitle was overwritten by other manual
537  // rule(s) not for title, this breaks the title conversion.
538  $newConvRuleTitle = $convRule->getTitle();
539  if ( $newConvRuleTitle ) {
540  // So I add an empty check for getTitle()
541  $this->mConvRuleTitle = $newConvRuleTitle;
542  }
543 
544  // merge/remove manual conversion rules to/from global table
545  $convTable = $convRule->getConvTable();
546  $action = $convRule->getRulesAction();
547  foreach ( $convTable as $variant => $pair ) {
548  if ( !$this->validateVariant( $variant ) ) {
549  continue;
550  }
551 
552  if ( $action == 'add' ) {
553  // More efficient than array_merge(), about 2.5 times.
554  foreach ( $pair as $from => $to ) {
555  $this->mTables[$variant]->setPair( $from, $to );
556  }
557  } elseif ( $action == 'remove' ) {
558  $this->mTables[$variant]->removeArray( $pair );
559  }
560  }
561  }
562 
570  public function convertTitle( $title ) {
571  $variant = $this->getPreferredVariant();
572  $index = $title->getNamespace();
573  if ( $index !== NS_MAIN ) {
574  $text = $this->convertNamespace( $index, $variant ) . ':';
575  } else {
576  $text = '';
577  }
578  $text .= $this->translate( $title->getText(), $variant );
579  return $text;
580  }
581 
589  public function convertNamespace( $index, $variant = null ) {
590  if ( $index === NS_MAIN ) {
591  return '';
592  }
593 
594  if ( $variant === null ) {
595  $variant = $this->getPreferredVariant();
596  }
597 
598  $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache();
599  $key = $cache->makeKey( 'languageconverter', 'namespace-text', $index, $variant );
600  $nsVariantText = $cache->get( $key );
601  if ( $nsVariantText !== false ) {
602  return $nsVariantText;
603  }
604 
605  // First check if a message gives a converted name in the target variant.
606  $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant );
607  if ( $nsConvMsg->exists() ) {
608  $nsVariantText = $nsConvMsg->plain();
609  }
610 
611  // Then check if a message gives a converted name in content language
612  // which needs extra translation to the target variant.
613  if ( $nsVariantText === false ) {
614  $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage();
615  if ( $nsConvMsg->exists() ) {
616  $nsVariantText = $this->translate( $nsConvMsg->plain(), $variant );
617  }
618  }
619 
620  if ( $nsVariantText === false ) {
621  // No message exists, retrieve it from the target variant's namespace names.
622  $langObj = $this->mLangObj->factory( $variant );
623  $nsVariantText = $langObj->getFormattedNsText( $index );
624  }
625 
626  $cache->set( $key, $nsVariantText, 60 );
627 
628  return $nsVariantText;
629  }
630 
645  public function convert( $text ) {
646  $variant = $this->getPreferredVariant();
647  return $this->convertTo( $text, $variant );
648  }
649 
657  public function convertTo( $text, $variant ) {
659  if ( $wgDisableLangConversion ) {
660  return $text;
661  }
662  // Reset converter state for a new converter run.
663  $this->mConvRuleTitle = false;
664  return $this->recursiveConvertTopLevel( $text, $variant );
665  }
666 
676  protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
677  $startPos = 0;
678  $out = '';
679  $length = strlen( $text );
680  $shouldConvert = !$this->guessVariant( $text, $variant );
681  $continue = 1;
682 
683  $noScript = '<script.*?>.*?<\/script>(*SKIP)(*FAIL)';
684  $noStyle = '<style.*?>.*?<\/style>(*SKIP)(*FAIL)';
685  // phpcs:ignore Generic.Files.LineLength
686  $noHtml = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)(*SKIP)(*FAIL)';
687  while ( $startPos < $length && $continue ) {
688  $continue = preg_match(
689  // Only match -{ outside of html.
690  "/$noScript|$noStyle|$noHtml|-\{/",
691  $text,
692  $m,
693  PREG_OFFSET_CAPTURE,
694  $startPos
695  );
696 
697  if ( !$continue ) {
698  // No more markup, append final segment
699  $fragment = substr( $text, $startPos );
700  $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
701  return $out;
702  }
703 
704  // Offset of the match of the regex pattern.
705  $pos = $m[0][1];
706 
707  // Append initial segment
708  $fragment = substr( $text, $startPos, $pos - $startPos );
709  $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
710  // -{ marker found, not in attribute
711  // Advance position up to -{ marker.
712  $startPos = $pos;
713  // Do recursive conversion
714  // Note: This passes $startPos by reference, and advances it.
715  $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
716  }
717  return $out;
718  }
719 
731  protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
732  // Quick sanity check (no function calls)
733  if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) {
734  throw new MWException( __METHOD__ . ': invalid input string' );
735  }
736 
737  $startPos += 2;
738  $inner = '';
739  $warningDone = false;
740  $length = strlen( $text );
741 
742  while ( $startPos < $length ) {
743  $m = false;
744  preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos );
745  if ( !$m ) {
746  // Unclosed rule
747  break;
748  }
749 
750  $token = $m[0][0];
751  $pos = $m[0][1];
752 
753  // Markup found
754  // Append initial segment
755  $inner .= substr( $text, $startPos, $pos - $startPos );
756 
757  // Advance position
758  $startPos = $pos;
759 
760  switch ( $token ) {
761  case '-{':
762  // Check max depth
763  if ( $depth >= $this->mMaxDepth ) {
764  $inner .= '-{';
765  if ( !$warningDone ) {
766  $inner .= '<span class="error">' .
767  wfMessage( 'language-converter-depth-warning' )
768  ->numParams( $this->mMaxDepth )->inContentLanguage()->text() .
769  '</span>';
770  $warningDone = true;
771  }
772  $startPos += 2;
773  break;
774  }
775  // Recursively parse another rule
776  $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
777  break;
778  case '}-':
779  // Apply the rule
780  $startPos += 2;
781  $rule = new ConverterRule( $inner, $this );
782  $rule->parse( $variant );
783  $this->applyManualConv( $rule );
784  return $rule->getDisplay();
785  default:
786  throw new MWException( __METHOD__ . ': invalid regex match' );
787  }
788  }
789 
790  // Unclosed rule
791  if ( $startPos < $length ) {
792  $inner .= substr( $text, $startPos );
793  }
794  $startPos = $length;
795  return '-{' . $this->autoConvert( $inner, $variant );
796  }
797 
809  public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
810  # If the article has already existed, there is no need to
811  # check it again, otherwise it may cause a fault.
812  if ( is_object( $nt ) && $nt->exists() ) {
813  return;
814  }
815 
817  $isredir = $wgRequest->getText( 'redirect', 'yes' );
818  $action = $wgRequest->getText( 'action' );
819  if ( $action == 'edit' && $wgRequest->getBool( 'redlink' ) ) {
820  $action = 'view';
821  }
822  $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' );
823  $disableLinkConversion = $wgDisableLangConversion
825  $linkBatch = new LinkBatch();
826 
827  $ns = NS_MAIN;
828 
829  if ( $disableLinkConversion ||
830  ( !$ignoreOtherCond &&
831  ( $isredir == 'no'
832  || $action == 'edit'
833  || $action == 'submit'
834  || $linkconvert == 'no' ) ) ) {
835  return;
836  }
837 
838  if ( is_object( $nt ) ) {
839  $ns = $nt->getNamespace();
840  }
841 
842  $variants = $this->autoConvertToAllVariants( $link );
843  if ( !$variants ) { // give up
844  return;
845  }
846 
847  $titles = [];
848 
849  foreach ( $variants as $v ) {
850  if ( $v != $link ) {
851  $varnt = Title::newFromText( $v, $ns );
852  if ( !is_null( $varnt ) ) {
853  $linkBatch->addObj( $varnt );
854  $titles[] = $varnt;
855  }
856  }
857  }
858 
859  // fetch all variants in single query
860  $linkBatch->execute();
861 
862  foreach ( $titles as $varnt ) {
863  if ( $varnt->getArticleID() > 0 ) {
864  $nt = $varnt;
865  $link = $varnt->getText();
866  break;
867  }
868  }
869  }
870 
876  public function getExtraHashOptions() {
877  $variant = $this->getPreferredVariant();
878 
879  return '!' . $variant;
880  }
881 
892  public function guessVariant( $text, $variant ) {
893  return false;
894  }
895 
903  function loadDefaultTables() {
904  $class = static::class;
905  throw new MWException( "Must implement loadDefaultTables() method in class $class" );
906  }
907 
913  function loadTables( $fromCache = true ) {
915 
916  if ( $this->mTablesLoaded ) {
917  return;
918  }
919 
920  $this->mTablesLoaded = true;
921  $this->mTables = false;
923  $cacheKey = $cache->makeKey( 'conversiontables', $this->mMainLanguageCode );
924  if ( $fromCache ) {
925  $this->mTables = $cache->get( $cacheKey );
926  }
927  if ( !$this->mTables || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
928  // not in cache, or we need a fresh reload.
929  // We will first load the default tables
930  // then update them using things in MediaWiki:Conversiontable/*
931  $this->loadDefaultTables();
932  foreach ( $this->mVariants as $var ) {
933  $cached = $this->parseCachedTable( $var );
934  $this->mTables[$var]->mergeArray( $cached );
935  }
936 
937  $this->postLoadTables();
938  $this->mTables[self::CACHE_VERSION_KEY] = true;
939 
940  $cache->set( $cacheKey, $this->mTables, 43200 );
941  }
942  }
943 
947  function postLoadTables() {
948  }
949 
957  private function reloadTables() {
958  if ( $this->mTables ) {
959  unset( $this->mTables );
960  }
961 
962  $this->mTablesLoaded = false;
963  $this->loadTables( false );
964  }
965 
985  function parseCachedTable( $code, $subpage = '', $recursive = true ) {
986  static $parsed = [];
987 
988  $key = 'Conversiontable/' . $code;
989  if ( $subpage ) {
990  $key .= '/' . $subpage;
991  }
992  if ( array_key_exists( $key, $parsed ) ) {
993  return [];
994  }
995 
996  $parsed[$key] = true;
997 
998  if ( $subpage === '' ) {
999  $txt = MessageCache::singleton()->getMsgFromNamespace( $key, $code );
1000  } else {
1001  $txt = false;
1003  if ( $title && $title->exists() ) {
1004  $revision = Revision::newFromTitle( $title );
1005  if ( $revision ) {
1006  if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) {
1007  $txt = $revision->getContent( Revision::RAW )->getNativeData();
1008  }
1009 
1010  // @todo in the future, use a specialized content model, perhaps based on json!
1011  }
1012  }
1013  }
1014 
1015  # Nothing to parse if there's no text
1016  if ( $txt === false || $txt === null || $txt === '' ) {
1017  return [];
1018  }
1019 
1020  // get all subpage links of the form
1021  // [[MediaWiki:Conversiontable/zh-xx/...|...]]
1022  $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
1023  ':Conversiontable';
1024  $subs = StringUtils::explode( '[[', $txt );
1025  $sublinks = [];
1026  foreach ( $subs as $sub ) {
1027  $link = explode( ']]', $sub, 2 );
1028  if ( count( $link ) != 2 ) {
1029  continue;
1030  }
1031  $b = explode( '|', $link[0], 2 );
1032  $b = explode( '/', trim( $b[0] ), 3 );
1033  if ( count( $b ) == 3 ) {
1034  $sublink = $b[2];
1035  } else {
1036  $sublink = '';
1037  }
1038 
1039  if ( $b[0] == $linkhead && $b[1] == $code ) {
1040  $sublinks[] = $sublink;
1041  }
1042  }
1043 
1044  // parse the mappings in this page
1045  $blocks = StringUtils::explode( '-{', $txt );
1046  $ret = [];
1047  $first = true;
1048  foreach ( $blocks as $block ) {
1049  if ( $first ) {
1050  // Skip the part before the first -{
1051  $first = false;
1052  continue;
1053  }
1054  $mappings = explode( '}-', $block, 2 )[0];
1055  $stripped = str_replace( [ "'", '"', '*', '#' ], '', $mappings );
1056  $table = StringUtils::explode( ';', $stripped );
1057  foreach ( $table as $t ) {
1058  $m = explode( '=>', $t, 3 );
1059  if ( count( $m ) != 2 ) {
1060  continue;
1061  }
1062  // trim any trailling comments starting with '//'
1063  $tt = explode( '//', $m[1], 2 );
1064  $ret[trim( $m[0] )] = trim( $tt[0] );
1065  }
1066  }
1067 
1068  // recursively parse the subpages
1069  if ( $recursive ) {
1070  foreach ( $sublinks as $link ) {
1071  $s = $this->parseCachedTable( $code, $link, $recursive );
1072  $ret = $s + $ret;
1073  }
1074  }
1075 
1076  if ( $this->mUcfirst ) {
1077  foreach ( $ret as $k => $v ) {
1078  $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
1079  }
1080  }
1081  return $ret;
1082  }
1083 
1092  public function markNoConversion( $text, $noParse = false ) {
1093  # don't mark if already marked
1094  if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) {
1095  return $text;
1096  }
1097 
1098  $ret = "-{R|$text}-";
1099  return $ret;
1100  }
1101 
1110  function convertCategoryKey( $key ) {
1111  return $key;
1112  }
1113 
1120  public function updateConversionTable( Title $titleobj ) {
1121  if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) {
1122  $title = $titleobj->getDBkey();
1123  $t = explode( '/', $title, 3 );
1124  $c = count( $t );
1125  if ( $c > 1 && $t[0] == 'Conversiontable' ) {
1126  if ( $this->validateVariant( $t[1] ) ) {
1127  $this->reloadTables();
1128  }
1129  }
1130  }
1131  }
1132 
1138  if ( is_null( $this->mVarSeparatorPattern ) ) {
1139  // varsep_pattern for preg_split:
1140  // text should be splited by ";" only if a valid variant
1141  // name exist after the markup, for example:
1142  // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\
1143  // <span style="font-size:120%;">yyy</span>;}-
1144  // we should split it as:
1145  // [
1146  // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>'
1147  // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
1148  // [2] => ''
1149  // ]
1150  $pat = '/;\s*(?=';
1151  foreach ( $this->mVariants as $variant ) {
1152  // zh-hans:xxx;zh-hant:yyy
1153  $pat .= $variant . '\s*:|';
1154  // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
1155  $pat .= '[^;]*?=>\s*' . $variant . '\s*:|';
1156  }
1157  $pat .= '\s*$)/';
1158  $this->mVarSeparatorPattern = $pat;
1159  }
1161  }
1162 }
LanguageConverter\getVarSeparatorPattern
getVarSeparatorPattern()
Get the cached separator pattern for ConverterRule::parseRules()
Definition: LanguageConverter.php:1137
ConverterRule
Parser for rules of language conversion , parse rules in -{ }- tag.
Definition: ConverterRule.php:27
$wgUser
$wgUser
Definition: Setup.php:902
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:273
LanguageConverter\$mMaxDepth
$mMaxDepth
Definition: LanguageConverter.php:75
LanguageConverter\getConvRuleTitle
getConvRuleTitle()
Get the title produced by the conversion rule.
Definition: LanguageConverter.php:153
LanguageConverter\__construct
__construct( $langobj, $maincode, $variants=[], $variantfallbacks=[], $flags=[], $manualLevel=[])
Definition: LanguageConverter.php:88
use
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
Definition: APACHE-LICENSE-2.0.txt:10
$wgDisabledVariants
$wgDisabledVariants
Disabled variants array of language variant conversion.
Definition: DefaultSettings.php:3085
LanguageConverter\getExtraHashOptions
getExtraHashOptions()
Returns language specific hash options.
Definition: LanguageConverter.php:876
LinkBatch
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition: LinkBatch.php:34
LanguageConverter\CACHE_VERSION_KEY
const CACHE_VERSION_KEY
Definition: LanguageConverter.php:78
wfMessage
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt
array
the array() calling protocol came about after MediaWiki 1.4rc1.
LanguageConverter\$mUcfirst
$mUcfirst
Definition: LanguageConverter.php:70
$output
static configuration should be added through ResourceLoaderGetConfigVars instead can be used to get the real title after the basic globals have been set but before ordinary actions take place $output
Definition: hooks.txt:2255
$languages
switch( $options['output']) $languages
Definition: transstat.php:76
LanguageConverter\$mURLVariant
$mURLVariant
Definition: LanguageConverter.php:72
LanguageConverter\$mManualLevel
$mManualLevel
Definition: LanguageConverter.php:65
$ret
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:2005
LanguageConverter\loadDefaultTables
loadDefaultTables()
Load default conversion tables.
Definition: LanguageConverter.php:903
LanguageConverter\$mConvRuleTitle
$mConvRuleTitle
Definition: LanguageConverter.php:71
$out
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition: hooks.txt:864
LanguageConverter\$mVarSeparatorPattern
$mVarSeparatorPattern
Definition: LanguageConverter.php:76
Html\expandAttributes
static expandAttributes(array $attribs)
Given an associative array of element attributes, generate a string to stick after the element name i...
Definition: Html.php:474
LanguageConverter\$mVariants
string[] $mVariants
Definition: LanguageConverter.php:59
$wgDisableTitleConversion
$wgDisableTitleConversion
Whether to enable language variant conversion for links.
Definition: DefaultSettings.php:3063
LanguageConverter\getURLVariant
getURLVariant()
Get the variant specified in the URL.
Definition: LanguageConverter.php:229
LanguageConverter\findVariantLink
findVariantLink(&$link, &$nt, $ignoreOtherCond=false)
If a language supports multiple variants, it is possible that non-existing link in one variant actual...
Definition: LanguageConverter.php:809
LanguageConverter\$mVariantNames
$mVariantNames
Definition: LanguageConverter.php:61
$s
$s
Definition: mergeMessageFileList.php:187
CONTENT_MODEL_WIKITEXT
const CONTENT_MODEL_WIKITEXT
Definition: Defines.php:245
Parser\MARKER_PREFIX
const MARKER_PREFIX
Definition: Parser.php:135
LanguageConverter\$mMainLanguageCode
$mMainLanguageCode
Definition: LanguageConverter.php:54
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:37
LanguageConverter\$mHeaderVariant
$mHeaderVariant
Definition: LanguageConverter.php:74
LanguageConverter\$mUserVariant
$mUserVariant
Definition: LanguageConverter.php:73
NS_MAIN
const NS_MAIN
Definition: Defines.php:74
LanguageConverter\getVariantFallbacks
getVariantFallbacks( $variant)
In case some variant is not defined in the markup, we need to have some fallback.
Definition: LanguageConverter.php:142
LanguageConverter\reloadTables
reloadTables()
Reload the conversion tables.
Definition: LanguageConverter.php:957
Revision\newFromTitle
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that's attached to a given link target.
Definition: Revision.php:133
LanguageConverter\getUserVariant
getUserVariant()
Determine if the user has a variant set.
Definition: LanguageConverter.php:252
LanguageConverter\validateVariant
validateVariant( $variant=null)
Validate the variant.
Definition: LanguageConverter.php:217
Title\getDBkey
getDBkey()
Get the main part with underscores.
Definition: Title.php:947
MWException
MediaWiki exception.
Definition: MWException.php:26
$titles
linkcache txt The LinkCache class maintains a list of article titles and the information about whether or not the article exists in the database This is used to mark up links when displaying a page If the same link appears more than once on any page then it only has to be looked up once In most cases link lookups are done in batches with the LinkBatch class or the equivalent in so the link cache is mostly useful for short snippets of parsed and for links in the navigation areas of the skin The link cache was formerly used to track links used in a document for the purposes of updating the link tables This application is now deprecated To create a you can use the following $titles
Definition: linkcache.txt:17
LanguageConverter\getVariants
getVariants()
Get all valid variants.
Definition: LanguageConverter.php:127
Language\fetchLanguageNames
static fetchLanguageNames( $inLanguage=null, $include='mw')
Get an array of language names, indexed by code.
Definition: Language.php:803
Title\getNamespace
getNamespace()
Get the namespace index, i.e.
Definition: Title.php:970
$wgLanguageConverterCacheType
$wgLanguageConverterCacheType
The cache type for storing language conversion tables, which are used when parsing certain text and i...
Definition: DefaultSettings.php:2304
LanguageConverter\recursiveConvertTopLevel
recursiveConvertTopLevel( $text, $variant, $depth=0)
Recursively convert text on the outside.
Definition: LanguageConverter.php:676
LanguageConverter\applyManualConv
applyManualConv( $convRule)
Apply manual conversion rules.
Definition: LanguageConverter.php:533
$wgDefaultLanguageVariant
$wgDefaultLanguageVariant
Default variant code, if false, the default will be the language code.
Definition: DefaultSettings.php:3068
StringUtils\explode
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
Definition: StringUtils.php:323
LanguageConverter\translate
translate( $text, $variant)
Translate a string to a variant.
Definition: LanguageConverter.php:501
ObjectCache\getInstance
static getInstance( $id)
Get a cached instance of the specified type of cache object.
Definition: ObjectCache.php:92
LanguageConverter\getDefaultVariant
getDefaultVariant()
Get default variant.
Definition: LanguageConverter.php:193
$link
usually copyright or history_copyright This message must be in HTML not wikitext & $link
Definition: hooks.txt:3021
$title
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:964
global
when a variable name is used in a it is silently declared as a new masking the global
Definition: design.txt:95
LanguageConverter\recursiveConvertRule
recursiveConvertRule( $text, $variant, &$startPos, $depth=0)
Recursively convert text on the inside.
Definition: LanguageConverter.php:731
$wgDisableLangConversion
$wgDisableLangConversion
Whether to enable language variant conversion.
Definition: DefaultSettings.php:3058
MessageCache\singleton
static singleton()
Get the signleton instance of this class.
Definition: MessageCache.php:113
LanguageConverter\postLoadTables
postLoadTables()
Hook for post processing after conversion tables are loaded.
Definition: LanguageConverter.php:947
Title\makeTitleSafe
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:562
LanguageConverter\$mTablesLoaded
$mTablesLoaded
Definition: LanguageConverter.php:62
LanguageConverter\convertCategoryKey
convertCategoryKey( $key)
Convert the sorting key for category links.
Definition: LanguageConverter.php:1110
LanguageConverter\parseCachedTable
parseCachedTable( $code, $subpage='', $recursive=true)
Parse the conversion table stored in the cache.
Definition: LanguageConverter.php:985
LanguageConverter\getPreferredVariant
getPreferredVariant()
Get preferred language variant.
Definition: LanguageConverter.php:161
LanguageConverter\markNoConversion
markNoConversion( $text, $noParse=false)
Enclose a string with the "no conversion" tag.
Definition: LanguageConverter.php:1092
LanguageConverter\autoConvert
autoConvert( $text, $toVariant=false)
Dictionary-based conversion.
Definition: LanguageConverter.php:346
Revision\RAW
const RAW
Definition: Revision.php:57
LanguageConverter\$mLangObj
$mLangObj
Definition: LanguageConverter.php:67
LanguageConverter\$mDescCodeSep
$mDescCodeSep
Definition: LanguageConverter.php:69
LanguageConverter\convertTitle
convertTitle( $title)
Auto convert a Title object to a readable string in the preferred variant.
Definition: LanguageConverter.php:570
LanguageConverter\convert
convert( $text)
Convert text to different variants of a language.
Definition: LanguageConverter.php:645
LanguageConverter\loadTables
loadTables( $fromCache=true)
Load conversion tables either from the cache or the disk.
Definition: LanguageConverter.php:913
$req
this hook is for auditing only $req
Definition: hooks.txt:990
LanguageConverter\$mFlags
$mFlags
Definition: LanguageConverter.php:68
Title
Represents a title within MediaWiki.
Definition: Title.php:39
LanguageConverter\$languagesWithVariants
static array $languagesWithVariants
languages supporting variants
Definition: LanguageConverter.php:40
$cache
$cache
Definition: mcc.php:33
$code
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable & $code
Definition: hooks.txt:865
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:22
LanguageConverter\$mDescVarSep
$mDescVarSep
Definition: LanguageConverter.php:69
LoggerFactory
MediaWiki Logger LoggerFactory implements a PSR[0] compatible message logging system Named Psr Log LoggerInterface instances can be obtained from the MediaWiki Logger LoggerFactory::getInstance() static method. MediaWiki\Logger\LoggerFactory expects a class implementing the MediaWiki\Logger\Spi interface to act as a factory for new Psr\Log\LoggerInterface instances. The "Spi" in MediaWiki\Logger\Spi stands for "service provider interface". An SPI is an API intended to be implemented or extended by a third party. This software design pattern is intended to enable framework extension and replaceable components. It is specifically used in the MediaWiki\Logger\LoggerFactory service to allow alternate PSR-3 logging implementations to be easily integrated with MediaWiki. The service provider interface allows the backend logging library to be implemented in multiple ways. The $wgMWLoggerDefaultSpi global provides the classname of the default MediaWiki\Logger\Spi implementation to be loaded at runtime. This can either be the name of a class implementing the MediaWiki\Logger\Spi with a zero argument const ructor or a callable that will return an MediaWiki\Logger\Spi instance. Alternately the MediaWiki\Logger\LoggerFactory MediaWiki Logger LoggerFactory
Definition: logger.txt:5
LanguageConverter\autoConvertToAllVariants
autoConvertToAllVariants( $text)
Call translate() to convert text to all valid variants.
Definition: LanguageConverter.php:517
Sanitizer\decodeTagAttributes
static decodeTagAttributes( $text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1434
NS_MEDIAWIKI
const NS_MEDIAWIKI
Definition: Defines.php:82
class
you have access to all of the normal MediaWiki so you can get a DB use the etc For full docs on the Maintenance class
Definition: maintenance.txt:56
LanguageConverter\convertTo
convertTo( $text, $variant)
Same as convert() except a extra parameter to custom variant.
Definition: LanguageConverter.php:657
$t
$t
Definition: testCompression.php:69
LanguageConverter\updateConversionTable
updateConversionTable(Title $titleobj)
Refresh the cache of conversion tables when MediaWiki:Conversiontable* is updated.
Definition: LanguageConverter.php:1120
LanguageConverter\getHeaderVariant
getHeaderVariant()
Determine the language variant from the Accept-Language header.
Definition: LanguageConverter.php:289
$wgRequest
if(! $wgDBerrorLogTZ) $wgRequest
Definition: Setup.php:737
MediaWikiServices
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency MediaWikiServices
Definition: injection.txt:25
LanguageConverter\guessVariant
guessVariant( $text, $variant)
Guess if a text is written in a variant.
Definition: LanguageConverter.php:892
Hooks\run
static run( $event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:203
LanguageConverter\$mVariantFallbacks
$mVariantFallbacks
Definition: LanguageConverter.php:60
LanguageConverter
Base class for language conversion.
Definition: LanguageConverter.php:34
LanguageConverter\convertNamespace
convertNamespace( $index, $variant=null)
Get the namespace display name in the preferred variant.
Definition: LanguageConverter.php:589
LanguageConverter\$mTables
$mTables
Definition: LanguageConverter.php:63
$wgContLang
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the content language as $wgContLang
Definition: design.txt:57