MediaWiki  1.30.0
LanguageConverter.php
Go to the documentation of this file.
1 <?php
22 
24 
34 class LanguageConverter {
40  static public $languagesWithVariants = [
41  'en',
42  'gan',
43  'iu',
44  'kk',
45  'ku',
46  'shi',
47  'sr',
48  'tg',
49  'uz',
50  'zh',
51  ];
52 
53  public $mMainLanguageCode;
54 
58  public $mVariants;
59  public $mVariantFallbacks;
60  public $mVariantNames;
61  public $mTablesLoaded = false;
62  public $mTables;
63  // 'bidirectional' 'unidirectional' 'disable' for each variant
64  public $mManualLevel;
65 
66  public $mLangObj;
67  public $mFlags;
68  public $mDescCodeSep = ':', $mDescVarSep = ';';
69  public $mUcfirst = false;
70  public $mConvRuleTitle = false;
71  public $mURLVariant;
72  public $mUserVariant;
73  public $mHeaderVariant;
74  public $mMaxDepth = 10;
75  public $mVarSeparatorPattern;
76 
77  const CACHE_VERSION_KEY = 'VERSION 7';
78 
87  public function __construct( $langobj, $maincode, $variants = [],
88  $variantfallbacks = [], $flags = [],
89  $manualLevel = [] ) {
91  $this->mLangObj = $langobj;
92  $this->mMainLanguageCode = $maincode;
93  $this->mVariants = array_diff( $variants, $wgDisabledVariants );
94  $this->mVariantFallbacks = $variantfallbacks;
95  $this->mVariantNames = Language::fetchLanguageNames();
96  $defaultflags = [
97  // 'S' show converted text
98  // '+' add rules for alltext
99  // 'E' the gave flags is error
100  // these flags above are reserved for program
101  'A' => 'A', // add rule for convert code (all text convert)
102  'T' => 'T', // title convert
103  'R' => 'R', // raw content
104  'D' => 'D', // convert description (subclass implement)
105  '-' => '-', // remove convert (not implement)
106  'H' => 'H', // add rule for convert code (but no display in placed code)
107  'N' => 'N', // current variant name
108  ];
109  $this->mFlags = array_merge( $defaultflags, $flags );
110  foreach ( $this->mVariants as $v ) {
111  if ( array_key_exists( $v, $manualLevel ) ) {
112  $this->mManualLevel[$v] = $manualLevel[$v];
113  } else {
114  $this->mManualLevel[$v] = 'bidirectional';
115  }
116  $this->mFlags[$v] = $v;
117  }
118  }
119 
126  public function getVariants() {
127  return $this->mVariants;
128  }
129 
141  public function getVariantFallbacks( $variant ) {
142  if ( isset( $this->mVariantFallbacks[$variant] ) ) {
143  return $this->mVariantFallbacks[$variant];
144  }
145  return $this->mMainLanguageCode;
146  }
147 
152  public function getConvRuleTitle() {
153  return $this->mConvRuleTitle;
154  }
155 
160  public function getPreferredVariant() {
162 
163  $req = $this->getURLVariant();
164 
165  if ( $wgUser->isSafeToLoad() && $wgUser->isLoggedIn() && !$req ) {
166  $req = $this->getUserVariant();
167  } elseif ( !$req ) {
168  $req = $this->getHeaderVariant();
169  }
170 
171  if ( $wgDefaultLanguageVariant && !$req ) {
172  $req = $this->validateVariant( $wgDefaultLanguageVariant );
173  }
174 
175  // This function, unlike the other get*Variant functions, is
176  // not memoized (i.e. there return value is not cached) since
177  // new information might appear during processing after this
178  // is first called.
179  if ( $this->validateVariant( $req ) ) {
180  return $req;
181  }
182  return $this->mMainLanguageCode;
183  }
184 
190  public function getDefaultVariant() {
192 
193  $req = $this->getURLVariant();
194 
195  if ( !$req ) {
196  $req = $this->getHeaderVariant();
197  }
198 
199  if ( $wgDefaultLanguageVariant && !$req ) {
200  $req = $this->validateVariant( $wgDefaultLanguageVariant );
201  }
202 
203  if ( $req ) {
204  return $req;
205  }
206  return $this->mMainLanguageCode;
207  }
208 
214  public function validateVariant( $variant = null ) {
215  if ( $variant !== null && in_array( $variant, $this->mVariants ) ) {
216  return $variant;
217  }
218  return null;
219  }
220 
226  public function getURLVariant() {
228 
229  if ( $this->mURLVariant ) {
230  return $this->mURLVariant;
231  }
232 
233  // see if the preference is set in the request
234  $ret = $wgRequest->getText( 'variant' );
235 
236  if ( !$ret ) {
237  $ret = $wgRequest->getVal( 'uselang' );
238  }
239 
240  $this->mURLVariant = $this->validateVariant( $ret );
241  return $this->mURLVariant;
242  }
243 
249  protected function getUserVariant() {
251 
252  // memoizing this function wreaks havoc on parserTest.php
253  /*
254  if ( $this->mUserVariant ) {
255  return $this->mUserVariant;
256  }
257  */
258 
259  // Get language variant preference from logged in users
260  // Don't call this on stub objects because that causes infinite
261  // recursion during initialisation
262  if ( !$wgUser->isSafeToLoad() ) {
263  return false;
264  }
265  if ( $wgUser->isLoggedIn() ) {
266  if ( $this->mMainLanguageCode == $wgContLang->getCode() ) {
267  $ret = $wgUser->getOption( 'variant' );
268  } else {
269  $ret = $wgUser->getOption( 'variant-' . $this->mMainLanguageCode );
270  }
271  } else {
272  // figure out user lang without constructing wgLang to avoid
273  // infinite recursion
274  $ret = $wgUser->getOption( 'language' );
275  }
276 
277  $this->mUserVariant = $this->validateVariant( $ret );
278  return $this->mUserVariant;
279  }
280 
286  protected function getHeaderVariant() {
288 
289  if ( $this->mHeaderVariant ) {
290  return $this->mHeaderVariant;
291  }
292 
293  // see if some supported language variant is set in the
294  // HTTP header.
295  $languages = array_keys( $wgRequest->getAcceptLang() );
296  if ( empty( $languages ) ) {
297  return null;
298  }
299 
300  $fallbackLanguages = [];
301  foreach ( $languages as $language ) {
302  $this->mHeaderVariant = $this->validateVariant( $language );
303  if ( $this->mHeaderVariant ) {
304  break;
305  }
306 
307  // To see if there are fallbacks of current language.
308  // We record these fallback variants, and process
309  // them later.
310  $fallbacks = $this->getVariantFallbacks( $language );
311  if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) {
312  $fallbackLanguages[] = $fallbacks;
313  } elseif ( is_array( $fallbacks ) ) {
314  $fallbackLanguages =
315  array_merge( $fallbackLanguages, $fallbacks );
316  }
317  }
318 
319  if ( !$this->mHeaderVariant ) {
320  // process fallback languages now
321  $fallback_languages = array_unique( $fallbackLanguages );
322  foreach ( $fallback_languages as $language ) {
323  $this->mHeaderVariant = $this->validateVariant( $language );
324  if ( $this->mHeaderVariant ) {
325  break;
326  }
327  }
328  }
329 
330  return $this->mHeaderVariant;
331  }
332 
343  public function autoConvert( $text, $toVariant = false ) {
344  $this->loadTables();
345 
346  if ( !$toVariant ) {
347  $toVariant = $this->getPreferredVariant();
348  if ( !$toVariant ) {
349  return $text;
350  }
351  }
352 
353  if ( $this->guessVariant( $text, $toVariant ) ) {
354  return $text;
355  }
356  /* we convert everything except:
357  1. HTML markups (anything between < and >)
358  2. HTML entities
359  3. placeholders created by the parser
360  IMPORTANT: Beware of failure from pcre.backtrack_limit (T124404).
361  Minimize use of backtracking where possible.
362  */
363  $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f';
364 
365  // this one is needed when the text is inside an HTML markup
366  $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>';
367 
368  // Optimize for the common case where these tags have
369  // few or no children. Thus try and possesively get as much as
370  // possible, and only engage in backtracking when we hit a '<'.
371 
372  // disable convert to variants between <code> tags
373  $codefix = '<code>[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|';
374  // disable conversion of <script> tags
375  $scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|';
376  // disable conversion of <pre> tags
377  $prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|';
378  // The "|.*+)" at the end, is in case we missed some part of html syntax,
379  // we will fail securely (hopefully) by matching the rest of the string.
380  $htmlFullTag = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|';
381 
382  $reg = '/' . $codefix . $scriptfix . $prefix . $htmlFullTag .
383  '&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s';
384  $startPos = 0;
385  $sourceBlob = '';
386  $literalBlob = '';
387 
388  // Guard against delimiter nulls in the input
389  // (should never happen: see T159174)
390  $text = str_replace( "\000", '', $text );
391  $text = str_replace( "\004", '', $text );
392 
393  $markupMatches = null;
394  $elementMatches = null;
395 
396  // We add a marker (\004) at the end of text, to ensure we always match the
397  // entire text (Otherwise, pcre.backtrack_limit might cause silent failure)
398  while ( $startPos < strlen( $text ) ) {
399  if ( preg_match( $reg, $text . "\004", $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
400  $elementPos = $markupMatches[0][1];
401  $element = $markupMatches[0][0];
402  if ( $element === "\004" ) {
403  // We hit the end.
404  $elementPos = strlen( $text );
405  $element = '';
406  } elseif ( substr( $element, -1 ) === "\004" ) {
407  // This can sometimes happen if we have
408  // unclosed html tags (For example
409  // when converting a title attribute
410  // during a recursive call that contains
411  // a &lt; e.g. <div title="&lt;">.
412  $element = substr( $element, 0, -1 );
413  }
414  } else {
415  // If we hit here, then Language Converter could be tricked
416  // into doing an XSS, so we refuse to translate.
417  // If non-crazy input manages to reach this code path,
418  // we should consider it a bug.
419  $log = LoggerFactory::getInstance( 'languageconverter' );
420  $log->error( "Hit pcre.backtrack_limit in " . __METHOD__
421  . ". Disabling language conversion for this page.",
422  [
423  "method" => __METHOD__,
424  "variant" => $toVariant,
425  "startOfText" => substr( $text, 0, 500 )
426  ]
427  );
428  return $text;
429  }
430  // Queue the part before the markup for translation in a batch
431  $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000";
432 
433  // Advance to the next position
434  $startPos = $elementPos + strlen( $element );
435 
436  // Translate any alt or title attributes inside the matched element
437  if ( $element !== ''
438  && preg_match( '/^(<[^>\s]*+)\s([^>]*+)(.*+)$/', $element, $elementMatches )
439  ) {
440  // FIXME, this decodes entities, so if you have something
441  // like <div title="foo&lt;bar"> the bar won't get
442  // translated since after entity decoding it looks like
443  // unclosed html and we call this method recursively
444  // on attributes.
445  $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
446  // Ensure self-closing tags stay self-closing.
447  $close = substr( $elementMatches[2], -1 ) === '/' ? ' /' : '';
448  $changed = false;
449  foreach ( [ 'title', 'alt' ] as $attrName ) {
450  if ( !isset( $attrs[$attrName] ) ) {
451  continue;
452  }
453  $attr = $attrs[$attrName];
454  // Don't convert URLs
455  if ( !strpos( $attr, '://' ) ) {
456  $attr = $this->recursiveConvertTopLevel( $attr, $toVariant );
457  }
458 
459  if ( $attr !== $attrs[$attrName] ) {
460  $attrs[$attrName] = $attr;
461  $changed = true;
462  }
463  }
464  if ( $changed ) {
465  $element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
466  $close . $elementMatches[3];
467  }
468  }
469  $literalBlob .= $element . "\000";
470  }
471 
472  // Do the main translation batch
473  $translatedBlob = $this->translate( $sourceBlob, $toVariant );
474 
475  // Put the output back together
476  $translatedIter = StringUtils::explode( "\000", $translatedBlob );
477  $literalIter = StringUtils::explode( "\000", $literalBlob );
478  $output = '';
479  while ( $translatedIter->valid() && $literalIter->valid() ) {
480  $output .= $translatedIter->current();
481  $output .= $literalIter->current();
482  $translatedIter->next();
483  $literalIter->next();
484  }
485 
486  return $output;
487  }
488 
498  public function translate( $text, $variant ) {
499  // If $text is empty or only includes spaces, do nothing
500  // Otherwise translate it
501  if ( trim( $text ) ) {
502  $this->loadTables();
503  $text = $this->mTables[$variant]->replace( $text );
504  }
505  return $text;
506  }
507 
514  public function autoConvertToAllVariants( $text ) {
515  $this->loadTables();
516 
517  $ret = [];
518  foreach ( $this->mVariants as $variant ) {
519  $ret[$variant] = $this->translate( $text, $variant );
520  }
521 
522  return $ret;
523  }
524 
530  protected function applyManualConv( $convRule ) {
531  // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
532  // title conversion.
533  // T26072: $mConvRuleTitle was overwritten by other manual
534  // rule(s) not for title, this breaks the title conversion.
535  $newConvRuleTitle = $convRule->getTitle();
536  if ( $newConvRuleTitle ) {
537  // So I add an empty check for getTitle()
538  $this->mConvRuleTitle = $newConvRuleTitle;
539  }
540 
541  // merge/remove manual conversion rules to/from global table
542  $convTable = $convRule->getConvTable();
543  $action = $convRule->getRulesAction();
544  foreach ( $convTable as $variant => $pair ) {
545  if ( !$this->validateVariant( $variant ) ) {
546  continue;
547  }
548 
549  if ( $action == 'add' ) {
550  // More efficient than array_merge(), about 2.5 times.
551  foreach ( $pair as $from => $to ) {
552  $this->mTables[$variant]->setPair( $from, $to );
553  }
554  } elseif ( $action == 'remove' ) {
555  $this->mTables[$variant]->removeArray( $pair );
556  }
557  }
558  }
559 
567  public function convertTitle( $title ) {
568  $variant = $this->getPreferredVariant();
569  $index = $title->getNamespace();
570  if ( $index !== NS_MAIN ) {
571  $text = $this->convertNamespace( $index, $variant ) . ':';
572  } else {
573  $text = '';
574  }
575  $text .= $this->translate( $title->getText(), $variant );
576  return $text;
577  }
578 
586  public function convertNamespace( $index, $variant = null ) {
587  if ( $index === NS_MAIN ) {
588  return '';
589  }
590 
591  if ( $variant === null ) {
592  $variant = $this->getPreferredVariant();
593  }
594 
595  $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache();
596  $key = $cache->makeKey( 'languageconverter', 'namespace-text', $index, $variant );
597  $nsVariantText = $cache->get( $key );
598  if ( $nsVariantText !== false ) {
599  return $nsVariantText;
600  }
601 
602  // First check if a message gives a converted name in the target variant.
603  $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant );
604  if ( $nsConvMsg->exists() ) {
605  $nsVariantText = $nsConvMsg->plain();
606  }
607 
608  // Then check if a message gives a converted name in content language
609  // which needs extra translation to the target variant.
610  if ( $nsVariantText === false ) {
611  $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage();
612  if ( $nsConvMsg->exists() ) {
613  $nsVariantText = $this->translate( $nsConvMsg->plain(), $variant );
614  }
615  }
616 
617  if ( $nsVariantText === false ) {
618  // No message exists, retrieve it from the target variant's namespace names.
619  $langObj = $this->mLangObj->factory( $variant );
620  $nsVariantText = $langObj->getFormattedNsText( $index );
621  }
622 
623  $cache->set( $key, $nsVariantText, 60 );
624 
625  return $nsVariantText;
626  }
627 
642  public function convert( $text ) {
643  $variant = $this->getPreferredVariant();
644  return $this->convertTo( $text, $variant );
645  }
646 
654  public function convertTo( $text, $variant ) {
656  if ( $wgDisableLangConversion ) {
657  return $text;
658  }
659  // Reset converter state for a new converter run.
660  $this->mConvRuleTitle = false;
661  return $this->recursiveConvertTopLevel( $text, $variant );
662  }
663 
673  protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
674  $startPos = 0;
675  $out = '';
676  $length = strlen( $text );
677  $shouldConvert = !$this->guessVariant( $text, $variant );
678  $continue = 1;
679 
680  $noScript = '<script.*?>.*?<\/script>(*SKIP)(*FAIL)';
681  $noStyle = '<style.*?>.*?<\/style>(*SKIP)(*FAIL)';
682  // @codingStandardsIgnoreStart Generic.Files.LineLength.TooLong
683  $noHtml = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)(*SKIP)(*FAIL)';
684  // @codingStandardsIgnoreEnd
685  while ( $startPos < $length && $continue ) {
686  $continue = preg_match(
687  // Only match -{ outside of html.
688  "/$noScript|$noStyle|$noHtml|-\{/",
689  $text,
690  $m,
691  PREG_OFFSET_CAPTURE,
692  $startPos
693  );
694 
695  if ( !$continue ) {
696  // No more markup, append final segment
697  $fragment = substr( $text, $startPos );
698  $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
699  return $out;
700  }
701 
702  // Offset of the match of the regex pattern.
703  $pos = $m[0][1];
704 
705  // Append initial segment
706  $fragment = substr( $text, $startPos, $pos - $startPos );
707  $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
708  // -{ marker found, not in attribute
709  // Advance position up to -{ marker.
710  $startPos = $pos;
711  // Do recursive conversion
712  // Note: This passes $startPos by reference, and advances it.
713  $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
714  }
715  return $out;
716  }
717 
729  protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
730  // Quick sanity check (no function calls)
731  if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) {
732  throw new MWException( __METHOD__ . ': invalid input string' );
733  }
734 
735  $startPos += 2;
736  $inner = '';
737  $warningDone = false;
738  $length = strlen( $text );
739 
740  while ( $startPos < $length ) {
741  $m = false;
742  preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos );
743  if ( !$m ) {
744  // Unclosed rule
745  break;
746  }
747 
748  $token = $m[0][0];
749  $pos = $m[0][1];
750 
751  // Markup found
752  // Append initial segment
753  $inner .= substr( $text, $startPos, $pos - $startPos );
754 
755  // Advance position
756  $startPos = $pos;
757 
758  switch ( $token ) {
759  case '-{':
760  // Check max depth
761  if ( $depth >= $this->mMaxDepth ) {
762  $inner .= '-{';
763  if ( !$warningDone ) {
764  $inner .= '<span class="error">' .
765  wfMessage( 'language-converter-depth-warning' )
766  ->numParams( $this->mMaxDepth )->inContentLanguage()->text() .
767  '</span>';
768  $warningDone = true;
769  }
770  $startPos += 2;
771  continue;
772  }
773  // Recursively parse another rule
774  $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
775  break;
776  case '}-':
777  // Apply the rule
778  $startPos += 2;
779  $rule = new ConverterRule( $inner, $this );
780  $rule->parse( $variant );
781  $this->applyManualConv( $rule );
782  return $rule->getDisplay();
783  default:
784  throw new MWException( __METHOD__ . ': invalid regex match' );
785  }
786  }
787 
788  // Unclosed rule
789  if ( $startPos < $length ) {
790  $inner .= substr( $text, $startPos );
791  }
792  $startPos = $length;
793  return '-{' . $this->autoConvert( $inner, $variant );
794  }
795 
807  public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
808  # If the article has already existed, there is no need to
809  # check it again, otherwise it may cause a fault.
810  if ( is_object( $nt ) && $nt->exists() ) {
811  return;
812  }
813 
815  $isredir = $wgRequest->getText( 'redirect', 'yes' );
816  $action = $wgRequest->getText( 'action' );
817  if ( $action == 'edit' && $wgRequest->getBool( 'redlink' ) ) {
818  $action = 'view';
819  }
820  $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' );
821  $disableLinkConversion = $wgDisableLangConversion
823  $linkBatch = new LinkBatch();
824 
825  $ns = NS_MAIN;
826 
827  if ( $disableLinkConversion ||
828  ( !$ignoreOtherCond &&
829  ( $isredir == 'no'
830  || $action == 'edit'
831  || $action == 'submit'
832  || $linkconvert == 'no' ) ) ) {
833  return;
834  }
835 
836  if ( is_object( $nt ) ) {
837  $ns = $nt->getNamespace();
838  }
839 
840  $variants = $this->autoConvertToAllVariants( $link );
841  if ( !$variants ) { // give up
842  return;
843  }
844 
845  $titles = [];
846 
847  foreach ( $variants as $v ) {
848  if ( $v != $link ) {
849  $varnt = Title::newFromText( $v, $ns );
850  if ( !is_null( $varnt ) ) {
851  $linkBatch->addObj( $varnt );
852  $titles[] = $varnt;
853  }
854  }
855  }
856 
857  // fetch all variants in single query
858  $linkBatch->execute();
859 
860  foreach ( $titles as $varnt ) {
861  if ( $varnt->getArticleID() > 0 ) {
862  $nt = $varnt;
863  $link = $varnt->getText();
864  break;
865  }
866  }
867  }
868 
874  public function getExtraHashOptions() {
875  $variant = $this->getPreferredVariant();
876 
877  return '!' . $variant;
878  }
879 
890  public function guessVariant( $text, $variant ) {
891  return false;
892  }
893 
901  function loadDefaultTables() {
902  $class = static::class;
903  throw new MWException( "Must implement loadDefaultTables() method in class $class" );
904  }
905 
911  function loadTables( $fromCache = true ) {
913 
914  if ( $this->mTablesLoaded ) {
915  return;
916  }
917 
918  $this->mTablesLoaded = true;
919  $this->mTables = false;
921  $cacheKey = $cache->makeKey( 'conversiontables', $this->mMainLanguageCode );
922  if ( $fromCache ) {
923  $this->mTables = $cache->get( $cacheKey );
924  }
925  if ( !$this->mTables || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
926  // not in cache, or we need a fresh reload.
927  // We will first load the default tables
928  // then update them using things in MediaWiki:Conversiontable/*
929  $this->loadDefaultTables();
930  foreach ( $this->mVariants as $var ) {
931  $cached = $this->parseCachedTable( $var );
932  $this->mTables[$var]->mergeArray( $cached );
933  }
934 
935  $this->postLoadTables();
936  $this->mTables[self::CACHE_VERSION_KEY] = true;
937 
938  $cache->set( $cacheKey, $this->mTables, 43200 );
939  }
940  }
941 
945  function postLoadTables() {
946  }
947 
955  private function reloadTables() {
956  if ( $this->mTables ) {
957  unset( $this->mTables );
958  }
959 
960  $this->mTablesLoaded = false;
961  $this->loadTables( false );
962  }
963 
983  function parseCachedTable( $code, $subpage = '', $recursive = true ) {
984  static $parsed = [];
985 
986  $key = 'Conversiontable/' . $code;
987  if ( $subpage ) {
988  $key .= '/' . $subpage;
989  }
990  if ( array_key_exists( $key, $parsed ) ) {
991  return [];
992  }
993 
994  $parsed[$key] = true;
995 
996  if ( $subpage === '' ) {
997  $txt = MessageCache::singleton()->getMsgFromNamespace( $key, $code );
998  } else {
999  $txt = false;
1001  if ( $title && $title->exists() ) {
1002  $revision = Revision::newFromTitle( $title );
1003  if ( $revision ) {
1004  if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) {
1005  $txt = $revision->getContent( Revision::RAW )->getNativeData();
1006  }
1007 
1008  // @todo in the future, use a specialized content model, perhaps based on json!
1009  }
1010  }
1011  }
1012 
1013  # Nothing to parse if there's no text
1014  if ( $txt === false || $txt === null || $txt === '' ) {
1015  return [];
1016  }
1017 
1018  // get all subpage links of the form
1019  // [[MediaWiki:Conversiontable/zh-xx/...|...]]
1020  $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
1021  ':Conversiontable';
1022  $subs = StringUtils::explode( '[[', $txt );
1023  $sublinks = [];
1024  foreach ( $subs as $sub ) {
1025  $link = explode( ']]', $sub, 2 );
1026  if ( count( $link ) != 2 ) {
1027  continue;
1028  }
1029  $b = explode( '|', $link[0], 2 );
1030  $b = explode( '/', trim( $b[0] ), 3 );
1031  if ( count( $b ) == 3 ) {
1032  $sublink = $b[2];
1033  } else {
1034  $sublink = '';
1035  }
1036 
1037  if ( $b[0] == $linkhead && $b[1] == $code ) {
1038  $sublinks[] = $sublink;
1039  }
1040  }
1041 
1042  // parse the mappings in this page
1043  $blocks = StringUtils::explode( '-{', $txt );
1044  $ret = [];
1045  $first = true;
1046  foreach ( $blocks as $block ) {
1047  if ( $first ) {
1048  // Skip the part before the first -{
1049  $first = false;
1050  continue;
1051  }
1052  $mappings = explode( '}-', $block, 2 )[0];
1053  $stripped = str_replace( [ "'", '"', '*', '#' ], '', $mappings );
1054  $table = StringUtils::explode( ';', $stripped );
1055  foreach ( $table as $t ) {
1056  $m = explode( '=>', $t, 3 );
1057  if ( count( $m ) != 2 ) {
1058  continue;
1059  }
1060  // trim any trailling comments starting with '//'
1061  $tt = explode( '//', $m[1], 2 );
1062  $ret[trim( $m[0] )] = trim( $tt[0] );
1063  }
1064  }
1065 
1066  // recursively parse the subpages
1067  if ( $recursive ) {
1068  foreach ( $sublinks as $link ) {
1069  $s = $this->parseCachedTable( $code, $link, $recursive );
1070  $ret = $s + $ret;
1071  }
1072  }
1073 
1074  if ( $this->mUcfirst ) {
1075  foreach ( $ret as $k => $v ) {
1076  $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
1077  }
1078  }
1079  return $ret;
1080  }
1081 
1090  public function markNoConversion( $text, $noParse = false ) {
1091  # don't mark if already marked
1092  if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) {
1093  return $text;
1094  }
1095 
1096  $ret = "-{R|$text}-";
1097  return $ret;
1098  }
1099 
1108  function convertCategoryKey( $key ) {
1109  return $key;
1110  }
1111 
1118  public function updateConversionTable( Title $titleobj ) {
1119  if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) {
1120  $title = $titleobj->getDBkey();
1121  $t = explode( '/', $title, 3 );
1122  $c = count( $t );
1123  if ( $c > 1 && $t[0] == 'Conversiontable' ) {
1124  if ( $this->validateVariant( $t[1] ) ) {
1125  $this->reloadTables();
1126  }
1127  }
1128  }
1129  }
1130 
1135  function getVarSeparatorPattern() {
1136  if ( is_null( $this->mVarSeparatorPattern ) ) {
1137  // varsep_pattern for preg_split:
1138  // text should be splited by ";" only if a valid variant
1139  // name exist after the markup, for example:
1140  // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\
1141  // <span style="font-size:120%;">yyy</span>;}-
1142  // we should split it as:
1143  // [
1144  // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>'
1145  // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
1146  // [2] => ''
1147  // ]
1148  $pat = '/;\s*(?=';
1149  foreach ( $this->mVariants as $variant ) {
1150  // zh-hans:xxx;zh-hant:yyy
1151  $pat .= $variant . '\s*:|';
1152  // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
1153  $pat .= '[^;]*?=>\s*' . $variant . '\s*:|';
1154  }
1155  $pat .= '\s*$)/';
1156  $this->mVarSeparatorPattern = $pat;
1157  }
1158  return $this->mVarSeparatorPattern;
1159  }
1160 }
ConverterRule
Parser for rules of language conversion , parse rules in -{ }- tag.
Definition: ConverterRule.php:27
$wgUser
$wgUser
Definition: Setup.php:809
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:268
$wgDisabledVariants
$wgDisabledVariants
Disabled variants array of language variant conversion.
Definition: DefaultSettings.php:3059
LinkBatch
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition: LinkBatch.php:34
captcha-old.count
count
Definition: captcha-old.py:249
$languages
switch( $options['output']) $languages
Definition: transstat.php:76
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
$req
this hook is for auditing only $req
Definition: hooks.txt:988
Html\expandAttributes
static expandAttributes(array $attribs)
Given an associative array of element attributes, generate a string to stick after the element name i...
Definition: Html.php:474
$wgDisableTitleConversion
$wgDisableTitleConversion
Whether to enable language variant conversion for links.
Definition: DefaultSettings.php:3037
$s
$s
Definition: mergeMessageFileList.php:188
CONTENT_MODEL_WIKITEXT
const CONTENT_MODEL_WIKITEXT
Definition: Defines.php:236
$output
static configuration should be added through ResourceLoaderGetConfigVars instead can be used to get the real title after the basic globals have been set but before ordinary actions take place $output
Definition: hooks.txt:2198
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
NS_MAIN
const NS_MAIN
Definition: Defines.php:65
Revision\newFromTitle
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that's attached to a given link target.
Definition: Revision.php:134
Title\getDBkey
getDBkey()
Get the main part with underscores.
Definition: Title.php:955
MWException
MediaWiki exception.
Definition: MWException.php:26
$title
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:932
$titles
linkcache txt The LinkCache class maintains a list of article titles and the information about whether or not the article exists in the database This is used to mark up links when displaying a page If the same link appears more than once on any page then it only has to be looked up once In most cases link lookups are done in batches with the LinkBatch class or the equivalent in so the link cache is mostly useful for short snippets of parsed and for links in the navigation areas of the skin The link cache was formerly used to track links used in a document for the purposes of updating the link tables This application is now deprecated To create a you can use the following $titles
Definition: linkcache.txt:17
Language\fetchLanguageNames
static fetchLanguageNames( $inLanguage=null, $include='mw')
Get an array of language names, indexed by code.
Definition: Language.php:803
Title\getNamespace
getNamespace()
Get the namespace index, i.e.
Definition: Title.php:978
$wgLanguageConverterCacheType
$wgLanguageConverterCacheType
The cache type for storing language conversion tables, which are used when parsing certain text and i...
Definition: DefaultSettings.php:2279
$wgDefaultLanguageVariant
$wgDefaultLanguageVariant
Default variant code, if false, the default will be the language code.
Definition: DefaultSettings.php:3042
StringUtils\explode
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
Definition: StringUtils.php:335
ObjectCache\getInstance
static getInstance( $id)
Get a cached instance of the specified type of cache object.
Definition: ObjectCache.php:92
global
when a variable name is used in a it is silently declared as a new masking the global
Definition: design.txt:93
$wgDisableLangConversion
$wgDisableLangConversion
Whether to enable language variant conversion.
Definition: DefaultSettings.php:3032
MessageCache\singleton
static singleton()
Get the signleton instance of this class.
Definition: MessageCache.php:113
Title\makeTitleSafe
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:557
$ret
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:1965
Revision\RAW
const RAW
Definition: Revision.php:100
Title
Represents a title within MediaWiki.
Definition: Title.php:39
$cache
$cache
Definition: mcc.php:33
Makefile.translate
def translate(text, conv_table)
Definition: Makefile.py:235
$code
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable & $code
Definition: hooks.txt:781
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
$link
usually copyright or history_copyright This message must be in HTML not wikitext & $link
Definition: hooks.txt:2981
LoggerFactory
MediaWiki Logger LoggerFactory implements a PSR[0] compatible message logging system Named Psr Log LoggerInterface instances can be obtained from the MediaWiki Logger LoggerFactory::getInstance() static method. MediaWiki\Logger\LoggerFactory expects a class implementing the MediaWiki\Logger\Spi interface to act as a factory for new Psr\Log\LoggerInterface instances. The "Spi" in MediaWiki\Logger\Spi stands for "service provider interface". An SPI is an API intended to be implemented or extended by a third party. This software design pattern is intended to enable framework extension and replaceable components. It is specifically used in the MediaWiki\Logger\LoggerFactory service to allow alternate PSR-3 logging implementations to be easily integrated with MediaWiki. The service provider interface allows the backend logging library to be implemented in multiple ways. The $wgMWLoggerDefaultSpi global provides the classname of the default MediaWiki\Logger\Spi implementation to be loaded at runtime. This can either be the name of a class implementing the MediaWiki\Logger\Spi with a zero argument const ructor or a callable that will return an MediaWiki\Logger\Spi instance. Alternately the MediaWiki\Logger\LoggerFactory MediaWiki Logger LoggerFactory
Definition: logger.txt:5
wfMessage
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt
NS_MEDIAWIKI
const NS_MEDIAWIKI
Definition: Defines.php:73
class
you have access to all of the normal MediaWiki so you can get a DB use the etc For full docs on the Maintenance class
Definition: maintenance.txt:52
$t
$t
Definition: testCompression.php:67
$wgRequest
if(! $wgDBerrorLogTZ) $wgRequest
Definition: Setup.php:662
MediaWikiServices
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency MediaWikiServices
Definition: injection.txt:23
$flags
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition: hooks.txt:2801
array
the array() calling protocol came about after MediaWiki 1.4rc1.
$wgContLang
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the content language as $wgContLang
Definition: design.txt:56
$out
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition: hooks.txt:781