MediaWiki  1.32.5
LanguageConverter.php
Go to the documentation of this file.
1 <?php
22 
24 
34 class LanguageConverter {
40  static public $languagesWithVariants = [
41  'en',
42  'crh',
43  'gan',
44  'iu',
45  'kk',
46  'ku',
47  'shi',
48  'sr',
49  'tg',
50  'uz',
51  'zh',
52  ];
53 
54  public $mMainLanguageCode;
55 
59  public $mVariants;
60  public $mVariantFallbacks;
61  public $mVariantNames;
62  public $mTablesLoaded = false;
63  public $mTables;
64  // 'bidirectional' 'unidirectional' 'disable' for each variant
65  public $mManualLevel;
66 
67  public $mLangObj;
68  public $mFlags;
69  public $mDescCodeSep = ':', $mDescVarSep = ';';
70  public $mUcfirst = false;
71  public $mConvRuleTitle = false;
72  public $mURLVariant;
73  public $mUserVariant;
74  public $mHeaderVariant;
75  public $mMaxDepth = 10;
76  public $mVarSeparatorPattern;
77 
78  const CACHE_VERSION_KEY = 'VERSION 7';
79 
88  public function __construct( Language $langobj, $maincode, $variants = [],
89  $variantfallbacks = [], $flags = [],
90  $manualLevel = [] ) {
91  global $wgDisabledVariants;
92  $this->mLangObj = $langobj;
93  $this->mMainLanguageCode = $maincode;
94  $this->mVariants = array_diff( $variants, $wgDisabledVariants );
95  $this->mVariantFallbacks = $variantfallbacks;
96  $this->mVariantNames = Language::fetchLanguageNames();
97  $defaultflags = [
98  // 'S' show converted text
99  // '+' add rules for alltext
100  // 'E' the gave flags is error
101  // these flags above are reserved for program
102  'A' => 'A', // add rule for convert code (all text convert)
103  'T' => 'T', // title convert
104  'R' => 'R', // raw content
105  'D' => 'D', // convert description (subclass implement)
106  '-' => '-', // remove convert (not implement)
107  'H' => 'H', // add rule for convert code (but no display in placed code)
108  'N' => 'N', // current variant name
109  ];
110  $this->mFlags = array_merge( $defaultflags, $flags );
111  foreach ( $this->mVariants as $v ) {
112  if ( array_key_exists( $v, $manualLevel ) ) {
113  $this->mManualLevel[$v] = $manualLevel[$v];
114  } else {
115  $this->mManualLevel[$v] = 'bidirectional';
116  }
117  $this->mFlags[$v] = $v;
118  }
119  }
120 
127  public function getVariants() {
128  return $this->mVariants;
129  }
130 
142  public function getVariantFallbacks( $variant ) {
143  if ( isset( $this->mVariantFallbacks[$variant] ) ) {
144  return $this->mVariantFallbacks[$variant];
145  }
146  return $this->mMainLanguageCode;
147  }
148 
153  public function getConvRuleTitle() {
154  return $this->mConvRuleTitle;
155  }
156 
161  public function getPreferredVariant() {
162  global $wgDefaultLanguageVariant, $wgUser;
163 
164  $req = $this->getURLVariant();
165 
166  Hooks::run( 'GetLangPreferredVariant', [ &$req ] );
167 
168  if ( $wgUser->isSafeToLoad() && $wgUser->isLoggedIn() && !$req ) {
169  $req = $this->getUserVariant();
170  } elseif ( !$req ) {
171  $req = $this->getHeaderVariant();
172  }
173 
174  if ( $wgDefaultLanguageVariant && !$req ) {
175  $req = $this->validateVariant( $wgDefaultLanguageVariant );
176  }
177 
178  $req = $this->validateVariant( $req );
179 
180  // This function, unlike the other get*Variant functions, is
181  // not memoized (i.e. there return value is not cached) since
182  // new information might appear during processing after this
183  // is first called.
184  if ( $req ) {
185  return $req;
186  }
187  return $this->mMainLanguageCode;
188  }
189 
195  public function getDefaultVariant() {
197 
198  $req = $this->getURLVariant();
199 
200  if ( !$req ) {
201  $req = $this->getHeaderVariant();
202  }
203 
204  if ( $wgDefaultLanguageVariant && !$req ) {
205  $req = $this->validateVariant( $wgDefaultLanguageVariant );
206  }
207 
208  if ( $req ) {
209  return $req;
210  }
211  return $this->mMainLanguageCode;
212  }
213 
223  public function validateVariant( $variant = null ) {
224  if ( $variant === null ) {
225  return null;
226  }
227  // Our internal variants are always lower-case; the variant we
228  // are validating may have mixed case.
229  $variant = LanguageCode::replaceDeprecatedCodes( strtolower( $variant ) );
230  if ( in_array( $variant, $this->mVariants ) ) {
231  return $variant;
232  }
233  // Browsers are supposed to use BCP 47 standard in the
234  // Accept-Language header, but not all of our internal
235  // mediawiki variant codes are BCP 47. Map BCP 47 code
236  // to our internal code.
237  foreach ( $this->mVariants as $v ) {
238  // Case-insensitive match (BCP 47 is mixed case)
239  if ( strtolower( LanguageCode::bcp47( $v ) ) === $variant ) {
240  return $v;
241  }
242  }
243  return null;
244  }
245 
251  public function getURLVariant() {
252  global $wgRequest;
253 
254  if ( $this->mURLVariant ) {
255  return $this->mURLVariant;
256  }
257 
258  // see if the preference is set in the request
259  $ret = $wgRequest->getText( 'variant' );
260 
261  if ( !$ret ) {
262  $ret = $wgRequest->getVal( 'uselang' );
263  }
264 
265  $this->mURLVariant = $this->validateVariant( $ret );
266  return $this->mURLVariant;
267  }
268 
274  protected function getUserVariant() {
275  global $wgUser;
276 
277  // memoizing this function wreaks havoc on parserTest.php
278  /*
279  if ( $this->mUserVariant ) {
280  return $this->mUserVariant;
281  }
282  */
283 
284  // Get language variant preference from logged in users
285  // Don't call this on stub objects because that causes infinite
286  // recursion during initialisation
287  if ( !$wgUser->isSafeToLoad() ) {
288  return false;
289  }
290  if ( $wgUser->isLoggedIn() ) {
291  if (
292  $this->mMainLanguageCode ==
293  MediaWikiServices::getInstance()->getContentLanguage()->getCode()
294  ) {
295  $ret = $wgUser->getOption( 'variant' );
296  } else {
297  $ret = $wgUser->getOption( 'variant-' . $this->mMainLanguageCode );
298  }
299  } else {
300  // figure out user lang without constructing wgLang to avoid
301  // infinite recursion
302  $ret = $wgUser->getOption( 'language' );
303  }
304 
305  $this->mUserVariant = $this->validateVariant( $ret );
306  return $this->mUserVariant;
307  }
308 
314  protected function getHeaderVariant() {
315  global $wgRequest;
316 
317  if ( $this->mHeaderVariant ) {
318  return $this->mHeaderVariant;
319  }
320 
321  // See if some supported language variant is set in the
322  // HTTP header.
323  $languages = array_keys( $wgRequest->getAcceptLang() );
324  if ( empty( $languages ) ) {
325  return null;
326  }
327 
328  $fallbackLanguages = [];
329  foreach ( $languages as $language ) {
330  $this->mHeaderVariant = $this->validateVariant( $language );
331  if ( $this->mHeaderVariant ) {
332  break;
333  }
334 
335  // To see if there are fallbacks of current language.
336  // We record these fallback variants, and process
337  // them later.
338  $fallbacks = $this->getVariantFallbacks( $language );
339  if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) {
340  $fallbackLanguages[] = $fallbacks;
341  } elseif ( is_array( $fallbacks ) ) {
342  $fallbackLanguages =
343  array_merge( $fallbackLanguages, $fallbacks );
344  }
345  }
346 
347  if ( !$this->mHeaderVariant ) {
348  // process fallback languages now
349  $fallback_languages = array_unique( $fallbackLanguages );
350  foreach ( $fallback_languages as $language ) {
351  $this->mHeaderVariant = $this->validateVariant( $language );
352  if ( $this->mHeaderVariant ) {
353  break;
354  }
355  }
356  }
357 
358  return $this->mHeaderVariant;
359  }
360 
371  public function autoConvert( $text, $toVariant = false ) {
372  $this->loadTables();
373 
374  if ( !$toVariant ) {
375  $toVariant = $this->getPreferredVariant();
376  if ( !$toVariant ) {
377  return $text;
378  }
379  }
380 
381  if ( $this->guessVariant( $text, $toVariant ) ) {
382  return $text;
383  }
384  /* we convert everything except:
385  1. HTML markups (anything between < and >)
386  2. HTML entities
387  3. placeholders created by the parser
388  IMPORTANT: Beware of failure from pcre.backtrack_limit (T124404).
389  Minimize use of backtracking where possible.
390  */
391  $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f';
392 
393  // this one is needed when the text is inside an HTML markup
394  $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>';
395 
396  // Optimize for the common case where these tags have
397  // few or no children. Thus try and possesively get as much as
398  // possible, and only engage in backtracking when we hit a '<'.
399 
400  // disable convert to variants between <code> tags
401  $codefix = '<code>[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|';
402  // disable conversion of <script> tags
403  $scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|';
404  // disable conversion of <pre> tags
405  $prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|';
406  // The "|.*+)" at the end, is in case we missed some part of html syntax,
407  // we will fail securely (hopefully) by matching the rest of the string.
408  $htmlFullTag = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|';
409 
410  $reg = '/' . $codefix . $scriptfix . $prefix . $htmlFullTag .
411  '&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s';
412  $startPos = 0;
413  $sourceBlob = '';
414  $literalBlob = '';
415 
416  // Guard against delimiter nulls in the input
417  // (should never happen: see T159174)
418  $text = str_replace( "\000", '', $text );
419  $text = str_replace( "\004", '', $text );
420 
421  $markupMatches = null;
422  $elementMatches = null;
423 
424  // We add a marker (\004) at the end of text, to ensure we always match the
425  // entire text (Otherwise, pcre.backtrack_limit might cause silent failure)
426  while ( $startPos < strlen( $text ) ) {
427  if ( preg_match( $reg, $text . "\004", $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
428  $elementPos = $markupMatches[0][1];
429  $element = $markupMatches[0][0];
430  if ( $element === "\004" ) {
431  // We hit the end.
432  $elementPos = strlen( $text );
433  $element = '';
434  } elseif ( substr( $element, -1 ) === "\004" ) {
435  // This can sometimes happen if we have
436  // unclosed html tags (For example
437  // when converting a title attribute
438  // during a recursive call that contains
439  // a &lt; e.g. <div title="&lt;">.
440  $element = substr( $element, 0, -1 );
441  }
442  } else {
443  // If we hit here, then Language Converter could be tricked
444  // into doing an XSS, so we refuse to translate.
445  // If non-crazy input manages to reach this code path,
446  // we should consider it a bug.
447  $log = LoggerFactory::getInstance( 'languageconverter' );
448  $log->error( "Hit pcre.backtrack_limit in " . __METHOD__
449  . ". Disabling language conversion for this page.",
450  [
451  "method" => __METHOD__,
452  "variant" => $toVariant,
453  "startOfText" => substr( $text, 0, 500 )
454  ]
455  );
456  return $text;
457  }
458  // Queue the part before the markup for translation in a batch
459  $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000";
460 
461  // Advance to the next position
462  $startPos = $elementPos + strlen( $element );
463 
464  // Translate any alt or title attributes inside the matched element
465  if ( $element !== ''
466  && preg_match( '/^(<[^>\s]*+)\s([^>]*+)(.*+)$/', $element, $elementMatches )
467  ) {
468  // FIXME, this decodes entities, so if you have something
469  // like <div title="foo&lt;bar"> the bar won't get
470  // translated since after entity decoding it looks like
471  // unclosed html and we call this method recursively
472  // on attributes.
473  $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
474  // Ensure self-closing tags stay self-closing.
475  $close = substr( $elementMatches[2], -1 ) === '/' ? ' /' : '';
476  $changed = false;
477  foreach ( [ 'title', 'alt' ] as $attrName ) {
478  if ( !isset( $attrs[$attrName] ) ) {
479  continue;
480  }
481  $attr = $attrs[$attrName];
482  // Don't convert URLs
483  if ( !strpos( $attr, '://' ) ) {
484  $attr = $this->recursiveConvertTopLevel( $attr, $toVariant );
485  }
486 
487  if ( $attr !== $attrs[$attrName] ) {
488  $attrs[$attrName] = $attr;
489  $changed = true;
490  }
491  }
492  if ( $changed ) {
493  $element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
494  $close . $elementMatches[3];
495  }
496  }
497  $literalBlob .= $element . "\000";
498  }
499 
500  // Do the main translation batch
501  $translatedBlob = $this->translate( $sourceBlob, $toVariant );
502 
503  // Put the output back together
504  $translatedIter = StringUtils::explode( "\000", $translatedBlob );
505  $literalIter = StringUtils::explode( "\000", $literalBlob );
506  $output = '';
507  while ( $translatedIter->valid() && $literalIter->valid() ) {
508  $output .= $translatedIter->current();
509  $output .= $literalIter->current();
510  $translatedIter->next();
511  $literalIter->next();
512  }
513 
514  return $output;
515  }
516 
526  public function translate( $text, $variant ) {
527  // If $text is empty or only includes spaces, do nothing
528  // Otherwise translate it
529  if ( trim( $text ) ) {
530  $this->loadTables();
531  $text = $this->mTables[$variant]->replace( $text );
532  }
533  return $text;
534  }
535 
542  public function autoConvertToAllVariants( $text ) {
543  $this->loadTables();
544 
545  $ret = [];
546  foreach ( $this->mVariants as $variant ) {
547  $ret[$variant] = $this->translate( $text, $variant );
548  }
549 
550  return $ret;
551  }
552 
558  protected function applyManualConv( $convRule ) {
559  // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
560  // title conversion.
561  // T26072: $mConvRuleTitle was overwritten by other manual
562  // rule(s) not for title, this breaks the title conversion.
563  $newConvRuleTitle = $convRule->getTitle();
564  if ( $newConvRuleTitle ) {
565  // So I add an empty check for getTitle()
566  $this->mConvRuleTitle = $newConvRuleTitle;
567  }
568 
569  // merge/remove manual conversion rules to/from global table
570  $convTable = $convRule->getConvTable();
571  $action = $convRule->getRulesAction();
572  foreach ( $convTable as $variant => $pair ) {
573  $v = $this->validateVariant( $variant );
574  if ( !$v ) {
575  continue;
576  }
577 
578  if ( $action == 'add' ) {
579  // More efficient than array_merge(), about 2.5 times.
580  foreach ( $pair as $from => $to ) {
581  $this->mTables[$v]->setPair( $from, $to );
582  }
583  } elseif ( $action == 'remove' ) {
584  $this->mTables[$v]->removeArray( $pair );
585  }
586  }
587  }
588 
596  public function convertTitle( $title ) {
597  $variant = $this->getPreferredVariant();
598  $index = $title->getNamespace();
599  if ( $index !== NS_MAIN ) {
600  $text = $this->convertNamespace( $index, $variant ) . ':';
601  } else {
602  $text = '';
603  }
604  $text .= $this->translate( $title->getText(), $variant );
605  return $text;
606  }
607 
615  public function convertNamespace( $index, $variant = null ) {
616  if ( $index === NS_MAIN ) {
617  return '';
618  }
619 
620  if ( $variant === null ) {
621  $variant = $this->getPreferredVariant();
622  }
623 
624  $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache();
625  $key = $cache->makeKey( 'languageconverter', 'namespace-text', $index, $variant );
626  $nsVariantText = $cache->get( $key );
627  if ( $nsVariantText !== false ) {
628  return $nsVariantText;
629  }
630 
631  // First check if a message gives a converted name in the target variant.
632  $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant );
633  if ( $nsConvMsg->exists() ) {
634  $nsVariantText = $nsConvMsg->plain();
635  }
636 
637  // Then check if a message gives a converted name in content language
638  // which needs extra translation to the target variant.
639  if ( $nsVariantText === false ) {
640  $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage();
641  if ( $nsConvMsg->exists() ) {
642  $nsVariantText = $this->translate( $nsConvMsg->plain(), $variant );
643  }
644  }
645 
646  if ( $nsVariantText === false ) {
647  // No message exists, retrieve it from the target variant's namespace names.
648  $langObj = $this->mLangObj->factory( $variant );
649  $nsVariantText = $langObj->getFormattedNsText( $index );
650  }
651 
652  $cache->set( $key, $nsVariantText, 60 );
653 
654  return $nsVariantText;
655  }
656 
675  public function convert( $text ) {
676  $variant = $this->getPreferredVariant();
677  return $this->convertTo( $text, $variant );
678  }
679 
689  public function convertTo( $text, $variant ) {
691  if ( $wgDisableLangConversion ) {
692  return $text;
693  }
694  // Reset converter state for a new converter run.
695  $this->mConvRuleTitle = false;
696  return $this->recursiveConvertTopLevel( $text, $variant );
697  }
698 
708  protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
709  $startPos = 0;
710  $out = '';
711  $length = strlen( $text );
712  $shouldConvert = !$this->guessVariant( $text, $variant );
713  $continue = 1;
714 
715  $noScript = '<script.*?>.*?<\/script>(*SKIP)(*FAIL)';
716  $noStyle = '<style.*?>.*?<\/style>(*SKIP)(*FAIL)';
717  // phpcs:ignore Generic.Files.LineLength
718  $noHtml = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)(*SKIP)(*FAIL)';
719  while ( $startPos < $length && $continue ) {
720  $continue = preg_match(
721  // Only match -{ outside of html.
722  "/$noScript|$noStyle|$noHtml|-\{/",
723  $text,
724  $m,
725  PREG_OFFSET_CAPTURE,
726  $startPos
727  );
728 
729  if ( !$continue ) {
730  // No more markup, append final segment
731  $fragment = substr( $text, $startPos );
732  $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
733  return $out;
734  }
735 
736  // Offset of the match of the regex pattern.
737  $pos = $m[0][1];
738 
739  // Append initial segment
740  $fragment = substr( $text, $startPos, $pos - $startPos );
741  $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
742  // -{ marker found, not in attribute
743  // Advance position up to -{ marker.
744  $startPos = $pos;
745  // Do recursive conversion
746  // Note: This passes $startPos by reference, and advances it.
747  $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
748  }
749  return $out;
750  }
751 
763  protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
764  // Quick sanity check (no function calls)
765  if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) {
766  throw new MWException( __METHOD__ . ': invalid input string' );
767  }
768 
769  $startPos += 2;
770  $inner = '';
771  $warningDone = false;
772  $length = strlen( $text );
773 
774  while ( $startPos < $length ) {
775  $m = false;
776  preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos );
777  if ( !$m ) {
778  // Unclosed rule
779  break;
780  }
781 
782  $token = $m[0][0];
783  $pos = $m[0][1];
784 
785  // Markup found
786  // Append initial segment
787  $inner .= substr( $text, $startPos, $pos - $startPos );
788 
789  // Advance position
790  $startPos = $pos;
791 
792  switch ( $token ) {
793  case '-{':
794  // Check max depth
795  if ( $depth >= $this->mMaxDepth ) {
796  $inner .= '-{';
797  if ( !$warningDone ) {
798  $inner .= '<span class="error">' .
799  wfMessage( 'language-converter-depth-warning' )
800  ->numParams( $this->mMaxDepth )->inContentLanguage()->text() .
801  '</span>';
802  $warningDone = true;
803  }
804  $startPos += 2;
805  break;
806  }
807  // Recursively parse another rule
808  $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
809  break;
810  case '}-':
811  // Apply the rule
812  $startPos += 2;
813  $rule = new ConverterRule( $inner, $this );
814  $rule->parse( $variant );
815  $this->applyManualConv( $rule );
816  return $rule->getDisplay();
817  default:
818  throw new MWException( __METHOD__ . ': invalid regex match' );
819  }
820  }
821 
822  // Unclosed rule
823  if ( $startPos < $length ) {
824  $inner .= substr( $text, $startPos );
825  }
826  $startPos = $length;
827  return '-{' . $this->autoConvert( $inner, $variant );
828  }
829 
841  public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
842  # If the article has already existed, there is no need to
843  # check it again, otherwise it may cause a fault.
844  if ( is_object( $nt ) && $nt->exists() ) {
845  return;
846  }
847 
849  $isredir = $wgRequest->getText( 'redirect', 'yes' );
850  $action = $wgRequest->getText( 'action' );
851  if ( $action == 'edit' && $wgRequest->getBool( 'redlink' ) ) {
852  $action = 'view';
853  }
854  $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' );
855  $disableLinkConversion = $wgDisableLangConversion
857  $linkBatch = new LinkBatch();
858 
859  $ns = NS_MAIN;
860 
861  if ( $disableLinkConversion ||
862  ( !$ignoreOtherCond &&
863  ( $isredir == 'no'
864  || $action == 'edit'
865  || $action == 'submit'
866  || $linkconvert == 'no' ) ) ) {
867  return;
868  }
869 
870  if ( is_object( $nt ) ) {
871  $ns = $nt->getNamespace();
872  }
873 
874  $variants = $this->autoConvertToAllVariants( $link );
875  if ( !$variants ) { // give up
876  return;
877  }
878 
879  $titles = [];
880 
881  foreach ( $variants as $v ) {
882  if ( $v != $link ) {
883  $varnt = Title::newFromText( $v, $ns );
884  if ( !is_null( $varnt ) ) {
885  $linkBatch->addObj( $varnt );
886  $titles[] = $varnt;
887  }
888  }
889  }
890 
891  // fetch all variants in single query
892  $linkBatch->execute();
893 
894  foreach ( $titles as $varnt ) {
895  if ( $varnt->getArticleID() > 0 ) {
896  $nt = $varnt;
897  $link = $varnt->getText();
898  break;
899  }
900  }
901  }
902 
908  public function getExtraHashOptions() {
909  $variant = $this->getPreferredVariant();
910 
911  return '!' . $variant;
912  }
913 
924  public function guessVariant( $text, $variant ) {
925  return false;
926  }
927 
935  function loadDefaultTables() {
936  $class = static::class;
937  throw new MWException( "Must implement loadDefaultTables() method in class $class" );
938  }
939 
945  function loadTables( $fromCache = true ) {
947 
948  if ( $this->mTablesLoaded ) {
949  return;
950  }
951 
952  $this->mTablesLoaded = true;
953  $this->mTables = false;
955  $cacheKey = $cache->makeKey( 'conversiontables', $this->mMainLanguageCode );
956  if ( $fromCache ) {
957  $this->mTables = $cache->get( $cacheKey );
958  }
959  if ( !$this->mTables || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
960  // not in cache, or we need a fresh reload.
961  // We will first load the default tables
962  // then update them using things in MediaWiki:Conversiontable/*
963  $this->loadDefaultTables();
964  foreach ( $this->mVariants as $var ) {
965  $cached = $this->parseCachedTable( $var );
966  $this->mTables[$var]->mergeArray( $cached );
967  }
968 
969  $this->postLoadTables();
970  $this->mTables[self::CACHE_VERSION_KEY] = true;
971 
972  $cache->set( $cacheKey, $this->mTables, 43200 );
973  }
974  }
975 
979  function postLoadTables() {
980  }
981 
989  private function reloadTables() {
990  if ( $this->mTables ) {
991  unset( $this->mTables );
992  }
993 
994  $this->mTablesLoaded = false;
995  $this->loadTables( false );
996  }
997 
1017  function parseCachedTable( $code, $subpage = '', $recursive = true ) {
1018  static $parsed = [];
1019 
1020  $key = 'Conversiontable/' . $code;
1021  if ( $subpage ) {
1022  $key .= '/' . $subpage;
1023  }
1024  if ( array_key_exists( $key, $parsed ) ) {
1025  return [];
1026  }
1027 
1028  $parsed[$key] = true;
1029 
1030  if ( $subpage === '' ) {
1031  $txt = MessageCache::singleton()->getMsgFromNamespace( $key, $code );
1032  } else {
1033  $txt = false;
1035  if ( $title && $title->exists() ) {
1036  $revision = Revision::newFromTitle( $title );
1037  if ( $revision ) {
1038  if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) {
1039  $txt = $revision->getContent( Revision::RAW )->getNativeData();
1040  }
1041 
1042  // @todo in the future, use a specialized content model, perhaps based on json!
1043  }
1044  }
1045  }
1046 
1047  # Nothing to parse if there's no text
1048  if ( $txt === false || $txt === null || $txt === '' ) {
1049  return [];
1050  }
1051 
1052  // get all subpage links of the form
1053  // [[MediaWiki:Conversiontable/zh-xx/...|...]]
1054  $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
1055  ':Conversiontable';
1056  $subs = StringUtils::explode( '[[', $txt );
1057  $sublinks = [];
1058  foreach ( $subs as $sub ) {
1059  $link = explode( ']]', $sub, 2 );
1060  if ( count( $link ) != 2 ) {
1061  continue;
1062  }
1063  $b = explode( '|', $link[0], 2 );
1064  $b = explode( '/', trim( $b[0] ), 3 );
1065  if ( count( $b ) == 3 ) {
1066  $sublink = $b[2];
1067  } else {
1068  $sublink = '';
1069  }
1070 
1071  if ( $b[0] == $linkhead && $b[1] == $code ) {
1072  $sublinks[] = $sublink;
1073  }
1074  }
1075 
1076  // parse the mappings in this page
1077  $blocks = StringUtils::explode( '-{', $txt );
1078  $ret = [];
1079  $first = true;
1080  foreach ( $blocks as $block ) {
1081  if ( $first ) {
1082  // Skip the part before the first -{
1083  $first = false;
1084  continue;
1085  }
1086  $mappings = explode( '}-', $block, 2 )[0];
1087  $stripped = str_replace( [ "'", '"', '*', '#' ], '', $mappings );
1088  $table = StringUtils::explode( ';', $stripped );
1089  foreach ( $table as $t ) {
1090  $m = explode( '=>', $t, 3 );
1091  if ( count( $m ) != 2 ) {
1092  continue;
1093  }
1094  // trim any trailling comments starting with '//'
1095  $tt = explode( '//', $m[1], 2 );
1096  $ret[trim( $m[0] )] = trim( $tt[0] );
1097  }
1098  }
1099 
1100  // recursively parse the subpages
1101  if ( $recursive ) {
1102  foreach ( $sublinks as $link ) {
1103  $s = $this->parseCachedTable( $code, $link, $recursive );
1104  $ret = $s + $ret;
1105  }
1106  }
1107 
1108  if ( $this->mUcfirst ) {
1109  foreach ( $ret as $k => $v ) {
1110  $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
1111  }
1112  }
1113  return $ret;
1114  }
1115 
1124  public function markNoConversion( $text, $noParse = false ) {
1125  # don't mark if already marked
1126  if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) {
1127  return $text;
1128  }
1129 
1130  $ret = "-{R|$text}-";
1131  return $ret;
1132  }
1133 
1142  function convertCategoryKey( $key ) {
1143  return $key;
1144  }
1145 
1152  public function updateConversionTable( Title $titleobj ) {
1153  if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) {
1154  $title = $titleobj->getDBkey();
1155  $t = explode( '/', $title, 3 );
1156  $c = count( $t );
1157  if ( $c > 1 && $t[0] == 'Conversiontable' ) {
1158  if ( $this->validateVariant( $t[1] ) ) {
1159  $this->reloadTables();
1160  }
1161  }
1162  }
1163  }
1164 
1169  function getVarSeparatorPattern() {
1170  if ( is_null( $this->mVarSeparatorPattern ) ) {
1171  // varsep_pattern for preg_split:
1172  // text should be splited by ";" only if a valid variant
1173  // name exist after the markup, for example:
1174  // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\
1175  // <span style="font-size:120%;">yyy</span>;}-
1176  // we should split it as:
1177  // [
1178  // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>'
1179  // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
1180  // [2] => ''
1181  // ]
1182  $pat = '/;\s*(?=';
1183  foreach ( $this->mVariants as $variant ) {
1184  // zh-hans:xxx;zh-hant:yyy
1185  $pat .= $variant . '\s*:|';
1186  // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
1187  $pat .= '[^;]*?=>\s*' . $variant . '\s*:|';
1188  }
1189  $pat .= '\s*$)/';
1190  $this->mVarSeparatorPattern = $pat;
1191  }
1192  return $this->mVarSeparatorPattern;
1193  }
1194 }
LanguageCode\replaceDeprecatedCodes
static replaceDeprecatedCodes( $code)
Replace deprecated language codes that were used in previous versions of MediaWiki to up-to-date,...
Definition: LanguageCode.php:165
ConverterRule
Parser for rules of language conversion , parse rules in -{ }- tag.
Definition: ConverterRule.php:27
Title\newFromText
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:280
$wgDisabledVariants
$wgDisabledVariants
Disabled variants array of language variant conversion.
Definition: DefaultSettings.php:3138
LinkBatch
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition: LinkBatch.php:34
captcha-old.count
count
Definition: captcha-old.py:249
$languages
switch( $options['output']) $languages
Definition: transstat.php:76
$req
this hook is for auditing only $req
Definition: hooks.txt:1018
Html\expandAttributes
static expandAttributes(array $attribs)
Given an associative array of element attributes, generate a string to stick after the element name i...
Definition: Html.php:475
$wgDisableTitleConversion
$wgDisableTitleConversion
Whether to enable language variant conversion for links.
Definition: DefaultSettings.php:3116
$s
$s
Definition: mergeMessageFileList.php:187
CONTENT_MODEL_WIKITEXT
const CONTENT_MODEL_WIKITEXT
Definition: Defines.php:235
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
NS_MAIN
const NS_MAIN
Definition: Defines.php:64
Revision\newFromTitle
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that's attached to a given link target.
Definition: Revision.php:133
Title\getDBkey
getDBkey()
Get the main part with underscores.
Definition: Title.php:951
MWException
MediaWiki exception.
Definition: MWException.php:26
$title
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:964
$titles
linkcache txt The LinkCache class maintains a list of article titles and the information about whether or not the article exists in the database This is used to mark up links when displaying a page If the same link appears more than once on any page then it only has to be looked up once In most cases link lookups are done in batches with the LinkBatch class or the equivalent in so the link cache is mostly useful for short snippets of parsed and for links in the navigation areas of the skin The link cache was formerly used to track links used in a document for the purposes of updating the link tables This application is now deprecated To create a you can use the following $titles
Definition: linkcache.txt:17
Title\getNamespace
getNamespace()
Get the namespace index, i.e.
Definition: Title.php:974
$wgLanguageConverterCacheType
$wgLanguageConverterCacheType
The cache type for storing language conversion tables, which are used when parsing certain text and i...
Definition: DefaultSettings.php:2369
$wgDefaultLanguageVariant
$wgDefaultLanguageVariant
Default variant code, if false, the default will be the language code.
Definition: DefaultSettings.php:3121
StringUtils\explode
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
Definition: StringUtils.php:336
ObjectCache\getInstance
static getInstance( $id)
Get a cached instance of the specified type of cache object.
Definition: ObjectCache.php:92
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
$code
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable & $code
Definition: hooks.txt:813
$output
$output
Definition: SyntaxHighlight.php:334
array
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
$wgDisableLangConversion
$wgDisableLangConversion
Whether to enable language variant conversion.
Definition: DefaultSettings.php:3111
MessageCache\singleton
static singleton()
Get the signleton instance of this class.
Definition: MessageCache.php:120
Title\makeTitleSafe
static makeTitleSafe( $ns, $title, $fragment='', $interwiki='')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:573
$ret
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:2044
Revision\RAW
const RAW
Definition: Revision.php:57
Title
Represents a title within MediaWiki.
Definition: Title.php:39
$cache
$cache
Definition: mcc.php:33
Makefile.translate
def translate(text, conv_table)
Definition: Makefile.py:235
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
LanguageCode\bcp47
static bcp47( $code)
Get the normalised IETF language tag See unit test for examples.
Definition: LanguageCode.php:182
$link
usually copyright or history_copyright This message must be in HTML not wikitext & $link
Definition: hooks.txt:3098
LoggerFactory
MediaWiki Logger LoggerFactory implements a PSR[0] compatible message logging system Named Psr Log LoggerInterface instances can be obtained from the MediaWiki Logger LoggerFactory::getInstance() static method. MediaWiki\Logger\LoggerFactory expects a class implementing the MediaWiki\Logger\Spi interface to act as a factory for new Psr\Log\LoggerInterface instances. The "Spi" in MediaWiki\Logger\Spi stands for "service provider interface". An SPI is an API intended to be implemented or extended by a third party. This software design pattern is intended to enable framework extension and replaceable components. It is specifically used in the MediaWiki\Logger\LoggerFactory service to allow alternate PSR-3 logging implementations to be easily integrated with MediaWiki. The service provider interface allows the backend logging library to be implemented in multiple ways. The $wgMWLoggerDefaultSpi global provides the classname of the default MediaWiki\Logger\Spi implementation to be loaded at runtime. This can either be the name of a class implementing the MediaWiki\Logger\Spi with a zero argument const ructor or a callable that will return an MediaWiki\Logger\Spi instance. Alternately the MediaWiki\Logger\LoggerFactory MediaWiki Logger LoggerFactory
Definition: logger.txt:5
NS_MEDIAWIKI
const NS_MEDIAWIKI
Definition: Defines.php:72
class
you have access to all of the normal MediaWiki so you can get a DB use the etc For full docs on the Maintenance class
Definition: maintenance.txt:52
$t
$t
Definition: testCompression.php:69
$wgRequest
if(! $wgDBerrorLogTZ) $wgRequest
Definition: Setup.php:747
MediaWikiServices
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency MediaWikiServices
Definition: injection.txt:23
Language\fetchLanguageNames
static fetchLanguageNames( $inLanguage=self::AS_AUTONYMS, $include='mw')
Get an array of language names, indexed by code.
Definition: Language.php:843
wfMessage
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation use $formDescriptor instead default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt
Hooks\run
static run( $event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:200
Language
Internationalisation code.
Definition: Language.php:35
$out
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition: hooks.txt:813