MediaWiki  1.28.3
LanguageConverter.php
Go to the documentation of this file.
1 <?php
22 
24 
40  static public $languagesWithVariants = [
41  'gan',
42  'iu',
43  'kk',
44  'ku',
45  'shi',
46  'sr',
47  'tg',
48  'uz',
49  'zh',
50  ];
51 
53  public $mVariants;
56  public $mTablesLoaded = false;
57  public $mTables;
58  // 'bidirectional' 'unidirectional' 'disable' for each variant
59  public $mManualLevel;
60 
64  public $mCacheKey;
65 
66  public $mLangObj;
67  public $mFlags;
68  public $mDescCodeSep = ':', $mDescVarSep = ';';
69  public $mUcfirst = false;
70  public $mConvRuleTitle = false;
71  public $mURLVariant;
72  public $mUserVariant;
74  public $mMaxDepth = 10;
76 
77  const CACHE_VERSION_KEY = 'VERSION 7';
78 
89  public function __construct( $langobj, $maincode, $variants = [],
90  $variantfallbacks = [], $flags = [],
91  $manualLevel = [] ) {
93  $this->mLangObj = $langobj;
94  $this->mMainLanguageCode = $maincode;
95  $this->mVariants = array_diff( $variants, $wgDisabledVariants );
96  $this->mVariantFallbacks = $variantfallbacks;
97  $this->mVariantNames = Language::fetchLanguageNames();
98  $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode );
99  $defaultflags = [
100  // 'S' show converted text
101  // '+' add rules for alltext
102  // 'E' the gave flags is error
103  // these flags above are reserved for program
104  'A' => 'A', // add rule for convert code (all text convert)
105  'T' => 'T', // title convert
106  'R' => 'R', // raw content
107  'D' => 'D', // convert description (subclass implement)
108  '-' => '-', // remove convert (not implement)
109  'H' => 'H', // add rule for convert code (but no display in placed code)
110  'N' => 'N' // current variant name
111  ];
112  $this->mFlags = array_merge( $defaultflags, $flags );
113  foreach ( $this->mVariants as $v ) {
114  if ( array_key_exists( $v, $manualLevel ) ) {
115  $this->mManualLevel[$v] = $manualLevel[$v];
116  } else {
117  $this->mManualLevel[$v] = 'bidirectional';
118  }
119  $this->mFlags[$v] = $v;
120  }
121  }
122 
129  public function getVariants() {
130  return $this->mVariants;
131  }
132 
144  public function getVariantFallbacks( $variant ) {
145  if ( isset( $this->mVariantFallbacks[$variant] ) ) {
146  return $this->mVariantFallbacks[$variant];
147  }
149  }
150 
155  public function getConvRuleTitle() {
156  return $this->mConvRuleTitle;
157  }
158 
163  public function getPreferredVariant() {
165 
166  $req = $this->getURLVariant();
167 
168  if ( $wgUser->isSafeToLoad() && $wgUser->isLoggedIn() && !$req ) {
169  $req = $this->getUserVariant();
170  } elseif ( !$req ) {
171  $req = $this->getHeaderVariant();
172  }
173 
174  if ( $wgDefaultLanguageVariant && !$req ) {
175  $req = $this->validateVariant( $wgDefaultLanguageVariant );
176  }
177 
178  // This function, unlike the other get*Variant functions, is
179  // not memoized (i.e. there return value is not cached) since
180  // new information might appear during processing after this
181  // is first called.
182  if ( $this->validateVariant( $req ) ) {
183  return $req;
184  }
186  }
187 
193  public function getDefaultVariant() {
195 
196  $req = $this->getURLVariant();
197 
198  if ( !$req ) {
199  $req = $this->getHeaderVariant();
200  }
201 
202  if ( $wgDefaultLanguageVariant && !$req ) {
203  $req = $this->validateVariant( $wgDefaultLanguageVariant );
204  }
205 
206  if ( $req ) {
207  return $req;
208  }
210  }
211 
217  public function validateVariant( $variant = null ) {
218  if ( $variant !== null && in_array( $variant, $this->mVariants ) ) {
219  return $variant;
220  }
221  return null;
222  }
223 
229  public function getURLVariant() {
231 
232  if ( $this->mURLVariant ) {
233  return $this->mURLVariant;
234  }
235 
236  // see if the preference is set in the request
237  $ret = $wgRequest->getText( 'variant' );
238 
239  if ( !$ret ) {
240  $ret = $wgRequest->getVal( 'uselang' );
241  }
242 
243  $this->mURLVariant = $this->validateVariant( $ret );
244  return $this->mURLVariant;
245  }
246 
252  protected function getUserVariant() {
254 
255  // memoizing this function wreaks havoc on parserTest.php
256  /*
257  if ( $this->mUserVariant ) {
258  return $this->mUserVariant;
259  }
260  */
261 
262  // Get language variant preference from logged in users
263  // Don't call this on stub objects because that causes infinite
264  // recursion during initialisation
265  if ( !$wgUser->isSafeToLoad() ) {
266  return false;
267  }
268  if ( $wgUser->isLoggedIn() ) {
269  if ( $this->mMainLanguageCode == $wgContLang->getCode() ) {
270  $ret = $wgUser->getOption( 'variant' );
271  } else {
272  $ret = $wgUser->getOption( 'variant-' . $this->mMainLanguageCode );
273  }
274  } else {
275  // figure out user lang without constructing wgLang to avoid
276  // infinite recursion
277  $ret = $wgUser->getOption( 'language' );
278  }
279 
280  $this->mUserVariant = $this->validateVariant( $ret );
281  return $this->mUserVariant;
282  }
283 
289  protected function getHeaderVariant() {
291 
292  if ( $this->mHeaderVariant ) {
293  return $this->mHeaderVariant;
294  }
295 
296  // see if some supported language variant is set in the
297  // HTTP header.
298  $languages = array_keys( $wgRequest->getAcceptLang() );
299  if ( empty( $languages ) ) {
300  return null;
301  }
302 
303  $fallbackLanguages = [];
304  foreach ( $languages as $language ) {
305  $this->mHeaderVariant = $this->validateVariant( $language );
306  if ( $this->mHeaderVariant ) {
307  break;
308  }
309 
310  // To see if there are fallbacks of current language.
311  // We record these fallback variants, and process
312  // them later.
313  $fallbacks = $this->getVariantFallbacks( $language );
314  if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) {
315  $fallbackLanguages[] = $fallbacks;
316  } elseif ( is_array( $fallbacks ) ) {
317  $fallbackLanguages =
318  array_merge( $fallbackLanguages, $fallbacks );
319  }
320  }
321 
322  if ( !$this->mHeaderVariant ) {
323  // process fallback languages now
324  $fallback_languages = array_unique( $fallbackLanguages );
325  foreach ( $fallback_languages as $language ) {
326  $this->mHeaderVariant = $this->validateVariant( $language );
327  if ( $this->mHeaderVariant ) {
328  break;
329  }
330  }
331  }
332 
333  return $this->mHeaderVariant;
334  }
335 
346  public function autoConvert( $text, $toVariant = false ) {
347 
348  $this->loadTables();
349 
350  if ( !$toVariant ) {
351  $toVariant = $this->getPreferredVariant();
352  if ( !$toVariant ) {
353  return $text;
354  }
355  }
356 
357  if ( $this->guessVariant( $text, $toVariant ) ) {
358  return $text;
359  }
360 
361  /* we convert everything except:
362  1. HTML markups (anything between < and >)
363  2. HTML entities
364  3. placeholders created by the parser
365  */
366  $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f';
367 
368  // this one is needed when the text is inside an HTML markup
369  $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>';
370 
371  // Optimize for the common case where these tags have
372  // few or no children. Thus try and possesively get as much as
373  // possible, and only engage in backtracking when we hit a '<'.
374 
375  // disable convert to variants between <code> tags
376  $codefix = '<code>[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|';
377  // disable conversion of <script> tags
378  $scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|';
379  // disable conversion of <pre> tags
380  $prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|';
381  // The "|.*+)" at the end, is in case we missed some part of html syntax,
382  // we will fail securely (hopefully) by matching the rest of the string.
383  $htmlFullTag = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|';
384 
385  $reg = '/' . $codefix . $scriptfix . $prefix . $htmlFullTag .
386  '&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s';
387  $startPos = 0;
388  $sourceBlob = '';
389  $literalBlob = '';
390 
391  // Guard against delimiter nulls in the input
392  $text = str_replace( "\000", '', $text );
393 
394  $markupMatches = null;
395  $elementMatches = null;
396 
397  // We add a marker (\004) at the end of text, to ensure we always match the
398  // entire text (Otherwise, pcre.backtrack_limit might cause silent failure)
399  while ( $startPos < strlen( $text ) ) {
400  if ( preg_match( $reg, $text . "\004", $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
401  $elementPos = $markupMatches[0][1];
402  $element = $markupMatches[0][0];
403  if ( $element === "\004" ) {
404  // We hit the end.
405  $elementPos = strlen( $text );
406  $element = '';
407  }
408  } else {
409  // If we hit here, then Language Converter could be tricked
410  // into doing an XSS, so we refuse to translate.
411  // If non-crazy input manages to reach this code path,
412  // we should consider it a bug.
413  $log = LoggerFactory::getInstance( 'languageconverter' );
414  $log->error( "Hit pcre.backtrack_limit in " . __METHOD__
415  . ". Disabling language conversion for this page.",
416  array(
417  "method" => __METHOD__,
418  "variant" => $toVariant,
419  "startOfText" => substr( $text, 0, 500 )
420  )
421  );
422  return $text;
423  }
424  // Queue the part before the markup for translation in a batch
425  $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000";
426 
427  // Advance to the next position
428  $startPos = $elementPos + strlen( $element );
429 
430  // Translate any alt or title attributes inside the matched element
431  if ( $element !== ''
432  && preg_match( '/^(<[^>\s]*+)\s([^>]*+)(.*+)$/', $element, $elementMatches )
433  ) {
434  $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
435  $changed = false;
436  foreach ( [ 'title', 'alt' ] as $attrName ) {
437  if ( !isset( $attrs[$attrName] ) ) {
438  continue;
439  }
440  $attr = $attrs[$attrName];
441  // Don't convert URLs
442  if ( !strpos( $attr, '://' ) ) {
443  $attr = $this->recursiveConvertTopLevel( $attr, $toVariant );
444  }
445 
446  // Remove HTML tags to avoid disrupting the layout
447  $attr = preg_replace( '/<[^>]+>/', '', $attr );
448  if ( $attr !== $attrs[$attrName] ) {
449  $attrs[$attrName] = $attr;
450  $changed = true;
451  }
452  }
453  if ( $changed ) {
454  $element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
455  $elementMatches[3];
456  }
457  }
458  $literalBlob .= $element . "\000";
459  }
460 
461  // Do the main translation batch
462  $translatedBlob = $this->translate( $sourceBlob, $toVariant );
463 
464  // Put the output back together
465  $translatedIter = StringUtils::explode( "\000", $translatedBlob );
466  $literalIter = StringUtils::explode( "\000", $literalBlob );
467  $output = '';
468  while ( $translatedIter->valid() && $literalIter->valid() ) {
469  $output .= $translatedIter->current();
470  $output .= $literalIter->current();
471  $translatedIter->next();
472  $literalIter->next();
473  }
474 
475  return $output;
476  }
477 
487  public function translate( $text, $variant ) {
488  // If $text is empty or only includes spaces, do nothing
489  // Otherwise translate it
490  if ( trim( $text ) ) {
491  $this->loadTables();
492  $text = $this->mTables[$variant]->replace( $text );
493  }
494  return $text;
495  }
496 
503  public function autoConvertToAllVariants( $text ) {
504  $this->loadTables();
505 
506  $ret = [];
507  foreach ( $this->mVariants as $variant ) {
508  $ret[$variant] = $this->translate( $text, $variant );
509  }
510 
511  return $ret;
512  }
513 
519  protected function applyManualConv( $convRule ) {
520  // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
521  // title conversion.
522  // Bug 24072: $mConvRuleTitle was overwritten by other manual
523  // rule(s) not for title, this breaks the title conversion.
524  $newConvRuleTitle = $convRule->getTitle();
525  if ( $newConvRuleTitle ) {
526  // So I add an empty check for getTitle()
527  $this->mConvRuleTitle = $newConvRuleTitle;
528  }
529 
530  // merge/remove manual conversion rules to/from global table
531  $convTable = $convRule->getConvTable();
532  $action = $convRule->getRulesAction();
533  foreach ( $convTable as $variant => $pair ) {
534  if ( !$this->validateVariant( $variant ) ) {
535  continue;
536  }
537 
538  if ( $action == 'add' ) {
539  // More efficient than array_merge(), about 2.5 times.
540  foreach ( $pair as $from => $to ) {
541  $this->mTables[$variant]->setPair( $from, $to );
542  }
543  } elseif ( $action == 'remove' ) {
544  $this->mTables[$variant]->removeArray( $pair );
545  }
546  }
547  }
548 
556  public function convertTitle( $title ) {
557  $variant = $this->getPreferredVariant();
558  $index = $title->getNamespace();
559  if ( $index !== NS_MAIN ) {
560  $text = $this->convertNamespace( $index, $variant ) . ':';
561  } else {
562  $text = '';
563  }
564  $text .= $this->translate( $title->getText(), $variant );
565  return $text;
566  }
567 
575  public function convertNamespace( $index, $variant = null ) {
576  if ( $index === NS_MAIN ) {
577  return '';
578  }
579 
580  if ( $variant === null ) {
581  $variant = $this->getPreferredVariant();
582  }
583 
584  $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache();
585  $key = $cache->makeKey( 'languageconverter', 'namespace-text', $index, $variant );
586  $nsVariantText = $cache->get( $key );
587  if ( $nsVariantText !== false ) {
588  return $nsVariantText;
589  }
590 
591  // First check if a message gives a converted name in the target variant.
592  $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant );
593  if ( $nsConvMsg->exists() ) {
594  $nsVariantText = $nsConvMsg->plain();
595  }
596 
597  // Then check if a message gives a converted name in content language
598  // which needs extra translation to the target variant.
599  if ( $nsVariantText === false ) {
600  $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage();
601  if ( $nsConvMsg->exists() ) {
602  $nsVariantText = $this->translate( $nsConvMsg->plain(), $variant );
603  }
604  }
605 
606  if ( $nsVariantText === false ) {
607  // No message exists, retrieve it from the target variant's namespace names.
608  $langObj = $this->mLangObj->factory( $variant );
609  $nsVariantText = $langObj->getFormattedNsText( $index );
610  }
611 
612  $cache->set( $key, $nsVariantText, 60 );
613 
614  return $nsVariantText;
615  }
616 
631  public function convert( $text ) {
632  $variant = $this->getPreferredVariant();
633  return $this->convertTo( $text, $variant );
634  }
635 
643  public function convertTo( $text, $variant ) {
645  if ( $wgDisableLangConversion ) {
646  return $text;
647  }
648  // Reset converter state for a new converter run.
649  $this->mConvRuleTitle = false;
650  return $this->recursiveConvertTopLevel( $text, $variant );
651  }
652 
662  protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
663  $startPos = 0;
664  $out = '';
665  $length = strlen( $text );
666  $shouldConvert = !$this->guessVariant( $text, $variant );
667  $continue = 1;
668 
669  $noScript = '<script.*?>.*?<\/script>(*SKIP)(*FAIL)';
670  $noStyle = '<style.*?>.*?<\/style>(*SKIP)(*FAIL)';
671  $noHtml = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)(*SKIP)(*FAIL)';
672  while ( $startPos < $length && $continue ) {
673  $continue = preg_match(
674  // Only match -{ outside of html.
675  "/$noScript|$noStyle|$noHtml|-\{/",
676  $text,
677  $m,
678  PREG_OFFSET_CAPTURE,
679  $startPos
680  );
681 
682  if ( !$continue ) {
683  // No more markup, append final segment
684  $fragment = substr( $text, $startPos );
685  $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
686  return $out;
687  }
688 
689  // Offset of the match of the regex pattern.
690  $pos = $m[0][1];
691 
692  // Append initial segment
693  $fragment = substr( $text, $startPos, $pos - $startPos );
694  $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
695  // -{ marker found, not in attribute
696  // Advance position up to -{ marker.
697  $startPos = $pos;
698  // Do recursive conversion
699  // Note: This passes $startPos by reference, and advances it.
700  $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
701  }
702  return $out;
703  }
704 
716  protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
717  // Quick sanity check (no function calls)
718  if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) {
719  throw new MWException( __METHOD__ . ': invalid input string' );
720  }
721 
722  $startPos += 2;
723  $inner = '';
724  $warningDone = false;
725  $length = strlen( $text );
726 
727  while ( $startPos < $length ) {
728  $m = false;
729  preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos );
730  if ( !$m ) {
731  // Unclosed rule
732  break;
733  }
734 
735  $token = $m[0][0];
736  $pos = $m[0][1];
737 
738  // Markup found
739  // Append initial segment
740  $inner .= substr( $text, $startPos, $pos - $startPos );
741 
742  // Advance position
743  $startPos = $pos;
744 
745  switch ( $token ) {
746  case '-{':
747  // Check max depth
748  if ( $depth >= $this->mMaxDepth ) {
749  $inner .= '-{';
750  if ( !$warningDone ) {
751  $inner .= '<span class="error">' .
752  wfMessage( 'language-converter-depth-warning' )
753  ->numParams( $this->mMaxDepth )->inContentLanguage()->text() .
754  '</span>';
755  $warningDone = true;
756  }
757  $startPos += 2;
758  continue;
759  }
760  // Recursively parse another rule
761  $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
762  break;
763  case '}-':
764  // Apply the rule
765  $startPos += 2;
766  $rule = new ConverterRule( $inner, $this );
767  $rule->parse( $variant );
768  $this->applyManualConv( $rule );
769  return $rule->getDisplay();
770  default:
771  throw new MWException( __METHOD__ . ': invalid regex match' );
772  }
773  }
774 
775  // Unclosed rule
776  if ( $startPos < $length ) {
777  $inner .= substr( $text, $startPos );
778  }
779  $startPos = $length;
780  return '-{' . $this->autoConvert( $inner, $variant );
781  }
782 
794  public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
795  # If the article has already existed, there is no need to
796  # check it again, otherwise it may cause a fault.
797  if ( is_object( $nt ) && $nt->exists() ) {
798  return;
799  }
800 
802  $isredir = $wgRequest->getText( 'redirect', 'yes' );
803  $action = $wgRequest->getText( 'action' );
804  if ( $action == 'edit' && $wgRequest->getBool( 'redlink' ) ) {
805  $action = 'view';
806  }
807  $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' );
808  $disableLinkConversion = $wgDisableLangConversion
810  $linkBatch = new LinkBatch();
811 
812  $ns = NS_MAIN;
813 
814  if ( $disableLinkConversion ||
815  ( !$ignoreOtherCond &&
816  ( $isredir == 'no'
817  || $action == 'edit'
818  || $action == 'submit'
819  || $linkconvert == 'no' ) ) ) {
820  return;
821  }
822 
823  if ( is_object( $nt ) ) {
824  $ns = $nt->getNamespace();
825  }
826 
827  $variants = $this->autoConvertToAllVariants( $link );
828  if ( !$variants ) { // give up
829  return;
830  }
831 
832  $titles = [];
833 
834  foreach ( $variants as $v ) {
835  if ( $v != $link ) {
836  $varnt = Title::newFromText( $v, $ns );
837  if ( !is_null( $varnt ) ) {
838  $linkBatch->addObj( $varnt );
839  $titles[] = $varnt;
840  }
841  }
842  }
843 
844  // fetch all variants in single query
845  $linkBatch->execute();
846 
847  foreach ( $titles as $varnt ) {
848  if ( $varnt->getArticleID() > 0 ) {
849  $nt = $varnt;
850  $link = $varnt->getText();
851  break;
852  }
853  }
854  }
855 
861  public function getExtraHashOptions() {
862  $variant = $this->getPreferredVariant();
863 
864  return '!' . $variant;
865  }
866 
877  public function guessVariant( $text, $variant ) {
878  return false;
879  }
880 
888  function loadDefaultTables() {
889  $name = get_class( $this );
890 
891  throw new MWException( "Must implement loadDefaultTables() method in class $name" );
892  }
893 
899  function loadTables( $fromCache = true ) {
901 
902  if ( $this->mTablesLoaded ) {
903  return;
904  }
905 
906  $this->mTablesLoaded = true;
907  $this->mTables = false;
908  $cache = ObjectCache::getInstance( $wgLanguageConverterCacheType );
909  if ( $fromCache ) {
910  wfProfileIn( __METHOD__ . '-cache' );
911  $this->mTables = $cache->get( $this->mCacheKey );
912  wfProfileOut( __METHOD__ . '-cache' );
913  }
914  if ( !$this->mTables || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
915  wfProfileIn( __METHOD__ . '-recache' );
916  // not in cache, or we need a fresh reload.
917  // We will first load the default tables
918  // then update them using things in MediaWiki:Conversiontable/*
919  $this->loadDefaultTables();
920  foreach ( $this->mVariants as $var ) {
921  $cached = $this->parseCachedTable( $var );
922  $this->mTables[$var]->mergeArray( $cached );
923  }
924 
925  $this->postLoadTables();
926  $this->mTables[self::CACHE_VERSION_KEY] = true;
927 
928  $cache->set( $this->mCacheKey, $this->mTables, 43200 );
929  wfProfileOut( __METHOD__ . '-recache' );
930  }
931  }
932 
936  function postLoadTables() {
937  }
938 
944  function reloadTables() {
945  if ( $this->mTables ) {
946  unset( $this->mTables );
947  }
948 
949  $this->mTablesLoaded = false;
950  $this->loadTables( false );
951  }
952 
972  function parseCachedTable( $code, $subpage = '', $recursive = true ) {
973  static $parsed = [];
974 
975  $key = 'Conversiontable/' . $code;
976  if ( $subpage ) {
977  $key .= '/' . $subpage;
978  }
979  if ( array_key_exists( $key, $parsed ) ) {
980  return [];
981  }
982 
983  $parsed[$key] = true;
984 
985  if ( $subpage === '' ) {
986  $txt = MessageCache::singleton()->getMsgFromNamespace( $key, $code );
987  } else {
988  $txt = false;
990  if ( $title && $title->exists() ) {
991  $revision = Revision::newFromTitle( $title );
992  if ( $revision ) {
993  if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) {
994  $txt = $revision->getContent( Revision::RAW )->getNativeData();
995  }
996 
997  // @todo in the future, use a specialized content model, perhaps based on json!
998  }
999  }
1000  }
1001 
1002  # Nothing to parse if there's no text
1003  if ( $txt === false || $txt === null || $txt === '' ) {
1004  return [];
1005  }
1006 
1007  // get all subpage links of the form
1008  // [[MediaWiki:Conversiontable/zh-xx/...|...]]
1009  $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
1010  ':Conversiontable';
1011  $subs = StringUtils::explode( '[[', $txt );
1012  $sublinks = [];
1013  foreach ( $subs as $sub ) {
1014  $link = explode( ']]', $sub, 2 );
1015  if ( count( $link ) != 2 ) {
1016  continue;
1017  }
1018  $b = explode( '|', $link[0], 2 );
1019  $b = explode( '/', trim( $b[0] ), 3 );
1020  if ( count( $b ) == 3 ) {
1021  $sublink = $b[2];
1022  } else {
1023  $sublink = '';
1024  }
1025 
1026  if ( $b[0] == $linkhead && $b[1] == $code ) {
1027  $sublinks[] = $sublink;
1028  }
1029  }
1030 
1031  // parse the mappings in this page
1032  $blocks = StringUtils::explode( '-{', $txt );
1033  $ret = [];
1034  $first = true;
1035  foreach ( $blocks as $block ) {
1036  if ( $first ) {
1037  // Skip the part before the first -{
1038  $first = false;
1039  continue;
1040  }
1041  $mappings = explode( '}-', $block, 2 )[0];
1042  $stripped = str_replace( [ "'", '"', '*', '#' ], '', $mappings );
1043  $table = StringUtils::explode( ';', $stripped );
1044  foreach ( $table as $t ) {
1045  $m = explode( '=>', $t, 3 );
1046  if ( count( $m ) != 2 ) {
1047  continue;
1048  }
1049  // trim any trailling comments starting with '//'
1050  $tt = explode( '//', $m[1], 2 );
1051  $ret[trim( $m[0] )] = trim( $tt[0] );
1052  }
1053  }
1054 
1055  // recursively parse the subpages
1056  if ( $recursive ) {
1057  foreach ( $sublinks as $link ) {
1058  $s = $this->parseCachedTable( $code, $link, $recursive );
1059  $ret = $s + $ret;
1060  }
1061  }
1062 
1063  if ( $this->mUcfirst ) {
1064  foreach ( $ret as $k => $v ) {
1065  $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
1066  }
1067  }
1068  return $ret;
1069  }
1070 
1079  public function markNoConversion( $text, $noParse = false ) {
1080  # don't mark if already marked
1081  if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) {
1082  return $text;
1083  }
1084 
1085  $ret = "-{R|$text}-";
1086  return $ret;
1087  }
1088 
1097  function convertCategoryKey( $key ) {
1098  return $key;
1099  }
1100 
1107  public function updateConversionTable( Title $titleobj ) {
1108  if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) {
1109  $title = $titleobj->getDBkey();
1110  $t = explode( '/', $title, 3 );
1111  $c = count( $t );
1112  if ( $c > 1 && $t[0] == 'Conversiontable' ) {
1113  if ( $this->validateVariant( $t[1] ) ) {
1114  $this->reloadTables();
1115  }
1116  }
1117  }
1118  }
1119 
1125  if ( is_null( $this->mVarSeparatorPattern ) ) {
1126  // varsep_pattern for preg_split:
1127  // text should be splited by ";" only if a valid variant
1128  // name exist after the markup, for example:
1129  // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\
1130  // <span style="font-size:120%;">yyy</span>;}-
1131  // we should split it as:
1132  // [
1133  // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>'
1134  // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
1135  // [2] => ''
1136  // ]
1137  $pat = '/;\s*(?=';
1138  foreach ( $this->mVariants as $variant ) {
1139  // zh-hans:xxx;zh-hant:yyy
1140  $pat .= $variant . '\s*:|';
1141  // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
1142  $pat .= '[^;]*?=>\s*' . $variant . '\s*:|';
1143  }
1144  $pat .= '\s*$)/';
1145  $this->mVarSeparatorPattern = $pat;
1146  }
1148  }
1149 }
convertCategoryKey($key)
Convert the sorting key for category links.
const MARKER_PREFIX
Definition: Parser.php:134
updateConversionTable(Title $titleobj)
Refresh the cache of conversion tables when MediaWiki:Conversiontable* is updated.
static decodeTagAttributes($text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1287
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition: hooks.txt:806
the array() calling protocol came about after MediaWiki 1.4rc1.
const CONTENT_MODEL_WIKITEXT
Definition: Defines.php:239
__construct($langobj, $maincode, $variants=[], $variantfallbacks=[], $flags=[], $manualLevel=[])
Constructor.
const NS_MAIN
Definition: Defines.php:56
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:1940
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
convertTo($text, $variant)
Same as convert() except a extra parameter to custom variant.
if(!$wgDBerrorLogTZ) $wgRequest
Definition: Setup.php:664
getVarSeparatorPattern()
Get the cached separator pattern for ConverterRule::parseRules()
static getInstance($id)
Get a cached instance of the specified type of cache object.
Definition: ObjectCache.php:92
wfProfileIn($functionname)
Begin profiling of a function.
recursiveConvertTopLevel($text, $variant, $depth=0)
Recursively convert text on the outside.
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency MediaWikiServices
Definition: injection.txt:23
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition: hooks.txt:2707
Base class for language conversion.
getExtraHashOptions()
Returns language specific hash options.
markNoConversion($text, $noParse=false)
Enclose a string with the "no conversion" tag.
parseCachedTable($code, $subpage= '', $recursive=true)
Parse the conversion table stored in the cache.
static newFromText($text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:262
loadTables($fromCache=true)
Load conversion tables either from the cache or the disk.
when a variable name is used in a it is silently declared as a new local masking the global
Definition: design.txt:93
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that's attached to a given link target...
Definition: Revision.php:128
static fetchLanguageNames($inLanguage=null, $include= 'mw')
Get an array of language names, indexed by code.
Definition: Language.php:800
postLoadTables()
Hook for post processing after conversion tables are loaded.
wfProfileOut($functionname= 'missing')
Stop profiling of a function.
usually copyright or history_copyright This message must be in HTML not wikitext & $link
Definition: hooks.txt:2893
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition: LinkBatch.php:32
getVariantFallbacks($variant)
In case some variant is not defined in the markup, we need to have some fallback. ...
getDBkey()
Get the main part with underscores.
Definition: Title.php:898
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock()-offset Set to overwrite offset parameter in $wgRequest set to ''to unsetoffset-wrap String Wrap the message in html(usually something like"&lt
switch($options['output']) $languages
Definition: transstat.php:76
getURLVariant()
Get the variant specified in the URL.
findVariantLink(&$link, &$nt, $ignoreOtherCond=false)
If a language supports multiple variants, it is possible that non-existing link in one variant actual...
recursiveConvertRule($text, $variant, &$startPos, $depth=0)
Recursively convert text on the inside.
reloadTables()
Reload the conversion tables.
$cache
Definition: mcc.php:33
Parser for rules of language conversion , parse rules in -{ }- tag.
getUserVariant()
Determine if the user has a variant set.
translate($text, $variant)
Translate a string to a variant.
$wgLanguageConverterCacheType
The cache type for storing language conversion tables, which are used when parsing certain text and i...
static makeTitleSafe($ns, $title, $fragment= '', $interwiki= '')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:535
$wgDisableTitleConversion
Whether to enable language variant conversion for links.
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:957
$wgDisabledVariants
Disabled variants array of language variant conversion.
getVariants()
Get all valid variants.
getNamespace()
Get the namespace index, i.e.
Definition: Title.php:921
static expandAttributes(array $attribs)
Given an associative array of element attributes, generate a string to stick after the element name i...
Definition: Html.php:470
const RAW
Definition: Revision.php:94
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
$wgDisableLangConversion
Whether to enable language variant conversion.
$wgDefaultLanguageVariant
Default variant code, if false, the default will be the language code.
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable & $code
Definition: hooks.txt:806
const NS_MEDIAWIKI
Definition: Defines.php:64
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context the output can only depend on parameters provided to this hook not on global state indicating whether full HTML should be generated If generation of HTML may be but other information should still be present in the ParserOutput object & $output
Definition: hooks.txt:1050
$from
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
this hook is for auditing only $req
Definition: hooks.txt:1011
linkcache txt The LinkCache class maintains a list of article titles and the information about whether or not the article exists in the database This is used to mark up links when displaying a page If the same link appears more than once on any page then it only has to be looked up once In most cases link lookups are done in batches with the LinkBatch class or the equivalent in so the link cache is mostly useful for short snippets of parsed and for links in the navigation areas of the skin The link cache was formerly used to track links used in a document for the purposes of updating the link tables This application is now deprecated To create a you can use the following $titles
Definition: linkcache.txt:17
getHeaderVariant()
Determine the language variant from the Accept-Language header.
guessVariant($text, $variant)
Guess if a text is written in a variant.
string $mCacheKey
Memcached key name.
getDefaultVariant()
Get default variant.
applyManualConv($convRule)
Apply manual conversion rules.
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the local content language as $wgContLang
Definition: design.txt:56
autoConvert($text, $toVariant=false)
Dictionary-based conversion.
getConvRuleTitle()
Get the title produced by the conversion rule.
MediaWiki Logger LoggerFactory implements a PSR[0] compatible message logging system Named Psr Log LoggerInterface instances can be obtained from the MediaWiki Logger LoggerFactory::getInstance() static method.MediaWiki\Logger\LoggerFactory expects a class implementing the MediaWiki\Logger\Spi interface to act as a factory for new Psr\Log\LoggerInterface instances.The"Spi"in MediaWiki\Logger\Spi stands for"service provider interface".An SPI is an API intended to be implemented or extended by a third party.This software design pattern is intended to enable framework extension and replaceable components.It is specifically used in the MediaWiki\Logger\LoggerFactory service to allow alternate PSR-3 logging implementations to be easily integrated with MediaWiki.The service provider interface allows the backend logging library to be implemented in multiple ways.The $wgMWLoggerDefaultSpi global provides the classname of the default MediaWiki\Logger\Spi implementation to be loaded at runtime.This can either be the name of a class implementing the MediaWiki\Logger\Spi with a zero argument const ructor or a callable that will return an MediaWiki\Logger\Spi instance.Alternately the MediaWiki\Logger\LoggerFactory MediaWiki Logger LoggerFactory
Definition: logger.txt:5
convert($text)
Convert text to different variants of a language.
wfMemcKey()
Make a cache key for the local wiki.
static explode($separator, $subject)
Workalike for explode() with limited memory usage.
validateVariant($variant=null)
Validate the variant.
convertNamespace($index, $variant=null)
Get the namespace display name in the preferred variant.
static array $languagesWithVariants
languages supporting variants
getPreferredVariant()
Get preferred language variant.
autoConvertToAllVariants($text)
Call translate() to convert text to all valid variants.
loadDefaultTables()
Load default conversion tables.
static singleton()
Get the signleton instance of this class.
$wgUser
Definition: Setup.php:806
convertTitle($title)
Auto convert a Title object to a readable string in the preferred variant.
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:304