MediaWiki  1.27.4
LanguageConverter.php
Go to the documentation of this file.
1 <?php
23 
39  static public $languagesWithVariants = [
40  'gan',
41  'iu',
42  'kk',
43  'ku',
44  'shi',
45  'sr',
46  'tg',
47  'uz',
48  'zh',
49  ];
50 
53  public $mTablesLoaded = false;
54  public $mTables;
55  // 'bidirectional' 'unidirectional' 'disable' for each variant
56  public $mManualLevel;
57 
61  public $mCacheKey;
62 
63  public $mLangObj;
64  public $mFlags;
65  public $mDescCodeSep = ':', $mDescVarSep = ';';
66  public $mUcfirst = false;
67  public $mConvRuleTitle = false;
68  public $mURLVariant;
69  public $mUserVariant;
71  public $mMaxDepth = 10;
73 
74  const CACHE_VERSION_KEY = 'VERSION 7';
75 
86  public function __construct( $langobj, $maincode, $variants = [],
87  $variantfallbacks = [], $flags = [],
88  $manualLevel = [] ) {
90  $this->mLangObj = $langobj;
91  $this->mMainLanguageCode = $maincode;
92  $this->mVariants = array_diff( $variants, $wgDisabledVariants );
93  $this->mVariantFallbacks = $variantfallbacks;
94  $this->mVariantNames = Language::fetchLanguageNames();
95  $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode );
96  $defaultflags = [
97  // 'S' show converted text
98  // '+' add rules for alltext
99  // 'E' the gave flags is error
100  // these flags above are reserved for program
101  'A' => 'A', // add rule for convert code (all text convert)
102  'T' => 'T', // title convert
103  'R' => 'R', // raw content
104  'D' => 'D', // convert description (subclass implement)
105  '-' => '-', // remove convert (not implement)
106  'H' => 'H', // add rule for convert code (but no display in placed code)
107  'N' => 'N' // current variant name
108  ];
109  $this->mFlags = array_merge( $defaultflags, $flags );
110  foreach ( $this->mVariants as $v ) {
111  if ( array_key_exists( $v, $manualLevel ) ) {
112  $this->mManualLevel[$v] = $manualLevel[$v];
113  } else {
114  $this->mManualLevel[$v] = 'bidirectional';
115  }
116  $this->mFlags[$v] = $v;
117  }
118  }
119 
126  public function getVariants() {
127  return $this->mVariants;
128  }
129 
141  public function getVariantFallbacks( $variant ) {
142  if ( isset( $this->mVariantFallbacks[$variant] ) ) {
143  return $this->mVariantFallbacks[$variant];
144  }
146  }
147 
152  public function getConvRuleTitle() {
153  return $this->mConvRuleTitle;
154  }
155 
160  public function getPreferredVariant() {
162 
163  $req = $this->getURLVariant();
164 
165  if ( $wgUser->isSafeToLoad() && $wgUser->isLoggedIn() && !$req ) {
166  $req = $this->getUserVariant();
167  } elseif ( !$req ) {
168  $req = $this->getHeaderVariant();
169  }
170 
171  if ( $wgDefaultLanguageVariant && !$req ) {
172  $req = $this->validateVariant( $wgDefaultLanguageVariant );
173  }
174 
175  // This function, unlike the other get*Variant functions, is
176  // not memoized (i.e. there return value is not cached) since
177  // new information might appear during processing after this
178  // is first called.
179  if ( $this->validateVariant( $req ) ) {
180  return $req;
181  }
183  }
184 
190  public function getDefaultVariant() {
192 
193  $req = $this->getURLVariant();
194 
195  if ( !$req ) {
196  $req = $this->getHeaderVariant();
197  }
198 
199  if ( $wgDefaultLanguageVariant && !$req ) {
200  $req = $this->validateVariant( $wgDefaultLanguageVariant );
201  }
202 
203  if ( $req ) {
204  return $req;
205  }
207  }
208 
214  public function validateVariant( $variant = null ) {
215  if ( $variant !== null && in_array( $variant, $this->mVariants ) ) {
216  return $variant;
217  }
218  return null;
219  }
220 
226  public function getURLVariant() {
228 
229  if ( $this->mURLVariant ) {
230  return $this->mURLVariant;
231  }
232 
233  // see if the preference is set in the request
234  $ret = $wgRequest->getText( 'variant' );
235 
236  if ( !$ret ) {
237  $ret = $wgRequest->getVal( 'uselang' );
238  }
239 
240  $this->mURLVariant = $this->validateVariant( $ret );
241  return $this->mURLVariant;
242  }
243 
249  protected function getUserVariant() {
251 
252  // memoizing this function wreaks havoc on parserTest.php
253  /*
254  if ( $this->mUserVariant ) {
255  return $this->mUserVariant;
256  }
257  */
258 
259  // Get language variant preference from logged in users
260  // Don't call this on stub objects because that causes infinite
261  // recursion during initialisation
262  if ( !$wgUser->isSafeToLoad() ) {
263  return false;
264  }
265  if ( $wgUser->isLoggedIn() ) {
266  if ( $this->mMainLanguageCode == $wgContLang->getCode() ) {
267  $ret = $wgUser->getOption( 'variant' );
268  } else {
269  $ret = $wgUser->getOption( 'variant-' . $this->mMainLanguageCode );
270  }
271  } else {
272  // figure out user lang without constructing wgLang to avoid
273  // infinite recursion
274  $ret = $wgUser->getOption( 'language' );
275  }
276 
277  $this->mUserVariant = $this->validateVariant( $ret );
278  return $this->mUserVariant;
279  }
280 
286  protected function getHeaderVariant() {
288 
289  if ( $this->mHeaderVariant ) {
290  return $this->mHeaderVariant;
291  }
292 
293  // see if some supported language variant is set in the
294  // HTTP header.
295  $languages = array_keys( $wgRequest->getAcceptLang() );
296  if ( empty( $languages ) ) {
297  return null;
298  }
299 
300  $fallbackLanguages = [];
301  foreach ( $languages as $language ) {
302  $this->mHeaderVariant = $this->validateVariant( $language );
303  if ( $this->mHeaderVariant ) {
304  break;
305  }
306 
307  // To see if there are fallbacks of current language.
308  // We record these fallback variants, and process
309  // them later.
310  $fallbacks = $this->getVariantFallbacks( $language );
311  if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) {
312  $fallbackLanguages[] = $fallbacks;
313  } elseif ( is_array( $fallbacks ) ) {
314  $fallbackLanguages =
315  array_merge( $fallbackLanguages, $fallbacks );
316  }
317  }
318 
319  if ( !$this->mHeaderVariant ) {
320  // process fallback languages now
321  $fallback_languages = array_unique( $fallbackLanguages );
322  foreach ( $fallback_languages as $language ) {
323  $this->mHeaderVariant = $this->validateVariant( $language );
324  if ( $this->mHeaderVariant ) {
325  break;
326  }
327  }
328  }
329 
330  return $this->mHeaderVariant;
331  }
332 
343  public function autoConvert( $text, $toVariant = false ) {
344 
345  $this->loadTables();
346 
347  if ( !$toVariant ) {
348  $toVariant = $this->getPreferredVariant();
349  if ( !$toVariant ) {
350  return $text;
351  }
352  }
353 
354  if ( $this->guessVariant( $text, $toVariant ) ) {
355  return $text;
356  }
357 
358  /* we convert everything except:
359  1. HTML markups (anything between < and >)
360  2. HTML entities
361  3. placeholders created by the parser
362  IMPORTANT: Beware of failure from pcre.backtrack_limit (T124404).
363  Minimize use of backtracking where possible.
364  */
365  $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f';
366 
367  // this one is needed when the text is inside an HTML markup
368  $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>';
369 
370  // Optimize for the common case where these tags have
371  // few or no children. Thus try and possesively get as much as
372  // possible, and only engage in backtracking when we hit a '<'.
373 
374  // disable convert to variants between <code> tags
375  $codefix = '<code>[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|';
376  // disable conversion of <script> tags
377  $scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|';
378  // disable conversion of <pre> tags
379  $prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|';
380  // The "|.*+)" at the end, is in case we missed some part of html syntax,
381  // we will fail securely (hopefully) by matching the rest of the string.
382  $htmlFullTag = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|';
383 
384  $reg = '/' . $codefix . $scriptfix . $prefix . $htmlFullTag .
385  '&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s';
386  $startPos = 0;
387  $sourceBlob = '';
388  $literalBlob = '';
389 
390  // Guard against delimiter nulls in the input
391  $text = str_replace( "\000", '', $text );
392 
393  $markupMatches = null;
394  $elementMatches = null;
395 
396  // We add a marker (\004) at the end of text, to ensure we always match the
397  // entire text (Otherwise, pcre.backtrack_limit might cause silent failure)
398  while ( $startPos < strlen( $text ) ) {
399  if ( preg_match( $reg, $text . "\004", $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
400  $elementPos = $markupMatches[0][1];
401  $element = $markupMatches[0][0];
402  if ( $element === "\004" ) {
403  // We hit the end.
404  $elementPos = strlen( $text );
405  $element = '';
406  }
407  } else {
408  // If we hit here, then Language Converter could be tricked
409  // into doing an XSS, so we refuse to translate.
410  // If non-crazy input manages to reach this code path,
411  // we should consider it a bug.
412  $log = LoggerFactory::getInstance( 'languageconverter' );
413  $log->error( "Hit pcre.backtrack_limit in " . __METHOD__
414  . ". Disabling language conversion for this page.",
415  array(
416  "method" => __METHOD__,
417  "variant" => $toVariant,
418  "startOfText" => substr( $text, 0, 500 )
419  )
420  );
421  return $text;
422  }
423  // Queue the part before the markup for translation in a batch
424  $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000";
425 
426  // Advance to the next position
427  $startPos = $elementPos + strlen( $element );
428 
429  // Translate any alt or title attributes inside the matched element
430  if ( $element !== ''
431  && preg_match( '/^(<[^>\s]*+)\s([^>]*+)(.*+)$/', $element, $elementMatches )
432  ) {
433  $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
434  $changed = false;
435  foreach ( [ 'title', 'alt' ] as $attrName ) {
436  if ( !isset( $attrs[$attrName] ) ) {
437  continue;
438  }
439  $attr = $attrs[$attrName];
440  // Don't convert URLs
441  if ( !strpos( $attr, '://' ) ) {
442  $attr = $this->recursiveConvertTopLevel( $attr, $toVariant );
443  }
444 
445  // Remove HTML tags to avoid disrupting the layout
446  $attr = preg_replace( '/<[^>]++>/', '', $attr );
447  if ( $attr !== $attrs[$attrName] ) {
448  $attrs[$attrName] = $attr;
449  $changed = true;
450  }
451  }
452  if ( $changed ) {
453  $element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
454  $elementMatches[3];
455  }
456  }
457  $literalBlob .= $element . "\000";
458  }
459 
460  // Do the main translation batch
461  $translatedBlob = $this->translate( $sourceBlob, $toVariant );
462 
463  // Put the output back together
464  $translatedIter = StringUtils::explode( "\000", $translatedBlob );
465  $literalIter = StringUtils::explode( "\000", $literalBlob );
466  $output = '';
467  while ( $translatedIter->valid() && $literalIter->valid() ) {
468  $output .= $translatedIter->current();
469  $output .= $literalIter->current();
470  $translatedIter->next();
471  $literalIter->next();
472  }
473 
474  return $output;
475  }
476 
486  public function translate( $text, $variant ) {
487  // If $text is empty or only includes spaces, do nothing
488  // Otherwise translate it
489  if ( trim( $text ) ) {
490  $this->loadTables();
491  $text = $this->mTables[$variant]->replace( $text );
492  }
493  return $text;
494  }
495 
502  public function autoConvertToAllVariants( $text ) {
503  $this->loadTables();
504 
505  $ret = [];
506  foreach ( $this->mVariants as $variant ) {
507  $ret[$variant] = $this->translate( $text, $variant );
508  }
509 
510  return $ret;
511  }
512 
518  protected function applyManualConv( $convRule ) {
519  // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
520  // title conversion.
521  // Bug 24072: $mConvRuleTitle was overwritten by other manual
522  // rule(s) not for title, this breaks the title conversion.
523  $newConvRuleTitle = $convRule->getTitle();
524  if ( $newConvRuleTitle ) {
525  // So I add an empty check for getTitle()
526  $this->mConvRuleTitle = $newConvRuleTitle;
527  }
528 
529  // merge/remove manual conversion rules to/from global table
530  $convTable = $convRule->getConvTable();
531  $action = $convRule->getRulesAction();
532  foreach ( $convTable as $variant => $pair ) {
533  if ( !$this->validateVariant( $variant ) ) {
534  continue;
535  }
536 
537  if ( $action == 'add' ) {
538  // More efficient than array_merge(), about 2.5 times.
539  foreach ( $pair as $from => $to ) {
540  $this->mTables[$variant]->setPair( $from, $to );
541  }
542  } elseif ( $action == 'remove' ) {
543  $this->mTables[$variant]->removeArray( $pair );
544  }
545  }
546  }
547 
555  public function convertTitle( $title ) {
556  $variant = $this->getPreferredVariant();
557  $index = $title->getNamespace();
558  if ( $index !== NS_MAIN ) {
559  $text = $this->convertNamespace( $index, $variant ) . ':';
560  } else {
561  $text = '';
562  }
563  $text .= $this->translate( $title->getText(), $variant );
564  return $text;
565  }
566 
574  public function convertNamespace( $index, $variant = null ) {
575  if ( $index === NS_MAIN ) {
576  return '';
577  }
578 
579  if ( $variant === null ) {
580  $variant = $this->getPreferredVariant();
581  }
582 
584  $key = wfMemcKey( 'languageconverter', 'namespace-text', $index, $variant );
585  $nsVariantText = $cache->get( $key );
586  if ( $nsVariantText !== false ) {
587  return $nsVariantText;
588  }
589 
590  // First check if a message gives a converted name in the target variant.
591  $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant );
592  if ( $nsConvMsg->exists() ) {
593  $nsVariantText = $nsConvMsg->plain();
594  }
595 
596  // Then check if a message gives a converted name in content language
597  // which needs extra translation to the target variant.
598  if ( $nsVariantText === false ) {
599  $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage();
600  if ( $nsConvMsg->exists() ) {
601  $nsVariantText = $this->translate( $nsConvMsg->plain(), $variant );
602  }
603  }
604 
605  if ( $nsVariantText === false ) {
606  // No message exists, retrieve it from the target variant's namespace names.
607  $langObj = $this->mLangObj->factory( $variant );
608  $nsVariantText = $langObj->getFormattedNsText( $index );
609  }
610 
611  $cache->set( $key, $nsVariantText, 60 );
612 
613  return $nsVariantText;
614  }
615 
630  public function convert( $text ) {
631  $variant = $this->getPreferredVariant();
632  return $this->convertTo( $text, $variant );
633  }
634 
642  public function convertTo( $text, $variant ) {
644  if ( $wgDisableLangConversion ) {
645  return $text;
646  }
647  // Reset converter state for a new converter run.
648  $this->mConvRuleTitle = false;
649  return $this->recursiveConvertTopLevel( $text, $variant );
650  }
651 
661  protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
662  $startPos = 0;
663  $out = '';
664  $length = strlen( $text );
665  $shouldConvert = !$this->guessVariant( $text, $variant );
666  $continue = 1;
667 
668  $noScript = '<script.*?>.*?<\/script>(*SKIP)(*FAIL)';
669  $noStyle = '<style.*?>.*?<\/style>(*SKIP)(*FAIL)';
670  $noHtml = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)(*SKIP)(*FAIL)';
671  while ( $startPos < $length && $continue ) {
672  $continue = preg_match(
673  // Only match -{ outside of html.
674  "/$noScript|$noStyle|$noHtml|-\{/",
675  $text,
676  $m,
677  PREG_OFFSET_CAPTURE,
678  $startPos
679  );
680 
681  if ( !$continue ) {
682  // No more markup, append final segment
683  $fragment = substr( $text, $startPos );
684  $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
685  return $out;
686  }
687 
688  // Offset of the match of the regex pattern.
689  $pos = $m[0][1];
690 
691  // Append initial segment
692  $fragment = substr( $text, $startPos, $pos - $startPos );
693  $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
694  // -{ marker found, not in attribute
695  // Advance position up to -{ marker.
696  $startPos = $pos;
697  // Do recursive conversion
698  // Note: This passes $startPos by reference, and advances it.
699  $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
700  }
701  return $out;
702  }
703 
715  protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
716  // Quick sanity check (no function calls)
717  if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) {
718  throw new MWException( __METHOD__ . ': invalid input string' );
719  }
720 
721  $startPos += 2;
722  $inner = '';
723  $warningDone = false;
724  $length = strlen( $text );
725 
726  while ( $startPos < $length ) {
727  $m = false;
728  preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos );
729  if ( !$m ) {
730  // Unclosed rule
731  break;
732  }
733 
734  $token = $m[0][0];
735  $pos = $m[0][1];
736 
737  // Markup found
738  // Append initial segment
739  $inner .= substr( $text, $startPos, $pos - $startPos );
740 
741  // Advance position
742  $startPos = $pos;
743 
744  switch ( $token ) {
745  case '-{':
746  // Check max depth
747  if ( $depth >= $this->mMaxDepth ) {
748  $inner .= '-{';
749  if ( !$warningDone ) {
750  $inner .= '<span class="error">' .
751  wfMessage( 'language-converter-depth-warning' )
752  ->numParams( $this->mMaxDepth )->inContentLanguage()->text() .
753  '</span>';
754  $warningDone = true;
755  }
756  $startPos += 2;
757  continue;
758  }
759  // Recursively parse another rule
760  $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
761  break;
762  case '}-':
763  // Apply the rule
764  $startPos += 2;
765  $rule = new ConverterRule( $inner, $this );
766  $rule->parse( $variant );
767  $this->applyManualConv( $rule );
768  return $rule->getDisplay();
769  default:
770  throw new MWException( __METHOD__ . ': invalid regex match' );
771  }
772  }
773 
774  // Unclosed rule
775  if ( $startPos < $length ) {
776  $inner .= substr( $text, $startPos );
777  }
778  $startPos = $length;
779  return '-{' . $this->autoConvert( $inner, $variant );
780  }
781 
793  public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
794  # If the article has already existed, there is no need to
795  # check it again, otherwise it may cause a fault.
796  if ( is_object( $nt ) && $nt->exists() ) {
797  return;
798  }
799 
801  $isredir = $wgRequest->getText( 'redirect', 'yes' );
802  $action = $wgRequest->getText( 'action' );
803  if ( $action == 'edit' && $wgRequest->getBool( 'redlink' ) ) {
804  $action = 'view';
805  }
806  $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' );
807  $disableLinkConversion = $wgDisableLangConversion
809  $linkBatch = new LinkBatch();
810 
811  $ns = NS_MAIN;
812 
813  if ( $disableLinkConversion ||
814  ( !$ignoreOtherCond &&
815  ( $isredir == 'no'
816  || $action == 'edit'
817  || $action == 'submit'
818  || $linkconvert == 'no' ) ) ) {
819  return;
820  }
821 
822  if ( is_object( $nt ) ) {
823  $ns = $nt->getNamespace();
824  }
825 
826  $variants = $this->autoConvertToAllVariants( $link );
827  if ( !$variants ) { // give up
828  return;
829  }
830 
831  $titles = [];
832 
833  foreach ( $variants as $v ) {
834  if ( $v != $link ) {
835  $varnt = Title::newFromText( $v, $ns );
836  if ( !is_null( $varnt ) ) {
837  $linkBatch->addObj( $varnt );
838  $titles[] = $varnt;
839  }
840  }
841  }
842 
843  // fetch all variants in single query
844  $linkBatch->execute();
845 
846  foreach ( $titles as $varnt ) {
847  if ( $varnt->getArticleID() > 0 ) {
848  $nt = $varnt;
849  $link = $varnt->getText();
850  break;
851  }
852  }
853  }
854 
860  public function getExtraHashOptions() {
861  $variant = $this->getPreferredVariant();
862 
863  return '!' . $variant;
864  }
865 
876  public function guessVariant( $text, $variant ) {
877  return false;
878  }
879 
887  function loadDefaultTables() {
888  $name = get_class( $this );
889 
890  throw new MWException( "Must implement loadDefaultTables() method in class $name" );
891  }
892 
898  function loadTables( $fromCache = true ) {
900 
901  if ( $this->mTablesLoaded ) {
902  return;
903  }
904 
905  $this->mTablesLoaded = true;
906  $this->mTables = false;
907  $cache = ObjectCache::getInstance( $wgLanguageConverterCacheType );
908  if ( $fromCache ) {
909  wfProfileIn( __METHOD__ . '-cache' );
910  $this->mTables = $cache->get( $this->mCacheKey );
911  wfProfileOut( __METHOD__ . '-cache' );
912  }
913  if ( !$this->mTables || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
914  wfProfileIn( __METHOD__ . '-recache' );
915  // not in cache, or we need a fresh reload.
916  // We will first load the default tables
917  // then update them using things in MediaWiki:Conversiontable/*
918  $this->loadDefaultTables();
919  foreach ( $this->mVariants as $var ) {
920  $cached = $this->parseCachedTable( $var );
921  $this->mTables[$var]->mergeArray( $cached );
922  }
923 
924  $this->postLoadTables();
925  $this->mTables[self::CACHE_VERSION_KEY] = true;
926 
927  $cache->set( $this->mCacheKey, $this->mTables, 43200 );
928  wfProfileOut( __METHOD__ . '-recache' );
929  }
930  }
931 
935  function postLoadTables() {
936  }
937 
943  function reloadTables() {
944  if ( $this->mTables ) {
945  unset( $this->mTables );
946  }
947 
948  $this->mTablesLoaded = false;
949  $this->loadTables( false );
950  }
951 
971  function parseCachedTable( $code, $subpage = '', $recursive = true ) {
972  static $parsed = [];
973 
974  $key = 'Conversiontable/' . $code;
975  if ( $subpage ) {
976  $key .= '/' . $subpage;
977  }
978  if ( array_key_exists( $key, $parsed ) ) {
979  return [];
980  }
981 
982  $parsed[$key] = true;
983 
984  if ( $subpage === '' ) {
985  $txt = MessageCache::singleton()->getMsgFromNamespace( $key, $code );
986  } else {
987  $txt = false;
989  if ( $title && $title->exists() ) {
990  $revision = Revision::newFromTitle( $title );
991  if ( $revision ) {
992  if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) {
993  $txt = $revision->getContent( Revision::RAW )->getNativeData();
994  }
995 
996  // @todo in the future, use a specialized content model, perhaps based on json!
997  }
998  }
999  }
1000 
1001  # Nothing to parse if there's no text
1002  if ( $txt === false || $txt === null || $txt === '' ) {
1003  return [];
1004  }
1005 
1006  // get all subpage links of the form
1007  // [[MediaWiki:Conversiontable/zh-xx/...|...]]
1008  $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
1009  ':Conversiontable';
1010  $subs = StringUtils::explode( '[[', $txt );
1011  $sublinks = [];
1012  foreach ( $subs as $sub ) {
1013  $link = explode( ']]', $sub, 2 );
1014  if ( count( $link ) != 2 ) {
1015  continue;
1016  }
1017  $b = explode( '|', $link[0], 2 );
1018  $b = explode( '/', trim( $b[0] ), 3 );
1019  if ( count( $b ) == 3 ) {
1020  $sublink = $b[2];
1021  } else {
1022  $sublink = '';
1023  }
1024 
1025  if ( $b[0] == $linkhead && $b[1] == $code ) {
1026  $sublinks[] = $sublink;
1027  }
1028  }
1029 
1030  // parse the mappings in this page
1031  $blocks = StringUtils::explode( '-{', $txt );
1032  $ret = [];
1033  $first = true;
1034  foreach ( $blocks as $block ) {
1035  if ( $first ) {
1036  // Skip the part before the first -{
1037  $first = false;
1038  continue;
1039  }
1040  $mappings = explode( '}-', $block, 2 )[0];
1041  $stripped = str_replace( [ "'", '"', '*', '#' ], '', $mappings );
1042  $table = StringUtils::explode( ';', $stripped );
1043  foreach ( $table as $t ) {
1044  $m = explode( '=>', $t, 3 );
1045  if ( count( $m ) != 2 ) {
1046  continue;
1047  }
1048  // trim any trailling comments starting with '//'
1049  $tt = explode( '//', $m[1], 2 );
1050  $ret[trim( $m[0] )] = trim( $tt[0] );
1051  }
1052  }
1053 
1054  // recursively parse the subpages
1055  if ( $recursive ) {
1056  foreach ( $sublinks as $link ) {
1057  $s = $this->parseCachedTable( $code, $link, $recursive );
1058  $ret = $s + $ret;
1059  }
1060  }
1061 
1062  if ( $this->mUcfirst ) {
1063  foreach ( $ret as $k => $v ) {
1064  $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
1065  }
1066  }
1067  return $ret;
1068  }
1069 
1078  public function markNoConversion( $text, $noParse = false ) {
1079  # don't mark if already marked
1080  if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) {
1081  return $text;
1082  }
1083 
1084  $ret = "-{R|$text}-";
1085  return $ret;
1086  }
1087 
1096  function convertCategoryKey( $key ) {
1097  return $key;
1098  }
1099 
1106  public function updateConversionTable( Title $titleobj ) {
1107  if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) {
1108  $title = $titleobj->getDBkey();
1109  $t = explode( '/', $title, 3 );
1110  $c = count( $t );
1111  if ( $c > 1 && $t[0] == 'Conversiontable' ) {
1112  if ( $this->validateVariant( $t[1] ) ) {
1113  $this->reloadTables();
1114  }
1115  }
1116  }
1117  }
1118 
1124  if ( is_null( $this->mVarSeparatorPattern ) ) {
1125  // varsep_pattern for preg_split:
1126  // text should be splited by ";" only if a valid variant
1127  // name exist after the markup, for example:
1128  // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\
1129  // <span style="font-size:120%;">yyy</span>;}-
1130  // we should split it as:
1131  // array(
1132  // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>'
1133  // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
1134  // [2] => ''
1135  // )
1136  $pat = '/;\s*(?=';
1137  foreach ( $this->mVariants as $variant ) {
1138  // zh-hans:xxx;zh-hant:yyy
1139  $pat .= $variant . '\s*:|';
1140  // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
1141  $pat .= '[^;]*?=>\s*' . $variant . '\s*:|';
1142  }
1143  $pat .= '\s*$)/';
1144  $this->mVarSeparatorPattern = $pat;
1145  }
1147  }
1148 }
convertCategoryKey($key)
Convert the sorting key for category links.
const MARKER_PREFIX
Definition: Parser.php:141
updateConversionTable(Title $titleobj)
Refresh the cache of conversion tables when MediaWiki:Conversiontable* is updated.
static decodeTagAttributes($text)
Return an associative array of attribute names and values from a partial tag string.
Definition: Sanitizer.php:1249
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition: hooks.txt:766
the array() calling protocol came about after MediaWiki 1.4rc1.
const CONTENT_MODEL_WIKITEXT
Definition: Defines.php:278
magic word the default is to use $key to get the and $key value or $key value text $key value html to format the value $key
Definition: hooks.txt:2325
__construct($langobj, $maincode, $variants=[], $variantfallbacks=[], $flags=[], $manualLevel=[])
Constructor.
const NS_MAIN
Definition: Defines.php:70
getText()
Get the text form (spaces not underscores) of the main part.
Definition: Title.php:893
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
convertTo($text, $variant)
Same as convert() except a extra parameter to custom variant.
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition: hooks.txt:1802
getVarSeparatorPattern()
Get the cached separator pattern for ConverterRule::parseRules()
static getInstance($id)
Get a cached instance of the specified type of cache object.
Definition: ObjectCache.php:92
wfProfileIn($functionname)
Begin profiling of a function.
recursiveConvertTopLevel($text, $variant, $depth=0)
Recursively convert text on the outside.
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition: hooks.txt:2552
Base class for language conversion.
getExtraHashOptions()
Returns language specific hash options.
markNoConversion($text, $noParse=false)
Enclose a string with the "no conversion" tag.
parseCachedTable($code, $subpage= '', $recursive=true)
Parse the conversion table stored in the cache.
static newFromText($text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:277
Represents a title within MediaWiki.
Definition: Title.php:34
loadTables($fromCache=true)
Load conversion tables either from the cache or the disk.
when a variable name is used in a it is silently declared as a new local masking the global
Definition: design.txt:93
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that's attached to a given link target...
Definition: Revision.php:117
static fetchLanguageNames($inLanguage=null, $include= 'mw')
Get an array of language names, indexed by code.
Definition: Language.php:798
postLoadTables()
Hook for post processing after conversion tables are loaded.
wfProfileOut($functionname= 'missing')
Stop profiling of a function.
the value to return A Title object or null for latest to be modified or replaced by the hook handler or if authentication is not possible after cache objects are set for highlighting & $link
Definition: hooks.txt:2585
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition: LinkBatch.php:31
getVariantFallbacks($variant)
In case some variant is not defined in the markup, we need to have some fallback. ...
getDBkey()
Get the main part with underscores.
Definition: Title.php:911
switch($options['output']) $languages
Definition: transstat.php:76
getURLVariant()
Get the variant specified in the URL.
findVariantLink(&$link, &$nt, $ignoreOtherCond=false)
If a language supports multiple variants, it is possible that non-existing link in one variant actual...
recursiveConvertRule($text, $variant, &$startPos, $depth=0)
Recursively convert text on the inside.
reloadTables()
Reload the conversion tables.
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock()-offset Set to overwrite offset parameter in $wgRequest set to ''to unsetoffset-wrap String Wrap the message in html(usually something like"&lt
static newAccelerator($params=[], $fallback=null)
$cache
Definition: mcc.php:33
Parser for rules of language conversion , parse rules in -{ }- tag.
getUserVariant()
Determine if the user has a variant set.
translate($text, $variant)
Translate a string to a variant.
$wgLanguageConverterCacheType
The cache type for storing language conversion tables, which are used when parsing certain text and i...
static makeTitleSafe($ns, $title, $fragment= '', $interwiki= '')
Create a new Title from a namespace index and a DB key.
Definition: Title.php:548
$wgDisableTitleConversion
Whether to enable language variant conversion for links.
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:916
$wgDisabledVariants
Disabled variants array of language variant conversion.
getVariants()
Get all valid variants.
getNamespace()
Get the namespace index, i.e.
Definition: Title.php:934
static expandAttributes(array $attribs)
Given an associative array of element attributes, generate a string to stick after the element name i...
Definition: Html.php:472
const RAW
Definition: Revision.php:85
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
$wgDisableLangConversion
Whether to enable language variant conversion.
$wgDefaultLanguageVariant
Default variant code, if false, the default will be the language code.
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable & $code
Definition: hooks.txt:766
const NS_MEDIAWIKI
Definition: Defines.php:78
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context the output can only depend on parameters provided to this hook not on global state indicating whether full HTML should be generated If generation of HTML may be but other information should still be present in the ParserOutput object & $output
Definition: hooks.txt:1008
$from
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
this hook is for auditing only $req
Definition: hooks.txt:969
linkcache txt The LinkCache class maintains a list of article titles and the information about whether or not the article exists in the database This is used to mark up links when displaying a page If the same link appears more than once on any page then it only has to be looked up once In most cases link lookups are done in batches with the LinkBatch class or the equivalent in so the link cache is mostly useful for short snippets of parsed and for links in the navigation areas of the skin The link cache was formerly used to track links used in a document for the purposes of updating the link tables This application is now deprecated To create a you can use the following $titles
Definition: linkcache.txt:17
getHeaderVariant()
Determine the language variant from the Accept-Language header.
guessVariant($text, $variant)
Guess if a text is written in a variant.
string $mCacheKey
Memcached key name.
getDefaultVariant()
Get default variant.
applyManualConv($convRule)
Apply manual conversion rules.
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the local content language as $wgContLang
Definition: design.txt:56
autoConvert($text, $toVariant=false)
Dictionary-based conversion.
getConvRuleTitle()
Get the title produced by the conversion rule.
MediaWiki Logger LoggerFactory implements a PSR[0] compatible message logging system Named Psr Log LoggerInterface instances can be obtained from the MediaWiki Logger LoggerFactory::getInstance() static method.MediaWiki\Logger\LoggerFactory expects a class implementing the MediaWiki\Logger\Spi interface to act as a factory for new Psr\Log\LoggerInterface instances.The"Spi"in MediaWiki\Logger\Spi stands for"service provider interface".An SPI is an API intended to be implemented or extended by a third party.This software design pattern is intended to enable framework extension and replaceable components.It is specifically used in the MediaWiki\Logger\LoggerFactory service to allow alternate PSR-3 logging implementations to be easily integrated with MediaWiki.The service provider interface allows the backend logging library to be implemented in multiple ways.The $wgMWLoggerDefaultSpi global provides the classname of the default MediaWiki\Logger\Spi implementation to be loaded at runtime.This can either be the name of a class implementing the MediaWiki\Logger\Spi with a zero argument const ructor or a callable that will return an MediaWiki\Logger\Spi instance.Alternately the MediaWiki\Logger\LoggerFactory MediaWiki Logger LoggerFactory
Definition: logger.txt:5
convert($text)
Convert text to different variants of a language.
wfMemcKey()
Make a cache key for the local wiki.
static explode($separator, $subject)
Workalike for explode() with limited memory usage.
validateVariant($variant=null)
Validate the variant.
convertNamespace($index, $variant=null)
Get the namespace display name in the preferred variant.
const CACHE_NONE
Definition: Defines.php:103
static array $languagesWithVariants
languages supporting variants
getPreferredVariant()
Get preferred language variant.
autoConvertToAllVariants($text)
Call translate() to convert text to all valid variants.
loadDefaultTables()
Load default conversion tables.
if(is_null($wgLocalTZoffset)) if(!$wgDBerrorLogTZ) $wgRequest
Definition: Setup.php:657
static singleton()
Get the signleton instance of this class.
$wgUser
Definition: Setup.php:794
convertTitle($title)
Auto convert a Title object to a readable string in the preferred variant.
Allows to change the fields on the form that will be generated $name
Definition: hooks.txt:314