MediaWiki REL1_33
LanguageConverter.php
Go to the documentation of this file.
1<?php
22
24
40 public static $languagesWithVariants = [
41 'en',
42 'crh',
43 'gan',
44 'iu',
45 'kk',
46 'ku',
47 'shi',
48 'sr',
49 'tg',
50 'uz',
51 'zh',
52 ];
53
55
59 public $mVariants;
62 public $mTablesLoaded = false;
63
68 public $mTables;
69
70 // 'bidirectional' 'unidirectional' 'disable' for each variant
72
73 public $mLangObj;
74 public $mFlags;
75 public $mDescCodeSep = ':', $mDescVarSep = ';';
76 public $mUcfirst = false;
77 public $mConvRuleTitle = false;
81 public $mMaxDepth = 10;
83
84 const CACHE_VERSION_KEY = 'VERSION 7';
85
94 public function __construct( Language $langobj, $maincode, $variants = [],
95 $variantfallbacks = [], $flags = [],
96 $manualLevel = [] ) {
98 $this->mLangObj = $langobj;
99 $this->mMainLanguageCode = $maincode;
100 $this->mVariants = array_diff( $variants, $wgDisabledVariants );
101 $this->mVariantFallbacks = $variantfallbacks;
102 $this->mVariantNames = Language::fetchLanguageNames();
103 $defaultflags = [
104 // 'S' show converted text
105 // '+' add rules for alltext
106 // 'E' the gave flags is error
107 // these flags above are reserved for program
108 'A' => 'A', // add rule for convert code (all text convert)
109 'T' => 'T', // title convert
110 'R' => 'R', // raw content
111 'D' => 'D', // convert description (subclass implement)
112 '-' => '-', // remove convert (not implement)
113 'H' => 'H', // add rule for convert code (but no display in placed code)
114 'N' => 'N', // current variant name
115 ];
116 $this->mFlags = array_merge( $defaultflags, $flags );
117 foreach ( $this->mVariants as $v ) {
118 if ( array_key_exists( $v, $manualLevel ) ) {
119 $this->mManualLevel[$v] = $manualLevel[$v];
120 } else {
121 $this->mManualLevel[$v] = 'bidirectional';
122 }
123 $this->mFlags[$v] = $v;
124 }
125 }
126
133 public function getVariants() {
134 return $this->mVariants;
135 }
136
148 public function getVariantFallbacks( $variant ) {
149 return $this->mVariantFallbacks[$variant] ?? $this->mMainLanguageCode;
150 }
151
156 public function getConvRuleTitle() {
157 return $this->mConvRuleTitle;
158 }
159
164 public function getPreferredVariant() {
165 global $wgDefaultLanguageVariant, $wgUser;
166
167 $req = $this->getURLVariant();
168
169 Hooks::run( 'GetLangPreferredVariant', [ &$req ] );
170
171 if ( $wgUser->isSafeToLoad() && $wgUser->isLoggedIn() && !$req ) {
172 $req = $this->getUserVariant();
173 } elseif ( !$req ) {
174 $req = $this->getHeaderVariant();
175 }
176
179 }
180
181 $req = $this->validateVariant( $req );
182
183 // This function, unlike the other get*Variant functions, is
184 // not memoized (i.e. there return value is not cached) since
185 // new information might appear during processing after this
186 // is first called.
187 if ( $req ) {
188 return $req;
189 }
190 return $this->mMainLanguageCode;
191 }
192
198 public function getDefaultVariant() {
200
201 $req = $this->getURLVariant();
202
203 if ( !$req ) {
204 $req = $this->getHeaderVariant();
205 }
206
209 }
210
211 if ( $req ) {
212 return $req;
213 }
214 return $this->mMainLanguageCode;
215 }
216
226 public function validateVariant( $variant = null ) {
227 if ( $variant === null ) {
228 return null;
229 }
230 // Our internal variants are always lower-case; the variant we
231 // are validating may have mixed case.
232 $variant = LanguageCode::replaceDeprecatedCodes( strtolower( $variant ) );
233 if ( in_array( $variant, $this->mVariants ) ) {
234 return $variant;
235 }
236 // Browsers are supposed to use BCP 47 standard in the
237 // Accept-Language header, but not all of our internal
238 // mediawiki variant codes are BCP 47. Map BCP 47 code
239 // to our internal code.
240 foreach ( $this->mVariants as $v ) {
241 // Case-insensitive match (BCP 47 is mixed case)
242 if ( strtolower( LanguageCode::bcp47( $v ) ) === $variant ) {
243 return $v;
244 }
245 }
246 return null;
247 }
248
254 public function getURLVariant() {
255 global $wgRequest;
256
257 if ( $this->mURLVariant ) {
258 return $this->mURLVariant;
259 }
260
261 // see if the preference is set in the request
262 $ret = $wgRequest->getText( 'variant' );
263
264 if ( !$ret ) {
265 $ret = $wgRequest->getVal( 'uselang' );
266 }
267
268 $this->mURLVariant = $this->validateVariant( $ret );
269 return $this->mURLVariant;
270 }
271
277 protected function getUserVariant() {
278 global $wgUser;
279
280 // memoizing this function wreaks havoc on parserTest.php
281 /*
282 if ( $this->mUserVariant ) {
283 return $this->mUserVariant;
284 }
285 */
286
287 // Get language variant preference from logged in users
288 // Don't call this on stub objects because that causes infinite
289 // recursion during initialisation
290 if ( !$wgUser->isSafeToLoad() ) {
291 return false;
292 }
293 if ( $wgUser->isLoggedIn() ) {
294 if (
295 $this->mMainLanguageCode ==
296 MediaWikiServices::getInstance()->getContentLanguage()->getCode()
297 ) {
298 $ret = $wgUser->getOption( 'variant' );
299 } else {
300 $ret = $wgUser->getOption( 'variant-' . $this->mMainLanguageCode );
301 }
302 } else {
303 // figure out user lang without constructing wgLang to avoid
304 // infinite recursion
305 $ret = $wgUser->getOption( 'language' );
306 }
307
308 $this->mUserVariant = $this->validateVariant( $ret );
309 return $this->mUserVariant;
310 }
311
317 protected function getHeaderVariant() {
318 global $wgRequest;
319
320 if ( $this->mHeaderVariant ) {
321 return $this->mHeaderVariant;
322 }
323
324 // See if some supported language variant is set in the
325 // HTTP header.
326 $languages = array_keys( $wgRequest->getAcceptLang() );
327 if ( empty( $languages ) ) {
328 return null;
329 }
330
331 $fallbackLanguages = [];
332 foreach ( $languages as $language ) {
333 $this->mHeaderVariant = $this->validateVariant( $language );
334 if ( $this->mHeaderVariant ) {
335 break;
336 }
337
338 // To see if there are fallbacks of current language.
339 // We record these fallback variants, and process
340 // them later.
341 $fallbacks = $this->getVariantFallbacks( $language );
342 if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) {
343 $fallbackLanguages[] = $fallbacks;
344 } elseif ( is_array( $fallbacks ) ) {
345 $fallbackLanguages =
346 array_merge( $fallbackLanguages, $fallbacks );
347 }
348 }
349
350 if ( !$this->mHeaderVariant ) {
351 // process fallback languages now
352 $fallback_languages = array_unique( $fallbackLanguages );
353 foreach ( $fallback_languages as $language ) {
354 $this->mHeaderVariant = $this->validateVariant( $language );
355 if ( $this->mHeaderVariant ) {
356 break;
357 }
358 }
359 }
360
361 return $this->mHeaderVariant;
362 }
363
374 public function autoConvert( $text, $toVariant = false ) {
375 $this->loadTables();
376
377 if ( !$toVariant ) {
378 $toVariant = $this->getPreferredVariant();
379 if ( !$toVariant ) {
380 return $text;
381 }
382 }
383
384 if ( $this->guessVariant( $text, $toVariant ) ) {
385 return $text;
386 }
387 /* we convert everything except:
388 1. HTML markups (anything between < and >)
389 2. HTML entities
390 3. placeholders created by the parser
391 IMPORTANT: Beware of failure from pcre.backtrack_limit (T124404).
392 Minimize use of backtracking where possible.
393 */
394 $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f';
395
396 // this one is needed when the text is inside an HTML markup
397 $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>';
398
399 // Optimize for the common case where these tags have
400 // few or no children. Thus try and possesively get as much as
401 // possible, and only engage in backtracking when we hit a '<'.
402
403 // disable convert to variants between <code> tags
404 $codefix = '<code>[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|';
405 // disable conversion of <script> tags
406 $scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|';
407 // disable conversion of <pre> tags
408 $prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|';
409 // The "|.*+)" at the end, is in case we missed some part of html syntax,
410 // we will fail securely (hopefully) by matching the rest of the string.
411 $htmlFullTag = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|';
412
413 $reg = '/' . $codefix . $scriptfix . $prefix . $htmlFullTag .
414 '&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s';
415 $startPos = 0;
416 $sourceBlob = '';
417 $literalBlob = '';
418
419 // Guard against delimiter nulls in the input
420 // (should never happen: see T159174)
421 $text = str_replace( "\000", '', $text );
422 $text = str_replace( "\004", '', $text );
423
424 $markupMatches = null;
425 $elementMatches = null;
426
427 // We add a marker (\004) at the end of text, to ensure we always match the
428 // entire text (Otherwise, pcre.backtrack_limit might cause silent failure)
429 while ( $startPos < strlen( $text ) ) {
430 if ( preg_match( $reg, $text . "\004", $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
431 $elementPos = $markupMatches[0][1];
432 $element = $markupMatches[0][0];
433 if ( $element === "\004" ) {
434 // We hit the end.
435 $elementPos = strlen( $text );
436 $element = '';
437 } elseif ( substr( $element, -1 ) === "\004" ) {
438 // This can sometimes happen if we have
439 // unclosed html tags (For example
440 // when converting a title attribute
441 // during a recursive call that contains
442 // a &lt; e.g. <div title="&lt;">.
443 $element = substr( $element, 0, -1 );
444 }
445 } else {
446 // If we hit here, then Language Converter could be tricked
447 // into doing an XSS, so we refuse to translate.
448 // If non-crazy input manages to reach this code path,
449 // we should consider it a bug.
450 $log = LoggerFactory::getInstance( 'languageconverter' );
451 $log->error( "Hit pcre.backtrack_limit in " . __METHOD__
452 . ". Disabling language conversion for this page.",
453 [
454 "method" => __METHOD__,
455 "variant" => $toVariant,
456 "startOfText" => substr( $text, 0, 500 )
457 ]
458 );
459 return $text;
460 }
461 // Queue the part before the markup for translation in a batch
462 $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000";
463
464 // Advance to the next position
465 $startPos = $elementPos + strlen( $element );
466
467 // Translate any alt or title attributes inside the matched element
468 if ( $element !== ''
469 && preg_match( '/^(<[^>\s]*+)\s([^>]*+)(.*+)$/', $element, $elementMatches )
470 ) {
471 // FIXME, this decodes entities, so if you have something
472 // like <div title="foo&lt;bar"> the bar won't get
473 // translated since after entity decoding it looks like
474 // unclosed html and we call this method recursively
475 // on attributes.
476 $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
477 // Ensure self-closing tags stay self-closing.
478 $close = substr( $elementMatches[2], -1 ) === '/' ? ' /' : '';
479 $changed = false;
480 foreach ( [ 'title', 'alt' ] as $attrName ) {
481 if ( !isset( $attrs[$attrName] ) ) {
482 continue;
483 }
484 $attr = $attrs[$attrName];
485 // Don't convert URLs
486 if ( !strpos( $attr, '://' ) ) {
487 $attr = $this->recursiveConvertTopLevel( $attr, $toVariant );
488 }
489
490 if ( $attr !== $attrs[$attrName] ) {
491 $attrs[$attrName] = $attr;
492 $changed = true;
493 }
494 }
495 if ( $changed ) {
496 $element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
497 $close . $elementMatches[3];
498 }
499 }
500 $literalBlob .= $element . "\000";
501 }
502
503 // Do the main translation batch
504 $translatedBlob = $this->translate( $sourceBlob, $toVariant );
505
506 // Put the output back together
507 $translatedIter = StringUtils::explode( "\000", $translatedBlob );
508 $literalIter = StringUtils::explode( "\000", $literalBlob );
509 $output = '';
510 while ( $translatedIter->valid() && $literalIter->valid() ) {
511 $output .= $translatedIter->current();
512 $output .= $literalIter->current();
513 $translatedIter->next();
514 $literalIter->next();
515 }
516
517 return $output;
518 }
519
529 public function translate( $text, $variant ) {
530 // If $text is empty or only includes spaces, do nothing
531 // Otherwise translate it
532 if ( trim( $text ) ) {
533 $this->loadTables();
534 $text = $this->mTables[$variant]->replace( $text );
535 }
536 return $text;
537 }
538
545 public function autoConvertToAllVariants( $text ) {
546 $this->loadTables();
547
548 $ret = [];
549 foreach ( $this->mVariants as $variant ) {
550 $ret[$variant] = $this->translate( $text, $variant );
551 }
552
553 return $ret;
554 }
555
561 protected function applyManualConv( $convRule ) {
562 // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
563 // title conversion.
564 // T26072: $mConvRuleTitle was overwritten by other manual
565 // rule(s) not for title, this breaks the title conversion.
566 $newConvRuleTitle = $convRule->getTitle();
567 if ( $newConvRuleTitle ) {
568 // So I add an empty check for getTitle()
569 $this->mConvRuleTitle = $newConvRuleTitle;
570 }
571
572 // merge/remove manual conversion rules to/from global table
573 $convTable = $convRule->getConvTable();
574 $action = $convRule->getRulesAction();
575 foreach ( $convTable as $variant => $pair ) {
576 $v = $this->validateVariant( $variant );
577 if ( !$v ) {
578 continue;
579 }
580
581 if ( $action == 'add' ) {
582 // More efficient than array_merge(), about 2.5 times.
583 foreach ( $pair as $from => $to ) {
584 $this->mTables[$v]->setPair( $from, $to );
585 }
586 } elseif ( $action == 'remove' ) {
587 $this->mTables[$v]->removeArray( $pair );
588 }
589 }
590 }
591
599 public function convertTitle( $title ) {
600 $variant = $this->getPreferredVariant();
601 $index = $title->getNamespace();
602 if ( $index !== NS_MAIN ) {
603 $text = $this->convertNamespace( $index, $variant ) . ':';
604 } else {
605 $text = '';
606 }
607 $text .= $this->translate( $title->getText(), $variant );
608 return $text;
609 }
610
618 public function convertNamespace( $index, $variant = null ) {
619 if ( $index === NS_MAIN ) {
620 return '';
621 }
622
623 if ( $variant === null ) {
624 $variant = $this->getPreferredVariant();
625 }
626
627 $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache();
628 $key = $cache->makeKey( 'languageconverter', 'namespace-text', $index, $variant );
629 $nsVariantText = $cache->get( $key );
630 if ( $nsVariantText !== false ) {
631 return $nsVariantText;
632 }
633
634 // First check if a message gives a converted name in the target variant.
635 $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant );
636 if ( $nsConvMsg->exists() ) {
637 $nsVariantText = $nsConvMsg->plain();
638 }
639
640 // Then check if a message gives a converted name in content language
641 // which needs extra translation to the target variant.
642 if ( $nsVariantText === false ) {
643 $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage();
644 if ( $nsConvMsg->exists() ) {
645 $nsVariantText = $this->translate( $nsConvMsg->plain(), $variant );
646 }
647 }
648
649 if ( $nsVariantText === false ) {
650 // No message exists, retrieve it from the target variant's namespace names.
651 $langObj = $this->mLangObj->factory( $variant );
652 $nsVariantText = $langObj->getFormattedNsText( $index );
653 }
654
655 $cache->set( $key, $nsVariantText, 60 );
656
657 return $nsVariantText;
658 }
659
678 public function convert( $text ) {
679 $variant = $this->getPreferredVariant();
680 return $this->convertTo( $text, $variant );
681 }
682
692 public function convertTo( $text, $variant ) {
695 return $text;
696 }
697 // Reset converter state for a new converter run.
698 $this->mConvRuleTitle = false;
699 return $this->recursiveConvertTopLevel( $text, $variant );
700 }
701
711 protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
712 $startPos = 0;
713 $out = '';
714 $length = strlen( $text );
715 $shouldConvert = !$this->guessVariant( $text, $variant );
716 $continue = 1;
717
718 $noScript = '<script.*?>.*?<\/script>(*SKIP)(*FAIL)';
719 $noStyle = '<style.*?>.*?<\/style>(*SKIP)(*FAIL)';
720 // phpcs:ignore Generic.Files.LineLength
721 $noHtml = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)(*SKIP)(*FAIL)';
722 while ( $startPos < $length && $continue ) {
723 $continue = preg_match(
724 // Only match -{ outside of html.
725 "/$noScript|$noStyle|$noHtml|-\{/",
726 $text,
727 $m,
728 PREG_OFFSET_CAPTURE,
729 $startPos
730 );
731
732 if ( !$continue ) {
733 // No more markup, append final segment
734 $fragment = substr( $text, $startPos );
735 $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
736 return $out;
737 }
738
739 // Offset of the match of the regex pattern.
740 $pos = $m[0][1];
741
742 // Append initial segment
743 $fragment = substr( $text, $startPos, $pos - $startPos );
744 $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
745 // -{ marker found, not in attribute
746 // Advance position up to -{ marker.
747 $startPos = $pos;
748 // Do recursive conversion
749 // Note: This passes $startPos by reference, and advances it.
750 $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
751 }
752 return $out;
753 }
754
766 protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
767 // Quick sanity check (no function calls)
768 if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) {
769 throw new MWException( __METHOD__ . ': invalid input string' );
770 }
771
772 $startPos += 2;
773 $inner = '';
774 $warningDone = false;
775 $length = strlen( $text );
776
777 while ( $startPos < $length ) {
778 $m = false;
779 preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos );
780 if ( !$m ) {
781 // Unclosed rule
782 break;
783 }
784
785 $token = $m[0][0];
786 $pos = $m[0][1];
787
788 // Markup found
789 // Append initial segment
790 $inner .= substr( $text, $startPos, $pos - $startPos );
791
792 // Advance position
793 $startPos = $pos;
794
795 switch ( $token ) {
796 case '-{':
797 // Check max depth
798 if ( $depth >= $this->mMaxDepth ) {
799 $inner .= '-{';
800 if ( !$warningDone ) {
801 $inner .= '<span class="error">' .
802 wfMessage( 'language-converter-depth-warning' )
803 ->numParams( $this->mMaxDepth )->inContentLanguage()->text() .
804 '</span>';
805 $warningDone = true;
806 }
807 $startPos += 2;
808 break;
809 }
810 // Recursively parse another rule
811 $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
812 break;
813 case '}-':
814 // Apply the rule
815 $startPos += 2;
816 $rule = new ConverterRule( $inner, $this );
817 $rule->parse( $variant );
818 $this->applyManualConv( $rule );
819 return $rule->getDisplay();
820 default:
821 throw new MWException( __METHOD__ . ': invalid regex match' );
822 }
823 }
824
825 // Unclosed rule
826 if ( $startPos < $length ) {
827 $inner .= substr( $text, $startPos );
828 }
829 $startPos = $length;
830 return '-{' . $this->autoConvert( $inner, $variant );
831 }
832
844 public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
845 # If the article has already existed, there is no need to
846 # check it again, otherwise it may cause a fault.
847 if ( is_object( $nt ) && $nt->exists() ) {
848 return;
849 }
850
852 $isredir = $wgRequest->getText( 'redirect', 'yes' );
853 $action = $wgRequest->getText( 'action' );
854 if ( $action == 'edit' && $wgRequest->getBool( 'redlink' ) ) {
855 $action = 'view';
856 }
857 $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' );
858 $disableLinkConversion = $wgDisableLangConversion
860 $linkBatch = new LinkBatch();
861
862 $ns = NS_MAIN;
863
864 if ( $disableLinkConversion ||
865 ( !$ignoreOtherCond &&
866 ( $isredir == 'no'
867 || $action == 'edit'
868 || $action == 'submit'
869 || $linkconvert == 'no' ) ) ) {
870 return;
871 }
872
873 if ( is_object( $nt ) ) {
874 $ns = $nt->getNamespace();
875 }
876
877 $variants = $this->autoConvertToAllVariants( $link );
878 if ( !$variants ) { // give up
879 return;
880 }
881
882 $titles = [];
883
884 foreach ( $variants as $v ) {
885 if ( $v != $link ) {
886 $varnt = Title::newFromText( $v, $ns );
887 if ( !is_null( $varnt ) ) {
888 $linkBatch->addObj( $varnt );
889 $titles[] = $varnt;
890 }
891 }
892 }
893
894 // fetch all variants in single query
895 $linkBatch->execute();
896
897 foreach ( $titles as $varnt ) {
898 if ( $varnt->getArticleID() > 0 ) {
899 $nt = $varnt;
900 $link = $varnt->getText();
901 break;
902 }
903 }
904 }
905
911 public function getExtraHashOptions() {
912 $variant = $this->getPreferredVariant();
913
914 return '!' . $variant;
915 }
916
927 public function guessVariant( $text, $variant ) {
928 return false;
929 }
930
938 function loadDefaultTables() {
939 $class = static::class;
940 throw new MWException( "Must implement loadDefaultTables() method in class $class" );
941 }
942
948 function loadTables( $fromCache = true ) {
950
951 if ( $this->mTablesLoaded ) {
952 return;
953 }
954
955 $this->mTablesLoaded = true;
956 $this->mTables = false;
957 $cache = ObjectCache::getInstance( $wgLanguageConverterCacheType );
958 $cacheKey = $cache->makeKey( 'conversiontables', $this->mMainLanguageCode );
959 if ( $fromCache ) {
960 $this->mTables = $cache->get( $cacheKey );
961 }
962 if ( !$this->mTables || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
963 // not in cache, or we need a fresh reload.
964 // We will first load the default tables
965 // then update them using things in MediaWiki:Conversiontable/*
966 $this->loadDefaultTables();
967 foreach ( $this->mVariants as $var ) {
968 $cached = $this->parseCachedTable( $var );
969 $this->mTables[$var]->mergeArray( $cached );
970 }
971
972 $this->postLoadTables();
973 $this->mTables[self::CACHE_VERSION_KEY] = true;
974
975 $cache->set( $cacheKey, $this->mTables, 43200 );
976 }
977 }
978
982 function postLoadTables() {
983 }
984
992 private function reloadTables() {
993 if ( $this->mTables ) {
994 unset( $this->mTables );
995 }
996
997 $this->mTablesLoaded = false;
998 $this->loadTables( false );
999 }
1000
1020 function parseCachedTable( $code, $subpage = '', $recursive = true ) {
1021 static $parsed = [];
1022
1023 $key = 'Conversiontable/' . $code;
1024 if ( $subpage ) {
1025 $key .= '/' . $subpage;
1026 }
1027 if ( array_key_exists( $key, $parsed ) ) {
1028 return [];
1029 }
1030
1031 $parsed[$key] = true;
1032
1033 if ( $subpage === '' ) {
1034 $txt = MessageCache::singleton()->getMsgFromNamespace( $key, $code );
1035 } else {
1036 $txt = false;
1037 $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key );
1038 if ( $title && $title->exists() ) {
1039 $revision = Revision::newFromTitle( $title );
1040 if ( $revision ) {
1041 if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) {
1042 $txt = $revision->getContent( Revision::RAW )->getText();
1043 }
1044
1045 // @todo in the future, use a specialized content model, perhaps based on json!
1046 }
1047 }
1048 }
1049
1050 # Nothing to parse if there's no text
1051 if ( $txt === false || $txt === null || $txt === '' ) {
1052 return [];
1053 }
1054
1055 // get all subpage links of the form
1056 // [[MediaWiki:Conversiontable/zh-xx/...|...]]
1057 $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
1058 ':Conversiontable';
1059 $subs = StringUtils::explode( '[[', $txt );
1060 $sublinks = [];
1061 foreach ( $subs as $sub ) {
1062 $link = explode( ']]', $sub, 2 );
1063 if ( count( $link ) != 2 ) {
1064 continue;
1065 }
1066 $b = explode( '|', $link[0], 2 );
1067 $b = explode( '/', trim( $b[0] ), 3 );
1068 if ( count( $b ) == 3 ) {
1069 $sublink = $b[2];
1070 } else {
1071 $sublink = '';
1072 }
1073
1074 if ( $b[0] == $linkhead && $b[1] == $code ) {
1075 $sublinks[] = $sublink;
1076 }
1077 }
1078
1079 // parse the mappings in this page
1080 $blocks = StringUtils::explode( '-{', $txt );
1081 $ret = [];
1082 $first = true;
1083 foreach ( $blocks as $block ) {
1084 if ( $first ) {
1085 // Skip the part before the first -{
1086 $first = false;
1087 continue;
1088 }
1089 $mappings = explode( '}-', $block, 2 )[0];
1090 $stripped = str_replace( [ "'", '"', '*', '#' ], '', $mappings );
1091 $table = StringUtils::explode( ';', $stripped );
1092 foreach ( $table as $t ) {
1093 $m = explode( '=>', $t, 3 );
1094 if ( count( $m ) != 2 ) {
1095 continue;
1096 }
1097 // trim any trailling comments starting with '//'
1098 $tt = explode( '//', $m[1], 2 );
1099 $ret[trim( $m[0] )] = trim( $tt[0] );
1100 }
1101 }
1102
1103 // recursively parse the subpages
1104 if ( $recursive ) {
1105 foreach ( $sublinks as $link ) {
1106 $s = $this->parseCachedTable( $code, $link, $recursive );
1107 $ret = $s + $ret;
1108 }
1109 }
1110
1111 if ( $this->mUcfirst ) {
1112 foreach ( $ret as $k => $v ) {
1113 $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
1114 }
1115 }
1116 return $ret;
1117 }
1118
1127 public function markNoConversion( $text, $noParse = false ) {
1128 # don't mark if already marked
1129 if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) {
1130 return $text;
1131 }
1132
1133 $ret = "-{R|$text}-";
1134 return $ret;
1135 }
1136
1145 function convertCategoryKey( $key ) {
1146 return $key;
1147 }
1148
1155 public function updateConversionTable( Title $titleobj ) {
1156 if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) {
1157 $title = $titleobj->getDBkey();
1158 $t = explode( '/', $title, 3 );
1159 $c = count( $t );
1160 if ( $c > 1 && $t[0] == 'Conversiontable' ) {
1161 if ( $this->validateVariant( $t[1] ) ) {
1162 $this->reloadTables();
1163 }
1164 }
1165 }
1166 }
1167
1173 if ( is_null( $this->mVarSeparatorPattern ) ) {
1174 // varsep_pattern for preg_split:
1175 // text should be splited by ";" only if a valid variant
1176 // name exist after the markup, for example:
1177 // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\
1178 // <span style="font-size:120%;">yyy</span>;}-
1179 // we should split it as:
1180 // [
1181 // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>'
1182 // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
1183 // [2] => ''
1184 // ]
1185 $expandedVariants = [];
1186 foreach ( $this->mVariants as $variant ) {
1187 $expandedVariants[ $variant ] = 1;
1188 // Accept standard BCP 47 names for variants as well.
1189 $expandedVariants[ LanguageCode::bcp47( $variant ) ] = 1;
1190 }
1191 // Accept old deprecated names for variants
1192 foreach ( LanguageCode::getDeprecatedCodeMapping() as $old => $new ) {
1193 if ( isset( $expandedVariants[ $new ] ) ) {
1194 $expandedVariants[ $old ] = 1;
1195 }
1196 }
1197
1198 $pat = '/;\s*(?=';
1199 foreach ( $expandedVariants as $variant => $ignore ) {
1200 // zh-hans:xxx;zh-hant:yyy
1201 $pat .= $variant . '\s*:|';
1202 // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
1203 $pat .= '[^;]*?=>\s*' . $variant . '\s*:|';
1204 }
1205 $pat .= '\s*$)/';
1206 $this->mVarSeparatorPattern = $pat;
1207 }
1208 return $this->mVarSeparatorPattern;
1209 }
1210}
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
$wgDefaultLanguageVariant
Default variant code, if false, the default will be the language code.
$wgDisableLangConversion
Whether to enable language variant conversion.
$wgDisabledVariants
Disabled variants array of language variant conversion.
$wgLanguageConverterCacheType
The cache type for storing language conversion tables, which are used when parsing certain text and i...
$wgDisableTitleConversion
Whether to enable language variant conversion for links.
if(! $wgDBerrorLogTZ) $wgRequest
Definition Setup.php:728
Parser for rules of language conversion, parse rules in -{ }- tag.
Base class for language conversion.
getPreferredVariant()
Get preferred language variant.
convertTitle( $title)
Auto convert a Title object to a readable string in the preferred variant.
validateVariant( $variant=null)
Validate the variant and return an appropriate strict internal variant code if one exists.
getDefaultVariant()
Get default variant.
recursiveConvertTopLevel( $text, $variant, $depth=0)
Recursively convert text on the outside.
loadTables( $fromCache=true)
Load conversion tables either from the cache or the disk.
getHeaderVariant()
Determine the language variant from the Accept-Language header.
static array $languagesWithVariants
languages supporting variants
autoConvert( $text, $toVariant=false)
Dictionary-based conversion.
recursiveConvertRule( $text, $variant, &$startPos, $depth=0)
Recursively convert text on the inside.
__construct(Language $langobj, $maincode, $variants=[], $variantfallbacks=[], $flags=[], $manualLevel=[])
parseCachedTable( $code, $subpage='', $recursive=true)
Parse the conversion table stored in the cache.
getVarSeparatorPattern()
Get the cached separator pattern for ConverterRule::parseRules()
convertNamespace( $index, $variant=null)
Get the namespace display name in the preferred variant.
getExtraHashOptions()
Returns language specific hash options.
getVariantFallbacks( $variant)
In case some variant is not defined in the markup, we need to have some fallback.
updateConversionTable(Title $titleobj)
Refresh the cache of conversion tables when MediaWiki:Conversiontable* is updated.
markNoConversion( $text, $noParse=false)
Enclose a string with the "no conversion" tag.
applyManualConv( $convRule)
Apply manual conversion rules.
translate( $text, $variant)
Translate a string to a variant.
getVariants()
Get all valid variants.
findVariantLink(&$link, &$nt, $ignoreOtherCond=false)
If a language supports multiple variants, it is possible that non-existing link in one variant actual...
convert( $text)
Convert text to different variants of a language.
postLoadTables()
Hook for post processing after conversion tables are loaded.
getURLVariant()
Get the variant specified in the URL.
ReplacementArray[] $mTables
@phan-var array<string,ReplacementArray>
loadDefaultTables()
Load default conversion tables.
autoConvertToAllVariants( $text)
Call translate() to convert text to all valid variants.
guessVariant( $text, $variant)
Guess if a text is written in a variant.
getUserVariant()
Determine if the user has a variant set.
convertTo( $text, $variant)
Same as convert() except a extra parameter to custom variant.
convertCategoryKey( $key)
Convert the sorting key for category links.
getConvRuleTitle()
Get the title produced by the conversion rule.
reloadTables()
Reload the conversion tables.
Internationalisation code.
Definition Language.php:36
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition LinkBatch.php:34
MediaWiki exception.
PSR-3 logger instance factory.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Wrapper around strtr() that holds replacements.
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that's attached to a given link target.
Definition Revision.php:137
const RAW
Definition Revision.php:56
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
Represents a title within MediaWiki.
Definition Title.php:40
getNamespace()
Get the namespace index, i.e.
Definition Title.php:994
getDBkey()
Get the main part with underscores.
Definition Title.php:970
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
const NS_MAIN
Definition Defines.php:73
const NS_MEDIAWIKI
Definition Defines.php:81
const CONTENT_MODEL_WIKITEXT
Definition Defines.php:244
this hook is for auditing only $req
Definition hooks.txt:979
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not it can be in the form of< username >< more info > e g for bot passwords intended to be added to log contexts Fields it might only if the login was with a bot password it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition hooks.txt:855
namespace and then decline to actually register it file or subcat img or subcat $title
Definition hooks.txt:955
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not it can be in the form of< username >< more info > e g for bot passwords intended to be added to log contexts Fields it might only if the login was with a bot password it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable & $code
Definition hooks.txt:856
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition hooks.txt:2003
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation use $formDescriptor instead default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt;div ...>$1&lt;/div>"). - flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException':Called before an exception(or PHP error) is logged. This is meant for integration with external error aggregation services
usually copyright or history_copyright This message must be in HTML not wikitext & $link
Definition hooks.txt:3069
static configuration should be added through ResourceLoaderGetConfigVars instead can be used to get the real title e g db for database replication lag or jobqueue for job queue size converted to pseudo seconds It is possible to add more fields and they will be returned to the user in the API response after the basic globals have been set but before ordinary actions take place $output
Definition hooks.txt:2272
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
linkcache txt The LinkCache class maintains a list of article titles and the information about whether or not the article exists in the database This is used to mark up links when displaying a page If the same link appears more than once on any page then it only has to be looked up once In most cases link lookups are done in batches with the LinkBatch class or the equivalent in so the link cache is mostly useful for short snippets of parsed and for links in the navigation areas of the skin The link cache was formerly used to track links used in a document for the purposes of updating the link tables This application is now deprecated To create a you can use the following $titles
Definition linkcache.txt:17
$cache
Definition mcc.php:33
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
switch( $options['output']) $languages
Definition transstat.php:76