MediaWiki REL1_31
LanguageConverter.php
Go to the documentation of this file.
1<?php
22
24
40 static public $languagesWithVariants = [
41 'en',
42 'crh',
43 'gan',
44 'iu',
45 'kk',
46 'ku',
47 'shi',
48 'sr',
49 'tg',
50 'uz',
51 'zh',
52 ];
53
55
59 public $mVariants;
62 public $mTablesLoaded = false;
63 public $mTables;
64 // 'bidirectional' 'unidirectional' 'disable' for each variant
66
67 public $mLangObj;
68 public $mFlags;
69 public $mDescCodeSep = ':', $mDescVarSep = ';';
70 public $mUcfirst = false;
71 public $mConvRuleTitle = false;
75 public $mMaxDepth = 10;
77
78 const CACHE_VERSION_KEY = 'VERSION 7';
79
88 public function __construct( $langobj, $maincode, $variants = [],
89 $variantfallbacks = [], $flags = [],
90 $manualLevel = [] ) {
92 $this->mLangObj = $langobj;
93 $this->mMainLanguageCode = $maincode;
94 $this->mVariants = array_diff( $variants, $wgDisabledVariants );
95 $this->mVariantFallbacks = $variantfallbacks;
96 $this->mVariantNames = Language::fetchLanguageNames();
97 $defaultflags = [
98 // 'S' show converted text
99 // '+' add rules for alltext
100 // 'E' the gave flags is error
101 // these flags above are reserved for program
102 'A' => 'A', // add rule for convert code (all text convert)
103 'T' => 'T', // title convert
104 'R' => 'R', // raw content
105 'D' => 'D', // convert description (subclass implement)
106 '-' => '-', // remove convert (not implement)
107 'H' => 'H', // add rule for convert code (but no display in placed code)
108 'N' => 'N', // current variant name
109 ];
110 $this->mFlags = array_merge( $defaultflags, $flags );
111 foreach ( $this->mVariants as $v ) {
112 if ( array_key_exists( $v, $manualLevel ) ) {
113 $this->mManualLevel[$v] = $manualLevel[$v];
114 } else {
115 $this->mManualLevel[$v] = 'bidirectional';
116 }
117 $this->mFlags[$v] = $v;
118 }
119 }
120
127 public function getVariants() {
128 return $this->mVariants;
129 }
130
142 public function getVariantFallbacks( $variant ) {
143 if ( isset( $this->mVariantFallbacks[$variant] ) ) {
144 return $this->mVariantFallbacks[$variant];
145 }
146 return $this->mMainLanguageCode;
147 }
148
153 public function getConvRuleTitle() {
154 return $this->mConvRuleTitle;
155 }
156
161 public function getPreferredVariant() {
163
164 $req = $this->getURLVariant();
165
166 Hooks::run( 'GetLangPreferredVariant', [ &$req ] );
167
168 if ( $wgUser->isSafeToLoad() && $wgUser->isLoggedIn() && !$req ) {
169 $req = $this->getUserVariant();
170 } elseif ( !$req ) {
171 $req = $this->getHeaderVariant();
172 }
173
176 }
177
178 // This function, unlike the other get*Variant functions, is
179 // not memoized (i.e. there return value is not cached) since
180 // new information might appear during processing after this
181 // is first called.
182 if ( $this->validateVariant( $req ) ) {
183 return $req;
184 }
185 return $this->mMainLanguageCode;
186 }
187
193 public function getDefaultVariant() {
195
196 $req = $this->getURLVariant();
197
198 if ( !$req ) {
199 $req = $this->getHeaderVariant();
200 }
201
204 }
205
206 if ( $req ) {
207 return $req;
208 }
209 return $this->mMainLanguageCode;
210 }
211
217 public function validateVariant( $variant = null ) {
218 if ( $variant !== null && in_array( $variant, $this->mVariants ) ) {
219 return $variant;
220 }
221 return null;
222 }
223
229 public function getURLVariant() {
230 global $wgRequest;
231
232 if ( $this->mURLVariant ) {
233 return $this->mURLVariant;
234 }
235
236 // see if the preference is set in the request
237 $ret = $wgRequest->getText( 'variant' );
238
239 if ( !$ret ) {
240 $ret = $wgRequest->getVal( 'uselang' );
241 }
242
243 $this->mURLVariant = $this->validateVariant( $ret );
244 return $this->mURLVariant;
245 }
246
252 protected function getUserVariant() {
253 global $wgUser, $wgContLang;
254
255 // memoizing this function wreaks havoc on parserTest.php
256 /*
257 if ( $this->mUserVariant ) {
258 return $this->mUserVariant;
259 }
260 */
261
262 // Get language variant preference from logged in users
263 // Don't call this on stub objects because that causes infinite
264 // recursion during initialisation
265 if ( !$wgUser->isSafeToLoad() ) {
266 return false;
267 }
268 if ( $wgUser->isLoggedIn() ) {
269 if ( $this->mMainLanguageCode == $wgContLang->getCode() ) {
270 $ret = $wgUser->getOption( 'variant' );
271 } else {
272 $ret = $wgUser->getOption( 'variant-' . $this->mMainLanguageCode );
273 }
274 } else {
275 // figure out user lang without constructing wgLang to avoid
276 // infinite recursion
277 $ret = $wgUser->getOption( 'language' );
278 }
279
280 $this->mUserVariant = $this->validateVariant( $ret );
281 return $this->mUserVariant;
282 }
283
289 protected function getHeaderVariant() {
290 global $wgRequest;
291
292 if ( $this->mHeaderVariant ) {
293 return $this->mHeaderVariant;
294 }
295
296 // see if some supported language variant is set in the
297 // HTTP header.
298 $languages = array_keys( $wgRequest->getAcceptLang() );
299 if ( empty( $languages ) ) {
300 return null;
301 }
302
303 $fallbackLanguages = [];
304 foreach ( $languages as $language ) {
305 $this->mHeaderVariant = $this->validateVariant( $language );
306 if ( $this->mHeaderVariant ) {
307 break;
308 }
309
310 // To see if there are fallbacks of current language.
311 // We record these fallback variants, and process
312 // them later.
313 $fallbacks = $this->getVariantFallbacks( $language );
314 if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) {
315 $fallbackLanguages[] = $fallbacks;
316 } elseif ( is_array( $fallbacks ) ) {
317 $fallbackLanguages =
318 array_merge( $fallbackLanguages, $fallbacks );
319 }
320 }
321
322 if ( !$this->mHeaderVariant ) {
323 // process fallback languages now
324 $fallback_languages = array_unique( $fallbackLanguages );
325 foreach ( $fallback_languages as $language ) {
326 $this->mHeaderVariant = $this->validateVariant( $language );
327 if ( $this->mHeaderVariant ) {
328 break;
329 }
330 }
331 }
332
333 return $this->mHeaderVariant;
334 }
335
346 public function autoConvert( $text, $toVariant = false ) {
347 $this->loadTables();
348
349 if ( !$toVariant ) {
350 $toVariant = $this->getPreferredVariant();
351 if ( !$toVariant ) {
352 return $text;
353 }
354 }
355
356 if ( $this->guessVariant( $text, $toVariant ) ) {
357 return $text;
358 }
359 /* we convert everything except:
360 1. HTML markups (anything between < and >)
361 2. HTML entities
362 3. placeholders created by the parser
363 IMPORTANT: Beware of failure from pcre.backtrack_limit (T124404).
364 Minimize use of backtracking where possible.
365 */
366 $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f';
367
368 // this one is needed when the text is inside an HTML markup
369 $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>';
370
371 // Optimize for the common case where these tags have
372 // few or no children. Thus try and possesively get as much as
373 // possible, and only engage in backtracking when we hit a '<'.
374
375 // disable convert to variants between <code> tags
376 $codefix = '<code>[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|';
377 // disable conversion of <script> tags
378 $scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|';
379 // disable conversion of <pre> tags
380 $prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|';
381 // The "|.*+)" at the end, is in case we missed some part of html syntax,
382 // we will fail securely (hopefully) by matching the rest of the string.
383 $htmlFullTag = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|';
384
385 $reg = '/' . $codefix . $scriptfix . $prefix . $htmlFullTag .
386 '&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s';
387 $startPos = 0;
388 $sourceBlob = '';
389 $literalBlob = '';
390
391 // Guard against delimiter nulls in the input
392 // (should never happen: see T159174)
393 $text = str_replace( "\000", '', $text );
394 $text = str_replace( "\004", '', $text );
395
396 $markupMatches = null;
397 $elementMatches = null;
398
399 // We add a marker (\004) at the end of text, to ensure we always match the
400 // entire text (Otherwise, pcre.backtrack_limit might cause silent failure)
401 while ( $startPos < strlen( $text ) ) {
402 if ( preg_match( $reg, $text . "\004", $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
403 $elementPos = $markupMatches[0][1];
404 $element = $markupMatches[0][0];
405 if ( $element === "\004" ) {
406 // We hit the end.
407 $elementPos = strlen( $text );
408 $element = '';
409 } elseif ( substr( $element, -1 ) === "\004" ) {
410 // This can sometimes happen if we have
411 // unclosed html tags (For example
412 // when converting a title attribute
413 // during a recursive call that contains
414 // a &lt; e.g. <div title="&lt;">.
415 $element = substr( $element, 0, -1 );
416 }
417 } else {
418 // If we hit here, then Language Converter could be tricked
419 // into doing an XSS, so we refuse to translate.
420 // If non-crazy input manages to reach this code path,
421 // we should consider it a bug.
422 $log = LoggerFactory::getInstance( 'languageconverter' );
423 $log->error( "Hit pcre.backtrack_limit in " . __METHOD__
424 . ". Disabling language conversion for this page.",
425 [
426 "method" => __METHOD__,
427 "variant" => $toVariant,
428 "startOfText" => substr( $text, 0, 500 )
429 ]
430 );
431 return $text;
432 }
433 // Queue the part before the markup for translation in a batch
434 $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000";
435
436 // Advance to the next position
437 $startPos = $elementPos + strlen( $element );
438
439 // Translate any alt or title attributes inside the matched element
440 if ( $element !== ''
441 && preg_match( '/^(<[^>\s]*+)\s([^>]*+)(.*+)$/', $element, $elementMatches )
442 ) {
443 // FIXME, this decodes entities, so if you have something
444 // like <div title="foo&lt;bar"> the bar won't get
445 // translated since after entity decoding it looks like
446 // unclosed html and we call this method recursively
447 // on attributes.
448 $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
449 // Ensure self-closing tags stay self-closing.
450 $close = substr( $elementMatches[2], -1 ) === '/' ? ' /' : '';
451 $changed = false;
452 foreach ( [ 'title', 'alt' ] as $attrName ) {
453 if ( !isset( $attrs[$attrName] ) ) {
454 continue;
455 }
456 $attr = $attrs[$attrName];
457 // Don't convert URLs
458 if ( !strpos( $attr, '://' ) ) {
459 $attr = $this->recursiveConvertTopLevel( $attr, $toVariant );
460 }
461
462 if ( $attr !== $attrs[$attrName] ) {
463 $attrs[$attrName] = $attr;
464 $changed = true;
465 }
466 }
467 if ( $changed ) {
468 $element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
469 $close . $elementMatches[3];
470 }
471 }
472 $literalBlob .= $element . "\000";
473 }
474
475 // Do the main translation batch
476 $translatedBlob = $this->translate( $sourceBlob, $toVariant );
477
478 // Put the output back together
479 $translatedIter = StringUtils::explode( "\000", $translatedBlob );
480 $literalIter = StringUtils::explode( "\000", $literalBlob );
481 $output = '';
482 while ( $translatedIter->valid() && $literalIter->valid() ) {
483 $output .= $translatedIter->current();
484 $output .= $literalIter->current();
485 $translatedIter->next();
486 $literalIter->next();
487 }
488
489 return $output;
490 }
491
501 public function translate( $text, $variant ) {
502 // If $text is empty or only includes spaces, do nothing
503 // Otherwise translate it
504 if ( trim( $text ) ) {
505 $this->loadTables();
506 $text = $this->mTables[$variant]->replace( $text );
507 }
508 return $text;
509 }
510
517 public function autoConvertToAllVariants( $text ) {
518 $this->loadTables();
519
520 $ret = [];
521 foreach ( $this->mVariants as $variant ) {
522 $ret[$variant] = $this->translate( $text, $variant );
523 }
524
525 return $ret;
526 }
527
533 protected function applyManualConv( $convRule ) {
534 // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
535 // title conversion.
536 // T26072: $mConvRuleTitle was overwritten by other manual
537 // rule(s) not for title, this breaks the title conversion.
538 $newConvRuleTitle = $convRule->getTitle();
539 if ( $newConvRuleTitle ) {
540 // So I add an empty check for getTitle()
541 $this->mConvRuleTitle = $newConvRuleTitle;
542 }
543
544 // merge/remove manual conversion rules to/from global table
545 $convTable = $convRule->getConvTable();
546 $action = $convRule->getRulesAction();
547 foreach ( $convTable as $variant => $pair ) {
548 if ( !$this->validateVariant( $variant ) ) {
549 continue;
550 }
551
552 if ( $action == 'add' ) {
553 // More efficient than array_merge(), about 2.5 times.
554 foreach ( $pair as $from => $to ) {
555 $this->mTables[$variant]->setPair( $from, $to );
556 }
557 } elseif ( $action == 'remove' ) {
558 $this->mTables[$variant]->removeArray( $pair );
559 }
560 }
561 }
562
570 public function convertTitle( $title ) {
571 $variant = $this->getPreferredVariant();
572 $index = $title->getNamespace();
573 if ( $index !== NS_MAIN ) {
574 $text = $this->convertNamespace( $index, $variant ) . ':';
575 } else {
576 $text = '';
577 }
578 $text .= $this->translate( $title->getText(), $variant );
579 return $text;
580 }
581
589 public function convertNamespace( $index, $variant = null ) {
590 if ( $index === NS_MAIN ) {
591 return '';
592 }
593
594 if ( $variant === null ) {
595 $variant = $this->getPreferredVariant();
596 }
597
598 $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache();
599 $key = $cache->makeKey( 'languageconverter', 'namespace-text', $index, $variant );
600 $nsVariantText = $cache->get( $key );
601 if ( $nsVariantText !== false ) {
602 return $nsVariantText;
603 }
604
605 // First check if a message gives a converted name in the target variant.
606 $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant );
607 if ( $nsConvMsg->exists() ) {
608 $nsVariantText = $nsConvMsg->plain();
609 }
610
611 // Then check if a message gives a converted name in content language
612 // which needs extra translation to the target variant.
613 if ( $nsVariantText === false ) {
614 $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage();
615 if ( $nsConvMsg->exists() ) {
616 $nsVariantText = $this->translate( $nsConvMsg->plain(), $variant );
617 }
618 }
619
620 if ( $nsVariantText === false ) {
621 // No message exists, retrieve it from the target variant's namespace names.
622 $langObj = $this->mLangObj->factory( $variant );
623 $nsVariantText = $langObj->getFormattedNsText( $index );
624 }
625
626 $cache->set( $key, $nsVariantText, 60 );
627
628 return $nsVariantText;
629 }
630
645 public function convert( $text ) {
646 $variant = $this->getPreferredVariant();
647 return $this->convertTo( $text, $variant );
648 }
649
657 public function convertTo( $text, $variant ) {
660 return $text;
661 }
662 // Reset converter state for a new converter run.
663 $this->mConvRuleTitle = false;
664 return $this->recursiveConvertTopLevel( $text, $variant );
665 }
666
676 protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
677 $startPos = 0;
678 $out = '';
679 $length = strlen( $text );
680 $shouldConvert = !$this->guessVariant( $text, $variant );
681 $continue = 1;
682
683 $noScript = '<script.*?>.*?<\/script>(*SKIP)(*FAIL)';
684 $noStyle = '<style.*?>.*?<\/style>(*SKIP)(*FAIL)';
685 // phpcs:ignore Generic.Files.LineLength
686 $noHtml = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)(*SKIP)(*FAIL)';
687 while ( $startPos < $length && $continue ) {
688 $continue = preg_match(
689 // Only match -{ outside of html.
690 "/$noScript|$noStyle|$noHtml|-\{/",
691 $text,
692 $m,
693 PREG_OFFSET_CAPTURE,
694 $startPos
695 );
696
697 if ( !$continue ) {
698 // No more markup, append final segment
699 $fragment = substr( $text, $startPos );
700 $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
701 return $out;
702 }
703
704 // Offset of the match of the regex pattern.
705 $pos = $m[0][1];
706
707 // Append initial segment
708 $fragment = substr( $text, $startPos, $pos - $startPos );
709 $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
710 // -{ marker found, not in attribute
711 // Advance position up to -{ marker.
712 $startPos = $pos;
713 // Do recursive conversion
714 // Note: This passes $startPos by reference, and advances it.
715 $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
716 }
717 return $out;
718 }
719
731 protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
732 // Quick sanity check (no function calls)
733 if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) {
734 throw new MWException( __METHOD__ . ': invalid input string' );
735 }
736
737 $startPos += 2;
738 $inner = '';
739 $warningDone = false;
740 $length = strlen( $text );
741
742 while ( $startPos < $length ) {
743 $m = false;
744 preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos );
745 if ( !$m ) {
746 // Unclosed rule
747 break;
748 }
749
750 $token = $m[0][0];
751 $pos = $m[0][1];
752
753 // Markup found
754 // Append initial segment
755 $inner .= substr( $text, $startPos, $pos - $startPos );
756
757 // Advance position
758 $startPos = $pos;
759
760 switch ( $token ) {
761 case '-{':
762 // Check max depth
763 if ( $depth >= $this->mMaxDepth ) {
764 $inner .= '-{';
765 if ( !$warningDone ) {
766 $inner .= '<span class="error">' .
767 wfMessage( 'language-converter-depth-warning' )
768 ->numParams( $this->mMaxDepth )->inContentLanguage()->text() .
769 '</span>';
770 $warningDone = true;
771 }
772 $startPos += 2;
773 break;
774 }
775 // Recursively parse another rule
776 $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
777 break;
778 case '}-':
779 // Apply the rule
780 $startPos += 2;
781 $rule = new ConverterRule( $inner, $this );
782 $rule->parse( $variant );
783 $this->applyManualConv( $rule );
784 return $rule->getDisplay();
785 default:
786 throw new MWException( __METHOD__ . ': invalid regex match' );
787 }
788 }
789
790 // Unclosed rule
791 if ( $startPos < $length ) {
792 $inner .= substr( $text, $startPos );
793 }
794 $startPos = $length;
795 return '-{' . $this->autoConvert( $inner, $variant );
796 }
797
809 public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
810 # If the article has already existed, there is no need to
811 # check it again, otherwise it may cause a fault.
812 if ( is_object( $nt ) && $nt->exists() ) {
813 return;
814 }
815
817 $isredir = $wgRequest->getText( 'redirect', 'yes' );
818 $action = $wgRequest->getText( 'action' );
819 if ( $action == 'edit' && $wgRequest->getBool( 'redlink' ) ) {
820 $action = 'view';
821 }
822 $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' );
823 $disableLinkConversion = $wgDisableLangConversion
825 $linkBatch = new LinkBatch();
826
827 $ns = NS_MAIN;
828
829 if ( $disableLinkConversion ||
830 ( !$ignoreOtherCond &&
831 ( $isredir == 'no'
832 || $action == 'edit'
833 || $action == 'submit'
834 || $linkconvert == 'no' ) ) ) {
835 return;
836 }
837
838 if ( is_object( $nt ) ) {
839 $ns = $nt->getNamespace();
840 }
841
842 $variants = $this->autoConvertToAllVariants( $link );
843 if ( !$variants ) { // give up
844 return;
845 }
846
847 $titles = [];
848
849 foreach ( $variants as $v ) {
850 if ( $v != $link ) {
851 $varnt = Title::newFromText( $v, $ns );
852 if ( !is_null( $varnt ) ) {
853 $linkBatch->addObj( $varnt );
854 $titles[] = $varnt;
855 }
856 }
857 }
858
859 // fetch all variants in single query
860 $linkBatch->execute();
861
862 foreach ( $titles as $varnt ) {
863 if ( $varnt->getArticleID() > 0 ) {
864 $nt = $varnt;
865 $link = $varnt->getText();
866 break;
867 }
868 }
869 }
870
876 public function getExtraHashOptions() {
877 $variant = $this->getPreferredVariant();
878
879 return '!' . $variant;
880 }
881
892 public function guessVariant( $text, $variant ) {
893 return false;
894 }
895
903 function loadDefaultTables() {
904 $class = static::class;
905 throw new MWException( "Must implement loadDefaultTables() method in class $class" );
906 }
907
913 function loadTables( $fromCache = true ) {
915
916 if ( $this->mTablesLoaded ) {
917 return;
918 }
919
920 $this->mTablesLoaded = true;
921 $this->mTables = false;
922 $cache = ObjectCache::getInstance( $wgLanguageConverterCacheType );
923 $cacheKey = $cache->makeKey( 'conversiontables', $this->mMainLanguageCode );
924 if ( $fromCache ) {
925 $this->mTables = $cache->get( $cacheKey );
926 }
927 if ( !$this->mTables || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
928 // not in cache, or we need a fresh reload.
929 // We will first load the default tables
930 // then update them using things in MediaWiki:Conversiontable/*
931 $this->loadDefaultTables();
932 foreach ( $this->mVariants as $var ) {
933 $cached = $this->parseCachedTable( $var );
934 $this->mTables[$var]->mergeArray( $cached );
935 }
936
937 $this->postLoadTables();
938 $this->mTables[self::CACHE_VERSION_KEY] = true;
939
940 $cache->set( $cacheKey, $this->mTables, 43200 );
941 }
942 }
943
947 function postLoadTables() {
948 }
949
957 private function reloadTables() {
958 if ( $this->mTables ) {
959 unset( $this->mTables );
960 }
961
962 $this->mTablesLoaded = false;
963 $this->loadTables( false );
964 }
965
985 function parseCachedTable( $code, $subpage = '', $recursive = true ) {
986 static $parsed = [];
987
988 $key = 'Conversiontable/' . $code;
989 if ( $subpage ) {
990 $key .= '/' . $subpage;
991 }
992 if ( array_key_exists( $key, $parsed ) ) {
993 return [];
994 }
995
996 $parsed[$key] = true;
997
998 if ( $subpage === '' ) {
999 $txt = MessageCache::singleton()->getMsgFromNamespace( $key, $code );
1000 } else {
1001 $txt = false;
1002 $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key );
1003 if ( $title && $title->exists() ) {
1004 $revision = Revision::newFromTitle( $title );
1005 if ( $revision ) {
1006 if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) {
1007 $txt = $revision->getContent( Revision::RAW )->getNativeData();
1008 }
1009
1010 // @todo in the future, use a specialized content model, perhaps based on json!
1011 }
1012 }
1013 }
1014
1015 # Nothing to parse if there's no text
1016 if ( $txt === false || $txt === null || $txt === '' ) {
1017 return [];
1018 }
1019
1020 // get all subpage links of the form
1021 // [[MediaWiki:Conversiontable/zh-xx/...|...]]
1022 $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
1023 ':Conversiontable';
1024 $subs = StringUtils::explode( '[[', $txt );
1025 $sublinks = [];
1026 foreach ( $subs as $sub ) {
1027 $link = explode( ']]', $sub, 2 );
1028 if ( count( $link ) != 2 ) {
1029 continue;
1030 }
1031 $b = explode( '|', $link[0], 2 );
1032 $b = explode( '/', trim( $b[0] ), 3 );
1033 if ( count( $b ) == 3 ) {
1034 $sublink = $b[2];
1035 } else {
1036 $sublink = '';
1037 }
1038
1039 if ( $b[0] == $linkhead && $b[1] == $code ) {
1040 $sublinks[] = $sublink;
1041 }
1042 }
1043
1044 // parse the mappings in this page
1045 $blocks = StringUtils::explode( '-{', $txt );
1046 $ret = [];
1047 $first = true;
1048 foreach ( $blocks as $block ) {
1049 if ( $first ) {
1050 // Skip the part before the first -{
1051 $first = false;
1052 continue;
1053 }
1054 $mappings = explode( '}-', $block, 2 )[0];
1055 $stripped = str_replace( [ "'", '"', '*', '#' ], '', $mappings );
1056 $table = StringUtils::explode( ';', $stripped );
1057 foreach ( $table as $t ) {
1058 $m = explode( '=>', $t, 3 );
1059 if ( count( $m ) != 2 ) {
1060 continue;
1061 }
1062 // trim any trailling comments starting with '//'
1063 $tt = explode( '//', $m[1], 2 );
1064 $ret[trim( $m[0] )] = trim( $tt[0] );
1065 }
1066 }
1067
1068 // recursively parse the subpages
1069 if ( $recursive ) {
1070 foreach ( $sublinks as $link ) {
1071 $s = $this->parseCachedTable( $code, $link, $recursive );
1072 $ret = $s + $ret;
1073 }
1074 }
1075
1076 if ( $this->mUcfirst ) {
1077 foreach ( $ret as $k => $v ) {
1078 $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
1079 }
1080 }
1081 return $ret;
1082 }
1083
1092 public function markNoConversion( $text, $noParse = false ) {
1093 # don't mark if already marked
1094 if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) {
1095 return $text;
1096 }
1097
1098 $ret = "-{R|$text}-";
1099 return $ret;
1100 }
1101
1110 function convertCategoryKey( $key ) {
1111 return $key;
1112 }
1113
1120 public function updateConversionTable( Title $titleobj ) {
1121 if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) {
1122 $title = $titleobj->getDBkey();
1123 $t = explode( '/', $title, 3 );
1124 $c = count( $t );
1125 if ( $c > 1 && $t[0] == 'Conversiontable' ) {
1126 if ( $this->validateVariant( $t[1] ) ) {
1127 $this->reloadTables();
1128 }
1129 }
1130 }
1131 }
1132
1138 if ( is_null( $this->mVarSeparatorPattern ) ) {
1139 // varsep_pattern for preg_split:
1140 // text should be splited by ";" only if a valid variant
1141 // name exist after the markup, for example:
1142 // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\
1143 // <span style="font-size:120%;">yyy</span>;}-
1144 // we should split it as:
1145 // [
1146 // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>'
1147 // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
1148 // [2] => ''
1149 // ]
1150 $pat = '/;\s*(?=';
1151 foreach ( $this->mVariants as $variant ) {
1152 // zh-hans:xxx;zh-hant:yyy
1153 $pat .= $variant . '\s*:|';
1154 // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
1155 $pat .= '[^;]*?=>\s*' . $variant . '\s*:|';
1156 }
1157 $pat .= '\s*$)/';
1158 $this->mVarSeparatorPattern = $pat;
1159 }
1160 return $this->mVarSeparatorPattern;
1161 }
1162}
$wgDefaultLanguageVariant
Default variant code, if false, the default will be the language code.
$wgDisableLangConversion
Whether to enable language variant conversion.
$wgDisabledVariants
Disabled variants array of language variant conversion.
$wgLanguageConverterCacheType
The cache type for storing language conversion tables, which are used when parsing certain text and i...
$wgDisableTitleConversion
Whether to enable language variant conversion for links.
$wgUser
Definition Setup.php:902
if(! $wgDBerrorLogTZ) $wgRequest
Definition Setup.php:737
Parser for rules of language conversion , parse rules in -{ }- tag.
Base class for language conversion.
getPreferredVariant()
Get preferred language variant.
convertTitle( $title)
Auto convert a Title object to a readable string in the preferred variant.
validateVariant( $variant=null)
Validate the variant.
getDefaultVariant()
Get default variant.
__construct( $langobj, $maincode, $variants=[], $variantfallbacks=[], $flags=[], $manualLevel=[])
recursiveConvertTopLevel( $text, $variant, $depth=0)
Recursively convert text on the outside.
loadTables( $fromCache=true)
Load conversion tables either from the cache or the disk.
getHeaderVariant()
Determine the language variant from the Accept-Language header.
static array $languagesWithVariants
languages supporting variants
autoConvert( $text, $toVariant=false)
Dictionary-based conversion.
recursiveConvertRule( $text, $variant, &$startPos, $depth=0)
Recursively convert text on the inside.
parseCachedTable( $code, $subpage='', $recursive=true)
Parse the conversion table stored in the cache.
getVarSeparatorPattern()
Get the cached separator pattern for ConverterRule::parseRules()
convertNamespace( $index, $variant=null)
Get the namespace display name in the preferred variant.
getExtraHashOptions()
Returns language specific hash options.
getVariantFallbacks( $variant)
In case some variant is not defined in the markup, we need to have some fallback.
updateConversionTable(Title $titleobj)
Refresh the cache of conversion tables when MediaWiki:Conversiontable* is updated.
markNoConversion( $text, $noParse=false)
Enclose a string with the "no conversion" tag.
applyManualConv( $convRule)
Apply manual conversion rules.
translate( $text, $variant)
Translate a string to a variant.
getVariants()
Get all valid variants.
findVariantLink(&$link, &$nt, $ignoreOtherCond=false)
If a language supports multiple variants, it is possible that non-existing link in one variant actual...
convert( $text)
Convert text to different variants of a language.
postLoadTables()
Hook for post processing after conversion tables are loaded.
getURLVariant()
Get the variant specified in the URL.
loadDefaultTables()
Load default conversion tables.
autoConvertToAllVariants( $text)
Call translate() to convert text to all valid variants.
guessVariant( $text, $variant)
Guess if a text is written in a variant.
getUserVariant()
Determine if the user has a variant set.
convertTo( $text, $variant)
Same as convert() except a extra parameter to custom variant.
convertCategoryKey( $key)
Convert the sorting key for category links.
getConvRuleTitle()
Get the title produced by the conversion rule.
reloadTables()
Reload the conversion tables.
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition LinkBatch.php:34
MediaWiki exception.
PSR-3 logger instance factory.
MediaWikiServices is the service locator for the application scope of MediaWiki.
static singleton()
Get the signleton instance of this class.
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
Represents a title within MediaWiki.
Definition Title.php:39
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the local content language as $wgContLang
Definition design.txt:57
this hook is for auditing only $req
Definition hooks.txt:990
static configuration should be added through ResourceLoaderGetConfigVars instead can be used to get the real title after the basic globals have been set but before ordinary actions take place $output
Definition hooks.txt:2255
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable & $code
Definition hooks.txt:865
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt;div ...>$1&lt;/div>"). - flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException':Called before an exception(or PHP error) is logged. This is meant for integration with external error aggregation services
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition hooks.txt:2005
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition hooks.txt:864
usually copyright or history_copyright This message must be in HTML not wikitext & $link
Definition hooks.txt:3021
const NS_MAIN
Definition Defines.php:74
const CONTENT_MODEL_WIKITEXT
Definition Defines.php:245
linkcache txt The LinkCache class maintains a list of article titles and the information about whether or not the article exists in the database This is used to mark up links when displaying a page If the same link appears more than once on any page then it only has to be looked up once In most cases link lookups are done in batches with the LinkBatch class or the equivalent in so the link cache is mostly useful for short snippets of parsed and for links in the navigation areas of the skin The link cache was formerly used to track links used in a document for the purposes of updating the link tables This application is now deprecated To create a you can use the following $titles
Definition linkcache.txt:17
$cache
Definition mcc.php:33
switch( $options['output']) $languages
Definition transstat.php:76