MediaWiki REL1_28
LanguageConverter.php
Go to the documentation of this file.
1<?php
22
24
40 static public $languagesWithVariants = [
41 'gan',
42 'iu',
43 'kk',
44 'ku',
45 'shi',
46 'sr',
47 'tg',
48 'uz',
49 'zh',
50 ];
51
53 public $mVariants;
56 public $mTablesLoaded = false;
57 public $mTables;
58 // 'bidirectional' 'unidirectional' 'disable' for each variant
60
64 public $mCacheKey;
65
66 public $mLangObj;
67 public $mFlags;
68 public $mDescCodeSep = ':', $mDescVarSep = ';';
69 public $mUcfirst = false;
70 public $mConvRuleTitle = false;
74 public $mMaxDepth = 10;
76
77 const CACHE_VERSION_KEY = 'VERSION 7';
78
89 public function __construct( $langobj, $maincode, $variants = [],
90 $variantfallbacks = [], $flags = [],
91 $manualLevel = [] ) {
93 $this->mLangObj = $langobj;
94 $this->mMainLanguageCode = $maincode;
95 $this->mVariants = array_diff( $variants, $wgDisabledVariants );
96 $this->mVariantFallbacks = $variantfallbacks;
97 $this->mVariantNames = Language::fetchLanguageNames();
98 $this->mCacheKey = wfMemcKey( 'conversiontables', $maincode );
99 $defaultflags = [
100 // 'S' show converted text
101 // '+' add rules for alltext
102 // 'E' the gave flags is error
103 // these flags above are reserved for program
104 'A' => 'A', // add rule for convert code (all text convert)
105 'T' => 'T', // title convert
106 'R' => 'R', // raw content
107 'D' => 'D', // convert description (subclass implement)
108 '-' => '-', // remove convert (not implement)
109 'H' => 'H', // add rule for convert code (but no display in placed code)
110 'N' => 'N' // current variant name
111 ];
112 $this->mFlags = array_merge( $defaultflags, $flags );
113 foreach ( $this->mVariants as $v ) {
114 if ( array_key_exists( $v, $manualLevel ) ) {
115 $this->mManualLevel[$v] = $manualLevel[$v];
116 } else {
117 $this->mManualLevel[$v] = 'bidirectional';
118 }
119 $this->mFlags[$v] = $v;
120 }
121 }
122
129 public function getVariants() {
130 return $this->mVariants;
131 }
132
144 public function getVariantFallbacks( $variant ) {
145 if ( isset( $this->mVariantFallbacks[$variant] ) ) {
146 return $this->mVariantFallbacks[$variant];
147 }
149 }
150
155 public function getConvRuleTitle() {
157 }
158
163 public function getPreferredVariant() {
165
166 $req = $this->getURLVariant();
167
168 if ( $wgUser->isSafeToLoad() && $wgUser->isLoggedIn() && !$req ) {
169 $req = $this->getUserVariant();
170 } elseif ( !$req ) {
171 $req = $this->getHeaderVariant();
172 }
173
174 if ( $wgDefaultLanguageVariant && !$req ) {
175 $req = $this->validateVariant( $wgDefaultLanguageVariant );
176 }
177
178 // This function, unlike the other get*Variant functions, is
179 // not memoized (i.e. there return value is not cached) since
180 // new information might appear during processing after this
181 // is first called.
182 if ( $this->validateVariant( $req ) ) {
183 return $req;
184 }
186 }
187
193 public function getDefaultVariant() {
195
196 $req = $this->getURLVariant();
197
198 if ( !$req ) {
199 $req = $this->getHeaderVariant();
200 }
201
202 if ( $wgDefaultLanguageVariant && !$req ) {
203 $req = $this->validateVariant( $wgDefaultLanguageVariant );
204 }
205
206 if ( $req ) {
207 return $req;
208 }
210 }
211
217 public function validateVariant( $variant = null ) {
218 if ( $variant !== null && in_array( $variant, $this->mVariants ) ) {
219 return $variant;
220 }
221 return null;
222 }
223
229 public function getURLVariant() {
231
232 if ( $this->mURLVariant ) {
233 return $this->mURLVariant;
234 }
235
236 // see if the preference is set in the request
237 $ret = $wgRequest->getText( 'variant' );
238
239 if ( !$ret ) {
240 $ret = $wgRequest->getVal( 'uselang' );
241 }
242
243 $this->mURLVariant = $this->validateVariant( $ret );
244 return $this->mURLVariant;
245 }
246
252 protected function getUserVariant() {
254
255 // memoizing this function wreaks havoc on parserTest.php
256 /*
257 if ( $this->mUserVariant ) {
258 return $this->mUserVariant;
259 }
260 */
261
262 // Get language variant preference from logged in users
263 // Don't call this on stub objects because that causes infinite
264 // recursion during initialisation
265 if ( !$wgUser->isSafeToLoad() ) {
266 return false;
267 }
268 if ( $wgUser->isLoggedIn() ) {
269 if ( $this->mMainLanguageCode == $wgContLang->getCode() ) {
270 $ret = $wgUser->getOption( 'variant' );
271 } else {
272 $ret = $wgUser->getOption( 'variant-' . $this->mMainLanguageCode );
273 }
274 } else {
275 // figure out user lang without constructing wgLang to avoid
276 // infinite recursion
277 $ret = $wgUser->getOption( 'language' );
278 }
279
280 $this->mUserVariant = $this->validateVariant( $ret );
281 return $this->mUserVariant;
282 }
283
289 protected function getHeaderVariant() {
291
292 if ( $this->mHeaderVariant ) {
294 }
295
296 // see if some supported language variant is set in the
297 // HTTP header.
298 $languages = array_keys( $wgRequest->getAcceptLang() );
299 if ( empty( $languages ) ) {
300 return null;
301 }
302
303 $fallbackLanguages = [];
304 foreach ( $languages as $language ) {
305 $this->mHeaderVariant = $this->validateVariant( $language );
306 if ( $this->mHeaderVariant ) {
307 break;
308 }
309
310 // To see if there are fallbacks of current language.
311 // We record these fallback variants, and process
312 // them later.
313 $fallbacks = $this->getVariantFallbacks( $language );
314 if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) {
315 $fallbackLanguages[] = $fallbacks;
316 } elseif ( is_array( $fallbacks ) ) {
317 $fallbackLanguages =
318 array_merge( $fallbackLanguages, $fallbacks );
319 }
320 }
321
322 if ( !$this->mHeaderVariant ) {
323 // process fallback languages now
324 $fallback_languages = array_unique( $fallbackLanguages );
325 foreach ( $fallback_languages as $language ) {
326 $this->mHeaderVariant = $this->validateVariant( $language );
327 if ( $this->mHeaderVariant ) {
328 break;
329 }
330 }
331 }
332
334 }
335
346 public function autoConvert( $text, $toVariant = false ) {
347
348 $this->loadTables();
349
350 if ( !$toVariant ) {
351 $toVariant = $this->getPreferredVariant();
352 if ( !$toVariant ) {
353 return $text;
354 }
355 }
356
357 if ( $this->guessVariant( $text, $toVariant ) ) {
358 return $text;
359 }
360 /* we convert everything except:
361 1. HTML markups (anything between < and >)
362 2. HTML entities
363 3. placeholders created by the parser
364 */
365 $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f';
366
367 // this one is needed when the text is inside an HTML markup
368 $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>';
369
370 // Optimize for the common case where these tags have
371 // few or no children. Thus try and possesively get as much as
372 // possible, and only engage in backtracking when we hit a '<'.
373
374 // disable convert to variants between <code> tags
375 $codefix = '<code>[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|';
376 // disable conversion of <script> tags
377 $scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|';
378 // disable conversion of <pre> tags
379 $prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|';
380 // The "|.*+)" at the end, is in case we missed some part of html syntax,
381 // we will fail securely (hopefully) by matching the rest of the string.
382 $htmlFullTag = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|';
383
384 $reg = '/' . $codefix . $scriptfix . $prefix . $htmlFullTag .
385 '&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s';
386 $startPos = 0;
387 $sourceBlob = '';
388 $literalBlob = '';
389
390 // Guard against delimiter nulls in the input
391 $text = str_replace( "\000", '', $text );
392 $text = str_replace( "\004", '', $text );
393
394 $markupMatches = null;
395 $elementMatches = null;
396
397 // We add a marker (\004) at the end of text, to ensure we always match the
398 // entire text (Otherwise, pcre.backtrack_limit might cause silent failure)
399 while ( $startPos < strlen( $text ) ) {
400 if ( preg_match( $reg, $text . "\004", $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
401 $elementPos = $markupMatches[0][1];
402 $element = $markupMatches[0][0];
403 if ( $element === "\004" ) {
404 // We hit the end.
405 $elementPos = strlen( $text );
406 $element = '';
407 } elseif ( substr( $element, -1 ) === "\004" ) {
408 // This can sometimes happen if we have
409 // unclosed html tags (For example
410 // when converting a title attribute
411 // during a recursive call that contains
412 // a &lt; e.g. <div title="&lt;">.
413 $element = substr( $element, 0, -1 );
414 }
415 } else {
416 // If we hit here, then Language Converter could be tricked
417 // into doing an XSS, so we refuse to translate.
418 // If non-crazy input manages to reach this code path,
419 // we should consider it a bug.
420 $log = LoggerFactory::getInstance( 'languageconverter' );
421 $log->error( "Hit pcre.backtrack_limit in " . __METHOD__
422 . ". Disabling language conversion for this page.",
423 [
424 "method" => __METHOD__,
425 "variant" => $toVariant,
426 "startOfText" => substr( $text, 0, 500 )
427 ]
428 );
429 return $text;
430 }
431 // Queue the part before the markup for translation in a batch
432 $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000";
433
434 // Advance to the next position
435 $startPos = $elementPos + strlen( $element );
436
437 // Translate any alt or title attributes inside the matched element
438 if ( $element !== ''
439 && preg_match( '/^(<[^>\s]*+)\s([^>]*+)(.*+)$/', $element, $elementMatches )
440 ) {
441 // FIXME, this decodes entities, so if you have something
442 // like <div title="foo&lt;bar"> the bar won't get
443 // translated since after entity decoding it looks like
444 // unclosed html and we call this method recursively
445 // on attributes.
446 $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
447 // Ensure self-closing tags stay self-closing.
448 $close = substr( $elementMatches[2], -1 ) === '/' ? ' /' : '';
449 $changed = false;
450 foreach ( [ 'title', 'alt' ] as $attrName ) {
451 if ( !isset( $attrs[$attrName] ) ) {
452 continue;
453 }
454 $attr = $attrs[$attrName];
455 // Don't convert URLs
456 if ( !strpos( $attr, '://' ) ) {
457 $attr = $this->recursiveConvertTopLevel( $attr, $toVariant );
458 }
459
460 // Remove HTML tags to avoid disrupting the layout
461 $attr = preg_replace( '/<[^>]+>/', '', $attr );
462 if ( $attr !== $attrs[$attrName] ) {
463 $attrs[$attrName] = $attr;
464 $changed = true;
465 }
466 }
467 if ( $changed ) {
468 $element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
469 $close . $elementMatches[3];
470 }
471 }
472 $literalBlob .= $element . "\000";
473 }
474
475 // Do the main translation batch
476 $translatedBlob = $this->translate( $sourceBlob, $toVariant );
477
478 // Put the output back together
479 $translatedIter = StringUtils::explode( "\000", $translatedBlob );
480 $literalIter = StringUtils::explode( "\000", $literalBlob );
481 $output = '';
482 while ( $translatedIter->valid() && $literalIter->valid() ) {
483 $output .= $translatedIter->current();
484 $output .= $literalIter->current();
485 $translatedIter->next();
486 $literalIter->next();
487 }
488
489 return $output;
490 }
491
501 public function translate( $text, $variant ) {
502 // If $text is empty or only includes spaces, do nothing
503 // Otherwise translate it
504 if ( trim( $text ) ) {
505 $this->loadTables();
506 $text = $this->mTables[$variant]->replace( $text );
507 }
508 return $text;
509 }
510
517 public function autoConvertToAllVariants( $text ) {
518 $this->loadTables();
519
520 $ret = [];
521 foreach ( $this->mVariants as $variant ) {
522 $ret[$variant] = $this->translate( $text, $variant );
523 }
524
525 return $ret;
526 }
527
533 protected function applyManualConv( $convRule ) {
534 // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
535 // title conversion.
536 // Bug 24072: $mConvRuleTitle was overwritten by other manual
537 // rule(s) not for title, this breaks the title conversion.
538 $newConvRuleTitle = $convRule->getTitle();
539 if ( $newConvRuleTitle ) {
540 // So I add an empty check for getTitle()
541 $this->mConvRuleTitle = $newConvRuleTitle;
542 }
543
544 // merge/remove manual conversion rules to/from global table
545 $convTable = $convRule->getConvTable();
546 $action = $convRule->getRulesAction();
547 foreach ( $convTable as $variant => $pair ) {
548 if ( !$this->validateVariant( $variant ) ) {
549 continue;
550 }
551
552 if ( $action == 'add' ) {
553 // More efficient than array_merge(), about 2.5 times.
554 foreach ( $pair as $from => $to ) {
555 $this->mTables[$variant]->setPair( $from, $to );
556 }
557 } elseif ( $action == 'remove' ) {
558 $this->mTables[$variant]->removeArray( $pair );
559 }
560 }
561 }
562
570 public function convertTitle( $title ) {
571 $variant = $this->getPreferredVariant();
572 $index = $title->getNamespace();
573 if ( $index !== NS_MAIN ) {
574 $text = $this->convertNamespace( $index, $variant ) . ':';
575 } else {
576 $text = '';
577 }
578 $text .= $this->translate( $title->getText(), $variant );
579 return $text;
580 }
581
589 public function convertNamespace( $index, $variant = null ) {
590 if ( $index === NS_MAIN ) {
591 return '';
592 }
593
594 if ( $variant === null ) {
595 $variant = $this->getPreferredVariant();
596 }
597
598 $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache();
599 $key = $cache->makeKey( 'languageconverter', 'namespace-text', $index, $variant );
600 $nsVariantText = $cache->get( $key );
601 if ( $nsVariantText !== false ) {
602 return $nsVariantText;
603 }
604
605 // First check if a message gives a converted name in the target variant.
606 $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant );
607 if ( $nsConvMsg->exists() ) {
608 $nsVariantText = $nsConvMsg->plain();
609 }
610
611 // Then check if a message gives a converted name in content language
612 // which needs extra translation to the target variant.
613 if ( $nsVariantText === false ) {
614 $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage();
615 if ( $nsConvMsg->exists() ) {
616 $nsVariantText = $this->translate( $nsConvMsg->plain(), $variant );
617 }
618 }
619
620 if ( $nsVariantText === false ) {
621 // No message exists, retrieve it from the target variant's namespace names.
622 $langObj = $this->mLangObj->factory( $variant );
623 $nsVariantText = $langObj->getFormattedNsText( $index );
624 }
625
626 $cache->set( $key, $nsVariantText, 60 );
627
628 return $nsVariantText;
629 }
630
645 public function convert( $text ) {
646 $variant = $this->getPreferredVariant();
647 return $this->convertTo( $text, $variant );
648 }
649
657 public function convertTo( $text, $variant ) {
660 return $text;
661 }
662 // Reset converter state for a new converter run.
663 $this->mConvRuleTitle = false;
664 return $this->recursiveConvertTopLevel( $text, $variant );
665 }
666
676 protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
677 $startPos = 0;
678 $out = '';
679 $length = strlen( $text );
680 $shouldConvert = !$this->guessVariant( $text, $variant );
681 $continue = 1;
682
683 $noScript = '<script.*?>.*?<\/script>(*SKIP)(*FAIL)';
684 $noStyle = '<style.*?>.*?<\/style>(*SKIP)(*FAIL)';
685 // @codingStandardsIgnoreStart Generic.Files.LineLength.TooLong
686 $noHtml = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)(*SKIP)(*FAIL)';
687 // @codingStandardsIgnoreEnd
688 while ( $startPos < $length && $continue ) {
689 $continue = preg_match(
690 // Only match -{ outside of html.
691 "/$noScript|$noStyle|$noHtml|-\{/",
692 $text,
693 $m,
694 PREG_OFFSET_CAPTURE,
695 $startPos
696 );
697
698 if ( !$continue ) {
699 // No more markup, append final segment
700 $fragment = substr( $text, $startPos );
701 $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
702 return $out;
703 }
704
705 // Offset of the match of the regex pattern.
706 $pos = $m[0][1];
707
708 // Append initial segment
709 $fragment = substr( $text, $startPos, $pos - $startPos );
710 $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
711 // -{ marker found, not in attribute
712 // Advance position up to -{ marker.
713 $startPos = $pos;
714 // Do recursive conversion
715 // Note: This passes $startPos by reference, and advances it.
716 $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
717 }
718 return $out;
719 }
720
732 protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
733 // Quick sanity check (no function calls)
734 if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) {
735 throw new MWException( __METHOD__ . ': invalid input string' );
736 }
737
738 $startPos += 2;
739 $inner = '';
740 $warningDone = false;
741 $length = strlen( $text );
742
743 while ( $startPos < $length ) {
744 $m = false;
745 preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos );
746 if ( !$m ) {
747 // Unclosed rule
748 break;
749 }
750
751 $token = $m[0][0];
752 $pos = $m[0][1];
753
754 // Markup found
755 // Append initial segment
756 $inner .= substr( $text, $startPos, $pos - $startPos );
757
758 // Advance position
759 $startPos = $pos;
760
761 switch ( $token ) {
762 case '-{':
763 // Check max depth
764 if ( $depth >= $this->mMaxDepth ) {
765 $inner .= '-{';
766 if ( !$warningDone ) {
767 $inner .= '<span class="error">' .
768 wfMessage( 'language-converter-depth-warning' )
769 ->numParams( $this->mMaxDepth )->inContentLanguage()->text() .
770 '</span>';
771 $warningDone = true;
772 }
773 $startPos += 2;
774 continue;
775 }
776 // Recursively parse another rule
777 $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
778 break;
779 case '}-':
780 // Apply the rule
781 $startPos += 2;
782 $rule = new ConverterRule( $inner, $this );
783 $rule->parse( $variant );
784 $this->applyManualConv( $rule );
785 return $rule->getDisplay();
786 default:
787 throw new MWException( __METHOD__ . ': invalid regex match' );
788 }
789 }
790
791 // Unclosed rule
792 if ( $startPos < $length ) {
793 $inner .= substr( $text, $startPos );
794 }
795 $startPos = $length;
796 return '-{' . $this->autoConvert( $inner, $variant );
797 }
798
810 public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
811 # If the article has already existed, there is no need to
812 # check it again, otherwise it may cause a fault.
813 if ( is_object( $nt ) && $nt->exists() ) {
814 return;
815 }
816
818 $isredir = $wgRequest->getText( 'redirect', 'yes' );
819 $action = $wgRequest->getText( 'action' );
820 if ( $action == 'edit' && $wgRequest->getBool( 'redlink' ) ) {
821 $action = 'view';
822 }
823 $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' );
824 $disableLinkConversion = $wgDisableLangConversion
826 $linkBatch = new LinkBatch();
827
828 $ns = NS_MAIN;
829
830 if ( $disableLinkConversion ||
831 ( !$ignoreOtherCond &&
832 ( $isredir == 'no'
833 || $action == 'edit'
834 || $action == 'submit'
835 || $linkconvert == 'no' ) ) ) {
836 return;
837 }
838
839 if ( is_object( $nt ) ) {
840 $ns = $nt->getNamespace();
841 }
842
843 $variants = $this->autoConvertToAllVariants( $link );
844 if ( !$variants ) { // give up
845 return;
846 }
847
848 $titles = [];
849
850 foreach ( $variants as $v ) {
851 if ( $v != $link ) {
852 $varnt = Title::newFromText( $v, $ns );
853 if ( !is_null( $varnt ) ) {
854 $linkBatch->addObj( $varnt );
855 $titles[] = $varnt;
856 }
857 }
858 }
859
860 // fetch all variants in single query
861 $linkBatch->execute();
862
863 foreach ( $titles as $varnt ) {
864 if ( $varnt->getArticleID() > 0 ) {
865 $nt = $varnt;
866 $link = $varnt->getText();
867 break;
868 }
869 }
870 }
871
877 public function getExtraHashOptions() {
878 $variant = $this->getPreferredVariant();
879
880 return '!' . $variant;
881 }
882
893 public function guessVariant( $text, $variant ) {
894 return false;
895 }
896
904 function loadDefaultTables() {
905 $name = get_class( $this );
906
907 throw new MWException( "Must implement loadDefaultTables() method in class $name" );
908 }
909
915 function loadTables( $fromCache = true ) {
917
918 if ( $this->mTablesLoaded ) {
919 return;
920 }
921
922 $this->mTablesLoaded = true;
923 $this->mTables = false;
924 $cache = ObjectCache::getInstance( $wgLanguageConverterCacheType );
925 if ( $fromCache ) {
926 wfProfileIn( __METHOD__ . '-cache' );
927 $this->mTables = $cache->get( $this->mCacheKey );
928 wfProfileOut( __METHOD__ . '-cache' );
929 }
930 if ( !$this->mTables || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
931 wfProfileIn( __METHOD__ . '-recache' );
932 // not in cache, or we need a fresh reload.
933 // We will first load the default tables
934 // then update them using things in MediaWiki:Conversiontable/*
935 $this->loadDefaultTables();
936 foreach ( $this->mVariants as $var ) {
937 $cached = $this->parseCachedTable( $var );
938 $this->mTables[$var]->mergeArray( $cached );
939 }
940
941 $this->postLoadTables();
942 $this->mTables[self::CACHE_VERSION_KEY] = true;
943
944 $cache->set( $this->mCacheKey, $this->mTables, 43200 );
945 wfProfileOut( __METHOD__ . '-recache' );
946 }
947 }
948
952 function postLoadTables() {
953 }
954
960 function reloadTables() {
961 if ( $this->mTables ) {
962 unset( $this->mTables );
963 }
964
965 $this->mTablesLoaded = false;
966 $this->loadTables( false );
967 }
968
988 function parseCachedTable( $code, $subpage = '', $recursive = true ) {
989 static $parsed = [];
990
991 $key = 'Conversiontable/' . $code;
992 if ( $subpage ) {
993 $key .= '/' . $subpage;
994 }
995 if ( array_key_exists( $key, $parsed ) ) {
996 return [];
997 }
998
999 $parsed[$key] = true;
1000
1001 if ( $subpage === '' ) {
1002 $txt = MessageCache::singleton()->getMsgFromNamespace( $key, $code );
1003 } else {
1004 $txt = false;
1005 $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key );
1006 if ( $title && $title->exists() ) {
1007 $revision = Revision::newFromTitle( $title );
1008 if ( $revision ) {
1009 if ( $revision->getContentModel() == CONTENT_MODEL_WIKITEXT ) {
1010 $txt = $revision->getContent( Revision::RAW )->getNativeData();
1011 }
1012
1013 // @todo in the future, use a specialized content model, perhaps based on json!
1014 }
1015 }
1016 }
1017
1018 # Nothing to parse if there's no text
1019 if ( $txt === false || $txt === null || $txt === '' ) {
1020 return [];
1021 }
1022
1023 // get all subpage links of the form
1024 // [[MediaWiki:Conversiontable/zh-xx/...|...]]
1025 $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
1026 ':Conversiontable';
1027 $subs = StringUtils::explode( '[[', $txt );
1028 $sublinks = [];
1029 foreach ( $subs as $sub ) {
1030 $link = explode( ']]', $sub, 2 );
1031 if ( count( $link ) != 2 ) {
1032 continue;
1033 }
1034 $b = explode( '|', $link[0], 2 );
1035 $b = explode( '/', trim( $b[0] ), 3 );
1036 if ( count( $b ) == 3 ) {
1037 $sublink = $b[2];
1038 } else {
1039 $sublink = '';
1040 }
1041
1042 if ( $b[0] == $linkhead && $b[1] == $code ) {
1043 $sublinks[] = $sublink;
1044 }
1045 }
1046
1047 // parse the mappings in this page
1048 $blocks = StringUtils::explode( '-{', $txt );
1049 $ret = [];
1050 $first = true;
1051 foreach ( $blocks as $block ) {
1052 if ( $first ) {
1053 // Skip the part before the first -{
1054 $first = false;
1055 continue;
1056 }
1057 $mappings = explode( '}-', $block, 2 )[0];
1058 $stripped = str_replace( [ "'", '"', '*', '#' ], '', $mappings );
1059 $table = StringUtils::explode( ';', $stripped );
1060 foreach ( $table as $t ) {
1061 $m = explode( '=>', $t, 3 );
1062 if ( count( $m ) != 2 ) {
1063 continue;
1064 }
1065 // trim any trailling comments starting with '//'
1066 $tt = explode( '//', $m[1], 2 );
1067 $ret[trim( $m[0] )] = trim( $tt[0] );
1068 }
1069 }
1070
1071 // recursively parse the subpages
1072 if ( $recursive ) {
1073 foreach ( $sublinks as $link ) {
1074 $s = $this->parseCachedTable( $code, $link, $recursive );
1075 $ret = $s + $ret;
1076 }
1077 }
1078
1079 if ( $this->mUcfirst ) {
1080 foreach ( $ret as $k => $v ) {
1081 $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
1082 }
1083 }
1084 return $ret;
1085 }
1086
1095 public function markNoConversion( $text, $noParse = false ) {
1096 # don't mark if already marked
1097 if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) {
1098 return $text;
1099 }
1100
1101 $ret = "-{R|$text}-";
1102 return $ret;
1103 }
1104
1113 function convertCategoryKey( $key ) {
1114 return $key;
1115 }
1116
1123 public function updateConversionTable( Title $titleobj ) {
1124 if ( $titleobj->getNamespace() == NS_MEDIAWIKI ) {
1125 $title = $titleobj->getDBkey();
1126 $t = explode( '/', $title, 3 );
1127 $c = count( $t );
1128 if ( $c > 1 && $t[0] == 'Conversiontable' ) {
1129 if ( $this->validateVariant( $t[1] ) ) {
1130 $this->reloadTables();
1131 }
1132 }
1133 }
1134 }
1135
1141 if ( is_null( $this->mVarSeparatorPattern ) ) {
1142 // varsep_pattern for preg_split:
1143 // text should be splited by ";" only if a valid variant
1144 // name exist after the markup, for example:
1145 // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\
1146 // <span style="font-size:120%;">yyy</span>;}-
1147 // we should split it as:
1148 // [
1149 // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>'
1150 // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
1151 // [2] => ''
1152 // ]
1153 $pat = '/;\s*(?=';
1154 foreach ( $this->mVariants as $variant ) {
1155 // zh-hans:xxx;zh-hant:yyy
1156 $pat .= $variant . '\s*:|';
1157 // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
1158 $pat .= '[^;]*?=>\s*' . $variant . '\s*:|';
1159 }
1160 $pat .= '\s*$)/';
1161 $this->mVarSeparatorPattern = $pat;
1162 }
1164 }
1165}
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
$wgDefaultLanguageVariant
Default variant code, if false, the default will be the language code.
$wgDisableLangConversion
Whether to enable language variant conversion.
$wgDisabledVariants
Disabled variants array of language variant conversion.
$wgLanguageConverterCacheType
The cache type for storing language conversion tables, which are used when parsing certain text and i...
$wgDisableTitleConversion
Whether to enable language variant conversion for links.
wfMemcKey()
Make a cache key for the local wiki.
wfProfileOut( $functionname='missing')
Stop profiling of a function.
wfProfileIn( $functionname)
Begin profiling of a function.
$wgUser
Definition Setup.php:806
if(! $wgDBerrorLogTZ) $wgRequest
Definition Setup.php:664
Parser for rules of language conversion , parse rules in -{ }- tag.
Base class for language conversion.
getPreferredVariant()
Get preferred language variant.
convertTitle( $title)
Auto convert a Title object to a readable string in the preferred variant.
validateVariant( $variant=null)
Validate the variant.
getDefaultVariant()
Get default variant.
__construct( $langobj, $maincode, $variants=[], $variantfallbacks=[], $flags=[], $manualLevel=[])
Constructor.
recursiveConvertTopLevel( $text, $variant, $depth=0)
Recursively convert text on the outside.
loadTables( $fromCache=true)
Load conversion tables either from the cache or the disk.
getHeaderVariant()
Determine the language variant from the Accept-Language header.
static array $languagesWithVariants
languages supporting variants
autoConvert( $text, $toVariant=false)
Dictionary-based conversion.
recursiveConvertRule( $text, $variant, &$startPos, $depth=0)
Recursively convert text on the inside.
parseCachedTable( $code, $subpage='', $recursive=true)
Parse the conversion table stored in the cache.
getVarSeparatorPattern()
Get the cached separator pattern for ConverterRule::parseRules()
convertNamespace( $index, $variant=null)
Get the namespace display name in the preferred variant.
string $mCacheKey
Memcached key name.
getExtraHashOptions()
Returns language specific hash options.
getVariantFallbacks( $variant)
In case some variant is not defined in the markup, we need to have some fallback.
updateConversionTable(Title $titleobj)
Refresh the cache of conversion tables when MediaWiki:Conversiontable* is updated.
markNoConversion( $text, $noParse=false)
Enclose a string with the "no conversion" tag.
applyManualConv( $convRule)
Apply manual conversion rules.
translate( $text, $variant)
Translate a string to a variant.
getVariants()
Get all valid variants.
findVariantLink(&$link, &$nt, $ignoreOtherCond=false)
If a language supports multiple variants, it is possible that non-existing link in one variant actual...
convert( $text)
Convert text to different variants of a language.
postLoadTables()
Hook for post processing after conversion tables are loaded.
getURLVariant()
Get the variant specified in the URL.
loadDefaultTables()
Load default conversion tables.
autoConvertToAllVariants( $text)
Call translate() to convert text to all valid variants.
guessVariant( $text, $variant)
Guess if a text is written in a variant.
getUserVariant()
Determine if the user has a variant set.
convertTo( $text, $variant)
Same as convert() except a extra parameter to custom variant.
convertCategoryKey( $key)
Convert the sorting key for category links.
getConvRuleTitle()
Get the title produced by the conversion rule.
reloadTables()
Reload the conversion tables.
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition LinkBatch.php:32
MediaWiki exception.
PSR-3 logger instance factory.
MediaWikiServices is the service locator for the application scope of MediaWiki.
static singleton()
Get the signleton instance of this class.
static newFromTitle(LinkTarget $linkTarget, $id=0, $flags=0)
Load either the current, or a specified, revision that's attached to a given link target.
Definition Revision.php:128
const RAW
Definition Revision.php:94
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
Represents a title within MediaWiki.
Definition Title.php:36
getNamespace()
Get the namespace index, i.e.
Definition Title.php:921
getDBkey()
Get the main part with underscores.
Definition Title.php:898
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the local content language as $wgContLang
Definition design.txt:57
when a variable name is used in a it is silently declared as a new local masking the global
Definition design.txt:95
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
const NS_MAIN
Definition Defines.php:56
const NS_MEDIAWIKI
Definition Defines.php:64
const CONTENT_MODEL_WIKITEXT
Definition Defines.php:239
this hook is for auditing only $req
Definition hooks.txt:1010
the array() calling protocol came about after MediaWiki 1.4rc1.
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context the output can only depend on parameters provided to this hook not on global state indicating whether full HTML should be generated If generation of HTML may be but other information should still be present in the ParserOutput object & $output
Definition hooks.txt:1102
namespace and then decline to actually register it file or subcat img or subcat $title
Definition hooks.txt:986
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt;div ...>$1&lt;/div>"). - flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException':Called before an exception(or PHP error) is logged. This is meant for integration with external error aggregation services
it s the revision text itself In either if gzip is the revision text is gzipped $flags
Definition hooks.txt:2710
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition hooks.txt:1949
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output $out
Definition hooks.txt:886
usually copyright or history_copyright This message must be in HTML not wikitext & $link
Definition hooks.txt:2900
Allows to change the fields on the form that will be generated $name
Definition hooks.txt:304
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable & $code
Definition hooks.txt:887
$from
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
linkcache txt The LinkCache class maintains a list of article titles and the information about whether or not the article exists in the database This is used to mark up links when displaying a page If the same link appears more than once on any page then it only has to be looked up once In most cases link lookups are done in batches with the LinkBatch class or the equivalent in so the link cache is mostly useful for short snippets of parsed and for links in the navigation areas of the skin The link cache was formerly used to track links used in a document for the purposes of updating the link tables This application is now deprecated To create a you can use the following $titles
Definition linkcache.txt:17
$cache
Definition mcc.php:33
switch( $options['output']) $languages
Definition transstat.php:76