MediaWiki REL1_35
LanguageConverter.php
Go to the documentation of this file.
1<?php
29
35abstract class LanguageConverter implements ILanguageConverter {
37
43 public static $languagesWithVariants = [
44 'en',
45 'crh',
46 'gan',
47 'iu',
48 'kk',
49 'ku',
50 'shi',
51 'sr',
52 'tg',
53 'uz',
54 'zh',
55 ];
56
58
62 public $mVariants;
65 private $mTablesLoaded = false;
66
70 protected $mTables;
71
72 // 'bidirectional' 'unidirectional' 'disable' for each variant
74
78 private $mLangObj;
79 public $mFlags;
80 public $mDescCodeSep = ':';
81 public $mDescVarSep = ';';
82 private $mUcfirst = false;
83 private $mConvRuleTitle = false;
84 private $mURLVariant;
87 private $mMaxDepth = 10;
89
90 private const CACHE_VERSION_KEY = 'VERSION 7';
91
100 public function __construct(
101 $langobj,
102 $maincode,
103 $variants = [],
104 $variantfallbacks = [],
105 $flags = [],
106 $manualLevel = []
107 ) {
108 global $wgDisabledVariants;
109
110 $this->deprecatePublicProperty( 'mURLVariant', '1.35', __CLASS__ );
111 $this->deprecatePublicProperty( 'mUcfirst', '1.35', __CLASS__ );
112 $this->deprecatePublicProperty( 'mConvRuleTitle', '1.35', __CLASS__ );
113 $this->deprecatePublicProperty( 'mUserVariant', '1.35', __CLASS__ );
114 $this->deprecatePublicProperty( 'mHeaderVariant', '1.35', __CLASS__ );
115 $this->deprecatePublicProperty( 'mMaxDepth = 10', '1.35', __CLASS__ );
116 $this->deprecatePublicProperty( 'mVarSeparatorPattern', '1.35', __CLASS__ );
117 $this->deprecatePublicProperty( 'mLangObj', '1.35', __CLASS__ );
118 $this->deprecatePublicProperty( 'mVariantFallbacks', '1.35', __CLASS__ );
119 $this->deprecatePublicProperty( 'mTablesLoaded', '1.35', __CLASS__ );
120 $this->deprecatePublicProperty( 'mTables', '1.35', __CLASS__ );
121
122 $this->mLangObj = $langobj;
123 $this->mMainLanguageCode = $maincode;
124 $this->mVariants = array_diff( $variants, $wgDisabledVariants );
125 $this->mVariantFallbacks = $variantfallbacks;
126 $this->mVariantNames = MediaWikiServices::getInstance()
127 ->getLanguageNameUtils()
128 ->getLanguageNames();
129 $defaultflags = [
130 // 'S' show converted text
131 // '+' add rules for alltext
132 // 'E' the gave flags is error
133 // these flags above are reserved for program
134 'A' => 'A', // add rule for convert code (all text convert)
135 'T' => 'T', // title convert
136 'R' => 'R', // raw content
137 'D' => 'D', // convert description (subclass implement)
138 '-' => '-', // remove convert (not implement)
139 'H' => 'H', // add rule for convert code (but no display in placed code)
140 'N' => 'N', // current variant name
141 ];
142 $this->mFlags = array_merge( $defaultflags, $flags );
143 foreach ( $this->mVariants as $v ) {
144 if ( array_key_exists( $v, $manualLevel ) ) {
145 $this->mManualLevel[$v] = $manualLevel[$v];
146 } else {
147 $this->mManualLevel[$v] = 'bidirectional';
148 }
149 $this->mFlags[$v] = $v;
150 }
151 }
152
159 public function getVariants() {
160 return $this->mVariants;
161 }
162
174 public function getVariantFallbacks( $variant ) {
175 return $this->mVariantFallbacks[$variant] ?? $this->mMainLanguageCode;
176 }
177
182 public function getConvRuleTitle() {
183 return $this->mConvRuleTitle;
184 }
185
190 public function getPreferredVariant() {
191 global $wgDefaultLanguageVariant, $wgUser;
192
193 $req = $this->getURLVariant();
194
195 Hooks::runner()->onGetLangPreferredVariant( $req );
196
197 // NOTE: For calls from Setup.php, wgUser or the session might not be set yet (T235360)
198 // Use case: During autocreation, User::isUsableName is called which uses interface
199 // messages for reserved usernames.
200 if ( $wgUser && $wgUser->isSafeToLoad() && $wgUser->isLoggedIn() && !$req ) {
201 $req = $this->getUserVariant( $wgUser );
202 } elseif ( !$req ) {
203 $req = $this->getHeaderVariant();
204 }
205
206 if ( $wgDefaultLanguageVariant && !$req ) {
208 }
209
210 $req = $this->validateVariant( $req );
211
212 // This function, unlike the other get*Variant functions, is
213 // not memoized (i.e. there return value is not cached) since
214 // new information might appear during processing after this
215 // is first called.
216 if ( $req ) {
217 return $req;
218 }
219 return $this->mMainLanguageCode;
220 }
221
227 public function getDefaultVariant() {
229
230 $req = $this->getURLVariant();
231
232 if ( !$req ) {
233 $req = $this->getHeaderVariant();
234 }
235
236 if ( $wgDefaultLanguageVariant && !$req ) {
238 }
239
240 if ( $req ) {
241 return $req;
242 }
243 return $this->mMainLanguageCode;
244 }
245
255 public function validateVariant( $variant = null ) {
256 if ( $variant === null ) {
257 return null;
258 }
259 // Our internal variants are always lower-case; the variant we
260 // are validating may have mixed case.
261 $variant = LanguageCode::replaceDeprecatedCodes( strtolower( $variant ) );
262 if ( in_array( $variant, $this->mVariants ) ) {
263 return $variant;
264 }
265 // Browsers are supposed to use BCP 47 standard in the
266 // Accept-Language header, but not all of our internal
267 // mediawiki variant codes are BCP 47. Map BCP 47 code
268 // to our internal code.
269 foreach ( $this->mVariants as $v ) {
270 // Case-insensitive match (BCP 47 is mixed case)
271 if ( strtolower( LanguageCode::bcp47( $v ) ) === $variant ) {
272 return $v;
273 }
274 }
275 return null;
276 }
277
283 public function getURLVariant() {
284 global $wgRequest;
285
286 if ( $this->mURLVariant ) {
287 return $this->mURLVariant;
288 }
289
290 // see if the preference is set in the request
291 $ret = $wgRequest->getText( 'variant' );
292
293 if ( !$ret ) {
294 $ret = $wgRequest->getVal( 'uselang' );
295 }
296
297 $this->mURLVariant = $this->validateVariant( $ret );
298 return $this->mURLVariant;
299 }
300
307 protected function getUserVariant( User $user ) {
308 // This should only be called within the class after the user is known to be
309 // safe to load and and logged in, but check just in case.
310 if ( !$user->isSafeToLoad() ) {
311 return false;
312 }
313
314 if ( $user->isLoggedIn() ) {
315 // Get language variant preference from logged in users
316 if (
317 $this->mMainLanguageCode ==
318 MediaWikiServices::getInstance()->getContentLanguage()->getCode()
319 ) {
320 $ret = $user->getOption( 'variant' );
321 } else {
322 $ret = $user->getOption( 'variant-' . $this->mMainLanguageCode );
323 }
324 } else {
325 // figure out user lang without constructing wgLang to avoid
326 // infinite recursion
327 $ret = $user->getOption( 'language' );
328 }
329
330 $this->mUserVariant = $this->validateVariant( $ret );
331 return $this->mUserVariant;
332 }
333
339 protected function getHeaderVariant() {
340 global $wgRequest;
341
342 if ( $this->mHeaderVariant ) {
343 return $this->mHeaderVariant;
344 }
345
346 // See if some supported language variant is set in the
347 // HTTP header.
348 $languages = array_keys( $wgRequest->getAcceptLang() );
349 if ( empty( $languages ) ) {
350 return null;
351 }
352
353 $fallbackLanguages = [];
354 foreach ( $languages as $language ) {
355 $this->mHeaderVariant = $this->validateVariant( $language );
356 if ( $this->mHeaderVariant ) {
357 break;
358 }
359
360 // To see if there are fallbacks of current language.
361 // We record these fallback variants, and process
362 // them later.
363 $fallbacks = $this->getVariantFallbacks( $language );
364 if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) {
365 $fallbackLanguages[] = $fallbacks;
366 } elseif ( is_array( $fallbacks ) ) {
367 $fallbackLanguages =
368 array_merge( $fallbackLanguages, $fallbacks );
369 }
370 }
371
372 if ( !$this->mHeaderVariant ) {
373 // process fallback languages now
374 $fallback_languages = array_unique( $fallbackLanguages );
375 foreach ( $fallback_languages as $language ) {
376 $this->mHeaderVariant = $this->validateVariant( $language );
377 if ( $this->mHeaderVariant ) {
378 break;
379 }
380 }
381 }
382
383 return $this->mHeaderVariant;
384 }
385
396 public function autoConvert( $text, $toVariant = false ) {
397 $this->loadTables();
398
399 if ( !$toVariant ) {
400 $toVariant = $this->getPreferredVariant();
401 if ( !$toVariant ) {
402 return $text;
403 }
404 }
405
406 if ( $this->guessVariant( $text, $toVariant ) ) {
407 return $text;
408 }
409 /* we convert everything except:
410 1. HTML markups (anything between < and >)
411 2. HTML entities
412 3. placeholders created by the parser
413 IMPORTANT: Beware of failure from pcre.backtrack_limit (T124404).
414 Minimize use of backtracking where possible.
415 */
416 static $reg;
417 if ( $reg === null ) {
418 $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f';
419
420 // this one is needed when the text is inside an HTML markup
421 $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>';
422
423 // Optimize for the common case where these tags have
424 // few or no children. Thus try and possesively get as much as
425 // possible, and only engage in backtracking when we hit a '<'.
426
427 // disable convert to variants between <code> tags
428 $codefix = '<code>[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|';
429 // disable conversion of <script> tags
430 $scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|';
431 // disable conversion of <pre> tags
432 $prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|';
433 // The "|.*+)" at the end, is in case we missed some part of html syntax,
434 // we will fail securely (hopefully) by matching the rest of the string.
435 $htmlFullTag = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|';
436
437 $reg = '/' . $codefix . $scriptfix . $prefix . $htmlFullTag .
438 '&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s';
439 }
440 $startPos = 0;
441 $sourceBlob = '';
442 $literalBlob = '';
443
444 // Guard against delimiter nulls in the input
445 // (should never happen: see T159174)
446 $text = str_replace( "\000", '', $text );
447 $text = str_replace( "\004", '', $text );
448
449 $markupMatches = null;
450 $elementMatches = null;
451
452 // We add a marker (\004) at the end of text, to ensure we always match the
453 // entire text (Otherwise, pcre.backtrack_limit might cause silent failure)
454 $textWithMarker = $text . "\004";
455 while ( $startPos < strlen( $text ) ) {
456 if ( preg_match( $reg, $textWithMarker, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
457 $elementPos = $markupMatches[0][1];
458 $element = $markupMatches[0][0];
459 if ( $element === "\004" ) {
460 // We hit the end.
461 $elementPos = strlen( $text );
462 $element = '';
463 } elseif ( substr( $element, -1 ) === "\004" ) {
464 // This can sometimes happen if we have
465 // unclosed html tags (For example
466 // when converting a title attribute
467 // during a recursive call that contains
468 // a &lt; e.g. <div title="&lt;">.
469 $element = substr( $element, 0, -1 );
470 }
471 } else {
472 // If we hit here, then Language Converter could be tricked
473 // into doing an XSS, so we refuse to translate.
474 // If non-crazy input manages to reach this code path,
475 // we should consider it a bug.
476 $log = LoggerFactory::getInstance( 'languageconverter' );
477 $log->error( "Hit pcre.backtrack_limit in " . __METHOD__
478 . ". Disabling language conversion for this page.",
479 [
480 "method" => __METHOD__,
481 "variant" => $toVariant,
482 "startOfText" => substr( $text, 0, 500 )
483 ]
484 );
485 return $text;
486 }
487 // Queue the part before the markup for translation in a batch
488 $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) . "\000";
489
490 // Advance to the next position
491 $startPos = $elementPos + strlen( $element );
492
493 // Translate any alt or title attributes inside the matched element
494 if ( $element !== ''
495 && preg_match( '/^(<[^>\s]*+)\s([^>]*+)(.*+)$/', $element, $elementMatches )
496 ) {
497 // FIXME, this decodes entities, so if you have something
498 // like <div title="foo&lt;bar"> the bar won't get
499 // translated since after entity decoding it looks like
500 // unclosed html and we call this method recursively
501 // on attributes.
502 $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
503 // Ensure self-closing tags stay self-closing.
504 $close = substr( $elementMatches[2], -1 ) === '/' ? ' /' : '';
505 $changed = false;
506 foreach ( [ 'title', 'alt' ] as $attrName ) {
507 if ( !isset( $attrs[$attrName] ) ) {
508 continue;
509 }
510 $attr = $attrs[$attrName];
511 // Don't convert URLs
512 if ( !strpos( $attr, '://' ) ) {
513 $attr = $this->recursiveConvertTopLevel( $attr, $toVariant );
514 }
515
516 if ( $attr !== $attrs[$attrName] ) {
517 $attrs[$attrName] = $attr;
518 $changed = true;
519 }
520 }
521 if ( $changed ) {
522 $element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
523 $close . $elementMatches[3];
524 }
525 }
526 $literalBlob .= $element . "\000";
527 }
528
529 // Do the main translation batch
530 $translatedBlob = $this->translate( $sourceBlob, $toVariant );
531
532 // Put the output back together
533 $translatedIter = StringUtils::explode( "\000", $translatedBlob );
534 $literalIter = StringUtils::explode( "\000", $literalBlob );
535 $output = '';
536 while ( $translatedIter->valid() && $literalIter->valid() ) {
537 $output .= $translatedIter->current();
538 $output .= $literalIter->current();
539 $translatedIter->next();
540 $literalIter->next();
541 }
542
543 return $output;
544 }
545
555 public function translate( $text, $variant ) {
556 // If $text is empty or only includes spaces, do nothing
557 // Otherwise translate it
558 if ( trim( $text ) ) {
559 $this->loadTables();
560 $text = $this->mTables[$variant]->replace( $text );
561 }
562 return $text;
563 }
564
571 public function autoConvertToAllVariants( $text ) {
572 $this->loadTables();
573
574 $ret = [];
575 foreach ( $this->mVariants as $variant ) {
576 $ret[$variant] = $this->translate( $text, $variant );
577 }
578
579 return $ret;
580 }
581
587 protected function applyManualConv( ConverterRule $convRule ) {
588 // Use syntax -{T|zh-cn:TitleCN; zh-tw:TitleTw}- to custom
589 // title conversion.
590 // T26072: $mConvRuleTitle was overwritten by other manual
591 // rule(s) not for title, this breaks the title conversion.
592 $newConvRuleTitle = $convRule->getTitle();
593 if ( $newConvRuleTitle ) {
594 // So I add an empty check for getTitle()
595 $this->mConvRuleTitle = $newConvRuleTitle;
596 }
597
598 // merge/remove manual conversion rules to/from global table
599 $convTable = $convRule->getConvTable();
600 $action = $convRule->getRulesAction();
601 foreach ( $convTable as $variant => $pair ) {
602 $v = $this->validateVariant( $variant );
603 if ( !$v ) {
604 continue;
605 }
606
607 if ( $action == 'add' ) {
608 // More efficient than array_merge(), about 2.5 times.
609 foreach ( $pair as $from => $to ) {
610 $this->mTables[$v]->setPair( $from, $to );
611 }
612 } elseif ( $action == 'remove' ) {
613 $this->mTables[$v]->removeArray( $pair );
614 }
615 }
616 }
617
625 public function convertTitle( LinkTarget $linkTarget ) {
626 $variant = $this->getPreferredVariant();
627 $index = $linkTarget->getNamespace();
628 if ( $index !== NS_MAIN ) {
629 $text = $this->convertNamespace( $index, $variant ) . ':';
630 } else {
631 $text = '';
632 }
633 $text .= $this->translate( $linkTarget->getText(), $variant );
634
635 return $text;
636 }
637
645 public function convertNamespace( $index, $variant = null ) {
646 if ( $index === NS_MAIN ) {
647 return '';
648 }
649
650 if ( $variant === null ) {
651 $variant = $this->getPreferredVariant();
652 }
653
654 $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache();
655 $key = $cache->makeKey( 'languageconverter', 'namespace-text', $index, $variant );
656 $nsVariantText = $cache->get( $key );
657 if ( $nsVariantText !== false ) {
658 return $nsVariantText;
659 }
660
661 // First check if a message gives a converted name in the target variant.
662 $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inLanguage( $variant );
663 if ( $nsConvMsg->exists() ) {
664 $nsVariantText = $nsConvMsg->plain();
665 }
666
667 // Then check if a message gives a converted name in content language
668 // which needs extra translation to the target variant.
669 if ( $nsVariantText === false ) {
670 $nsConvMsg = wfMessage( 'conversion-ns' . $index )->inContentLanguage();
671 if ( $nsConvMsg->exists() ) {
672 $nsVariantText = $this->translate( $nsConvMsg->plain(), $variant );
673 }
674 }
675
676 if ( $nsVariantText === false ) {
677 // No message exists, retrieve it from the target variant's namespace names.
678 $mLangObj = MediaWikiServices::getInstance()
679 ->getLanguageFactory()
680 ->getLanguage( $variant );
681 $nsVariantText = $mLangObj->getFormattedNsText( $index );
682 }
683
684 $cache->set( $key, $nsVariantText, 60 );
685
686 return $nsVariantText;
687 }
688
707 public function convert( $text ) {
708 $variant = $this->getPreferredVariant();
709 return $this->convertTo( $text, $variant );
710 }
711
721 public function convertTo( $text, $variant ) {
724 return $text;
725 }
726 // Reset converter state for a new converter run.
727 $this->mConvRuleTitle = false;
728 return $this->recursiveConvertTopLevel( $text, $variant );
729 }
730
740 protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
741 $startPos = 0;
742 $out = '';
743 $length = strlen( $text );
744 $shouldConvert = !$this->guessVariant( $text, $variant );
745 $continue = true;
746
747 $noScript = '<script.*?>.*?<\/script>(*SKIP)(*FAIL)';
748 $noStyle = '<style.*?>.*?<\/style>(*SKIP)(*FAIL)';
749 // phpcs:ignore Generic.Files.LineLength
750 $noHtml = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)(*SKIP)(*FAIL)';
751 while ( $startPos < $length && $continue ) {
752 $continue = preg_match(
753 // Only match -{ outside of html.
754 "/$noScript|$noStyle|$noHtml|-\{/",
755 $text,
756 $m,
757 PREG_OFFSET_CAPTURE,
758 $startPos
759 );
760
761 if ( !$continue ) {
762 // No more markup, append final segment
763 $fragment = substr( $text, $startPos );
764 $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
765 return $out;
766 }
767
768 // Offset of the match of the regex pattern.
769 $pos = $m[0][1];
770
771 // Append initial segment
772 $fragment = substr( $text, $startPos, $pos - $startPos );
773 $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
774 // -{ marker found, not in attribute
775 // Advance position up to -{ marker.
776 $startPos = $pos;
777 // Do recursive conversion
778 // Note: This passes $startPos by reference, and advances it.
779 $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
780 }
781 return $out;
782 }
783
795 protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
796 // Quick sanity check (no function calls)
797 if ( $text[$startPos] !== '-' || $text[$startPos + 1] !== '{' ) {
798 throw new MWException( __METHOD__ . ': invalid input string' );
799 }
800
801 $startPos += 2;
802 $inner = '';
803 $warningDone = false;
804 $length = strlen( $text );
805
806 while ( $startPos < $length ) {
807 $m = false;
808 preg_match( '/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos );
809 if ( !$m ) {
810 // Unclosed rule
811 break;
812 }
813
814 $token = $m[0][0];
815 $pos = $m[0][1];
816
817 // Markup found
818 // Append initial segment
819 $inner .= substr( $text, $startPos, $pos - $startPos );
820
821 // Advance position
822 $startPos = $pos;
823
824 switch ( $token ) {
825 case '-{':
826 // Check max depth
827 if ( $depth >= $this->mMaxDepth ) {
828 $inner .= '-{';
829 if ( !$warningDone ) {
830 $inner .= '<span class="error">' .
831 wfMessage( 'language-converter-depth-warning' )
832 ->numParams( $this->mMaxDepth )->inContentLanguage()->text() .
833 '</span>';
834 $warningDone = true;
835 }
836 $startPos += 2;
837 break;
838 }
839 // Recursively parse another rule
840 $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
841 break;
842 case '}-':
843 // Apply the rule
844 $startPos += 2;
845 $rule = new ConverterRule( $inner, $this );
846 $rule->parse( $variant );
847 $this->applyManualConv( $rule );
848 return $rule->getDisplay();
849 default:
850 throw new MWException( __METHOD__ . ': invalid regex match' );
851 }
852 }
853
854 // Unclosed rule
855 if ( $startPos < $length ) {
856 $inner .= substr( $text, $startPos );
857 }
858 $startPos = $length;
859 return '-{' . $this->autoConvert( $inner, $variant );
860 }
861
873 public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
874 # If the article has already existed, there is no need to
875 # check it again, otherwise it may cause a fault.
876 if ( is_object( $nt ) && $nt->exists() ) {
877 return;
878 }
879
881 $isredir = $wgRequest->getText( 'redirect', 'yes' );
882 $action = $wgRequest->getText( 'action' );
883 if ( $action == 'edit' && $wgRequest->getBool( 'redlink' ) ) {
884 $action = 'view';
885 }
886 $linkconvert = $wgRequest->getText( 'linkconvert', 'yes' );
887 $disableLinkConversion = $wgDisableLangConversion
889 $linkBatch = new LinkBatch();
890
891 $ns = NS_MAIN;
892
893 if ( $disableLinkConversion ||
894 ( !$ignoreOtherCond &&
895 ( $isredir == 'no'
896 || $action == 'edit'
897 || $action == 'submit'
898 || $linkconvert == 'no' ) ) ) {
899 return;
900 }
901
902 if ( is_object( $nt ) ) {
903 $ns = $nt->getNamespace();
904 }
905
906 $variants = $this->autoConvertToAllVariants( $link );
907 if ( !$variants ) { // give up
908 return;
909 }
910
911 $titles = [];
912
913 foreach ( $variants as $v ) {
914 if ( $v != $link ) {
915 $varnt = Title::newFromText( $v, $ns );
916 if ( $varnt !== null ) {
917 $linkBatch->addObj( $varnt );
918 $titles[] = $varnt;
919 }
920 }
921 }
922
923 // fetch all variants in single query
924 $linkBatch->execute();
925
926 foreach ( $titles as $varnt ) {
927 if ( $varnt->getArticleID() > 0 ) {
928 $nt = $varnt;
929 $link = $varnt->getText();
930 break;
931 }
932 }
933 }
934
940 public function getExtraHashOptions() {
941 $variant = $this->getPreferredVariant();
942
943 return '!' . $variant;
944 }
945
956 public function guessVariant( $text, $variant ) {
957 return false;
958 }
959
966 protected function loadDefaultTables() {
967 $class = static::class;
968 throw new MWException( "Must implement loadDefaultTables() method in class $class" );
969 }
970
976 protected function loadTables( $fromCache = true ) {
978
979 if ( $this->mTablesLoaded ) {
980 return;
981 }
982
983 $this->mTablesLoaded = true;
984 // Do not use null as starting value, as that would confuse phan a lot.
985 $this->mTables = [];
986 $cache = ObjectCache::getInstance( $wgLanguageConverterCacheType );
987 $cacheKey = $cache->makeKey( 'conversiontables', $this->mMainLanguageCode );
988 if ( $fromCache ) {
989 $this->mTables = $cache->get( $cacheKey );
990 }
991 if ( !$this->mTables || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
992 // not in cache, or we need a fresh reload.
993 // We will first load the default tables
994 // then update them using things in MediaWiki:Conversiontable/*
995 $this->loadDefaultTables();
996 foreach ( $this->mVariants as $var ) {
997 $cached = $this->parseCachedTable( $var );
998 // @phan-suppress-next-next-line PhanTypeArraySuspiciousNullable
999 // FIXME: $this->mTables could theoretically be null here
1000 $this->mTables[$var]->mergeArray( $cached );
1001 }
1002
1003 $this->postLoadTables();
1004 $this->mTables[self::CACHE_VERSION_KEY] = true;
1005
1006 $cache->set( $cacheKey, $this->mTables, 43200 );
1007 }
1008 }
1009
1013 protected function postLoadTables() {
1014 }
1015
1023 private function reloadTables() {
1024 if ( $this->mTables ) {
1025 // @phan-suppress-next-line PhanTypeObjectUnsetDeclaredProperty
1026 unset( $this->mTables );
1027 }
1028
1029 $this->mTablesLoaded = false;
1030 $this->loadTables( false );
1031 }
1032
1052 private function parseCachedTable( $code, $subpage = '', $recursive = true ) {
1053 static $parsed = [];
1054
1055 $key = 'Conversiontable/' . $code;
1056 if ( $subpage ) {
1057 $key .= '/' . $subpage;
1058 }
1059 if ( array_key_exists( $key, $parsed ) ) {
1060 return [];
1061 }
1062
1063 $parsed[$key] = true;
1064
1065 if ( $subpage === '' ) {
1066 $messageCache = MediaWikiServices::getInstance()->getMessageCache();
1067 $txt = $messageCache->getMsgFromNamespace( $key, $code );
1068 } else {
1069 $txt = false;
1070 $title = Title::makeTitleSafe( NS_MEDIAWIKI, $key );
1071 if ( $title && $title->exists() ) {
1072 $revision = MediaWikiServices::getInstance()
1073 ->getRevisionLookup()
1074 ->getRevisionByTitle( $title );
1075 if ( $revision ) {
1076 $model = $revision->getSlot(
1077 SlotRecord::MAIN,
1078 RevisionRecord::RAW
1079 )->getModel();
1080 if ( $model == CONTENT_MODEL_WIKITEXT ) {
1081 // @phan-suppress-next-line PhanUndeclaredMethod
1082 $txt = $revision->getContent(
1083 SlotRecord::MAIN,
1084 RevisionRecord::RAW
1085 )->getText();
1086 }
1087
1088 // @todo in the future, use a specialized content model, perhaps based on json!
1089 }
1090 }
1091 }
1092
1093 # Nothing to parse if there's no text
1094 if ( $txt === false || $txt === null || $txt === '' ) {
1095 return [];
1096 }
1097
1098 // get all subpage links of the form
1099 // [[MediaWiki:Conversiontable/zh-xx/...|...]]
1100 $linkhead = $this->mLangObj->getNsText( NS_MEDIAWIKI ) .
1101 ':Conversiontable';
1102 $subs = StringUtils::explode( '[[', $txt );
1103 $sublinks = [];
1104 foreach ( $subs as $sub ) {
1105 $link = explode( ']]', $sub, 2 );
1106 if ( count( $link ) != 2 ) {
1107 continue;
1108 }
1109 $b = explode( '|', $link[0], 2 );
1110 $b = explode( '/', trim( $b[0] ), 3 );
1111 if ( count( $b ) == 3 ) {
1112 $sublink = $b[2];
1113 } else {
1114 $sublink = '';
1115 }
1116
1117 if ( $b[0] == $linkhead && $b[1] == $code ) {
1118 $sublinks[] = $sublink;
1119 }
1120 }
1121
1122 // parse the mappings in this page
1123 $blocks = StringUtils::explode( '-{', $txt );
1124 $ret = [];
1125 $first = true;
1126 foreach ( $blocks as $block ) {
1127 if ( $first ) {
1128 // Skip the part before the first -{
1129 $first = false;
1130 continue;
1131 }
1132 $mappings = explode( '}-', $block, 2 )[0];
1133 $stripped = str_replace( [ "'", '"', '*', '#' ], '', $mappings );
1134 $table = StringUtils::explode( ';', $stripped );
1135 foreach ( $table as $t ) {
1136 $m = explode( '=>', $t, 3 );
1137 if ( count( $m ) != 2 ) {
1138 continue;
1139 }
1140 // trim any trailling comments starting with '//'
1141 $tt = explode( '//', $m[1], 2 );
1142 $ret[trim( $m[0] )] = trim( $tt[0] );
1143 }
1144 }
1145
1146 // recursively parse the subpages
1147 if ( $recursive ) {
1148 foreach ( $sublinks as $link ) {
1149 $s = $this->parseCachedTable( $code, $link, $recursive );
1150 $ret = $s + $ret;
1151 }
1152 }
1153
1154 if ( $this->mUcfirst ) {
1155 foreach ( $ret as $k => $v ) {
1156 $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
1157 }
1158 }
1159 return $ret;
1160 }
1161
1170 public function markNoConversion( $text, $noParse = false ) {
1171 # don't mark if already marked
1172 if ( strpos( $text, '-{' ) || strpos( $text, '}-' ) ) {
1173 return $text;
1174 }
1175
1176 $ret = "-{R|$text}-";
1177 return $ret;
1178 }
1179
1188 public function convertCategoryKey( $key ) {
1189 return $key;
1190 }
1191
1198 public function updateConversionTable( LinkTarget $linkTarget ) {
1199 if ( $linkTarget->getNamespace() == NS_MEDIAWIKI ) {
1200 $t = explode( '/', $linkTarget->getDBkey(), 3 );
1201 $c = count( $t );
1202 if ( $c > 1 && $t[0] == 'Conversiontable' ) {
1203 if ( $this->validateVariant( $t[1] ) ) {
1204 $this->reloadTables();
1205 }
1206 }
1207 }
1208 }
1209
1214 public function getVarSeparatorPattern() {
1215 if ( $this->mVarSeparatorPattern === null ) {
1216 // varsep_pattern for preg_split:
1217 // text should be splited by ";" only if a valid variant
1218 // name exist after the markup, for example:
1219 // -{zh-hans:<span style="font-size:120%;">xxx</span>;zh-hant:\
1220 // <span style="font-size:120%;">yyy</span>;}-
1221 // we should split it as:
1222 // [
1223 // [0] => 'zh-hans:<span style="font-size:120%;">xxx</span>'
1224 // [1] => 'zh-hant:<span style="font-size:120%;">yyy</span>'
1225 // [2] => ''
1226 // ]
1227 $expandedVariants = [];
1228 foreach ( $this->mVariants as $variant ) {
1229 $expandedVariants[ $variant ] = 1;
1230 // Accept standard BCP 47 names for variants as well.
1231 $expandedVariants[ LanguageCode::bcp47( $variant ) ] = 1;
1232 }
1233 // Accept old deprecated names for variants
1234 foreach ( LanguageCode::getDeprecatedCodeMapping() as $old => $new ) {
1235 if ( isset( $expandedVariants[ $new ] ) ) {
1236 $expandedVariants[ $old ] = 1;
1237 }
1238 }
1239
1240 $pat = '/;\s*(?=';
1241 foreach ( $expandedVariants as $variant => $ignore ) {
1242 // zh-hans:xxx;zh-hant:yyy
1243 $pat .= $variant . '\s*:|';
1244 // xxx=>zh-hans:yyy; xxx=>zh-hant:zzz
1245 $pat .= '[^;]*?=>\s*' . $variant . '\s*:|';
1246 }
1247 $pat .= '\s*$)/';
1248 $this->mVarSeparatorPattern = $pat;
1249 }
1250 return $this->mVarSeparatorPattern;
1251 }
1252
1260 public function hasVariants() {
1261 return count( $this->getVariants() ) > 1;
1262 }
1263
1274 public function hasVariant( $variant ) {
1275 return $variant && ( $variant === $this->validateVariant( $variant ) );
1276 }
1277
1286 public function convertHtml( $text ) {
1287 return htmlspecialchars( $this->convert( $text ) );
1288 }
1289}
$wgDefaultLanguageVariant
Default variant code, if false, the default will be the language code.
$wgDisableLangConversion
Whether to enable language variant conversion.
$wgDisabledVariants
Disabled variants array of language variant conversion.
$wgLanguageConverterCacheType
The cache type for storing language conversion tables, which are used when parsing certain text and i...
$wgDisableTitleConversion
Whether to enable language variant conversion for links.
deprecatePublicProperty( $property, $version, $class=null, $component=null)
Mark a property as deprecated.
trait DeprecationHelper
Use this trait in classes which have properties for which public access is deprecated.
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
if(! $wgDBerrorLogTZ) $wgRequest
Definition Setup.php:643
The rules used for language conversion, this processes the rules extracted by Parser from the -{ }- w...
getRulesAction()
Return how deal with conversion rules.
getConvTable()
Get conversion table.
getTitle()
Get converted title.
Base class for multi-variant language conversion.
getPreferredVariant()
Get preferred language variant.
validateVariant( $variant=null)
Validate the variant and return an appropriate strict internal variant code if one exists.
getDefaultVariant()
Get default variant.
__construct( $langobj, $maincode, $variants=[], $variantfallbacks=[], $flags=[], $manualLevel=[])
recursiveConvertTopLevel( $text, $variant, $depth=0)
Recursively convert text on the outside.
loadTables( $fromCache=true)
Load conversion tables either from the cache or the disk.
getHeaderVariant()
Determine the language variant from the Accept-Language header.
static array $languagesWithVariants
languages supporting variants
hasVariant( $variant)
Strict check if the language has the specific variant.
autoConvert( $text, $toVariant=false)
Dictionary-based conversion.
recursiveConvertRule( $text, $variant, &$startPos, $depth=0)
Recursively convert text on the inside.
parseCachedTable( $code, $subpage='', $recursive=true)
Parse the conversion table stored in the cache.
getVarSeparatorPattern()
Get the cached separator pattern for ConverterRule::parseRules()
convertNamespace( $index, $variant=null)
Get the namespace display name in the preferred variant.
getExtraHashOptions()
Returns language specific hash options.
getVariantFallbacks( $variant)
In case some variant is not defined in the markup, we need to have some fallback.
markNoConversion( $text, $noParse=false)
Enclose a string with the "no conversion" tag.
translate( $text, $variant)
Translate a string to a variant.
getVariants()
Get all valid variants.
findVariantLink(&$link, &$nt, $ignoreOtherCond=false)
If a language supports multiple variants, it is possible that non-existing link in one variant actual...
convert( $text)
Convert text to different variants of a language.
convertTitle(LinkTarget $linkTarget)
Auto convert a LinkTarget object to a readable string in the preferred variant.
postLoadTables()
Hook for post processing after conversion tables are loaded.
getURLVariant()
Get the variant specified in the URL.
loadDefaultTables()
Load default conversion tables.
getUserVariant(User $user)
Determine if the user has a variant set.
autoConvertToAllVariants( $text)
Call translate() to convert text to all valid variants.
guessVariant( $text, $variant)
Guess if a text is written in a variant.
convertHtml( $text)
Perform output conversion on a string, and encode for safe HTML output.
hasVariants()
Check if this is a language with variants.
applyManualConv(ConverterRule $convRule)
Apply manual conversion rules.
convertTo( $text, $variant)
Same as convert() except a extra parameter to custom variant.
updateConversionTable(LinkTarget $linkTarget)
Refresh the cache of conversion tables when MediaWiki:Conversiontable* is updated.
convertCategoryKey( $key)
Convert the sorting key for category links.
getConvRuleTitle()
Get the title produced by the conversion rule.
ReplacementArray[] bool[] $mTables
reloadTables()
Reload the conversion tables.
Internationalisation code See https://www.mediawiki.org/wiki/Special:MyLanguage/Localisation for more...
Definition Language.php:41
getFormattedNsText( $index)
A convenience function that returns the same thing as getNsText() except with '_' changed to ' ',...
Definition Language.php:681
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition LinkBatch.php:35
MediaWiki exception.
PSR-3 logger instance factory.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Page revision base class.
Value object representing a content slot associated with a page revision.
Wrapper around strtr() that holds replacements.
static explode( $separator, $subject)
Workalike for explode() with limited memory usage.
The User object encapsulates all of the user-specific settings (user_id, name, rights,...
Definition User.php:60
isSafeToLoad()
Test if it's safe to load this User object.
Definition User.php:304
getOption( $oname, $defaultOverride=null, $ignoreHidden=false)
Get the user's current setting for a given option.
Definition User.php:2665
isLoggedIn()
Get whether the user is registered.
Definition User.php:3079
const NS_MAIN
Definition Defines.php:70
const NS_MEDIAWIKI
Definition Defines.php:78
const CONTENT_MODEL_WIKITEXT
Definition Defines.php:225
The shared interface for all language converters.
getNamespace()
Get the namespace index.
getDBkey()
Get the main part with underscores.
getText()
Returns the link in text form, without namespace prefix or fragment.
$cache
Definition mcc.php:33