34 class LanguageConverter {
40 public static $languagesWithVariants = [
54 public $mMainLanguageCode;
60 public $mVariantFallbacks;
61 public $mVariantNames;
62 public $mTablesLoaded =
false;
75 public $mDescCodeSep =
':', $mDescVarSep =
';';
76 public $mUcfirst =
false;
77 public $mConvRuleTitle =
false;
80 public $mHeaderVariant;
81 public $mMaxDepth = 10;
82 public $mVarSeparatorPattern;
84 const CACHE_VERSION_KEY =
'VERSION 7';
94 public function __construct(
Language $langobj, $maincode, $variants = [],
95 $variantfallbacks = [], $flags = [],
98 $this->mLangObj = $langobj;
99 $this->mMainLanguageCode = $maincode;
101 $this->mVariantFallbacks = $variantfallbacks;
116 $this->mFlags = array_merge( $defaultflags, $flags );
117 foreach ( $this->mVariants
as $v ) {
118 if ( array_key_exists( $v, $manualLevel ) ) {
119 $this->mManualLevel[$v] = $manualLevel[$v];
121 $this->mManualLevel[$v] =
'bidirectional';
123 $this->mFlags[$v] = $v;
133 public function getVariants() {
134 return $this->mVariants;
148 public function getVariantFallbacks( $variant ) {
149 return $this->mVariantFallbacks[$variant] ?? $this->mMainLanguageCode;
156 public function getConvRuleTitle() {
157 return $this->mConvRuleTitle;
164 public function getPreferredVariant() {
167 $req = $this->getURLVariant();
171 if ( $wgUser->isSafeToLoad() && $wgUser->isLoggedIn() && !
$req ) {
172 $req = $this->getUserVariant();
174 $req = $this->getHeaderVariant();
181 $req = $this->validateVariant(
$req );
190 return $this->mMainLanguageCode;
198 public function getDefaultVariant() {
201 $req = $this->getURLVariant();
204 $req = $this->getHeaderVariant();
214 return $this->mMainLanguageCode;
226 public function validateVariant( $variant =
null ) {
227 if ( $variant ===
null ) {
233 if ( in_array( $variant, $this->mVariants ) ) {
240 foreach ( $this->mVariants
as $v ) {
254 public function getURLVariant() {
257 if ( $this->mURLVariant ) {
258 return $this->mURLVariant;
268 $this->mURLVariant = $this->validateVariant(
$ret );
269 return $this->mURLVariant;
277 protected function getUserVariant() {
290 if ( !$wgUser->isSafeToLoad() ) {
293 if ( $wgUser->isLoggedIn() ) {
295 $this->mMainLanguageCode ==
296 MediaWikiServices::getInstance()->getContentLanguage()->getCode()
298 $ret = $wgUser->getOption(
'variant' );
300 $ret = $wgUser->getOption(
'variant-' . $this->mMainLanguageCode );
305 $ret = $wgUser->getOption(
'language' );
308 $this->mUserVariant = $this->validateVariant(
$ret );
309 return $this->mUserVariant;
317 protected function getHeaderVariant() {
320 if ( $this->mHeaderVariant ) {
321 return $this->mHeaderVariant;
331 $fallbackLanguages = [];
333 $this->mHeaderVariant = $this->validateVariant( $language );
334 if ( $this->mHeaderVariant ) {
341 $fallbacks = $this->getVariantFallbacks( $language );
342 if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) {
343 $fallbackLanguages[] = $fallbacks;
344 } elseif ( is_array( $fallbacks ) ) {
346 array_merge( $fallbackLanguages, $fallbacks );
350 if ( !$this->mHeaderVariant ) {
352 $fallback_languages = array_unique( $fallbackLanguages );
353 foreach ( $fallback_languages
as $language ) {
354 $this->mHeaderVariant = $this->validateVariant( $language );
355 if ( $this->mHeaderVariant ) {
361 return $this->mHeaderVariant;
374 public function autoConvert( $text, $toVariant =
false ) {
378 $toVariant = $this->getPreferredVariant();
384 if ( $this->guessVariant( $text, $toVariant ) ) {
394 $marker =
'|' . Parser::MARKER_PREFIX .
'[^\x7f]++\x7f';
397 $htmlfix =
'|<[^>\004]++(?=\004$)|^[^<>]*+>';
404 $codefix =
'<code>[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|';
406 $scriptfix =
'<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|';
408 $prefix =
'<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|';
411 $htmlFullTag =
'<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|';
413 $reg =
'/' . $codefix . $scriptfix . $prefix . $htmlFullTag .
414 '&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix .
'|\004$/s';
421 $text = str_replace(
"\000",
'', $text );
422 $text = str_replace(
"\004",
'', $text );
424 $markupMatches =
null;
425 $elementMatches =
null;
429 while ( $startPos < strlen( $text ) ) {
430 if ( preg_match( $reg, $text .
"\004", $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
431 $elementPos = $markupMatches[0][1];
432 $element = $markupMatches[0][0];
433 if ( $element ===
"\004" ) {
435 $elementPos = strlen( $text );
437 } elseif ( substr( $element, -1 ) ===
"\004" ) {
443 $element = substr( $element, 0, -1 );
450 $log = LoggerFactory::getInstance(
'languageconverter' );
451 $log->error(
"Hit pcre.backtrack_limit in " . __METHOD__
452 .
". Disabling language conversion for this page.",
454 "method" => __METHOD__,
455 "variant" => $toVariant,
456 "startOfText" => substr( $text, 0, 500 )
462 $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) .
"\000";
465 $startPos = $elementPos + strlen( $element );
469 && preg_match(
'/^(<[^>\s]*+)\s([^>]*+)(.*+)$/', $element, $elementMatches )
476 $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
478 $close = substr( $elementMatches[2], -1 ) ===
'/' ?
' /' :
'';
480 foreach ( [
'title',
'alt' ]
as $attrName ) {
481 if ( !isset( $attrs[$attrName] ) ) {
484 $attr = $attrs[$attrName];
486 if ( !strpos( $attr,
'://' ) ) {
487 $attr = $this->recursiveConvertTopLevel( $attr, $toVariant );
490 if ( $attr !== $attrs[$attrName] ) {
491 $attrs[$attrName] = $attr;
496 $element = $elementMatches[1] . Html::expandAttributes( $attrs ) .
497 $close . $elementMatches[3];
500 $literalBlob .= $element .
"\000";
504 $translatedBlob = $this->
translate( $sourceBlob, $toVariant );
510 while ( $translatedIter->valid() && $literalIter->valid() ) {
511 $output .= $translatedIter->current();
512 $output .= $literalIter->current();
513 $translatedIter->next();
514 $literalIter->next();
529 public function translate( $text, $variant ) {
532 if ( trim( $text ) ) {
534 $text = $this->mTables[$variant]->replace( $text );
545 public function autoConvertToAllVariants( $text ) {
549 foreach ( $this->mVariants
as $variant ) {
561 protected function applyManualConv( $convRule ) {
566 $newConvRuleTitle = $convRule->getTitle();
567 if ( $newConvRuleTitle ) {
569 $this->mConvRuleTitle = $newConvRuleTitle;
573 $convTable = $convRule->getConvTable();
574 $action = $convRule->getRulesAction();
575 foreach ( $convTable
as $variant => $pair ) {
576 $v = $this->validateVariant( $variant );
581 if ( $action ==
'add' ) {
583 foreach ( $pair
as $from => $to ) {
584 $this->mTables[$v]->setPair( $from, $to );
586 } elseif ( $action ==
'remove' ) {
587 $this->mTables[$v]->removeArray( $pair );
599 public function convertTitle(
$title ) {
600 $variant = $this->getPreferredVariant();
601 $index =
$title->getNamespace();
603 $text = $this->convertNamespace( $index, $variant ) .
':';
618 public function convertNamespace( $index, $variant =
null ) {
623 if ( $variant ===
null ) {
624 $variant = $this->getPreferredVariant();
627 $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache();
628 $key =
$cache->makeKey(
'languageconverter',
'namespace-text', $index, $variant );
629 $nsVariantText =
$cache->get( $key );
630 if ( $nsVariantText !==
false ) {
631 return $nsVariantText;
635 $nsConvMsg =
wfMessage(
'conversion-ns' . $index )->inLanguage( $variant );
636 if ( $nsConvMsg->exists() ) {
637 $nsVariantText = $nsConvMsg->plain();
642 if ( $nsVariantText ===
false ) {
643 $nsConvMsg =
wfMessage(
'conversion-ns' . $index )->inContentLanguage();
644 if ( $nsConvMsg->exists() ) {
645 $nsVariantText = $this->
translate( $nsConvMsg->plain(), $variant );
649 if ( $nsVariantText ===
false ) {
651 $langObj = $this->mLangObj->factory( $variant );
652 $nsVariantText = $langObj->getFormattedNsText( $index );
655 $cache->set( $key, $nsVariantText, 60 );
657 return $nsVariantText;
678 public function convert( $text ) {
679 $variant = $this->getPreferredVariant();
680 return $this->convertTo( $text, $variant );
692 public function convertTo( $text, $variant ) {
698 $this->mConvRuleTitle =
false;
699 return $this->recursiveConvertTopLevel( $text, $variant );
711 protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
714 $length = strlen( $text );
715 $shouldConvert = !$this->guessVariant( $text, $variant );
718 $noScript =
'<script.*?>.*?<\/script>(*SKIP)(*FAIL)';
719 $noStyle =
'<style.*?>.*?<\/style>(*SKIP)(*FAIL)';
721 $noHtml =
'<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)(*SKIP)(*FAIL)';
722 while ( $startPos < $length && $continue ) {
723 $continue = preg_match(
725 "/$noScript|$noStyle|$noHtml|-\{/",
734 $fragment = substr( $text, $startPos );
735 $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
743 $fragment = substr( $text, $startPos, $pos - $startPos );
744 $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
750 $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
766 protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
768 if ( $text[$startPos] !==
'-' || $text[$startPos + 1] !==
'{' ) {
769 throw new MWException( __METHOD__ .
': invalid input string' );
774 $warningDone =
false;
775 $length = strlen( $text );
777 while ( $startPos < $length ) {
779 preg_match(
'/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos );
790 $inner .= substr( $text, $startPos, $pos - $startPos );
798 if ( $depth >= $this->mMaxDepth ) {
800 if ( !$warningDone ) {
801 $inner .=
'<span class="error">' .
802 wfMessage(
'language-converter-depth-warning' )
803 ->numParams( $this->mMaxDepth )->inContentLanguage()->text() .
811 $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
817 $rule->parse( $variant );
818 $this->applyManualConv( $rule );
819 return $rule->getDisplay();
821 throw new MWException( __METHOD__ .
': invalid regex match' );
826 if ( $startPos < $length ) {
827 $inner .= substr( $text, $startPos );
830 return '-{' . $this->autoConvert( $inner, $variant );
844 public function findVariantLink( &
$link, &$nt, $ignoreOtherCond =
false ) {
845 # If the article has already existed, there is no need to
846 # check it again, otherwise it may cause a fault.
847 if ( is_object( $nt ) && $nt->exists() ) {
852 $isredir =
$wgRequest->getText(
'redirect',
'yes' );
854 if ( $action ==
'edit' &&
$wgRequest->getBool(
'redlink' ) ) {
857 $linkconvert =
$wgRequest->getText(
'linkconvert',
'yes' );
864 if ( $disableLinkConversion ||
865 ( !$ignoreOtherCond &&
868 || $action ==
'submit'
869 || $linkconvert ==
'no' ) ) ) {
873 if ( is_object( $nt ) ) {
874 $ns = $nt->getNamespace();
877 $variants = $this->autoConvertToAllVariants(
$link );
884 foreach ( $variants
as $v ) {
887 if ( !is_null( $varnt ) ) {
888 $linkBatch->addObj( $varnt );
895 $linkBatch->execute();
898 if ( $varnt->getArticleID() > 0 ) {
900 $link = $varnt->getText();
911 public function getExtraHashOptions() {
912 $variant = $this->getPreferredVariant();
914 return '!' . $variant;
927 public function guessVariant( $text, $variant ) {
938 function loadDefaultTables() {
940 throw new MWException(
"Must implement loadDefaultTables() method in class $class" );
948 function loadTables( $fromCache =
true ) {
951 if ( $this->mTablesLoaded ) {
955 $this->mTablesLoaded =
true;
956 $this->mTables =
false;
958 $cacheKey =
$cache->makeKey(
'conversiontables', $this->mMainLanguageCode );
960 $this->mTables =
$cache->get( $cacheKey );
962 if ( !$this->mTables || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
966 $this->loadDefaultTables();
967 foreach ( $this->mVariants
as $var ) {
968 $cached = $this->parseCachedTable( $var );
969 $this->mTables[$var]->mergeArray( $cached );
972 $this->postLoadTables();
973 $this->mTables[self::CACHE_VERSION_KEY] =
true;
975 $cache->set( $cacheKey, $this->mTables, 43200 );
982 function postLoadTables() {
992 private function reloadTables() {
993 if ( $this->mTables ) {
994 unset( $this->mTables );
997 $this->mTablesLoaded =
false;
998 $this->loadTables(
false );
1020 function parseCachedTable(
$code, $subpage =
'', $recursive =
true ) {
1021 static $parsed = [];
1023 $key =
'Conversiontable/' .
$code;
1025 $key .=
'/' . $subpage;
1027 if ( array_key_exists( $key, $parsed ) ) {
1031 $parsed[$key] =
true;
1033 if ( $subpage ===
'' ) {
1050 # Nothing to parse if there's no text
1051 if ( $txt ===
false || $txt ===
null || $txt ===
'' ) {
1057 $linkhead = $this->mLangObj->getNsText(
NS_MEDIAWIKI ) .
1061 foreach ( $subs
as $sub ) {
1062 $link = explode(
']]', $sub, 2 );
1066 $b = explode(
'|',
$link[0], 2 );
1067 $b = explode(
'/', trim( $b[0] ), 3 );
1068 if (
count( $b ) == 3 ) {
1074 if ( $b[0] == $linkhead && $b[1] ==
$code ) {
1075 $sublinks[] = $sublink;
1083 foreach ( $blocks
as $block ) {
1089 $mappings = explode(
'}-', $block, 2 )[0];
1090 $stripped = str_replace( [
"'",
'"',
'*',
'#' ],
'', $mappings );
1092 foreach ( $table
as $t ) {
1093 $m = explode(
'=>',
$t, 3 );
1094 if (
count( $m ) != 2 ) {
1098 $tt = explode(
'//', $m[1], 2 );
1099 $ret[trim( $m[0] )] = trim( $tt[0] );
1105 foreach ( $sublinks
as $link ) {
1106 $s = $this->parseCachedTable(
$code,
$link, $recursive );
1111 if ( $this->mUcfirst ) {
1112 foreach (
$ret as $k => $v ) {
1113 $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
1127 public function markNoConversion( $text, $noParse =
false ) {
1128 # don't mark if already marked
1129 if ( strpos( $text,
'-{' ) || strpos( $text,
'}-' ) ) {
1133 $ret =
"-{R|$text}-";
1145 function convertCategoryKey( $key ) {
1155 public function updateConversionTable(
Title $titleobj ) {
1160 if ( $c > 1 &&
$t[0] ==
'Conversiontable' ) {
1161 if ( $this->validateVariant(
$t[1] ) ) {
1162 $this->reloadTables();
1172 function getVarSeparatorPattern() {
1173 if ( is_null( $this->mVarSeparatorPattern ) ) {
1185 $expandedVariants = [];
1186 foreach ( $this->mVariants
as $variant ) {
1187 $expandedVariants[ $variant ] = 1;
1193 if ( isset( $expandedVariants[ $new ] ) ) {
1194 $expandedVariants[ $old ] = 1;
1199 foreach ( $expandedVariants
as $variant => $ignore ) {
1201 $pat .= $variant .
'\s*:|';
1203 $pat .=
'[^;]*?=>\s*' . $variant .
'\s*:|';
1206 $this->mVarSeparatorPattern = $pat;
1208 return $this->mVarSeparatorPattern;