34 class LanguageConverter {
40 static public $languagesWithVariants = [
53 public $mMainLanguageCode;
59 public $mVariantFallbacks;
60 public $mVariantNames;
61 public $mTablesLoaded =
false;
68 public $mDescCodeSep =
':', $mDescVarSep =
';';
69 public $mUcfirst =
false;
70 public $mConvRuleTitle =
false;
73 public $mHeaderVariant;
74 public $mMaxDepth = 10;
75 public $mVarSeparatorPattern;
77 const CACHE_VERSION_KEY =
'VERSION 7';
87 public function __construct( $langobj, $maincode, $variants = [],
88 $variantfallbacks = [],
$flags = [],
91 $this->mLangObj = $langobj;
92 $this->mMainLanguageCode = $maincode;
94 $this->mVariantFallbacks = $variantfallbacks;
109 $this->mFlags = array_merge( $defaultflags,
$flags );
110 foreach ( $this->mVariants
as $v ) {
111 if ( array_key_exists( $v, $manualLevel ) ) {
112 $this->mManualLevel[$v] = $manualLevel[$v];
114 $this->mManualLevel[$v] =
'bidirectional';
116 $this->mFlags[$v] = $v;
126 public function getVariants() {
127 return $this->mVariants;
141 public function getVariantFallbacks( $variant ) {
142 if ( isset( $this->mVariantFallbacks[$variant] ) ) {
143 return $this->mVariantFallbacks[$variant];
145 return $this->mMainLanguageCode;
152 public function getConvRuleTitle() {
153 return $this->mConvRuleTitle;
160 public function getPreferredVariant() {
163 $req = $this->getURLVariant();
166 $req = $this->getUserVariant();
168 $req = $this->getHeaderVariant();
179 if ( $this->validateVariant(
$req ) ) {
182 return $this->mMainLanguageCode;
190 public function getDefaultVariant() {
193 $req = $this->getURLVariant();
196 $req = $this->getHeaderVariant();
206 return $this->mMainLanguageCode;
214 public function validateVariant( $variant =
null ) {
215 if ( $variant !==
null && in_array( $variant, $this->mVariants ) ) {
226 public function getURLVariant() {
229 if ( $this->mURLVariant ) {
230 return $this->mURLVariant;
240 $this->mURLVariant = $this->validateVariant(
$ret );
241 return $this->mURLVariant;
249 protected function getUserVariant() {
262 if ( !
$wgUser->isSafeToLoad() ) {
266 if ( $this->mMainLanguageCode ==
$wgContLang->getCode() ) {
269 $ret =
$wgUser->getOption(
'variant-' . $this->mMainLanguageCode );
277 $this->mUserVariant = $this->validateVariant(
$ret );
278 return $this->mUserVariant;
286 protected function getHeaderVariant() {
289 if ( $this->mHeaderVariant ) {
290 return $this->mHeaderVariant;
300 $fallbackLanguages = [];
302 $this->mHeaderVariant = $this->validateVariant( $language );
303 if ( $this->mHeaderVariant ) {
310 $fallbacks = $this->getVariantFallbacks( $language );
311 if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) {
312 $fallbackLanguages[] = $fallbacks;
313 } elseif ( is_array( $fallbacks ) ) {
315 array_merge( $fallbackLanguages, $fallbacks );
319 if ( !$this->mHeaderVariant ) {
321 $fallback_languages = array_unique( $fallbackLanguages );
322 foreach ( $fallback_languages
as $language ) {
323 $this->mHeaderVariant = $this->validateVariant( $language );
324 if ( $this->mHeaderVariant ) {
330 return $this->mHeaderVariant;
343 public function autoConvert( $text, $toVariant =
false ) {
347 $toVariant = $this->getPreferredVariant();
353 if ( $this->guessVariant( $text, $toVariant ) ) {
363 $marker =
'|' . Parser::MARKER_PREFIX .
'[^\x7f]++\x7f';
366 $htmlfix =
'|<[^>\004]++(?=\004$)|^[^<>]*+>';
373 $codefix =
'<code>[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|';
375 $scriptfix =
'<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|';
377 $prefix =
'<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|';
380 $htmlFullTag =
'<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|';
382 $reg =
'/' . $codefix . $scriptfix . $prefix . $htmlFullTag .
383 '&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix .
'|\004$/s';
390 $text = str_replace(
"\000",
'', $text );
391 $text = str_replace(
"\004",
'', $text );
393 $markupMatches =
null;
394 $elementMatches =
null;
398 while ( $startPos < strlen( $text ) ) {
399 if ( preg_match( $reg, $text .
"\004", $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
400 $elementPos = $markupMatches[0][1];
401 $element = $markupMatches[0][0];
402 if ( $element ===
"\004" ) {
404 $elementPos = strlen( $text );
406 } elseif ( substr( $element, -1 ) ===
"\004" ) {
412 $element = substr( $element, 0, -1 );
419 $log = LoggerFactory::getInstance(
'languageconverter' );
420 $log->error(
"Hit pcre.backtrack_limit in " . __METHOD__
421 .
". Disabling language conversion for this page.",
423 "method" => __METHOD__,
424 "variant" => $toVariant,
425 "startOfText" => substr( $text, 0, 500 )
431 $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) .
"\000";
434 $startPos = $elementPos + strlen( $element );
438 && preg_match(
'/^(<[^>\s]*+)\s([^>]*+)(.*+)$/', $element, $elementMatches )
445 $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
447 $close = substr( $elementMatches[2], -1 ) ===
'/' ?
' /' :
'';
449 foreach ( [
'title',
'alt' ]
as $attrName ) {
450 if ( !isset( $attrs[$attrName] ) ) {
453 $attr = $attrs[$attrName];
455 if ( !strpos( $attr,
'://' ) ) {
456 $attr = $this->recursiveConvertTopLevel( $attr, $toVariant );
459 if ( $attr !== $attrs[$attrName] ) {
460 $attrs[$attrName] = $attr;
466 $close . $elementMatches[3];
469 $literalBlob .= $element .
"\000";
473 $translatedBlob = $this->
translate( $sourceBlob, $toVariant );
479 while ( $translatedIter->valid() && $literalIter->valid() ) {
480 $output .= $translatedIter->current();
481 $output .= $literalIter->current();
482 $translatedIter->next();
483 $literalIter->next();
498 public function translate( $text, $variant ) {
501 if ( trim( $text ) ) {
503 $text = $this->mTables[$variant]->replace( $text );
514 public function autoConvertToAllVariants( $text ) {
518 foreach ( $this->mVariants
as $variant ) {
530 protected function applyManualConv( $convRule ) {
535 $newConvRuleTitle = $convRule->getTitle();
536 if ( $newConvRuleTitle ) {
538 $this->mConvRuleTitle = $newConvRuleTitle;
542 $convTable = $convRule->getConvTable();
543 $action = $convRule->getRulesAction();
544 foreach ( $convTable
as $variant => $pair ) {
545 if ( !$this->validateVariant( $variant ) ) {
549 if ( $action ==
'add' ) {
551 foreach ( $pair
as $from => $to ) {
552 $this->mTables[$variant]->setPair( $from, $to );
554 } elseif ( $action ==
'remove' ) {
555 $this->mTables[$variant]->removeArray( $pair );
567 public function convertTitle(
$title ) {
568 $variant = $this->getPreferredVariant();
569 $index =
$title->getNamespace();
571 $text = $this->convertNamespace( $index, $variant ) .
':';
586 public function convertNamespace( $index, $variant =
null ) {
591 if ( $variant ===
null ) {
592 $variant = $this->getPreferredVariant();
595 $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache();
596 $key =
$cache->makeKey(
'languageconverter',
'namespace-text', $index, $variant );
597 $nsVariantText =
$cache->get( $key );
598 if ( $nsVariantText !==
false ) {
599 return $nsVariantText;
603 $nsConvMsg =
wfMessage(
'conversion-ns' . $index )->inLanguage( $variant );
604 if ( $nsConvMsg->exists() ) {
605 $nsVariantText = $nsConvMsg->plain();
610 if ( $nsVariantText ===
false ) {
611 $nsConvMsg =
wfMessage(
'conversion-ns' . $index )->inContentLanguage();
612 if ( $nsConvMsg->exists() ) {
613 $nsVariantText = $this->
translate( $nsConvMsg->plain(), $variant );
617 if ( $nsVariantText ===
false ) {
619 $langObj = $this->mLangObj->factory( $variant );
620 $nsVariantText = $langObj->getFormattedNsText( $index );
623 $cache->set( $key, $nsVariantText, 60 );
625 return $nsVariantText;
642 public function convert( $text ) {
643 $variant = $this->getPreferredVariant();
644 return $this->convertTo( $text, $variant );
654 public function convertTo( $text, $variant ) {
660 $this->mConvRuleTitle =
false;
661 return $this->recursiveConvertTopLevel( $text, $variant );
673 protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
676 $length = strlen( $text );
677 $shouldConvert = !$this->guessVariant( $text, $variant );
680 $noScript =
'<script.*?>.*?<\/script>(*SKIP)(*FAIL)';
681 $noStyle =
'<style.*?>.*?<\/style>(*SKIP)(*FAIL)';
683 $noHtml =
'<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)(*SKIP)(*FAIL)';
685 while ( $startPos < $length && $continue ) {
686 $continue = preg_match(
688 "/$noScript|$noStyle|$noHtml|-\{/",
697 $fragment = substr( $text, $startPos );
698 $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
706 $fragment = substr( $text, $startPos, $pos - $startPos );
707 $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
713 $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
729 protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
731 if ( $text[$startPos] !==
'-' || $text[$startPos + 1] !==
'{' ) {
732 throw new MWException( __METHOD__ .
': invalid input string' );
737 $warningDone =
false;
738 $length = strlen( $text );
740 while ( $startPos < $length ) {
742 preg_match(
'/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos );
753 $inner .= substr( $text, $startPos, $pos - $startPos );
761 if ( $depth >= $this->mMaxDepth ) {
763 if ( !$warningDone ) {
764 $inner .=
'<span class="error">' .
765 wfMessage(
'language-converter-depth-warning' )
766 ->numParams( $this->mMaxDepth )->inContentLanguage()->text() .
774 $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
780 $rule->parse( $variant );
781 $this->applyManualConv( $rule );
782 return $rule->getDisplay();
784 throw new MWException( __METHOD__ .
': invalid regex match' );
789 if ( $startPos < $length ) {
790 $inner .= substr( $text, $startPos );
793 return '-{' . $this->autoConvert( $inner, $variant );
807 public function findVariantLink( &
$link, &$nt, $ignoreOtherCond =
false ) {
808 # If the article has already existed, there is no need to
809 # check it again, otherwise it may cause a fault.
810 if ( is_object( $nt ) && $nt->exists() ) {
815 $isredir =
$wgRequest->getText(
'redirect',
'yes' );
817 if ( $action ==
'edit' &&
$wgRequest->getBool(
'redlink' ) ) {
820 $linkconvert =
$wgRequest->getText(
'linkconvert',
'yes' );
827 if ( $disableLinkConversion ||
828 ( !$ignoreOtherCond &&
831 || $action ==
'submit'
832 || $linkconvert ==
'no' ) ) ) {
836 if ( is_object( $nt ) ) {
837 $ns = $nt->getNamespace();
840 $variants = $this->autoConvertToAllVariants(
$link );
847 foreach ( $variants
as $v ) {
850 if ( !is_null( $varnt ) ) {
851 $linkBatch->addObj( $varnt );
858 $linkBatch->execute();
861 if ( $varnt->getArticleID() > 0 ) {
863 $link = $varnt->getText();
874 public function getExtraHashOptions() {
875 $variant = $this->getPreferredVariant();
877 return '!' . $variant;
890 public function guessVariant( $text, $variant ) {
901 function loadDefaultTables() {
903 throw new MWException(
"Must implement loadDefaultTables() method in class $class" );
911 function loadTables( $fromCache =
true ) {
914 if ( $this->mTablesLoaded ) {
918 $this->mTablesLoaded =
true;
919 $this->mTables =
false;
921 $cacheKey =
$cache->makeKey(
'conversiontables', $this->mMainLanguageCode );
923 $this->mTables =
$cache->get( $cacheKey );
925 if ( !$this->mTables || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
929 $this->loadDefaultTables();
930 foreach ( $this->mVariants
as $var ) {
931 $cached = $this->parseCachedTable( $var );
932 $this->mTables[$var]->mergeArray( $cached );
935 $this->postLoadTables();
936 $this->mTables[self::CACHE_VERSION_KEY] =
true;
938 $cache->set( $cacheKey, $this->mTables, 43200 );
945 function postLoadTables() {
955 private function reloadTables() {
956 if ( $this->mTables ) {
957 unset( $this->mTables );
960 $this->mTablesLoaded =
false;
961 $this->loadTables(
false );
983 function parseCachedTable(
$code, $subpage =
'', $recursive =
true ) {
986 $key =
'Conversiontable/' .
$code;
988 $key .=
'/' . $subpage;
990 if ( array_key_exists( $key, $parsed ) ) {
994 $parsed[$key] =
true;
996 if ( $subpage ===
'' ) {
1005 $txt = $revision->getContent(
Revision::RAW )->getNativeData();
1013 # Nothing to parse if there's no text
1014 if ( $txt ===
false || $txt ===
null || $txt ===
'' ) {
1020 $linkhead = $this->mLangObj->getNsText(
NS_MEDIAWIKI ) .
1024 foreach ( $subs
as $sub ) {
1025 $link = explode(
']]', $sub, 2 );
1029 $b = explode(
'|',
$link[0], 2 );
1030 $b = explode(
'/', trim( $b[0] ), 3 );
1031 if (
count( $b ) == 3 ) {
1037 if ( $b[0] == $linkhead && $b[1] ==
$code ) {
1038 $sublinks[] = $sublink;
1046 foreach ( $blocks
as $block ) {
1052 $mappings = explode(
'}-', $block, 2 )[0];
1053 $stripped = str_replace( [
"'",
'"',
'*',
'#' ],
'', $mappings );
1055 foreach ( $table
as $t ) {
1056 $m = explode(
'=>',
$t, 3 );
1057 if (
count( $m ) != 2 ) {
1061 $tt = explode(
'//', $m[1], 2 );
1062 $ret[trim( $m[0] )] = trim( $tt[0] );
1068 foreach ( $sublinks
as $link ) {
1069 $s = $this->parseCachedTable(
$code,
$link, $recursive );
1074 if ( $this->mUcfirst ) {
1075 foreach (
$ret as $k => $v ) {
1076 $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
1090 public function markNoConversion( $text, $noParse =
false ) {
1091 # don't mark if already marked
1092 if ( strpos( $text,
'-{' ) || strpos( $text,
'}-' ) ) {
1096 $ret =
"-{R|$text}-";
1108 function convertCategoryKey( $key ) {
1118 public function updateConversionTable(
Title $titleobj ) {
1123 if ( $c > 1 &&
$t[0] ==
'Conversiontable' ) {
1124 if ( $this->validateVariant(
$t[1] ) ) {
1125 $this->reloadTables();
1135 function getVarSeparatorPattern() {
1136 if ( is_null( $this->mVarSeparatorPattern ) ) {
1149 foreach ( $this->mVariants
as $variant ) {
1151 $pat .= $variant .
'\s*:|';
1153 $pat .=
'[^;]*?=>\s*' . $variant .
'\s*:|';
1156 $this->mVarSeparatorPattern = $pat;
1158 return $this->mVarSeparatorPattern;