32 class LanguageConverter {
38 static public $languagesWithVariants = [
50 public $mMainLanguageCode;
56 public $mVariantFallbacks;
57 public $mVariantNames;
58 public $mTablesLoaded =
false;
70 public $mDescCodeSep =
':', $mDescVarSep =
';';
71 public $mUcfirst =
false;
72 public $mConvRuleTitle =
false;
75 public $mHeaderVariant;
76 public $mMaxDepth = 10;
77 public $mVarSeparatorPattern;
79 const CACHE_VERSION_KEY =
'VERSION 7';
89 public function __construct( $langobj, $maincode, $variants = [],
90 $variantfallbacks = [],
$flags = [],
92 global $wgDisabledVariants;
93 $this->mLangObj = $langobj;
94 $this->mMainLanguageCode = $maincode;
95 $this->mVariants = array_diff( $variants, $wgDisabledVariants );
96 $this->mVariantFallbacks = $variantfallbacks;
98 $this->mCacheKey =
wfMemcKey(
'conversiontables', $maincode );
112 $this->mFlags = array_merge( $defaultflags,
$flags );
113 foreach ( $this->mVariants
as $v ) {
114 if ( array_key_exists( $v, $manualLevel ) ) {
115 $this->mManualLevel[$v] = $manualLevel[$v];
117 $this->mManualLevel[$v] =
'bidirectional';
119 $this->mFlags[$v] = $v;
129 public function getVariants() {
130 return $this->mVariants;
144 public function getVariantFallbacks( $variant ) {
145 if ( isset( $this->mVariantFallbacks[$variant] ) ) {
146 return $this->mVariantFallbacks[$variant];
148 return $this->mMainLanguageCode;
155 public function getConvRuleTitle() {
156 return $this->mConvRuleTitle;
163 public function getPreferredVariant() {
166 $req = $this->getURLVariant();
169 $req = $this->getUserVariant();
171 $req = $this->getHeaderVariant();
174 if ( $wgDefaultLanguageVariant && !
$req ) {
175 $req = $this->validateVariant( $wgDefaultLanguageVariant );
182 if ( $this->validateVariant(
$req ) ) {
185 return $this->mMainLanguageCode;
193 public function getDefaultVariant() {
194 global $wgDefaultLanguageVariant;
196 $req = $this->getURLVariant();
199 $req = $this->getHeaderVariant();
202 if ( $wgDefaultLanguageVariant && !
$req ) {
203 $req = $this->validateVariant( $wgDefaultLanguageVariant );
209 return $this->mMainLanguageCode;
217 public function validateVariant( $variant =
null ) {
218 if ( $variant !==
null && in_array( $variant, $this->mVariants ) ) {
229 public function getURLVariant() {
232 if ( $this->mURLVariant ) {
233 return $this->mURLVariant;
243 $this->mURLVariant = $this->validateVariant(
$ret );
244 return $this->mURLVariant;
252 protected function getUserVariant() {
265 if ( !
$wgUser->isSafeToLoad() ) {
269 if ( $this->mMainLanguageCode ==
$wgContLang->getCode() ) {
272 $ret =
$wgUser->getOption(
'variant-' . $this->mMainLanguageCode );
280 $this->mUserVariant = $this->validateVariant(
$ret );
281 return $this->mUserVariant;
289 protected function getHeaderVariant() {
292 if ( $this->mHeaderVariant ) {
293 return $this->mHeaderVariant;
303 $fallbackLanguages = [];
305 $this->mHeaderVariant = $this->validateVariant( $language );
306 if ( $this->mHeaderVariant ) {
313 $fallbacks = $this->getVariantFallbacks( $language );
314 if ( is_string( $fallbacks ) && $fallbacks !== $this->mMainLanguageCode ) {
315 $fallbackLanguages[] = $fallbacks;
316 } elseif ( is_array( $fallbacks ) ) {
318 array_merge( $fallbackLanguages, $fallbacks );
322 if ( !$this->mHeaderVariant ) {
324 $fallback_languages = array_unique( $fallbackLanguages );
325 foreach ( $fallback_languages
as $language ) {
326 $this->mHeaderVariant = $this->validateVariant( $language );
327 if ( $this->mHeaderVariant ) {
333 return $this->mHeaderVariant;
346 public function autoConvert( $text, $toVariant =
false ) {
351 $toVariant = $this->getPreferredVariant();
357 if ( $this->guessVariant( $text, $toVariant ) ) {
366 $marker =
'|' . Parser::MARKER_PREFIX .
'[\-a-zA-Z0-9]+';
369 $htmlfix =
'|<[^>]+$|^[^<>]*>';
372 $codefix =
'<code>.+?<\/code>|';
374 $scriptfix =
'<script.*?>.*?<\/script>|';
376 $prefix =
'<pre.*?>.*?<\/pre>|';
378 $reg =
'/' . $codefix . $scriptfix . $prefix .
379 '<[^>]+>|&[a-zA-Z#][a-z0-9]+;' . $marker . $htmlfix .
'/s';
386 $text = str_replace(
"\000",
'', $text );
388 $markupMatches =
null;
389 $elementMatches =
null;
390 while ( $startPos < strlen( $text ) ) {
391 if ( preg_match( $reg, $text, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) {
392 $elementPos = $markupMatches[0][1];
393 $element = $markupMatches[0][0];
395 $elementPos = strlen( $text );
400 $sourceBlob .= substr( $text, $startPos, $elementPos - $startPos ) .
"\000";
403 $startPos = $elementPos + strlen( $element );
407 && preg_match(
'/^(<[^>\s]*)\s([^>]*)(.*)$/', $element, $elementMatches )
409 $attrs = Sanitizer::decodeTagAttributes( $elementMatches[2] );
411 foreach ( [
'title',
'alt' ]
as $attrName ) {
412 if ( !isset( $attrs[$attrName] ) ) {
415 $attr = $attrs[$attrName];
417 if ( !strpos( $attr,
'://' ) ) {
418 $attr = $this->recursiveConvertTopLevel( $attr, $toVariant );
421 if ( $attr !== $attrs[$attrName] ) {
422 $attrs[$attrName] = $attr;
431 $literalBlob .= $element .
"\000";
435 $translatedBlob = $this->
translate( $sourceBlob, $toVariant );
441 while ( $translatedIter->valid() && $literalIter->valid() ) {
442 $output .= $translatedIter->current();
443 $output .= $literalIter->current();
444 $translatedIter->next();
445 $literalIter->next();
460 public function translate( $text, $variant ) {
463 if ( trim( $text ) ) {
465 $text = $this->mTables[$variant]->replace( $text );
476 public function autoConvertToAllVariants( $text ) {
480 foreach ( $this->mVariants
as $variant ) {
492 protected function applyManualConv( $convRule ) {
497 $newConvRuleTitle = $convRule->getTitle();
498 if ( $newConvRuleTitle ) {
500 $this->mConvRuleTitle = $newConvRuleTitle;
504 $convTable = $convRule->getConvTable();
505 $action = $convRule->getRulesAction();
506 foreach ( $convTable
as $variant => $pair ) {
507 if ( !$this->validateVariant( $variant ) ) {
511 if ( $action ==
'add' ) {
513 foreach ( $pair
as $from => $to ) {
514 $this->mTables[$variant]->setPair( $from, $to );
516 } elseif ( $action ==
'remove' ) {
517 $this->mTables[$variant]->removeArray( $pair );
529 public function convertTitle(
$title ) {
530 $variant = $this->getPreferredVariant();
531 $index =
$title->getNamespace();
533 $text = $this->convertNamespace( $index, $variant ) .
':';
548 public function convertNamespace( $index, $variant =
null ) {
553 if ( $variant ===
null ) {
554 $variant = $this->getPreferredVariant();
557 $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache();
558 $key =
$cache->makeKey(
'languageconverter',
'namespace-text', $index, $variant );
559 $nsVariantText =
$cache->get( $key );
560 if ( $nsVariantText !==
false ) {
561 return $nsVariantText;
565 $nsConvMsg =
wfMessage(
'conversion-ns' . $index )->inLanguage( $variant );
566 if ( $nsConvMsg->exists() ) {
567 $nsVariantText = $nsConvMsg->plain();
572 if ( $nsVariantText ===
false ) {
573 $nsConvMsg =
wfMessage(
'conversion-ns' . $index )->inContentLanguage();
574 if ( $nsConvMsg->exists() ) {
575 $nsVariantText = $this->
translate( $nsConvMsg->plain(), $variant );
579 if ( $nsVariantText ===
false ) {
581 $langObj = $this->mLangObj->factory( $variant );
582 $nsVariantText = $langObj->getFormattedNsText( $index );
585 $cache->set( $key, $nsVariantText, 60 );
587 return $nsVariantText;
604 public function convert( $text ) {
605 $variant = $this->getPreferredVariant();
606 return $this->convertTo( $text, $variant );
616 public function convertTo( $text, $variant ) {
617 global $wgDisableLangConversion;
618 if ( $wgDisableLangConversion ) {
622 $this->mConvRuleTitle =
false;
623 return $this->recursiveConvertTopLevel( $text, $variant );
635 protected function recursiveConvertTopLevel( $text, $variant, $depth = 0 ) {
638 $length = strlen( $text );
639 $shouldConvert = !$this->guessVariant( $text, $variant );
641 while ( $startPos < $length ) {
642 $pos = strpos( $text,
'-{', $startPos );
644 if ( $pos ===
false ) {
646 $fragment = substr( $text, $startPos );
647 $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
653 $fragment = substr( $text, $startPos, $pos - $startPos );
654 $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment;
660 $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
677 protected function recursiveConvertRule( $text, $variant, &$startPos, $depth = 0 ) {
679 if ( $text[$startPos] !==
'-' || $text[$startPos + 1] !==
'{' ) {
680 throw new MWException( __METHOD__ .
': invalid input string' );
685 $warningDone =
false;
686 $length = strlen( $text );
688 while ( $startPos < $length ) {
690 preg_match(
'/-\{|\}-/', $text, $m, PREG_OFFSET_CAPTURE, $startPos );
701 $inner .= substr( $text, $startPos, $pos - $startPos );
709 if ( $depth >= $this->mMaxDepth ) {
711 if ( !$warningDone ) {
712 $inner .=
'<span class="error">' .
713 wfMessage(
'language-converter-depth-warning' )
714 ->numParams( $this->mMaxDepth )->inContentLanguage()->text() .
722 $inner .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 );
728 $rule->parse( $variant );
729 $this->applyManualConv( $rule );
730 return $rule->getDisplay();
732 throw new MWException( __METHOD__ .
': invalid regex match' );
737 if ( $startPos < $length ) {
738 $inner .= substr( $text, $startPos );
741 return '-{' . $this->autoConvert( $inner, $variant );
755 public function findVariantLink( &
$link, &$nt, $ignoreOtherCond =
false ) {
756 # If the article has already existed, there is no need to
757 # check it again, otherwise it may cause a fault.
758 if ( is_object( $nt ) && $nt->exists() ) {
763 $isredir =
$wgRequest->getText(
'redirect',
'yes' );
765 if ( $action ==
'edit' &&
$wgRequest->getBool(
'redlink' ) ) {
768 $linkconvert =
$wgRequest->getText(
'linkconvert',
'yes' );
769 $disableLinkConversion = $wgDisableLangConversion
770 || $wgDisableTitleConversion;
775 if ( $disableLinkConversion ||
776 ( !$ignoreOtherCond &&
779 || $action ==
'submit'
780 || $linkconvert ==
'no' ) ) ) {
784 if ( is_object( $nt ) ) {
785 $ns = $nt->getNamespace();
788 $variants = $this->autoConvertToAllVariants(
$link );
795 foreach ( $variants
as $v ) {
798 if ( !is_null( $varnt ) ) {
799 $linkBatch->addObj( $varnt );
806 $linkBatch->execute();
809 if ( $varnt->getArticleID() > 0 ) {
811 $link = $varnt->getText();
822 public function getExtraHashOptions() {
823 $variant = $this->getPreferredVariant();
825 return '!' . $variant;
838 public function guessVariant( $text, $variant ) {
849 function loadDefaultTables() {
851 throw new MWException(
"Must implement loadDefaultTables() method in class $class" );
859 function loadTables( $fromCache =
true ) {
862 if ( $this->mTablesLoaded ) {
866 $this->mTablesLoaded =
true;
867 $this->mTables =
false;
870 $this->mTables =
$cache->get( $this->mCacheKey );
872 if ( !$this->mTables || !array_key_exists( self::CACHE_VERSION_KEY, $this->mTables ) ) {
876 $this->loadDefaultTables();
877 foreach ( $this->mVariants
as $var ) {
878 $cached = $this->parseCachedTable( $var );
879 $this->mTables[$var]->mergeArray( $cached );
882 $this->postLoadTables();
883 $this->mTables[self::CACHE_VERSION_KEY] =
true;
885 $cache->set( $this->mCacheKey, $this->mTables, 43200 );
892 function postLoadTables() {
900 function reloadTables() {
901 if ( $this->mTables ) {
902 unset( $this->mTables );
905 $this->mTablesLoaded =
false;
906 $this->loadTables(
false );
928 function parseCachedTable(
$code, $subpage =
'', $recursive =
true ) {
931 $key =
'Conversiontable/' .
$code;
933 $key .=
'/' . $subpage;
935 if ( array_key_exists( $key, $parsed ) ) {
939 $parsed[$key] =
true;
941 if ( $subpage ===
'' ) {
950 $txt = $revision->getContent(
Revision::RAW )->getNativeData();
958 # Nothing to parse if there's no text
959 if ( $txt ===
false || $txt ===
null || $txt ===
'' ) {
965 $linkhead = $this->mLangObj->getNsText(
NS_MEDIAWIKI ) .
969 foreach ( $subs
as $sub ) {
970 $link = explode(
']]', $sub, 2 );
974 $b = explode(
'|',
$link[0], 2 );
975 $b = explode(
'/', trim( $b[0] ), 3 );
976 if (
count( $b ) == 3 ) {
982 if ( $b[0] == $linkhead && $b[1] ==
$code ) {
983 $sublinks[] = $sublink;
991 foreach ( $blocks
as $block ) {
997 $mappings = explode(
'}-', $block, 2 )[0];
998 $stripped = str_replace( [
"'",
'"',
'*',
'#' ],
'', $mappings );
1000 foreach ( $table
as $t ) {
1001 $m = explode(
'=>',
$t, 3 );
1002 if (
count( $m ) != 2 ) {
1006 $tt = explode(
'//', $m[1], 2 );
1007 $ret[trim( $m[0] )] = trim( $tt[0] );
1013 foreach ( $sublinks
as $link ) {
1014 $s = $this->parseCachedTable(
$code,
$link, $recursive );
1019 if ( $this->mUcfirst ) {
1020 foreach (
$ret as $k => $v ) {
1021 $ret[$this->mLangObj->ucfirst( $k )] = $this->mLangObj->ucfirst( $v );
1035 public function markNoConversion( $text, $noParse =
false ) {
1036 # don't mark if already marked
1037 if ( strpos( $text,
'-{' ) || strpos( $text,
'}-' ) ) {
1041 $ret =
"-{R|$text}-";
1053 function convertCategoryKey( $key ) {
1063 public function updateConversionTable(
Title $titleobj ) {
1068 if ( $c > 1 &&
$t[0] ==
'Conversiontable' ) {
1069 if ( $this->validateVariant(
$t[1] ) ) {
1070 $this->reloadTables();
1080 function getVarSeparatorPattern() {
1081 if ( is_null( $this->mVarSeparatorPattern ) ) {
1094 foreach ( $this->mVariants
as $variant ) {
1096 $pat .= $variant .
'\s*:|';
1098 $pat .=
'[^;]*?=>\s*' . $variant .
'\s*:|';
1101 $this->mVarSeparatorPattern = $pat;
1103 return $this->mVarSeparatorPattern;