35 const C_UC =
'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ'; # Crimean Tatar Cyrillic uppercase
36 const C_LC =
'абвгдеёжзийклмнопрстуфхцчшщъыьэюя'; # Crimean Tatar Cyrillic lowercase
37 const C_CONS_UC =
'БВГДЖЗЙКЛМНПРСТФХЦЧШЩCÑ'; # Crimean Tatar Cyrillic + CÑ uppercase consonants
38 const C_CONS_LC =
'бвгджзйклмнпрстфхцчшщcñ'; # Crimean Tatar Cyrillic + CÑ lowercase consonants
39 const C_M_CONS =
'бгкмшcБГКМШC'; # Crimean Tatar Cyrillic M-type consonants
42 const C_CONS =
'бвгджзйклмнпрстфхцчшщcñБВГДЖЗЙКЛМНПРСТФХЦЧШЩCÑ';
45 const L_UC =
'AÂBCÇDEFGĞHIİJKLMNÑOÖPQRSŞTUÜVYZ'; # Crimean Tatar Latin uppercase
46 const L_LC =
'aâbcçdefgğhıijklmnñoöpqrsştuüvyz'; # Crimean Tatar Latin lowercase
47 const L_N_CONS_UC =
'ÇNRSTZ'; # Crimean Tatar Latin N-type upper
case consonants
48 const L_N_CONS_LC =
'çnrstz'; # Crimean Tatar Latin N-type lower
case consonants
49 const L_N_CONS =
'çnrstzÇNRSTZ'; # Crimean Tatar Latin N-type consonants
50 const L_M_CONS =
'bcgkmpşBCGKMPŞ'; # Crimean Tatar Latin M-type consonants
51 const L_CONS_UC =
'BCÇDFGĞHJKLMNÑPQRSŞTVZ'; # Crimean Tatar Latin uppercase consonants
52 const L_CONS_LC =
'bcçdfgğhjklmnñpqrsştvz'; # Crimean Tatar Latin lowercase consonants
53 const L_CONS =
'bcçdfgğhjklmnñpqrsştvzBCÇDFGĞHJKLMNÑPQRSŞTVZ'; # Crimean Tatar Latin consonants
54 const L_VOW_UC =
'AÂEIİOÖUÜ'; # Crimean Tatar Latin uppercase vowels
55 const L_VOW =
'aâeıioöuüAÂEIİOÖUÜ'; # Crimean Tatar Latin vowels
56 const L_F_UC =
'EİÖÜ'; # Crimean Tatar Latin uppercase front vowels
57 const L_F =
'eiöüEİÖÜ'; # Crimean Tatar Latin front vowels
68 $variantfallbacks = [],
71 parent::__construct( $langobj, $maincode,
72 $variants, $variantfallbacks, $flags );
77 $this->loadExceptions();
80 public $mCyrillicToLatin = [
82 ## these are independent of location in the word, but have
83 ## to go first so other transforms don't bleed them
84 'гъ' =>
'ğ',
'Гъ' =>
'Ğ',
'ГЪ' =>
'Ğ',
85 'къ' =>
'q',
'Къ' =>
'Q',
'КЪ' =>
'Q',
86 'нъ' =>
'ñ',
'Нъ' =>
'Ñ',
'НЪ' =>
'Ñ',
87 'дж' =>
'c',
'Дж' =>
'C',
'ДЖ' =>
'C',
89 'А' =>
'A',
'а' =>
'a',
'Б' =>
'B',
'б' =>
'b',
90 'В' =>
'V',
'в' =>
'v',
'Г' =>
'G',
'г' =>
'g',
91 'Д' =>
'D',
'д' =>
'd',
'Ж' =>
'J',
'ж' =>
'j',
92 'З' =>
'Z',
'з' =>
'z',
'И' =>
'İ',
'и' =>
'i',
93 'Й' =>
'Y',
'й' =>
'y',
'К' =>
'K',
'к' =>
'k',
94 'Л' =>
'L',
'л' =>
'l',
'М' =>
'M',
'м' =>
'm',
95 'Н' =>
'N',
'н' =>
'n',
'П' =>
'P',
'п' =>
'p',
96 'Р' =>
'R',
'р' =>
'r',
'С' =>
'S',
'с' =>
's',
97 'Т' =>
'T',
'т' =>
't',
'Ф' =>
'F',
'ф' =>
'f',
98 'Х' =>
'H',
'х' =>
'h',
'Ч' =>
'Ç',
'ч' =>
'ç',
99 'Ш' =>
'Ş',
'ш' =>
'ş',
'Ы' =>
'I',
'ы' =>
'ı',
100 'Э' =>
'E',
'э' =>
'e',
'Е' =>
'E',
'е' =>
'e',
101 'Я' =>
'Â',
'я' =>
'â',
'У' =>
'U',
'у' =>
'u',
102 'О' =>
'O',
'о' =>
'o',
104 'Ё' =>
'Yo',
'ё' =>
'yo',
'Ю' =>
'Yu',
'ю' =>
'yu',
105 'Ц' =>
'Ts',
'ц' =>
'ts',
'Щ' =>
'Şç',
'щ' =>
'şç',
106 'Ь' =>
'',
'ь' =>
'',
'Ъ' =>
'',
'ъ' =>
'',
110 public $mLatinToCyrillic = [
111 'Â' =>
'Я',
'â' =>
'я',
'B' =>
'Б',
'b' =>
'б',
112 'Ç' =>
'Ч',
'ç' =>
'ч',
'D' =>
'Д',
'd' =>
'д',
113 'F' =>
'Ф',
'f' =>
'ф',
'G' =>
'Г',
'g' =>
'г',
114 'H' =>
'Х',
'h' =>
'х',
'I' =>
'Ы',
'ı' =>
'ы',
115 'İ' =>
'И',
'i' =>
'и',
'J' =>
'Ж',
'j' =>
'ж',
116 'K' =>
'К',
'k' =>
'к',
'L' =>
'Л',
'l' =>
'л',
117 'M' =>
'М',
'm' =>
'м',
'N' =>
'Н',
'n' =>
'н',
118 'O' =>
'О',
'o' =>
'о',
'P' =>
'П',
'p' =>
'п',
119 'R' =>
'Р',
'r' =>
'р',
'S' =>
'С',
's' =>
'с',
120 'Ş' =>
'Ш',
'ş' =>
'ш',
'T' =>
'Т',
't' =>
'т',
121 'V' =>
'В',
'v' =>
'в',
'Z' =>
'З',
'z' =>
'з',
123 'ya' =>
'я',
'Ya' =>
'Я',
'YA' =>
'Я',
124 'ye' =>
'е',
'YE' =>
'Е',
'Ye' =>
'Е',
127 'A' =>
'А',
'a' =>
'а',
'E' =>
'Е',
'e' =>
'е',
128 'Ö' =>
'Ё',
'ö' =>
'ё',
'U' =>
'У',
'u' =>
'у',
129 'Ü' =>
'Ю',
'ü' =>
'ю',
'Y' =>
'Й',
'y' =>
'й',
130 'C' =>
'Дж',
'c' =>
'дж',
'Ğ' =>
'Гъ',
'ğ' =>
'гъ',
131 'Ñ' =>
'Нъ',
'ñ' =>
'нъ',
'Q' =>
'Къ',
'q' =>
'къ',
135 public $mCyrl2LatnExceptions = [];
136 public $mLatn2CyrlExceptions = [];
138 public $mCyrl2LatnPatterns = [];
139 public $mLatn2CyrlPatterns = [];
141 public $mCyrlCleanUpRegexes = [];
143 public $mExceptionsLoaded =
false;
153 function loadExceptions() {
154 if ( $this->mExceptionsLoaded ) {
158 $this->mExceptionsLoaded =
true;
160 list( $this->mCyrl2LatnExceptions, $this->mLatn2CyrlExceptions,
161 $this->mCyrl2LatnPatterns, $this->mLatn2CyrlPatterns, $this->mCyrlCleanUpRegexes ) =
162 $crhExceptions->loadExceptions( self::L_LC . self::C_LC, self::L_UC . self::C_UC );
177 if ( is_object( $nt ) ) {
178 $ns = $nt->getNamespace();
185 parent::findVariantLink( $link, $nt, $ignoreOtherCond );
201 function translate( $text, $toVariant ) {
202 switch ( $toVariant ) {
210 if ( !$this->mTablesLoaded ) {
214 if ( !isset( $this->mTables[$toVariant] ) ) {
215 throw new MWException(
"Broken variant table: " . implode(
',', array_keys( $this->mTables ) ) );
218 switch ( $toVariant ) {
225 $roman =
'(?=[MDCLXVI]([^.]|$))M{0,4}(C[DM]|D?C{0,3})(X[LC]|L?X{0,3})(I[VX]|V?I{0,3})';
227 $breaks =
'([^\w\x80-\xff])';
230 $romanRegex =
'/^' . $roman .
'$|^(' . $roman . $breaks .
')+|(' . $breaks . $roman .
')+$|' .
231 $breaks .
'(' . $roman . $breaks .
')+/';
233 $matches = preg_split( $romanRegex, $text, -1, PREG_SPLIT_OFFSET_CAPTURE );
238 $ret .= substr( $text, $mstart, $m[1] - $mstart );
241 if ( $m[0] !==
'' ) {
242 $ret .= $this->regsConverter( $m[0], $toVariant );
245 $mstart = $m[1] + strlen( $m[0] );
251 return $this->regsConverter( $text, $toVariant );
255 private function regsConverter( $text, $toVariant ) {
256 if ( $text ==
'' )
return $text;
260 switch ( $toVariant ) {
262 $text = strtr( $text, $this->mCyrl2LatnExceptions );
263 foreach ( $this->mCyrl2LatnPatterns as $pat => $rep ) {
264 $text = preg_replace( $pat, $rep, $text );
266 $text = parent::translate( $text, $toVariant );
267 $text = strtr( $text, [
'«' =>
'"',
'»' =>
'"', ] );
270 $text = strtr( $text, $this->mLatn2CyrlExceptions );
271 foreach ( $this->mLatn2CyrlPatterns as $pat => $rep ) {
272 $text = preg_replace( $pat, $rep, $text );
274 $text = parent::translate( $text, $toVariant );
275 $text = strtr( $text, [
'“' =>
'«',
'”' =>
'»', ] );
276 foreach ( $this->mCyrlCleanUpRegexes as $pat => $rep ) {
277 $text = preg_replace( $pat, $rep, $text );