35 # Crimean Tatar Cyrillic uppercase
36 public const C_UC =
'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ';
37 # Crimean Tatar Cyrillic lowercase
38 public const C_LC =
'абвгдеёжзийклмнопрстуфхцчшщъыьэюя';
39 # Crimean Tatar Cyrillic + CÑ uppercase consonants
41 # Crimean Tatar Cyrillic + CÑ lowercase consonants
43 # Crimean Tatar Cyrillic M-type consonants
47 public const C_CONS =
'бвгджзйклмнпрстфхцчшщcñБВГДЖЗЙКЛМНПРСТФХЦЧШЩCÑ';
50 # Crimean Tatar Latin uppercase
51 public const L_UC =
'AÂBCÇDEFGĞHIİJKLMNÑOÖPQRSŞTUÜVYZ';
52 # Crimean Tatar Latin lowercase
53 public const L_LC =
'aâbcçdefgğhıijklmnñoöpqrsştuüvyz';
54 # Crimean Tatar Latin N-type upper case consonants
56 # Crimean Tatar Latin N-type lower case consonants
58 # Crimean Tatar Latin N-type consonants
60 # Crimean Tatar Latin M-type consonants
62 # Crimean Tatar Latin uppercase consonants
64 # Crimean Tatar Latin lowercase consonants
66 # Crimean Tatar Latin consonants
67 public const L_CONS =
'bcçdfgğhjklmnñpqrsştvzBCÇDFGĞHJKLMNÑPQRSŞTVZ';
68 # Crimean Tatar Latin uppercase vowels
70 # Crimean Tatar Latin vowels
71 public const L_VOW =
'aâeıioöuüAÂEIİOÖUÜ';
72 # Crimean Tatar Latin uppercase front vowels
74 # Crimean Tatar Latin front vowels
75 public const L_F =
'eiöüEİÖÜ';
81 $variants = [
'crh',
'crh-cyrl',
'crh-latn' ];
84 'crh-cyrl' =>
'crh-latn',
85 'crh-latn' =>
'crh-cyrl',
88 parent::__construct( $langobj,
'crh',
89 $variants, $variantfallbacks, [] );
94 $this->loadExceptions();
97 public $mCyrillicToLatin = [
99 ## these are independent of location in the word, but have
100 ## to go first so other transforms don't bleed them
101 'гъ' =>
'ğ',
'Гъ' =>
'Ğ',
'ГЪ' =>
'Ğ',
102 'къ' =>
'q',
'Къ' =>
'Q',
'КЪ' =>
'Q',
103 'нъ' =>
'ñ',
'Нъ' =>
'Ñ',
'НЪ' =>
'Ñ',
104 'дж' =>
'c',
'Дж' =>
'C',
'ДЖ' =>
'C',
106 'А' =>
'A',
'а' =>
'a',
'Б' =>
'B',
'б' =>
'b',
107 'В' =>
'V',
'в' =>
'v',
'Г' =>
'G',
'г' =>
'g',
108 'Д' =>
'D',
'д' =>
'd',
'Ж' =>
'J',
'ж' =>
'j',
109 'З' =>
'Z',
'з' =>
'z',
'И' =>
'İ',
'и' =>
'i',
110 'Й' =>
'Y',
'й' =>
'y',
'К' =>
'K',
'к' =>
'k',
111 'Л' =>
'L',
'л' =>
'l',
'М' =>
'M',
'м' =>
'm',
112 'Н' =>
'N',
'н' =>
'n',
'П' =>
'P',
'п' =>
'p',
113 'Р' =>
'R',
'р' =>
'r',
'С' =>
'S',
'с' =>
's',
114 'Т' =>
'T',
'т' =>
't',
'Ф' =>
'F',
'ф' =>
'f',
115 'Х' =>
'H',
'х' =>
'h',
'Ч' =>
'Ç',
'ч' =>
'ç',
116 'Ш' =>
'Ş',
'ш' =>
'ş',
'Ы' =>
'I',
'ы' =>
'ı',
117 'Э' =>
'E',
'э' =>
'e',
'Е' =>
'E',
'е' =>
'e',
118 'Я' =>
'Â',
'я' =>
'â',
'У' =>
'U',
'у' =>
'u',
119 'О' =>
'O',
'о' =>
'o',
121 'Ё' =>
'Yo',
'ё' =>
'yo',
'Ю' =>
'Yu',
'ю' =>
'yu',
122 'Ц' =>
'Ts',
'ц' =>
'ts',
'Щ' =>
'Şç',
'щ' =>
'şç',
123 'Ь' =>
'',
'ь' =>
'',
'Ъ' =>
'',
'ъ' =>
'',
127 public $mLatinToCyrillic = [
128 'Â' =>
'Я',
'â' =>
'я',
'B' =>
'Б',
'b' =>
'б',
129 'Ç' =>
'Ч',
'ç' =>
'ч',
'D' =>
'Д',
'd' =>
'д',
130 'F' =>
'Ф',
'f' =>
'ф',
'G' =>
'Г',
'g' =>
'г',
131 'H' =>
'Х',
'h' =>
'х',
'I' =>
'Ы',
'ı' =>
'ы',
132 'İ' =>
'И',
'i' =>
'и',
'J' =>
'Ж',
'j' =>
'ж',
133 'K' =>
'К',
'k' =>
'к',
'L' =>
'Л',
'l' =>
'л',
134 'M' =>
'М',
'm' =>
'м',
'N' =>
'Н',
'n' =>
'н',
135 'O' =>
'О',
'o' =>
'о',
'P' =>
'П',
'p' =>
'п',
136 'R' =>
'Р',
'r' =>
'р',
'S' =>
'С',
's' =>
'с',
137 'Ş' =>
'Ш',
'ş' =>
'ш',
'T' =>
'Т',
't' =>
'т',
138 'V' =>
'В',
'v' =>
'в',
'Z' =>
'З',
'z' =>
'з',
140 'ya' =>
'я',
'Ya' =>
'Я',
'YA' =>
'Я',
141 'ye' =>
'е',
'YE' =>
'Е',
'Ye' =>
'Е',
144 'A' =>
'А',
'a' =>
'а',
'E' =>
'Е',
'e' =>
'е',
145 'Ö' =>
'Ё',
'ö' =>
'ё',
'U' =>
'У',
'u' =>
'у',
146 'Ü' =>
'Ю',
'ü' =>
'ю',
'Y' =>
'Й',
'y' =>
'й',
147 'C' =>
'Дж',
'c' =>
'дж',
'Ğ' =>
'Гъ',
'ğ' =>
'гъ',
148 'Ñ' =>
'Нъ',
'ñ' =>
'нъ',
'Q' =>
'Къ',
'q' =>
'къ',
152 public $mCyrl2LatnExceptions = [];
153 public $mLatn2CyrlExceptions = [];
155 public $mCyrl2LatnPatterns = [];
156 public $mLatn2CyrlPatterns = [];
158 public $mCyrlCleanUpRegexes = [];
160 public $mExceptionsLoaded =
false;
162 protected function loadDefaultTables() {
170 private function loadExceptions() {
171 if ( $this->mExceptionsLoaded ) {
175 $this->mExceptionsLoaded =
true;
177 list( $this->mCyrl2LatnExceptions, $this->mLatn2CyrlExceptions,
178 $this->mCyrl2LatnPatterns, $this->mLatn2CyrlPatterns, $this->mCyrlCleanUpRegexes ) =
179 $crhExceptions->loadExceptions( self::L_LC . self::C_LC, self::L_UC . self::C_UC );
192 public function translate( $text, $toVariant ) {
193 switch ( $toVariant ) {
203 if ( !isset( $this->mTables[$toVariant] ) ) {
204 throw new MWException(
"Broken variant table: " . implode(
',', array_keys( $this->mTables ) ) );
207 switch ( $toVariant ) {
214 $roman =
'(?=[MDCLXVI]([^.]|$))M{0,4}(C[DM]|D?C{0,3})(X[LC]|L?X{0,3})(I[VX]|V?I{0,3})';
216 $breaks =
'([^\w\x80-\xff])';
219 $romanRegex =
'/^' . $roman .
'$|^(' . $roman . $breaks .
')+|(' . $breaks . $roman .
')+$|' .
220 $breaks .
'(' . $roman . $breaks .
')+/';
222 $matches = preg_split( $romanRegex, $text, -1, PREG_SPLIT_OFFSET_CAPTURE );
227 $ret .= substr( $text, $mstart, $m[1] - $mstart );
230 if ( $m[0] !==
'' ) {
231 $ret .= $this->regsConverter( $m[0], $toVariant );
234 $mstart = $m[1] + strlen( $m[0] );
240 return $this->regsConverter( $text, $toVariant );
244 private function regsConverter( $text, $toVariant ) {
245 if ( $text ==
'' )
return $text;
249 switch ( $toVariant ) {
251 $text = strtr( $text, $this->mCyrl2LatnExceptions );
252 foreach ( $this->mCyrl2LatnPatterns as $pat => $rep ) {
253 $text = preg_replace( $pat, $rep, $text );
255 $text = parent::translate( $text, $toVariant );
256 $text = strtr( $text, [
'«' =>
'"',
'»' =>
'"', ] );
259 $text = strtr( $text, $this->mLatn2CyrlExceptions );
260 foreach ( $this->mLatn2CyrlPatterns as $pat => $rep ) {
261 $text = preg_replace( $pat, $rep, $text );
263 $text = parent::translate( $text, $toVariant );
264 $text = strtr( $text, [
'“' =>
'«',
'”' =>
'»', ] );
265 foreach ( $this->mCyrlCleanUpRegexes as $pat => $rep ) {
266 $text = preg_replace( $pat, $rep, $text );