MediaWiki REL1_32
LanguageCrh.php
Go to the documentation of this file.
1<?php
32 // Defines working character ranges
33
34 // Cyrillic
35 const C_UC = 'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ'; # Crimean Tatar Cyrillic uppercase
36 const C_LC = 'абвгдеёжзийклмнопрстуфхцчшщъыьэюя'; # Crimean Tatar Cyrillic lowercase
37 const C_CONS_UC = 'БВГДЖЗЙКЛМНПРСТФХЦЧШЩCÑ'; # Crimean Tatar Cyrillic + CÑ uppercase consonants
38 const C_CONS_LC = 'бвгджзйклмнпрстфхцчшщcñ'; # Crimean Tatar Cyrillic + CÑ lowercase consonants
39 const C_M_CONS = 'бгкмшcБГКМШC'; # Crimean Tatar Cyrillic M-type consonants
40
41 // Crimean Tatar Cyrillic + CÑ consonants
42 const C_CONS = 'бвгджзйклмнпрстфхцчшщcñБВГДЖЗЙКЛМНПРСТФХЦЧШЩCÑ';
43
44 // Latin
45 const L_UC = 'AÂBCÇDEFGĞHIİJKLMNÑOÖPQRSŞTUÜVYZ'; # Crimean Tatar Latin uppercase
46 const L_LC = 'aâbcçdefgğhıijklmnñoöpqrsştuüvyz'; # Crimean Tatar Latin lowercase
47 const L_N_CONS_UC = 'ÇNRSTZ'; # Crimean Tatar Latin N-type upper case consonants
48 const L_N_CONS_LC = 'çnrstz'; # Crimean Tatar Latin N-type lower case consonants
49 const L_N_CONS = 'çnrstzÇNRSTZ'; # Crimean Tatar Latin N-type consonants
50 const L_M_CONS = 'bcgkmpşBCGKMPŞ'; # Crimean Tatar Latin M-type consonants
51 const L_CONS_UC = 'BCÇDFGĞHJKLMNÑPQRSŞTVZ'; # Crimean Tatar Latin uppercase consonants
52 const L_CONS_LC = 'bcçdfgğhjklmnñpqrsştvz'; # Crimean Tatar Latin lowercase consonants
53 const L_CONS = 'bcçdfgğhjklmnñpqrsştvzBCÇDFGĞHJKLMNÑPQRSŞTVZ'; # Crimean Tatar Latin consonants
54 const L_VOW_UC = 'AÂEIİOÖUÜ'; # Crimean Tatar Latin uppercase vowels
55 const L_VOW = 'aâeıioöuüAÂEIİOÖUÜ'; # Crimean Tatar Latin vowels
56 const L_F_UC = 'EİÖÜ'; # Crimean Tatar Latin uppercase front vowels
57 const L_F = 'eiöüEİÖÜ'; # Crimean Tatar Latin front vowels
58
66 function __construct( Language $langobj, $maincode,
67 $variants = [],
68 $variantfallbacks = [],
69 $flags = [] ) {
70 parent::__construct( $langobj, $maincode,
71 $variants, $variantfallbacks, $flags );
72
73 // No point delaying this since they're in code.
74 // Waiting until loadDefaultTables() means they never get loaded
75 // when the tables themselves are loaded from cache.
76 $this->loadExceptions();
77 }
78
79 public $mCyrillicToLatin = [
80
81 ## these are independent of location in the word, but have
82 ## to go first so other transforms don't bleed them
83 'гъ' => 'ğ', 'Гъ' => 'Ğ', 'ГЪ' => 'Ğ',
84 'къ' => 'q', 'Къ' => 'Q', 'КЪ' => 'Q',
85 'нъ' => 'ñ', 'Нъ' => 'Ñ', 'НЪ' => 'Ñ',
86 'дж' => 'c', 'Дж' => 'C', 'ДЖ' => 'C',
87
88 'А' => 'A', 'а' => 'a', 'Б' => 'B', 'б' => 'b',
89 'В' => 'V', 'в' => 'v', 'Г' => 'G', 'г' => 'g',
90 'Д' => 'D', 'д' => 'd', 'Ж' => 'J', 'ж' => 'j',
91 'З' => 'Z', 'з' => 'z', 'И' => 'İ', 'и' => 'i',
92 'Й' => 'Y', 'й' => 'y', 'К' => 'K', 'к' => 'k',
93 'Л' => 'L', 'л' => 'l', 'М' => 'M', 'м' => 'm',
94 'Н' => 'N', 'н' => 'n', 'П' => 'P', 'п' => 'p',
95 'Р' => 'R', 'р' => 'r', 'С' => 'S', 'с' => 's',
96 'Т' => 'T', 'т' => 't', 'Ф' => 'F', 'ф' => 'f',
97 'Х' => 'H', 'х' => 'h', 'Ч' => 'Ç', 'ч' => 'ç',
98 'Ш' => 'Ş', 'ш' => 'ş', 'Ы' => 'I', 'ы' => 'ı',
99 'Э' => 'E', 'э' => 'e', 'Е' => 'E', 'е' => 'e',
100 'Я' => 'Â', 'я' => 'â', 'У' => 'U', 'у' => 'u',
101 'О' => 'O', 'о' => 'o',
102
103 'Ё' => 'Yo', 'ё' => 'yo', 'Ю' => 'Yu', 'ю' => 'yu',
104 'Ц' => 'Ts', 'ц' => 'ts', 'Щ' => 'Şç', 'щ' => 'şç',
105 'Ь' => '', 'ь' => '', 'Ъ' => '', 'ъ' => '',
106
107 ];
108
109 public $mLatinToCyrillic = [
110 'Â' => 'Я', 'â' => 'я', 'B' => 'Б', 'b' => 'б',
111 'Ç' => 'Ч', 'ç' => 'ч', 'D' => 'Д', 'd' => 'д',
112 'F' => 'Ф', 'f' => 'ф', 'G' => 'Г', 'g' => 'г',
113 'H' => 'Х', 'h' => 'х', 'I' => 'Ы', 'ı' => 'ы',
114 'İ' => 'И', 'i' => 'и', 'J' => 'Ж', 'j' => 'ж',
115 'K' => 'К', 'k' => 'к', 'L' => 'Л', 'l' => 'л',
116 'M' => 'М', 'm' => 'м', 'N' => 'Н', 'n' => 'н',
117 'O' => 'О', 'o' => 'о', 'P' => 'П', 'p' => 'п',
118 'R' => 'Р', 'r' => 'р', 'S' => 'С', 's' => 'с',
119 'Ş' => 'Ш', 'ş' => 'ш', 'T' => 'Т', 't' => 'т',
120 'V' => 'В', 'v' => 'в', 'Z' => 'З', 'z' => 'з',
121
122 'ya' => 'я', 'Ya' => 'Я', 'YA' => 'Я',
123 'ye' => 'е', 'YE' => 'Е', 'Ye' => 'Е',
124
125 // hack, hack, hack
126 'A' => 'А', 'a' => 'а', 'E' => 'Е', 'e' => 'е',
127 'Ö' => 'Ё', 'ö' => 'ё', 'U' => 'У', 'u' => 'у',
128 'Ü' => 'Ю', 'ü' => 'ю', 'Y' => 'Й', 'y' => 'й',
129 'C' => 'Дж', 'c' => 'дж', 'Ğ' => 'Гъ', 'ğ' => 'гъ',
130 'Ñ' => 'Нъ', 'ñ' => 'нъ', 'Q' => 'Къ', 'q' => 'къ',
131
132 ];
133
134 public $mCyrl2LatnExceptions = [];
135 public $mLatn2CyrlExceptions = [];
136
137 public $mCyrl2LatnPatterns = [];
138 public $mLatn2CyrlPatterns = [];
139
140 public $mCyrlCleanUpRegexes = [];
141
142 public $mExceptionsLoaded = false;
143
144 function loadDefaultTables() {
145 $this->mTables = [
146 'crh-latn' => new ReplacementArray( $this->mCyrillicToLatin ),
147 'crh-cyrl' => new ReplacementArray( $this->mLatinToCyrillic ),
148 'crh' => new ReplacementArray()
149 ];
150 }
151
152 function loadExceptions() {
153 if ( $this->mExceptionsLoaded ) {
154 return;
155 }
156
157 $this->mExceptionsLoaded = true;
158 $crhExceptions = new MediaWiki\Languages\Data\CrhExceptions();
159 list( $this->mCyrl2LatnExceptions, $this->mLatn2CyrlExceptions,
160 $this->mCyrl2LatnPatterns, $this->mLatn2CyrlPatterns, $this->mCyrlCleanUpRegexes ) =
161 $crhExceptions->loadExceptions( self::L_LC . self::C_LC, self::L_UC . self::C_UC );
162 }
163
174 function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
175 // check for user namespace
176 if ( is_object( $nt ) ) {
177 $ns = $nt->getNamespace();
178 if ( $ns == NS_USER || $ns == NS_USER_TALK ) {
179 return;
180 }
181 }
182
183 $oldlink = $link;
184 parent::findVariantLink( $link, $nt, $ignoreOtherCond );
185 if ( $this->getPreferredVariant() == $this->mMainLanguageCode ) {
186 $link = $oldlink;
187 }
188 }
189
200 function translate( $text, $toVariant ) {
201 switch ( $toVariant ) {
202 case 'crh-cyrl':
203 case 'crh-latn':
204 break;
205 default:
206 return $text;
207 }
208
209 if ( !$this->mTablesLoaded ) {
210 $this->loadTables();
211 }
212
213 if ( !isset( $this->mTables[$toVariant] ) ) {
214 throw new MWException( "Broken variant table: " . implode( ',', array_keys( $this->mTables ) ) );
215 }
216
217 switch ( $toVariant ) {
218 case 'crh-cyrl':
219 /* Check for roman numbers like VII, XIX...
220 * Only need to split on Roman numerals when converting to Cyrillic
221 * Lookahead assertion ensures $roman doesn't match the empty string, and
222 * non-period after first "Roman" character allows initials to be converted
223 */
224 $roman = '(?=[MDCLXVI]([^.]|$))M{0,4}(C[DM]|D?C{0,3})(X[LC]|L?X{0,3})(I[VX]|V?I{0,3})';
225
226 $breaks = '([^\w\x80-\xff])';
227
228 // allow for multiple Roman numerals in a row; rare but it happens
229 $romanRegex = '/^' . $roman . '$|^(' . $roman . $breaks . ')+|(' . $breaks . $roman . ')+$|' .
230 $breaks . '(' . $roman . $breaks . ')+/';
231
232 $matches = preg_split( $romanRegex, $text, -1, PREG_SPLIT_OFFSET_CAPTURE );
233 $mstart = 0;
234 $ret = '';
235 foreach ( $matches as $m ) {
236 // copy over Roman numerals
237 $ret .= substr( $text, $mstart, $m[1] - $mstart );
238
239 // process everything else
240 if ( $m[0] !== '' ) {
241 $ret .= $this->regsConverter( $m[0], $toVariant );
242 }
243
244 $mstart = $m[1] + strlen( $m[0] );
245 }
246
247 return $ret;
248 default:
249 // Just process the whole string in one go
250 return $this->regsConverter( $text, $toVariant );
251 }
252 }
253
254 private function regsConverter( $text, $toVariant ) {
255 if ( $text == '' ) return $text;
256
257 $pat = [];
258 $rep = [];
259 switch ( $toVariant ) {
260 case 'crh-latn':
261 $text = strtr( $text, $this->mCyrl2LatnExceptions );
262 foreach ( $this->mCyrl2LatnPatterns as $pat => $rep ) {
263 $text = preg_replace( $pat, $rep, $text );
264 }
265 $text = parent::translate( $text, $toVariant );
266 $text = strtr( $text, [ '«' => '"', '»' => '"', ] );
267 return $text;
268 case 'crh-cyrl':
269 $text = strtr( $text, $this->mLatn2CyrlExceptions );
270 foreach ( $this->mLatn2CyrlPatterns as $pat => $rep ) {
271 $text = preg_replace( $pat, $rep, $text );
272 }
273 $text = parent::translate( $text, $toVariant );
274 $text = strtr( $text, [ '“' => '«', '”' => '»', ] );
275 foreach ( $this->mCyrlCleanUpRegexes as $pat => $rep ) {
276 $text = preg_replace( $pat, $rep, $text );
277 }
278 return $text;
279 default:
280 return $text;
281 }
282 }
283
284}
285
291class LanguageCrh extends Language {
292
293 function __construct() {
294 parent::__construct();
295
296 $variants = [ 'crh', 'crh-cyrl', 'crh-latn' ];
297 $variantfallbacks = [
298 'crh' => 'crh-latn',
299 'crh-cyrl' => 'crh-latn',
300 'crh-latn' => 'crh-cyrl',
301 ];
302
303 $this->mConverter = new CrhConverter( $this, 'crh', $variants, $variantfallbacks );
304 }
305}
Crimean Tatar (Qırımtatarca) converter routines.
const L_N_CONS_UC
const L_N_CONS_LC
__construct(Language $langobj, $maincode, $variants=[], $variantfallbacks=[], $flags=[])
Base class for language conversion.
getPreferredVariant()
Get preferred language variant.
loadTables( $fromCache=true)
Load conversion tables either from the cache or the disk.
translate( $text, $variant)
Translate a string to a variant.
findVariantLink(&$link, &$nt, $ignoreOtherCond=false)
If a language supports multiple variants, it is possible that non-existing link in one variant actual...
loadDefaultTables()
Load default conversion tables.
Crimean Tatar (Qırımtatarca)
Internationalisation code.
Definition Language.php:35
MediaWiki exception.
Wrapper around strtr() that holds replacements.
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition hooks.txt:2054
usually copyright or history_copyright This message must be in HTML not wikitext & $link
Definition hooks.txt:3106
const NS_USER_TALK
Definition Defines.php:67
This document describes the state of Postgres support in and is fairly well maintained The main code is very well while extensions are very hit and miss it is probably the most supported database after MySQL Much of the work in making MediaWiki database agnostic came about through the work of creating Postgres as and are nearing end of but without copying over all the usage comments General notes on the but these can almost always be programmed around *Although Postgres has a true BOOLEAN type
Definition postgres.txt:36