MediaWiki REL1_31
LanguageCrh.php
Go to the documentation of this file.
1<?php
32 // Defines working character ranges
33 const WORD_BEGINS = '\r\s\"\'\‍(\‍)\-<>\[\]\/.,:;!?';
34 const WORD_ENDS = '\r\s\"\'\‍(\‍)\-<>\[\]\/.,:;!?';
35
36 // Cyrillic
37 const C_UC = 'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ'; # Crimean Tatar Cyrillic uppercase
38 const C_LC = 'абвгдеёжзийклмнопрстуфхцчшщъыьэюя'; # Crimean Tatar Cyrillic lowercase
39 const C_CONS_UC = 'БВГДЖЗЙКЛМНПРСТФХЦЧШЩCÑ'; # Crimean Tatar Cyrillic + CÑ uppercase consonants
40 const C_CONS_LC = 'бвгджзйклмнпрстфхцчшщcñ'; # Crimean Tatar Cyrillic + CÑ lowercase consonants
41 const C_M_CONS = 'бгкмпшcБГКМПШC'; # Crimean Tatar Cyrillic M-type consonants
42
43 # Crimean Tatar Cyrillic + CÑ consonants
44 const C_CONS = 'бвгджзйклмнпрстфхцчшщcñБВГДЖЗЙКЛМНПРСТФХЦЧШЩCÑ';
45
46 // Latin
47 const L_UC = 'AÂBCÇDEFGĞHIİJKLMNÑOÖPQRSŞTUÜVYZ'; # Crimean Tatar Latin uppercase
48 const L_LC = 'aâbcçdefgğhıijklmnñoöpqrsştuüvyz'; # Crimean Tatar Latin lowercase
49 const L_N_CONS_UC = 'ÇNRSTZ'; # Crimean Tatar Latin N-type upper case consonants
50 const L_N_CONS_LC = 'çnrstz'; # Crimean Tatar Latin N-type lower case consonants
51 const L_N_CONS = 'çnrstzÇNRSTZ'; # Crimean Tatar Latin N-type consonants
52 const L_M_CONS = 'bcgkmpşBCGKMPŞ'; # Crimean Tatar Latin M-type consonants
53 const L_CONS_UC = 'BCÇDFGHJKLMNÑPRSŞTVZ'; # Crimean Tatar Latin uppercase consonants
54 const L_CONS_LC = 'bcçdfghjklmnñprsştvz'; # Crimean Tatar Latin lowercase consonants
55 const L_CONS = 'bcçdfghjklmnñprsştvzBCÇDFGHJKLMNÑPRSŞTVZ'; # Crimean Tatar Latin consonants
56 const L_VOW_UC = 'AÂEIİOÖUÜ'; # Crimean Tatar Latin uppercase vowels
57 const L_VOW = 'aâeıioöuüAÂEIİOÖUÜ'; # Crimean Tatar Latin vowels
58 const L_F_UC = 'EİÖÜ'; # Crimean Tatar Latin uppercase front vowels
59 const L_F = 'eiöüEİÖÜ'; # Crimean Tatar Latin front vowels
60
68 function __construct( $langobj, $maincode,
69 $variants = [],
70 $variantfallbacks = [],
71 $flags = [] ) {
72 parent::__construct( $langobj, $maincode,
73 $variants, $variantfallbacks, $flags );
74
75 // No point delaying this since they're in code.
76 // Waiting until loadDefaultTables() means they never get loaded
77 // when the tables themselves are loaded from cache.
78 $this->loadExceptions();
79 }
80
81 public $mCyrillicToLatin = [
82
83 ## these are independent of location in the word, but have
84 ## to go first so other transforms don't bleed them
85 'гъ' => 'ğ', 'Гъ' => 'Ğ', 'ГЪ' => 'Ğ',
86 'къ' => 'q', 'Къ' => 'Q', 'КЪ' => 'Q',
87 'нъ' => 'ñ', 'Нъ' => 'Ñ', 'НЪ' => 'Ñ',
88 'дж' => 'c', 'Дж' => 'C', 'ДЖ' => 'C',
89
90 'А' => 'A', 'а' => 'a', 'Б' => 'B', 'б' => 'b',
91 'В' => 'V', 'в' => 'v', 'Г' => 'G', 'г' => 'g',
92 'Д' => 'D', 'д' => 'd', 'Ж' => 'J', 'ж' => 'j',
93 'З' => 'Z', 'з' => 'z', 'И' => 'İ', 'и' => 'i',
94 'Й' => 'Y', 'й' => 'y', 'К' => 'K', 'к' => 'k',
95 'Л' => 'L', 'л' => 'l', 'М' => 'M', 'м' => 'm',
96 'Н' => 'N', 'н' => 'n', 'П' => 'P', 'п' => 'p',
97 'Р' => 'R', 'р' => 'r', 'С' => 'S', 'с' => 's',
98 'Т' => 'T', 'т' => 't', 'Ф' => 'F', 'ф' => 'f',
99 'Х' => 'H', 'х' => 'h', 'Ч' => 'Ç', 'ч' => 'ç',
100 'Ш' => 'Ş', 'ш' => 'ş', 'Ы' => 'I', 'ы' => 'ı',
101 'Э' => 'E', 'э' => 'e', 'Е' => 'E', 'е' => 'e',
102 'Я' => 'Â', 'я' => 'â', 'У' => 'U', 'у' => 'u',
103 'О' => 'O', 'о' => 'o',
104
105 'Ё' => 'Yo', 'ё' => 'yo', 'Ю' => 'Yu', 'ю' => 'yu',
106 'Ц' => 'Ts', 'ц' => 'ts', 'Щ' => 'Şç', 'щ' => 'şç',
107 'Ь' => '', 'ь' => '', 'Ъ' => '', 'ъ' => '',
108
109 ];
110
111 public $mLatinToCyrillic = [
112 'Â' => 'Я', 'â' => 'я', 'B' => 'Б', 'b' => 'б',
113 'Ç' => 'Ч', 'ç' => 'ч', 'D' => 'Д', 'd' => 'д',
114 'F' => 'Ф', 'f' => 'ф', 'G' => 'Г', 'g' => 'г',
115 'H' => 'Х', 'h' => 'х', 'I' => 'Ы', 'ı' => 'ы',
116 'İ' => 'И', 'i' => 'и', 'J' => 'Ж', 'j' => 'ж',
117 'K' => 'К', 'k' => 'к', 'L' => 'Л', 'l' => 'л',
118 'M' => 'М', 'm' => 'м', 'N' => 'Н', 'n' => 'н',
119 'O' => 'О', 'o' => 'о', 'P' => 'П', 'p' => 'п',
120 'R' => 'Р', 'r' => 'р', 'S' => 'С', 's' => 'с',
121 'Ş' => 'Ш', 'ş' => 'ш', 'T' => 'Т', 't' => 'т',
122 'V' => 'В', 'v' => 'в', 'Z' => 'З', 'z' => 'з',
123
124 'ya' => 'я', 'Ya' => 'Я', 'YA' => 'Я',
125 'ye' => 'е', 'YE' => 'Е', 'Ye' => 'Е',
126
127 // hack, hack, hack
128 'A' => 'А', 'a' => 'а', 'E' => 'Е', 'e' => 'е',
129 'Ö' => 'Ё', 'ö' => 'ё', 'U' => 'У', 'u' => 'у',
130 'Ü' => 'Ю', 'ü' => 'ю', 'Y' => 'Й', 'y' => 'й',
131 'C' => 'Дж', 'c' => 'дж', 'Ğ' => 'Гъ', 'ğ' => 'гъ',
132 'Ñ' => 'Нъ', 'ñ' => 'нъ', 'Q' => 'Къ', 'q' => 'къ',
133
134 ];
135
136 public $mExceptions = [];
137 public $mCyrl2LatnPatterns = [];
138 public $mLatn2CyrlPatterns = [];
139 public $mCyrlCleanUpRegexes = [];
140
141 public $mExceptionsLoaded = false;
142
143 function loadDefaultTables() {
144 $this->mTables = [
145 'crh-latn' => new ReplacementArray( $this->mCyrillicToLatin ),
146 'crh-cyrl' => new ReplacementArray( $this->mLatinToCyrillic ),
147 'crh' => new ReplacementArray()
148 ];
149 }
150
151 function loadExceptions() {
152 if ( $this->mExceptionsLoaded ) {
153 return;
154 }
155
156 $this->mExceptionsLoaded = true;
157 $crhExceptions = new MediaWiki\Languages\Data\CrhExceptions();
158 list( $this->mExceptions, $this->mCyrl2LatnPatterns, $this->mLatn2CyrlPatterns,
159 $this->mCyrlCleanUpRegexes ) = $crhExceptions->loadExceptions( self::L_LC . self::C_LC,
160 self::L_UC . self::C_UC );
161 }
162
173 function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
174 // check for user namespace
175 if ( is_object( $nt ) ) {
176 $ns = $nt->getNamespace();
177 if ( $ns == NS_USER || $ns == NS_USER_TALK ) {
178 return;
179 }
180 }
181
182 $oldlink = $link;
183 parent::findVariantLink( $link, $nt, $ignoreOtherCond );
184 if ( $this->getPreferredVariant() == $this->mMainLanguageCode ) {
185 $link = $oldlink;
186 }
187 }
188
199 function translate( $text, $toVariant ) {
200 $letters = '';
201 switch ( $toVariant ) {
202 case 'crh-cyrl':
203 $letters = self::L_UC . self::L_LC . "\'";
204 break;
205 case 'crh-latn':
206 $letters = self::C_UC . self::C_LC . "";
207 break;
208 default:
209 return $text;
210 break;
211 }
212
213 if ( !$this->mTablesLoaded ) {
214 $this->loadTables();
215 }
216
217 if ( !isset( $this->mTables[$toVariant] ) ) {
218 throw new MWException( "Broken variant table: " . implode( ',', array_keys( $this->mTables ) ) );
219 }
220
221 // check for roman numbers like VII, XIX...
222 // Lookahead assertion ensures $roman doesn't match the empty string
223 $roman = '/^(?=[MDCLXVI])M{0,4}(C[DM]|D?C{0,3})(X[LC]|L?X{0,3})(I[VX]|V?I{0,3})$/u';
224
225 # match any sub-string of the relevant letters and convert it
226 $matches = preg_split( '/(\b|^)[^' . $letters . ']+(\b|$)/u',
227 $text, -1, PREG_SPLIT_OFFSET_CAPTURE );
228 $mstart = 0;
229 $ret = '';
230 foreach ( $matches as $m ) {
231 # copy over the non-matching bit
232 $ret .= substr( $text, $mstart, $m[1] - $mstart );
233 # skip certain classes of strings
234
235 if ( array_key_exists( $m[0], $this->mExceptions ) ) {
236 # if it's an exception, just copy down the right answer
237 $ret .= $this->mExceptions[$m[0]];
238 } elseif ( ! $m[0] || # empty strings
239 preg_match( $roman, $m[0] ) || # roman numerals
240 preg_match( '/[^' . $letters . ']/', $m[0] ) # mixed orthography
241 ) {
242 $ret .= $m[0];
243 } else {
244 # convert according to the rules
245 $token = $this->regsConverter( $m[0], $toVariant );
246 $ret .= parent::translate( $token, $toVariant );
247 }
248 $mstart = $m[1] + strlen( $m[0] );
249 }
250
251 # pick up stray quote marks
252 switch ( $toVariant ) {
253 case 'crh-cyrl':
254 $ret = strtr( $ret, [ '“' => '«', '”' => '»', ] );
255 $ret = $this->regsConverter( $ret, 'cyrl-cleanup' );
256 break;
257 case 'crh-latn':
258 $ret = strtr( $ret, [ '«' => '"', '»' => '"', ] );
259 break;
260 }
261
262 return $ret;
263 }
264
265 private function regsConverter( $text, $toVariant ) {
266 if ( $text == '' ) return $text;
267
268 $pat = [];
269 $rep = [];
270 switch ( $toVariant ) {
271 case 'crh-latn':
272 foreach ( $this->mCyrl2LatnPatterns as $pat => $rep ) {
273 $text = preg_replace( $pat, $rep, $text );
274 }
275 return $text;
276 case 'crh-cyrl':
277 foreach ( $this->mLatn2CyrlPatterns as $pat => $rep ) {
278 $text = preg_replace( $pat, $rep, $text );
279 }
280 return $text;
281 case 'cyrl-cleanup':
282 foreach ( $this->mCyrlCleanUpRegexes as $pat => $rep ) {
283 $text = preg_replace( $pat, $rep, $text );
284 }
285 return $text;
286 default:
287 return $text;
288 }
289 }
290
291}
292
298class LanguageCrh extends Language {
299
300 function __construct() {
301 parent::__construct();
302
303 $variants = [ 'crh', 'crh-cyrl', 'crh-latn' ];
304 $variantfallbacks = [
305 'crh' => 'crh-latn',
306 'crh-cyrl' => 'crh-latn',
307 'crh-latn' => 'crh-cyrl',
308 ];
309
310 $this->mConverter = new CrhConverter( $this, 'crh', $variants, $variantfallbacks );
311 }
312}
Crimean Tatar (Qırımtatarca) converter routines.
__construct( $langobj, $maincode, $variants=[], $variantfallbacks=[], $flags=[])
const L_N_CONS_UC
const WORD_BEGINS
const L_N_CONS_LC
Base class for language conversion.
getPreferredVariant()
Get preferred language variant.
loadTables( $fromCache=true)
Load conversion tables either from the cache or the disk.
translate( $text, $variant)
Translate a string to a variant.
findVariantLink(&$link, &$nt, $ignoreOtherCond=false)
If a language supports multiple variants, it is possible that non-existing link in one variant actual...
loadDefaultTables()
Load default conversion tables.
Crimean Tatar (Qırımtatarca)
Internationalisation code.
Definition Language.php:35
MediaWiki exception.
Wrapper around strtr() that holds replacements.
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
it sets a lot of them automatically from query strings
Definition design.txt:93
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses & $ret
Definition hooks.txt:2005
usually copyright or history_copyright This message must be in HTML not wikitext & $link
Definition hooks.txt:3021
const NS_USER_TALK
Definition Defines.php:77
This document describes the state of Postgres support in and is fairly well maintained The main code is very well while extensions are very hit and miss it is probably the most supported database after MySQL Much of the work in making MediaWiki database agnostic came about through the work of creating Postgres but without copying over all the usage comments General notes on the but these can almost always be programmed around *Although Postgres has a true BOOLEAN type
Definition postgres.txt:30