MediaWiki REL1_35
KuConverter.php
Go to the documentation of this file.
1<?php
30
34 public $mArabicToLatin = [
35 'ب' => 'b', 'ج' => 'c', 'چ' => 'ç', 'د' => 'd', 'ف' => 'f', 'گ' => 'g', 'ھ' => 'h',
36 'ہ' => 'h', 'ه' => 'h', 'ح' => 'h', 'ژ' => 'j', 'ك' => 'k', 'ک' => 'k', 'ل' => 'l',
37 'م' => 'm', 'ن' => 'n', 'پ' => 'p', 'ق' => 'q', 'ر' => 'r', 'س' => 's', 'ش' => 'ş',
38 'ت' => 't', 'ڤ' => 'v', 'خ' => 'x', 'غ' => 'x', 'ز' => 'z',
39
40// ک و => ku -- ist richtig
41// و ك=> ku -- ist auch richtig
42
43 /* Doppel- und Halbvokale */
44 'ڵ' => 'll', # ll
45 'ڕ' => 'rr', # rr
46 'ا' => 'a',
47 # 'ئێ' => 'ê', # initial e
48 'ە' => 'e',
49 'ه‌' => 'e', # with one non-joiner
50 'ه‌‌' => 'e', # with two non-joiner
51 'ة' => 'e',
52 'ێ' => 'ê',
53 'ي' => 'î',
54 'ی' => 'î', # U+06CC db 8c ARABIC LETTER FARSI YEH
55 'ى' => 'î', # U+0649 d9 89 ARABIC LETTER ALEF MAKSURA
56 'ۆ' => 'o',
57 'و' => 'w',
58 'ئ' => '', # initial hemze should not be shown
59 '،' => ',',
60 'ع' => '\'', # ayn
61 '؟' => '?',
62
63 # digits
64 '٠' => '0', # U+0660
65 '١' => '1', # U+0661
66 '٢' => '2', # U+0662
67 '٣' => '3', # U+0663
68 '٤' => '4', # U+0664
69 '٥' => '5', # U+0665
70 '٦' => '6', # U+0666
71 '٧' => '7', # U+0667
72 '٨' => '8', # U+0668
73 '٩' => '9', # U+0669
74 ];
75
79 public $mLatinToArabic = [
80 'b' => 'ب', 'c' => 'ج', 'ç' => 'چ', 'd' => 'د', 'f' => 'ف', 'g' => 'گ',
81 'h' => 'ه', 'j' => 'ژ', 'k' => 'ک', 'l' => 'ل',
82 'm' => 'م', 'n' => 'ن', 'p' => 'پ', 'q' => 'ق', 'r' => 'ر', 's' => 'س', 'ş' => 'ش',
83 't' => 'ت', 'v' => 'ڤ',
84 'x' => 'خ', 'y' => 'ی', 'z' => 'ز',
85
86 'B' => 'ب', 'C' => 'ج', 'Ç' => 'چ', 'D' => 'د', 'F' => 'ف', 'G' => 'گ',
87 'H' => 'ح', 'J' => 'ژ', 'K' => 'ک', 'L' => 'ل',
88 'M' => 'م', 'N' => 'ن', 'P' => 'پ', 'Q' => 'ق', 'R' => 'ر', 'S' => 'س', 'Ş' => 'ش',
89 'T' => 'ت', 'V' => 'ڤ', 'W' => 'و', 'X' => 'خ',
90 'Y' => 'ی', 'Z' => 'ز',
91
92 /* Doppelkonsonanten */
93 # 'll' => 'ڵ', # wenn es geht, doppel-l und l getrennt zu behandeln
94 # 'rr' => 'ڕ', # selbiges für doppel-r
95
96 /* Einzelne Großbuchstaben */
97 // ' C' => 'ج',
98
99 /* Vowels */
100 'a' => 'ا',
101 'e' => 'ە',
102 'ê' => 'ێ',
103 'i' => '',
104 'î' => 'ی',
105 'o' => 'ۆ',
106 'u' => 'و',
107 'û' => 'وو',
108 'w' => 'و',
109 ',' => '،',
110 '?' => '؟',
111
112 # Try to replace the leading vowel
113 ' a' => 'ئا ',
114 ' e' => 'ئە ',
115 ' ê' => 'ئێ ',
116 ' î' => 'ئی ',
117 ' o' => 'ئۆ ',
118 ' u' => 'ئو ',
119 ' û' => 'ئوو ',
120 'A' => 'ئا',
121 'E' => 'ئە',
122 'Ê' => 'ئێ',
123 'Î' => 'ئی',
124 'O' => 'ئۆ',
125 'U' => 'ئو',
126 'Û' => 'ئوو',
127 ' A' => 'ئا ',
128 ' E' => 'ئە ',
129 ' Ê' => 'ئێ ',
130 ' Î' => 'ئی ',
131 ' O' => 'ئۆ ',
132 ' U' => 'ئو ',
133 ' Û' => 'ئوو ',
134 # eyn erstmal deaktivieren, einfache Anführungsstriche sind einfach zu
135 # häufig, um sie als eyn zu interpretieren.
136 # '\'' => 'ع',
137
138/* # deactivated for now, breaks links i.e. in header of Special:Recentchanges :-(
139 # digits
140 '0' => '٠', # U+0660
141 '1' => '١', # U+0661
142 '2' => '٢', # U+0662
143 '3' => '٣', # U+0663
144 '4' => '٤', # U+0664
145 '5' => '٥', # U+0665
146 '6' => '٦', # U+0666
147 '7' => '٧', # U+0667
148 '8' => '٨', # U+0668
149 '9' => '٩', # U+0669
150*/
151 ];
152
156 public function __construct( $langobj ) {
157 $variants = [ 'ku', 'ku-arab', 'ku-latn' ];
158 $variantfallbacks = [
159 'ku' => 'ku-latn',
160 'ku-arab' => 'ku-latn',
161 'ku-latn' => 'ku-arab',
162 ];
163
164 parent::__construct( $langobj, 'ku', $variants, $variantfallbacks );
165 }
166
167 protected function loadDefaultTables() {
168 $this->mTables = [
169 'ku-latn' => new ReplacementArray( $this->mArabicToLatin ),
170 'ku-arab' => new ReplacementArray( $this->mLatinToArabic ),
171 'ku' => new ReplacementArray()
172 ];
173 }
174
185 public function translate( $text, $toVariant ) {
186 $this->loadTables();
187 /* From Kazakh interface, maybe we need it later
188 $breaks = '[^\w\x80-\xff]';
189 // regexp for roman numbers
190 // Lookahead assertion ensures $roman doesn't match the empty string
191 $roman = '(?=[MDCLXVI])M{0,4}(C[DM]|D?C{0,3})(X[LC]|L?X{0,3})(I[VX]|V?I{0,3})';
192 $roman = '';
193
194 $reg = '/^'.$roman.'$|^'.$roman.$breaks.'|'.$breaks.$roman.'$|'.$breaks.$roman.$breaks.'/';
195
196 $matches = preg_split($reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE);
197
198 $m = array_shift($matches);
199 if( !isset( $this->mTables[$toVariant] ) ) {
200 throw new MWException( "Broken variant table: " . implode( ',', array_keys( $this->mTables ) ) );
201 }
202 $ret = $this->mTables[$toVariant]->replace( $m[0] );
203 $mstart = $m[1]+strlen($m[0]);
204 foreach($matches as $m) {
205 $ret .= substr($text, $mstart, $m[1]-$mstart);
206 $ret .= parent::translate($m[0], $toVariant);
207 $mstart = $m[1] + strlen($m[0]);
208 }
209
210 return $ret;
211 */
212
213 if ( !isset( $this->mTables[$toVariant] ) ) {
214 throw new MWException( "Broken variant table: " . implode( ',', array_keys( $this->mTables ) ) );
215 }
216
217 return parent::translate( $text, $toVariant );
218 }
219}
Kurdish converter routines.
string[] $mArabicToLatin
loadDefaultTables()
Load default conversion tables.
string[] $mLatinToArabic
__construct( $langobj)
translate( $text, $toVariant)
It translates text into variant, specials:
A class that extends LanguageConverter with specific behaviour.
loadTables( $fromCache=true)
Load conversion tables either from the cache or the disk.
MediaWiki exception.
Wrapper around strtr() that holds replacements.