MediaWiki  master
KuConverter.php
Go to the documentation of this file.
1 <?php
30 
34  public $mArabicToLatin = [
35  'ب' => 'b', 'ج' => 'c', 'چ' => 'ç', 'د' => 'd', 'ف' => 'f', 'گ' => 'g', 'ھ' => 'h',
36  'ہ' => 'h', 'ه' => 'h', 'ح' => 'h', 'ژ' => 'j', 'ك' => 'k', 'ک' => 'k', 'ل' => 'l',
37  'م' => 'm', 'ن' => 'n', 'پ' => 'p', 'ق' => 'q', 'ر' => 'r', 'س' => 's', 'ش' => 'ş',
38  'ت' => 't', 'ڤ' => 'v', 'خ' => 'x', 'غ' => 'x', 'ز' => 'z',
39 
40 // ک و => ku -- ist richtig
41 // و ك=> ku -- ist auch richtig
42 
43  /* Doppel- und Halbvokale */
44  'ڵ' => 'll', # ll
45  'ڕ' => 'rr', # rr
46  'ا' => 'a',
47  # 'ئێ' => 'ê', # initial e
48  'ە' => 'e',
49  'ه‌' => 'e', # with one non-joiner
50  'ه‌‌' => 'e', # with two non-joiner
51  'ة' => 'e',
52  'ێ' => 'ê',
53  'ي' => 'î',
54  'ی' => 'î', # U+06CC db 8c ARABIC LETTER FARSI YEH
55  'ى' => 'î', # U+0649 d9 89 ARABIC LETTER ALEF MAKSURA
56  'ۆ' => 'o',
57  'و' => 'w',
58  'ئ' => '', # initial hemze should not be shown
59  '،' => ',',
60  'ع' => '\'', # ayn
61  '؟' => '?',
62 
63  # digits
64  '٠' => '0', # U+0660
65  '١' => '1', # U+0661
66  '٢' => '2', # U+0662
67  '٣' => '3', # U+0663
68  '٤' => '4', # U+0664
69  '٥' => '5', # U+0665
70  '٦' => '6', # U+0666
71  '٧' => '7', # U+0667
72  '٨' => '8', # U+0668
73  '٩' => '9', # U+0669
74  ];
75 
79  public $mLatinToArabic = [
80  'b' => 'ب', 'c' => 'ج', 'ç' => 'چ', 'd' => 'د', 'f' => 'ف', 'g' => 'گ',
81  'h' => 'ه', 'j' => 'ژ', 'k' => 'ک', 'l' => 'ل',
82  'm' => 'م', 'n' => 'ن', 'p' => 'پ', 'q' => 'ق', 'r' => 'ر', 's' => 'س', 'ş' => 'ش',
83  't' => 'ت', 'v' => 'ڤ',
84  'x' => 'خ', 'y' => 'ی', 'z' => 'ز',
85 
86  'B' => 'ب', 'C' => 'ج', 'Ç' => 'چ', 'D' => 'د', 'F' => 'ف', 'G' => 'گ',
87  'H' => 'ح', 'J' => 'ژ', 'K' => 'ک', 'L' => 'ل',
88  'M' => 'م', 'N' => 'ن', 'P' => 'پ', 'Q' => 'ق', 'R' => 'ر', 'S' => 'س', 'Ş' => 'ش',
89  'T' => 'ت', 'V' => 'ڤ', 'W' => 'و', 'X' => 'خ',
90  'Y' => 'ی', 'Z' => 'ز',
91 
92  /* Doppelkonsonanten */
93  # 'll' => 'ڵ', # wenn es geht, doppel-l und l getrennt zu behandeln
94  # 'rr' => 'ڕ', # selbiges für doppel-r
95 
96  /* Einzelne Großbuchstaben */
97  // ' C' => 'ج',
98 
99  /* Vowels */
100  'a' => 'ا',
101  'e' => 'ە',
102  'ê' => 'ێ',
103  'i' => '',
104  'î' => 'ی',
105  'o' => 'ۆ',
106  'u' => 'و',
107  'û' => 'وو',
108  'w' => 'و',
109  ',' => '،',
110  '?' => '؟',
111 
112  # Try to replace the leading vowel
113  ' a' => 'ئا ',
114  ' e' => 'ئە ',
115  ' ê' => 'ئێ ',
116  ' î' => 'ئی ',
117  ' o' => 'ئۆ ',
118  ' u' => 'ئو ',
119  ' û' => 'ئوو ',
120  'A' => 'ئا',
121  'E' => 'ئە',
122  'Ê' => 'ئێ',
123  'Î' => 'ئی',
124  'O' => 'ئۆ',
125  'U' => 'ئو',
126  'Û' => 'ئوو',
127  ' A' => 'ئا ',
128  ' E' => 'ئە ',
129  ' Ê' => 'ئێ ',
130  ' Î' => 'ئی ',
131  ' O' => 'ئۆ ',
132  ' U' => 'ئو ',
133  ' Û' => 'ئوو ',
134  # eyn erstmal deaktivieren, einfache Anführungsstriche sind einfach zu
135  # häufig, um sie als eyn zu interpretieren.
136  # '\'' => 'ع',
137 
138 /* # deactivated for now, breaks links i.e. in header of Special:Recentchanges :-(
139  # digits
140  '0' => '٠', # U+0660
141  '1' => '١', # U+0661
142  '2' => '٢', # U+0662
143  '3' => '٣', # U+0663
144  '4' => '٤', # U+0664
145  '5' => '٥', # U+0665
146  '6' => '٦', # U+0666
147  '7' => '٧', # U+0667
148  '8' => '٨', # U+0668
149  '9' => '٩', # U+0669
150 */
151  ];
152 
156  public function __construct( $langobj ) {
157  $variants = [ 'ku', 'ku-arab', 'ku-latn' ];
158  $variantfallbacks = [
159  'ku' => 'ku-latn',
160  'ku-arab' => 'ku-latn',
161  'ku-latn' => 'ku-arab',
162  ];
163 
164  parent::__construct( $langobj, 'ku', $variants, $variantfallbacks );
165  }
166 
167  protected function loadDefaultTables() {
168  $this->mTables = [
169  'ku-latn' => new ReplacementArray( $this->mArabicToLatin ),
170  'ku-arab' => new ReplacementArray( $this->mLatinToArabic ),
171  'ku' => new ReplacementArray()
172  ];
173  }
174 
185  public function translate( $text, $toVariant ) {
186  $this->loadTables();
187  /* From Kazakh interface, maybe we need it later
188  $breaks = '[^\w\x80-\xff]';
189  // regexp for roman numbers
190  // Lookahead assertion ensures $roman doesn't match the empty string
191  $roman = '(?=[MDCLXVI])M{0,4}(C[DM]|D?C{0,3})(X[LC]|L?X{0,3})(I[VX]|V?I{0,3})';
192  $roman = '';
193 
194  $reg = '/^'.$roman.'$|^'.$roman.$breaks.'|'.$breaks.$roman.'$|'.$breaks.$roman.$breaks.'/';
195 
196  $matches = preg_split($reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE);
197 
198  $m = array_shift($matches);
199  if( !isset( $this->mTables[$toVariant] ) ) {
200  throw new MWException( "Broken variant table: " . implode( ',', array_keys( $this->mTables ) ) );
201  }
202  $ret = $this->mTables[$toVariant]->replace( $m[0] );
203  $mstart = $m[1]+strlen($m[0]);
204  foreach($matches as $m) {
205  $ret .= substr($text, $mstart, $m[1]-$mstart);
206  $ret .= parent::translate($m[0], $toVariant);
207  $mstart = $m[1] + strlen($m[0]);
208  }
209 
210  return $ret;
211  */
212 
213  if ( !isset( $this->mTables[$toVariant] ) ) {
214  throw new MWException( "Broken variant table: " . implode( ',', array_keys( $this->mTables ) ) );
215  }
216 
217  return parent::translate( $text, $toVariant );
218  }
219 }
KuConverter\loadDefaultTables
loadDefaultTables()
Load default conversion tables.
Definition: KuConverter.php:167
KuConverter\$mArabicToLatin
string[] $mArabicToLatin
Definition: KuConverter.php:34
MWException
MediaWiki exception.
Definition: MWException.php:29
ReplacementArray
Wrapper around strtr() that holds replacements.
Definition: ReplacementArray.php:24
KuConverter\translate
translate( $text, $toVariant)
It translates text into variant, specials:
Definition: KuConverter.php:185
KuConverter
Kurdish converter routines.
Definition: KuConverter.php:29
LanguageConverter\loadTables
loadTables( $fromCache=true)
Load conversion tables either from the cache or the disk.
Definition: LanguageConverter.php:979
LanguageConverterSpecific
A class that extends LanguageConverter with specific behaviour.
Definition: LanguageConverterSpecific.php:26
KuConverter\$mLatinToArabic
string[] $mLatinToArabic
Definition: KuConverter.php:79
KuConverter\__construct
__construct( $langobj)
Definition: KuConverter.php:156