Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
78.21% |
61 / 78 |
|
50.00% |
5 / 10 |
CRAP | |
0.00% |
0 / 1 |
MniConverter | |
78.21% |
61 / 78 |
|
50.00% |
5 / 10 |
77.93 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
isBeginning | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
isEndOfWord | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
2.06 | |||
mteiToBengali | |
80.39% |
41 / 51 |
|
0.00% |
0 / 1 |
48.89 | |||
transliterate | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
getMainCode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getLanguageVariants | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getVariantsFallbacks | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
loadDefaultTables | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
translate | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file MniConverter.php |
19 | * @author Nokib Sarkar |
20 | * @author Haoreima |
21 | */ |
22 | /** |
23 | * Meitei specific converter routines. |
24 | * |
25 | * @ingroup Languages |
26 | */ |
27 | class MniConverter extends LanguageConverterSpecific { |
28 | private $O = 'ꯑ'; |
29 | private $OO = 'ꯑꯣ'; |
30 | private $U = 'ꯎ'; |
31 | private $EE = 'ꯑꯤ'; |
32 | private $YA = 'ꯌ'; |
33 | private $Y_ = 'য'; |
34 | private $WA = 'ꯋ'; |
35 | private $BA = 'ꯕ'; |
36 | private $NA_ = 'ꯟ'; |
37 | private $NA = 'ꯅ'; |
38 | private $DIACRITIC_AA = 'ꯥ'; |
39 | private $HALANTA = '꯭'; |
40 | private $SKIP = ''; |
41 | private $PERIOD = '꯫'; |
42 | private $PA_ = 'ꯞ'; |
43 | private $DIACRITICS_WITH_O = [ |
44 | 'ꯣ' => 'ো', |
45 | 'ꯤ' => 'ী', |
46 | 'ꯥ' => 'া', |
47 | 'ꯦ' => 'ে', |
48 | 'ꯧ' => 'ৌ', |
49 | 'ꯩ' => 'ৈ', |
50 | 'ꯪ' => 'ং', |
51 | ]; |
52 | private $CONJUGATE_WITH_O = [ |
53 | 'ꯑꯣ' => 'ও', |
54 | 'ꯑꯤ' => 'ঈ', |
55 | 'ꯑꯥ' => 'আ', |
56 | 'ꯑꯦ' => 'এ', |
57 | 'ꯑꯧ' => 'ঔ', |
58 | 'ꯑꯩ' => 'ঐ', |
59 | 'ꯑꯪ' => 'অং', |
60 | ]; |
61 | private $NOT_WEIRD_AFTER_NA_ = [ 'ꯇ', 'ꯊ', 'ꯗ', 'ꯙ', 'ꯟ', 'ꯕ', 'ꯌ', 'ꯁ' ]; |
62 | private $NUMERALS = [ |
63 | '꯰' => '০', |
64 | '꯱' => '১', |
65 | '꯲' => '২', |
66 | '꯳' => '৩', |
67 | '꯴' => '৪', |
68 | '꯵' => '৫', |
69 | '꯶' => '৬', |
70 | '꯷' => '৭', |
71 | '꯸' => '৮', |
72 | '꯹' => '৯', |
73 | ]; |
74 | private $HALANTA_CONSONANTS = [ |
75 | 'ꯟ' => 'ন্', |
76 | 'ꯛ' => 'ক্', |
77 | 'ꯝ' => 'ম্', |
78 | 'ꯡ' => 'ং', |
79 | 'ꯜ' => 'ল্', |
80 | 'ꯠ' => 'ৎ', |
81 | 'ꯞ' => 'প্', |
82 | ]; |
83 | private $HALANTA_CONSONANTS_TO_NORMAL = [ |
84 | 'ꯟ' => 'ন', |
85 | 'ꯛ' => 'ক', |
86 | 'ꯝ' => 'ম', |
87 | 'ꯡ' => 'ং', |
88 | 'ꯜ' => 'ল', |
89 | 'ꯠ' => 'ৎ', |
90 | 'ꯞ' => 'প', |
91 | ]; |
92 | private $NON_WORD_CHARACTER_PATTERN = "/[\s꯫\p{P}<>=\-\|$+^~]+?/u"; |
93 | private $CONSONANTS = [ |
94 | 'ꯀ' => 'ক', |
95 | 'ꯈ' => 'খ', |
96 | 'ꯒ' => 'গ', |
97 | 'ꯘ' => 'ঘ', |
98 | 'ꯉ' => 'ঙ', |
99 | 'ꯆ' => 'চ', |
100 | 'ꯖ' => 'জ', |
101 | 'ꯓ' => 'ঝ', |
102 | 'ꯇ' => 'ত', |
103 | 'ꯊ' => 'থ', |
104 | 'ꯗ' => 'দ', |
105 | 'ꯙ' => 'ধ', |
106 | 'ꯅ' => 'ন', |
107 | 'ꯄ' => 'প', |
108 | 'ꯐ' => 'ফ', |
109 | 'ꯕ' => 'ব', |
110 | 'ꯚ' => 'ভ', |
111 | 'ꯃ' => 'ম', |
112 | 'ꯌ' => 'য়', |
113 | 'ꯔ' => 'র', |
114 | 'ꯂ' => 'ল', |
115 | 'ꯋ' => 'ৱ', |
116 | 'ꫩ' => 'শ', |
117 | 'ꫪ' => 'ষ', |
118 | 'ꯁ' => 'স', |
119 | 'ꯍ' => 'হ', |
120 | ]; |
121 | private $VOWELS = [ |
122 | 'ꯑ' => 'অ', |
123 | 'ꯏ' => 'ই', |
124 | 'ꯎ' => 'উ', |
125 | 'ꯢ' => 'ই', |
126 | 'ꯨ' => 'ু', |
127 | ]; |
128 | private $MTEI_TO_BENG_MAP = [ |
129 | '꯫' => '।', |
130 | '꯭' => '্', |
131 | ]; |
132 | |
133 | public function __construct( $_ ) { |
134 | parent::__construct( $_ ); |
135 | $this->VOWELS += $this->DIACRITICS_WITH_O + $this->CONJUGATE_WITH_O; |
136 | $this->CONSONANTS += $this->HALANTA_CONSONANTS; |
137 | $this->MTEI_TO_BENG_MAP += $this->VOWELS + $this->CONSONANTS; |
138 | $this->MTEI_TO_BENG_MAP += $this->NUMERALS; |
139 | } |
140 | |
141 | private function isBeginning( $position, $text ) { |
142 | $at_first = $position === 0; |
143 | return $at_first || preg_match( $this->NON_WORD_CHARACTER_PATTERN, $text[$position - 1] ); |
144 | } |
145 | |
146 | private function isEndOfWord( $char ) { |
147 | if ( $char === $this->PERIOD ) { |
148 | return true; |
149 | } |
150 | $status = preg_match( $this->NON_WORD_CHARACTER_PATTERN, $char, $matches ); |
151 | return count( $matches ) > 0; |
152 | } |
153 | |
154 | private function mteiToBengali( $text ) { |
155 | $chars = mb_str_split( $text ); |
156 | $l = count( $chars ); |
157 | $i = 0; |
158 | while ( $i < $l ) { |
159 | $char = $chars[$i]; |
160 | if ( |
161 | $char === $this->O && |
162 | $i + 1 < $l && |
163 | array_key_exists( $chars[ $i + 1 ], $this->DIACRITICS_WITH_O ) |
164 | ) { |
165 | /** |
166 | * We have only 3 true vowels, |
167 | * ꯑ(a), ꯏ(i), ꯎ (u) |
168 | * Others are just extension from "a" by mixing with diacritics |
169 | */ |
170 | yield $this->CONJUGATE_WITH_O[$char . $chars[ $i + 1 ]]; |
171 | $i += 1; |
172 | } elseif ( |
173 | $char === $this->HALANTA && |
174 | $i > 0 && |
175 | array_key_exists( $chars[ $i - 1 ], $this->HALANTA_CONSONANTS ) |
176 | ) { |
177 | // Remove halanta if the consonant has halanta already |
178 | yield $this->SKIP; |
179 | } elseif ( |
180 | array_key_exists( $char, $this->HALANTA_CONSONANTS ) && |
181 | ( $i === $l - 1 || ( $i + 1 < $l && |
182 | $this->isEndOfWord( $chars[ $i + 1 ] ) |
183 | ) ) |
184 | ) { |
185 | // Remove halanta if this is the last character of the word |
186 | yield $this->HALANTA_CONSONANTS_TO_NORMAL[$char]; |
187 | } elseif ( $char === $this->YA && |
188 | $i > 0 && $chars[ $i - 1 ] === $this->HALANTA ) { |
189 | // য + ্ = য় |
190 | yield $this->Y_; |
191 | } elseif ( |
192 | $char === $this->WA && |
193 | $i - 2 >= 0 && $chars[ $i - 1 ] === $this->HALANTA && |
194 | array_key_exists( $chars[ $i - 2 ], $this->CONSONANTS ) |
195 | ) { |
196 | // ব + ্ + র = ব্র |
197 | yield $this->CONSONANTS[$this->BA]; |
198 | } elseif ( |
199 | $char === $this->PA_ && $i + 1 < $l && $chars[ $i + 1 ] === 'ꯀ' |
200 | ) { |
201 | // do not conjugate with halanta if it's followed by "ক" |
202 | yield $this->HALANTA_CONSONANTS_TO_NORMAL[$char]; |
203 | } elseif ( |
204 | $char === $this->NA_ && |
205 | $i + 1 < $l && |
206 | !in_array( $chars[ $i + 1 ], $this->NOT_WEIRD_AFTER_NA_ ) && |
207 | array_key_exists( $chars[ $i + 1 ], $this->CONSONANTS ) |
208 | ) { |
209 | /** |
210 | * ন্ / ণ্ + any consonant |
211 | * (except, ট, ঠ, ড, ঢ, , ত, থ, দ, ধ, ন, ব, য, য়) = weird |
212 | * Any consonant + ্ + ন = maybe ok |
213 | */ |
214 | yield $this->MTEI_TO_BENG_MAP[$this->NA]; |
215 | $i += 1; |
216 | continue; |
217 | } elseif ( $char === $this->U && !$this->isBeginning( $i, $text ) ) { |
218 | // উ/ঊ in the middle of words are often replaced by ও |
219 | yield $this->MTEI_TO_BENG_MAP[$this->OO]; |
220 | } elseif ( $char === $this->O && |
221 | $i + 2 < $l && $chars[$i + 1] === $this->EE[0] && $chars[ $i + 2 ] === $this->EE[1] ) { |
222 | /** |
223 | * Instead of হাঈবা, people love to use হায়বা. |
224 | * But this is only in the case when ee or ya is |
225 | * in the middle of the words, |
226 | * never to do it if it's in the beginning. |
227 | */ |
228 | yield $this->MTEI_TO_BENG_MAP[$this->YA]; |
229 | } elseif ( |
230 | !array_key_exists( $char, $this->HALANTA_CONSONANTS ) && |
231 | array_key_exists( $char, $this->CONSONANTS ) && |
232 | ( $i === $l - 1 || ( $i + 1 < $l && |
233 | $this->isEndOfWord( $chars[ $i + 1 ] ) |
234 | ) ) |
235 | ) { |
236 | // Consonants without halantas should end with diacritics of aa sound everytime. |
237 | yield $this->MTEI_TO_BENG_MAP[$char] . $this->MTEI_TO_BENG_MAP[$this->DIACRITIC_AA]; |
238 | } else { |
239 | yield ( |
240 | array_key_exists( $char, $this->MTEI_TO_BENG_MAP ) ? |
241 | $this->MTEI_TO_BENG_MAP[$char] : $char |
242 | ); |
243 | } |
244 | $i += 1; |
245 | } |
246 | } |
247 | |
248 | public function transliterate( $text ) { |
249 | $transliterated = ''; |
250 | foreach ( $this->mteiToBengali( $text ) as $char ) { |
251 | $transliterated .= $char; |
252 | } |
253 | return $transliterated; |
254 | } |
255 | |
256 | public function getMainCode(): string { |
257 | return 'mni'; |
258 | } |
259 | |
260 | public function getLanguageVariants(): array { |
261 | return [ 'mni', 'mni-beng' ]; |
262 | } |
263 | |
264 | public function getVariantsFallbacks(): array { |
265 | return [ |
266 | 'mni-beng' => 'mni' |
267 | ]; |
268 | } |
269 | |
270 | protected function loadDefaultTables(): array { |
271 | return [ |
272 | 'mni' => new ReplacementArray(), |
273 | 'mni-beng' => new ReplacementArray(), |
274 | ]; |
275 | } |
276 | |
277 | /** |
278 | * Transliterates text into Bangla Script. This allows developers to test the language variants |
279 | * functionality and user interface without having to switch wiki language away from default. |
280 | * This method also processes custom conversion rules to allow testing these parts of the |
281 | * language converter as well. |
282 | * |
283 | * @param string $text |
284 | * @param string $toVariant |
285 | * @return string |
286 | */ |
287 | public function translate( $text, $toVariant ) { |
288 | if ( $toVariant === 'mni-beng' ) { |
289 | return $this->transliterate( $text ); |
290 | } |
291 | return $text; |
292 | } |
293 | } |