Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
76.71% |
56 / 73 |
|
44.44% |
4 / 9 |
CRAP | |
0.00% |
0 / 1 |
MniConverter | |
76.71% |
56 / 73 |
|
44.44% |
4 / 9 |
81.57 | |
0.00% |
0 / 1 |
isBeginning | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
isEndOfWord | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
2.06 | |||
mteiToBengali | |
80.39% |
41 / 51 |
|
0.00% |
0 / 1 |
48.89 | |||
transliterate | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
getMainCode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getLanguageVariants | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getVariantsFallbacks | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
loadDefaultTables | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
translate | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file MniConverter.php |
19 | * @author Nokib Sarkar |
20 | * @author Haoreima |
21 | */ |
22 | /** |
23 | * Meitei specific converter routines. |
24 | * |
25 | * @ingroup Languages |
26 | */ |
27 | class MniConverter extends LanguageConverterSpecific { |
28 | private const O = 'ꯑ'; |
29 | private const OO = 'ꯑꯣ'; |
30 | private const U = 'ꯎ'; |
31 | private const EE = 'ꯑꯤ'; |
32 | private const YA = 'ꯌ'; |
33 | private const Y_ = 'য'; |
34 | private const WA = 'ꯋ'; |
35 | private const BA = 'ꯕ'; |
36 | private const NA_ = 'ꯟ'; |
37 | private const NA = 'ꯅ'; |
38 | private const DIACRITIC_AA = 'ꯥ'; |
39 | private const HALANTA = '꯭'; |
40 | private const SKIP = ''; |
41 | private const PERIOD = '꯫'; |
42 | private const PA_ = 'ꯞ'; |
43 | private const DIACRITICS_WITH_O = [ |
44 | 'ꯣ' => 'ো', |
45 | 'ꯤ' => 'ী', |
46 | 'ꯥ' => 'া', |
47 | 'ꯦ' => 'ে', |
48 | 'ꯧ' => 'ৌ', |
49 | 'ꯩ' => 'ৈ', |
50 | 'ꯪ' => 'ং', |
51 | ]; |
52 | private const CONJUGATE_WITH_O = [ |
53 | 'ꯑꯣ' => 'ও', |
54 | 'ꯑꯤ' => 'ঈ', |
55 | 'ꯑꯥ' => 'আ', |
56 | 'ꯑꯦ' => 'এ', |
57 | 'ꯑꯧ' => 'ঔ', |
58 | 'ꯑꯩ' => 'ঐ', |
59 | 'ꯑꯪ' => 'অং', |
60 | ]; |
61 | private const NOT_WEIRD_AFTER_NA_ = [ 'ꯇ', 'ꯊ', 'ꯗ', 'ꯙ', 'ꯟ', 'ꯕ', 'ꯌ', 'ꯁ' ]; |
62 | private const NUMERALS = [ |
63 | '꯰' => '০', |
64 | '꯱' => '১', |
65 | '꯲' => '২', |
66 | '꯳' => '৩', |
67 | '꯴' => '৪', |
68 | '꯵' => '৫', |
69 | '꯶' => '৬', |
70 | '꯷' => '৭', |
71 | '꯸' => '৮', |
72 | '꯹' => '৯', |
73 | ]; |
74 | private const HALANTA_CONSONANTS = [ |
75 | 'ꯟ' => 'ন্', |
76 | 'ꯛ' => 'ক্', |
77 | 'ꯝ' => 'ম্', |
78 | 'ꯡ' => 'ং', |
79 | 'ꯜ' => 'ল্', |
80 | 'ꯠ' => 'ৎ', |
81 | 'ꯞ' => 'প্', |
82 | ]; |
83 | private const HALANTA_CONSONANTS_TO_NORMAL = [ |
84 | 'ꯟ' => 'ন', |
85 | 'ꯛ' => 'ক', |
86 | 'ꯝ' => 'ম', |
87 | 'ꯡ' => 'ং', |
88 | 'ꯜ' => 'ল', |
89 | 'ꯠ' => 'ৎ', |
90 | 'ꯞ' => 'প', |
91 | ]; |
92 | private const NON_WORD_CHARACTER_PATTERN = "/[\s꯫\p{P}<>=\-\|$+^~]+?/u"; |
93 | private const CONSONANTS = self::HALANTA_CONSONANTS + [ |
94 | 'ꯀ' => 'ক', |
95 | 'ꯈ' => 'খ', |
96 | 'ꯒ' => 'গ', |
97 | 'ꯘ' => 'ঘ', |
98 | 'ꯉ' => 'ঙ', |
99 | 'ꯆ' => 'চ', |
100 | 'ꯖ' => 'জ', |
101 | 'ꯓ' => 'ঝ', |
102 | 'ꯇ' => 'ত', |
103 | 'ꯊ' => 'থ', |
104 | 'ꯗ' => 'দ', |
105 | 'ꯙ' => 'ধ', |
106 | 'ꯅ' => 'ন', |
107 | 'ꯄ' => 'প', |
108 | 'ꯐ' => 'ফ', |
109 | 'ꯕ' => 'ব', |
110 | 'ꯚ' => 'ভ', |
111 | 'ꯃ' => 'ম', |
112 | 'ꯌ' => 'য়', |
113 | 'ꯔ' => 'র', |
114 | 'ꯂ' => 'ল', |
115 | 'ꯋ' => 'ৱ', |
116 | 'ꫩ' => 'শ', |
117 | 'ꫪ' => 'ষ', |
118 | 'ꯁ' => 'স', |
119 | 'ꯍ' => 'হ', |
120 | ]; |
121 | private const VOWELS = [ |
122 | 'ꯑ' => 'অ', |
123 | 'ꯏ' => 'ই', |
124 | 'ꯎ' => 'উ', |
125 | 'ꯢ' => 'ই', |
126 | 'ꯨ' => 'ু', |
127 | ]; |
128 | private const MTEI_TO_BENG_MAP_EXTRA = [ |
129 | '꯫' => '।', |
130 | '꯭' => '্', |
131 | ]; |
132 | private const MTEI_TO_BENG_MAP = |
133 | self::VOWELS + |
134 | self::DIACRITICS_WITH_O + |
135 | self::CONJUGATE_WITH_O + |
136 | self::CONSONANTS + |
137 | self::NUMERALS + |
138 | self::MTEI_TO_BENG_MAP_EXTRA; |
139 | |
140 | private function isBeginning( $position, $text ) { |
141 | $at_first = $position === 0; |
142 | return $at_first || preg_match( self::NON_WORD_CHARACTER_PATTERN, $text[$position - 1] ); |
143 | } |
144 | |
145 | private function isEndOfWord( $char ) { |
146 | if ( $char === self::PERIOD ) { |
147 | return true; |
148 | } |
149 | $status = preg_match( self::NON_WORD_CHARACTER_PATTERN, $char, $matches ); |
150 | return count( $matches ) > 0; |
151 | } |
152 | |
153 | private function mteiToBengali( $text ) { |
154 | $chars = mb_str_split( $text ); |
155 | $l = count( $chars ); |
156 | $i = 0; |
157 | while ( $i < $l ) { |
158 | $char = $chars[$i]; |
159 | if ( |
160 | $char === self::O && |
161 | $i + 1 < $l && |
162 | array_key_exists( $chars[ $i + 1 ], self::DIACRITICS_WITH_O ) |
163 | ) { |
164 | /** |
165 | * We have only 3 true vowels, |
166 | * ꯑ(a), ꯏ(i), ꯎ (u) |
167 | * Others are just extension from "a" by mixing with diacritics |
168 | */ |
169 | yield self::CONJUGATE_WITH_O[$char . $chars[ $i + 1 ]]; |
170 | $i++; |
171 | } elseif ( |
172 | $char === self::HALANTA && |
173 | $i > 0 && |
174 | array_key_exists( $chars[ $i - 1 ], self::HALANTA_CONSONANTS ) |
175 | ) { |
176 | // Remove halanta if the consonant has halanta already |
177 | yield self::SKIP; |
178 | } elseif ( |
179 | array_key_exists( $char, self::HALANTA_CONSONANTS ) && |
180 | ( $i === $l - 1 || ( $i + 1 < $l && |
181 | $this->isEndOfWord( $chars[ $i + 1 ] ) |
182 | ) ) |
183 | ) { |
184 | // Remove halanta if this is the last character of the word |
185 | yield self::HALANTA_CONSONANTS_TO_NORMAL[$char]; |
186 | } elseif ( $char === self::YA && |
187 | $i > 0 && $chars[ $i - 1 ] === self::HALANTA ) { |
188 | // য + ্ = য় |
189 | yield self::Y_; |
190 | } elseif ( |
191 | $char === self::WA && |
192 | $i - 2 >= 0 && $chars[ $i - 1 ] === self::HALANTA && |
193 | array_key_exists( $chars[ $i - 2 ], self::CONSONANTS ) |
194 | ) { |
195 | // ব + ্ + র = ব্র |
196 | yield self::CONSONANTS[self::BA]; |
197 | } elseif ( |
198 | $char === self::PA_ && $i + 1 < $l && $chars[ $i + 1 ] === 'ꯀ' |
199 | ) { |
200 | // do not conjugate with halanta if it's followed by "ক" |
201 | yield self::HALANTA_CONSONANTS_TO_NORMAL[$char]; |
202 | } elseif ( |
203 | $char === self::NA_ && |
204 | $i + 1 < $l && |
205 | !in_array( $chars[ $i + 1 ], self::NOT_WEIRD_AFTER_NA_ ) && |
206 | array_key_exists( $chars[ $i + 1 ], self::CONSONANTS ) |
207 | ) { |
208 | /** |
209 | * ন্ / ণ্ + any consonant |
210 | * (except, ট, ঠ, ড, ঢ, , ত, থ, দ, ধ, ন, ব, য, য়) = weird |
211 | * Any consonant + ্ + ন = maybe ok |
212 | */ |
213 | yield self::MTEI_TO_BENG_MAP[self::NA]; |
214 | $i++; |
215 | continue; |
216 | } elseif ( $char === self::U && !$this->isBeginning( $i, $text ) ) { |
217 | // উ/ঊ in the middle of words are often replaced by ও |
218 | yield self::MTEI_TO_BENG_MAP[self::OO]; |
219 | } elseif ( $char === self::O && |
220 | $i + 2 < $l && $chars[$i + 1] === self::EE[0] && $chars[ $i + 2 ] === self::EE[1] ) { |
221 | /** |
222 | * Instead of হাঈবা, people love to use হায়বা. |
223 | * But this is only in the case when ee or ya is |
224 | * in the middle of the words, |
225 | * never to do it if it's in the beginning. |
226 | */ |
227 | yield self::MTEI_TO_BENG_MAP[self::YA]; |
228 | } elseif ( |
229 | !array_key_exists( $char, self::HALANTA_CONSONANTS ) && |
230 | array_key_exists( $char, self::CONSONANTS ) && |
231 | ( $i === $l - 1 || ( $i + 1 < $l && |
232 | $this->isEndOfWord( $chars[ $i + 1 ] ) |
233 | ) ) |
234 | ) { |
235 | // Consonants without halantas should end with diacritics of aa sound everytime. |
236 | yield self::MTEI_TO_BENG_MAP[$char] . self::MTEI_TO_BENG_MAP[self::DIACRITIC_AA]; |
237 | } else { |
238 | yield ( |
239 | array_key_exists( $char, self::MTEI_TO_BENG_MAP ) ? |
240 | self::MTEI_TO_BENG_MAP[$char] : $char |
241 | ); |
242 | } |
243 | $i++; |
244 | } |
245 | } |
246 | |
247 | public function transliterate( $text ) { |
248 | $transliterated = ''; |
249 | foreach ( $this->mteiToBengali( $text ) as $char ) { |
250 | $transliterated .= $char; |
251 | } |
252 | return $transliterated; |
253 | } |
254 | |
255 | public function getMainCode(): string { |
256 | return 'mni'; |
257 | } |
258 | |
259 | public function getLanguageVariants(): array { |
260 | return [ 'mni', 'mni-beng' ]; |
261 | } |
262 | |
263 | public function getVariantsFallbacks(): array { |
264 | return [ |
265 | 'mni-beng' => 'mni' |
266 | ]; |
267 | } |
268 | |
269 | protected function loadDefaultTables(): array { |
270 | return [ |
271 | 'mni' => new ReplacementArray(), |
272 | 'mni-beng' => new ReplacementArray(), |
273 | ]; |
274 | } |
275 | |
276 | /** |
277 | * Transliterates text into Bangla Script. This allows developers to test the language variants |
278 | * functionality and user interface without having to switch wiki language away from default. |
279 | * This method also processes custom conversion rules to allow testing these parts of the |
280 | * language converter as well. |
281 | * |
282 | * @param string $text |
283 | * @param string $toVariant |
284 | * @return string |
285 | */ |
286 | public function translate( $text, $toVariant ) { |
287 | if ( $toVariant === 'mni-beng' ) { |
288 | return $this->transliterate( $text ); |
289 | } |
290 | return $text; |
291 | } |
292 | } |