Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
76.71% |
56 / 73 |
|
44.44% |
4 / 9 |
CRAP | |
0.00% |
0 / 1 |
MniConverter | |
76.71% |
56 / 73 |
|
44.44% |
4 / 9 |
81.57 | |
0.00% |
0 / 1 |
isBeginning | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
isEndOfWord | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
2.06 | |||
mteiToBengali | |
80.39% |
41 / 51 |
|
0.00% |
0 / 1 |
48.89 | |||
transliterate | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
getMainCode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getLanguageVariants | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getVariantsFallbacks | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
loadDefaultTables | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
translate | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file MniConverter.php |
19 | * @author Nokib Sarkar |
20 | * @author Haoreima |
21 | */ |
22 | |
23 | use MediaWiki\Language\ReplacementArray; |
24 | |
25 | /** |
26 | * Meitei specific converter routines. |
27 | * |
28 | * @ingroup Languages |
29 | */ |
30 | class MniConverter extends LanguageConverterSpecific { |
31 | private const O = 'ꯑ'; |
32 | private const OO = 'ꯑꯣ'; |
33 | private const U = 'ꯎ'; |
34 | private const EE = 'ꯑꯤ'; |
35 | private const YA = 'ꯌ'; |
36 | private const Y_ = 'য'; |
37 | private const WA = 'ꯋ'; |
38 | private const BA = 'ꯕ'; |
39 | private const NA_ = 'ꯟ'; |
40 | private const NA = 'ꯅ'; |
41 | private const DIACRITIC_AA = 'ꯥ'; |
42 | private const HALANTA = '꯭'; |
43 | private const SKIP = ''; |
44 | private const PERIOD = '꯫'; |
45 | private const PA_ = 'ꯞ'; |
46 | private const DIACRITICS_WITH_O = [ |
47 | 'ꯣ' => 'ো', |
48 | 'ꯤ' => 'ী', |
49 | 'ꯥ' => 'া', |
50 | 'ꯦ' => 'ে', |
51 | 'ꯧ' => 'ৌ', |
52 | 'ꯩ' => 'ৈ', |
53 | 'ꯪ' => 'ং', |
54 | ]; |
55 | private const CONJUGATE_WITH_O = [ |
56 | 'ꯑꯣ' => 'ও', |
57 | 'ꯑꯤ' => 'ঈ', |
58 | 'ꯑꯥ' => 'আ', |
59 | 'ꯑꯦ' => 'এ', |
60 | 'ꯑꯧ' => 'ঔ', |
61 | 'ꯑꯩ' => 'ঐ', |
62 | 'ꯑꯪ' => 'অং', |
63 | ]; |
64 | private const NOT_WEIRD_AFTER_NA_ = [ 'ꯇ', 'ꯊ', 'ꯗ', 'ꯙ', 'ꯟ', 'ꯕ', 'ꯌ', 'ꯁ' ]; |
65 | private const NUMERALS = [ |
66 | '꯰' => '০', |
67 | '꯱' => '১', |
68 | '꯲' => '২', |
69 | '꯳' => '৩', |
70 | '꯴' => '৪', |
71 | '꯵' => '৫', |
72 | '꯶' => '৬', |
73 | '꯷' => '৭', |
74 | '꯸' => '৮', |
75 | '꯹' => '৯', |
76 | ]; |
77 | private const HALANTA_CONSONANTS = [ |
78 | 'ꯟ' => 'ন্', |
79 | 'ꯛ' => 'ক্', |
80 | 'ꯝ' => 'ম্', |
81 | 'ꯡ' => 'ং', |
82 | 'ꯜ' => 'ল্', |
83 | 'ꯠ' => 'ৎ', |
84 | 'ꯞ' => 'প্', |
85 | ]; |
86 | private const HALANTA_CONSONANTS_TO_NORMAL = [ |
87 | 'ꯟ' => 'ন', |
88 | 'ꯛ' => 'ক', |
89 | 'ꯝ' => 'ম', |
90 | 'ꯡ' => 'ং', |
91 | 'ꯜ' => 'ল', |
92 | 'ꯠ' => 'ৎ', |
93 | 'ꯞ' => 'প', |
94 | ]; |
95 | private const NON_WORD_CHARACTER_PATTERN = "/[\s꯫\p{P}<>=\-\|$+^~]+?/u"; |
96 | private const CONSONANTS = self::HALANTA_CONSONANTS + [ |
97 | 'ꯀ' => 'ক', |
98 | 'ꯈ' => 'খ', |
99 | 'ꯒ' => 'গ', |
100 | 'ꯘ' => 'ঘ', |
101 | 'ꯉ' => 'ঙ', |
102 | 'ꯆ' => 'চ', |
103 | 'ꯖ' => 'জ', |
104 | 'ꯓ' => 'ঝ', |
105 | 'ꯇ' => 'ত', |
106 | 'ꯊ' => 'থ', |
107 | 'ꯗ' => 'দ', |
108 | 'ꯙ' => 'ধ', |
109 | 'ꯅ' => 'ন', |
110 | 'ꯄ' => 'প', |
111 | 'ꯐ' => 'ফ', |
112 | 'ꯕ' => 'ব', |
113 | 'ꯚ' => 'ভ', |
114 | 'ꯃ' => 'ম', |
115 | 'ꯌ' => 'য়', |
116 | 'ꯔ' => 'র', |
117 | 'ꯂ' => 'ল', |
118 | 'ꯋ' => 'ৱ', |
119 | 'ꫩ' => 'শ', |
120 | 'ꫪ' => 'ষ', |
121 | 'ꯁ' => 'স', |
122 | 'ꯍ' => 'হ', |
123 | ]; |
124 | private const VOWELS = [ |
125 | 'ꯑ' => 'অ', |
126 | 'ꯏ' => 'ই', |
127 | 'ꯎ' => 'উ', |
128 | 'ꯢ' => 'ই', |
129 | 'ꯨ' => 'ু', |
130 | ]; |
131 | private const MTEI_TO_BENG_MAP_EXTRA = [ |
132 | '꯫' => '।', |
133 | '꯭' => '্', |
134 | ]; |
135 | private const MTEI_TO_BENG_MAP = |
136 | self::VOWELS + |
137 | self::DIACRITICS_WITH_O + |
138 | self::CONJUGATE_WITH_O + |
139 | self::CONSONANTS + |
140 | self::NUMERALS + |
141 | self::MTEI_TO_BENG_MAP_EXTRA; |
142 | |
143 | private function isBeginning( int $position, string $text ): bool { |
144 | $at_first = $position === 0; |
145 | return $at_first || preg_match( self::NON_WORD_CHARACTER_PATTERN, $text[$position - 1] ); |
146 | } |
147 | |
148 | private function isEndOfWord( string $char ): bool { |
149 | if ( $char === self::PERIOD ) { |
150 | return true; |
151 | } |
152 | $status = preg_match( self::NON_WORD_CHARACTER_PATTERN, $char, $matches ); |
153 | return count( $matches ) > 0; |
154 | } |
155 | |
156 | private function mteiToBengali( string $text ): iterable { |
157 | $chars = mb_str_split( $text ); |
158 | $l = count( $chars ); |
159 | $i = 0; |
160 | while ( $i < $l ) { |
161 | $char = $chars[$i]; |
162 | if ( |
163 | $char === self::O && |
164 | $i + 1 < $l && |
165 | array_key_exists( $chars[ $i + 1 ], self::DIACRITICS_WITH_O ) |
166 | ) { |
167 | /** |
168 | * We have only 3 true vowels, |
169 | * ꯑ(a), ꯏ(i), ꯎ (u) |
170 | * Others are just extension from "a" by mixing with diacritics |
171 | */ |
172 | yield self::CONJUGATE_WITH_O[$char . $chars[ $i + 1 ]]; |
173 | $i++; |
174 | } elseif ( |
175 | $char === self::HALANTA && |
176 | $i > 0 && |
177 | array_key_exists( $chars[ $i - 1 ], self::HALANTA_CONSONANTS ) |
178 | ) { |
179 | // Remove halanta if the consonant has halanta already |
180 | yield self::SKIP; |
181 | } elseif ( |
182 | array_key_exists( $char, self::HALANTA_CONSONANTS ) && |
183 | ( $i === $l - 1 || ( $i + 1 < $l && |
184 | $this->isEndOfWord( $chars[ $i + 1 ] ) |
185 | ) ) |
186 | ) { |
187 | // Remove halanta if this is the last character of the word |
188 | yield self::HALANTA_CONSONANTS_TO_NORMAL[$char]; |
189 | } elseif ( $char === self::YA && |
190 | $i > 0 && $chars[ $i - 1 ] === self::HALANTA ) { |
191 | // য + ্ = য় |
192 | yield self::Y_; |
193 | } elseif ( |
194 | $char === self::WA && |
195 | $i - 2 >= 0 && $chars[ $i - 1 ] === self::HALANTA && |
196 | array_key_exists( $chars[ $i - 2 ], self::CONSONANTS ) |
197 | ) { |
198 | // ব + ্ + র = ব্র |
199 | yield self::CONSONANTS[self::BA]; |
200 | } elseif ( |
201 | $char === self::PA_ && $i + 1 < $l && $chars[ $i + 1 ] === 'ꯀ' |
202 | ) { |
203 | // do not conjugate with halanta if it's followed by "ক" |
204 | yield self::HALANTA_CONSONANTS_TO_NORMAL[$char]; |
205 | } elseif ( |
206 | $char === self::NA_ && |
207 | $i + 1 < $l && |
208 | !in_array( $chars[ $i + 1 ], self::NOT_WEIRD_AFTER_NA_ ) && |
209 | array_key_exists( $chars[ $i + 1 ], self::CONSONANTS ) |
210 | ) { |
211 | /** |
212 | * ন্ / ণ্ + any consonant |
213 | * (except, ট, ঠ, ড, ঢ, , ত, থ, দ, ধ, ন, ব, য, য়) = weird |
214 | * Any consonant + ্ + ন = maybe ok |
215 | */ |
216 | yield self::MTEI_TO_BENG_MAP[self::NA]; |
217 | $i++; |
218 | continue; |
219 | } elseif ( $char === self::U && !$this->isBeginning( $i, $text ) ) { |
220 | // উ/ঊ in the middle of words are often replaced by ও |
221 | yield self::MTEI_TO_BENG_MAP[self::OO]; |
222 | } elseif ( $char === self::O && |
223 | $i + 2 < $l && $chars[$i + 1] === self::EE[0] && $chars[ $i + 2 ] === self::EE[1] ) { |
224 | /** |
225 | * Instead of হাঈবা, people love to use হায়বা. |
226 | * But this is only in the case when ee or ya is |
227 | * in the middle of the words, |
228 | * never to do it if it's in the beginning. |
229 | */ |
230 | yield self::MTEI_TO_BENG_MAP[self::YA]; |
231 | } elseif ( |
232 | !array_key_exists( $char, self::HALANTA_CONSONANTS ) && |
233 | array_key_exists( $char, self::CONSONANTS ) && |
234 | ( $i === $l - 1 || ( $i + 1 < $l && |
235 | $this->isEndOfWord( $chars[ $i + 1 ] ) |
236 | ) ) |
237 | ) { |
238 | // Consonants without halantas should end with diacritics of aa sound everytime. |
239 | yield self::MTEI_TO_BENG_MAP[$char] . self::MTEI_TO_BENG_MAP[self::DIACRITIC_AA]; |
240 | } else { |
241 | yield ( |
242 | array_key_exists( $char, self::MTEI_TO_BENG_MAP ) ? |
243 | self::MTEI_TO_BENG_MAP[$char] : $char |
244 | ); |
245 | } |
246 | $i++; |
247 | } |
248 | } |
249 | |
250 | public function transliterate( $text ) { |
251 | $transliterated = ''; |
252 | foreach ( $this->mteiToBengali( $text ) as $char ) { |
253 | $transliterated .= $char; |
254 | } |
255 | return $transliterated; |
256 | } |
257 | |
258 | public function getMainCode(): string { |
259 | return 'mni'; |
260 | } |
261 | |
262 | public function getLanguageVariants(): array { |
263 | return [ 'mni', 'mni-beng' ]; |
264 | } |
265 | |
266 | public function getVariantsFallbacks(): array { |
267 | return [ |
268 | 'mni-beng' => 'mni' |
269 | ]; |
270 | } |
271 | |
272 | protected function loadDefaultTables(): array { |
273 | return [ |
274 | 'mni' => new ReplacementArray(), |
275 | 'mni-beng' => new ReplacementArray(), |
276 | ]; |
277 | } |
278 | |
279 | /** |
280 | * Transliterates text into Bangla Script. This allows developers to test the language variants |
281 | * functionality and user interface without having to switch wiki language away from default. |
282 | * This method also processes custom conversion rules to allow testing these parts of the |
283 | * language converter as well. |
284 | * |
285 | * @param string $text |
286 | * @param string $toVariant |
287 | * @return string |
288 | */ |
289 | public function translate( $text, $toVariant ) { |
290 | if ( $toVariant === 'mni-beng' ) { |
291 | return $this->transliterate( $text ); |
292 | } |
293 | return $text; |
294 | } |
295 | } |