Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 204
0.00% covered (danger)
0.00%
0 / 5
CRAP
0.00% covered (danger)
0.00%
0 / 1
GrammarKk_cyrl
0.00% covered (danger)
0.00%
0 / 204
0.00% covered (danger)
0.00%
0 / 5
14042
0.00% covered (danger)
0.00%
0 / 1
 process
0.00% covered (danger)
0.00%
0 / 175
0.00% covered (danger)
0.00%
0 / 1
11556
 lastLetter
0.00% covered (danger)
0.00%
0 / 15
0.00% covered (danger)
0.00%
0 / 1
12
 lc
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
12
 lcfirst
0.00% covered (danger)
0.00%
0 / 10
0.00% covered (danger)
0.00%
0 / 1
20
 isMultibyte
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2/**
3 * @license GPL-2.0-or-later
4 * @file
5 */
6
7// phpcs:ignoreFile Squiz.Classes.ValidClassName.NotCamelCaps
8namespace Wikimedia\Leximorph\Handler\Overrides\Grammar;
9
10use Wikimedia\Leximorph\Handler\Overrides\IGrammarTransformer;
11
12/**
13 * GrammarKk_cyrl
14 *
15 * Implements grammar transformations for Kazakh using the Cyrillic script.
16 *
17 * These rules don't cover the whole grammar of the language.
18 * This logic was originally taken from MediaWiki Core.
19 * Thanks to all contributors.
20 *
21 * @since     1.45
22 * @author    Doğu Abaris (abaris@null.net)
23 * @license   https://www.gnu.org/copyleft/gpl.html GPL-2.0-or-later
24 */
25class GrammarKk_cyrl implements IGrammarTransformer {
26    /**
27     * Applies Cyrillic Kazakh-specific grammatical transformations.
28     *
29     * Convert from the nominative form of a noun to some other case
30     * Invoked with {{GRAMMAR:case|word}}
31     *
32     * Cases: genitive, dative, accusative, locative, ablative, comitative + possessive forms
33     *
34     * @param string $word The word to process.
35     * @param string $case The grammatical case.
36     *
37     * @since 1.45
38     * @return string The processed word.
39     */
40    public function process( string $word, string $case ): string {
41        // Set up some constants...
42        // Vowels in last syllable
43        $frontVowels = [ "е", "ө", "ү", "і", "ә", "э", "я", "ё", "и", ];
44        $backVowels = [ "а", "о", "ұ", "ы", ];
45        $allVowels = [ "е", "ө", "ү", "і", "ә", "э", "а", "о", "ұ", "ы", "я", "ё", "и", ];
46        // Preceding letters
47        $Nasals = [ "м", "н", "ң", ];
48        $Sonants = [ "и", "й", "л", "р", "у", "ю", ];
49        $Consonants = [ "п", "ф", "к", "қ", "т", "ш", "с", "х", "ц", "ч", "щ", "б", "в", "г", "д", ];
50        $Sibilants = [ "ж", "з", ];
51        $Sonorants = [ "и", "й", "л", "р", "у", "ю", "м", "н", "ң", "ж", "з", ];
52
53        // Possessives
54        // 1st singular, 2nd informal
55        $firstPerson = [ "м", "ң", ];
56        // 1st plural, 2nd formal
57        $secondPerson = [ "з" ];
58        // 3rd
59        $thirdPerson = [ "ы", "і", ];
60
61        [
62            $wordEnding,
63            $wordLastVowel,
64        ] = $this->lastLetter( $word, $allVowels );
65
66        // Now convert the word
67        switch ( $case ) {
68            case "dc1":
69                # ilik
70            case "genitive":
71                if ( in_array( $wordEnding, $Consonants ) ) {
72                    if ( in_array( $wordLastVowel, $frontVowels ) ) {
73                        $word .= "тің";
74                    } elseif ( in_array( $wordLastVowel, $backVowels ) ) {
75                        $word .= "тың";
76                    }
77                } elseif ( in_array( $wordEnding, $allVowels ) || in_array( $wordEnding, $Nasals ) ) {
78                    if ( in_array( $wordLastVowel, $frontVowels ) ) {
79                        $word .= "нің";
80                    } elseif ( in_array( $wordLastVowel, $backVowels ) ) {
81                        $word .= "ның";
82                    }
83                } elseif ( in_array( $wordEnding, $Sonants ) || in_array( $wordEnding, $Sibilants ) ) {
84                    if ( in_array( $wordLastVowel, $frontVowels ) ) {
85                        $word .= "дің";
86                    } elseif ( in_array( $wordLastVowel, $backVowels ) ) {
87                        $word .= "дың";
88                    }
89                }
90                break;
91
92            case "dc2":
93                # barıs
94            case "dative":
95                if ( in_array( $wordEnding, $Consonants ) ) {
96                    if ( in_array( $wordLastVowel, $frontVowels ) ) {
97                        $word .= "ке";
98                    } elseif ( in_array( $wordLastVowel, $backVowels ) ) {
99                        $word .= "қа";
100                    }
101                } elseif ( in_array( $wordEnding, $allVowels ) || in_array( $wordEnding, $Sonorants ) ) {
102                    if ( in_array( $wordLastVowel, $frontVowels ) ) {
103                        $word .= "ге";
104                    } elseif ( in_array( $wordLastVowel, $backVowels ) ) {
105                        $word .= "ға";
106                    }
107                }
108                break;
109
110            case "dc21":
111                # täweldık + barıs
112            case "possessive dative":
113                if ( in_array( $wordEnding, $firstPerson ) ) {
114                    if ( in_array( $wordLastVowel, $frontVowels ) ) {
115                        $word .= "е";
116                    } elseif ( in_array( $wordLastVowel, $backVowels ) ) {
117                        $word .= "а";
118                    }
119                } elseif ( in_array( $wordEnding, $secondPerson ) ) {
120                    if ( in_array( $wordLastVowel, $frontVowels ) ) {
121                        $word .= "ге";
122                    } elseif ( in_array( $wordLastVowel, $backVowels ) ) {
123                        $word .= "ға";
124                    }
125                } elseif ( in_array( $wordEnding, $thirdPerson ) ) {
126                    if ( in_array( $wordLastVowel, $frontVowels ) ) {
127                        $word .= "не";
128                    } elseif ( in_array( $wordLastVowel, $backVowels ) ) {
129                        $word .= "на";
130                    }
131                }
132                break;
133
134            case "dc3":
135                # tabıs
136            case "accusative":
137                if ( in_array( $wordEnding, $Consonants ) ) {
138                    if ( in_array( $wordLastVowel, $frontVowels ) ) {
139                        $word .= "ті";
140                    } elseif ( in_array( $wordLastVowel, $backVowels ) ) {
141                        $word .= "ты";
142                    }
143                } elseif ( in_array( $wordEnding, $allVowels ) ) {
144                    if ( in_array( $wordLastVowel, $frontVowels ) ) {
145                        $word .= "ні";
146                    } elseif ( in_array( $wordLastVowel, $backVowels ) ) {
147                        $word .= "ны";
148                    }
149                } elseif ( in_array( $wordEnding, $Sonorants ) ) {
150                    if ( in_array( $wordLastVowel, $frontVowels ) ) {
151                        $word .= "ді";
152                    } elseif ( in_array( $wordLastVowel, $backVowels ) ) {
153                        $word .= "ды";
154                    }
155                }
156                break;
157
158            case "dc31":
159                # täweldık + tabıs
160            case "possessive accusative":
161                if ( in_array( $wordEnding, $firstPerson ) || in_array( $wordEnding, $secondPerson ) ) {
162                    if ( in_array( $wordLastVowel, $frontVowels ) ) {
163                        $word .= "ді";
164                    } elseif ( in_array( $wordLastVowel, $backVowels ) ) {
165                        $word .= "ды";
166                    }
167                } elseif ( in_array( $wordEnding, $thirdPerson ) ) {
168                    $word .= "н";
169                }
170                break;
171
172            case "dc4":
173                # jatıs
174            case "locative":
175                if ( in_array( $wordEnding, $Consonants ) ) {
176                    if ( in_array( $wordLastVowel, $frontVowels ) ) {
177                        $word .= "те";
178                    } elseif ( in_array( $wordLastVowel, $backVowels ) ) {
179                        $word .= "та";
180                    }
181                } elseif ( in_array( $wordEnding, $allVowels ) || in_array( $wordEnding, $Sonorants ) ) {
182                    if ( in_array( $wordLastVowel, $frontVowels ) ) {
183                        $word .= "де";
184                    } elseif ( in_array( $wordLastVowel, $backVowels ) ) {
185                        $word .= "да";
186                    }
187                }
188                break;
189
190            case "dc41":
191                # täweldık + jatıs
192            case "possessive locative":
193                if ( in_array( $wordEnding, $firstPerson ) || in_array( $wordEnding, $secondPerson ) ) {
194                    if ( in_array( $wordLastVowel, $frontVowels ) ) {
195                        $word .= "де";
196                    } elseif ( in_array( $wordLastVowel, $backVowels ) ) {
197                        $word .= "да";
198                    }
199                } elseif ( in_array( $wordEnding, $thirdPerson ) ) {
200                    if ( in_array( $wordLastVowel, $frontVowels ) ) {
201                        $word .= "нде";
202                    } elseif ( in_array( $wordLastVowel, $backVowels ) ) {
203                        $word .= "нда";
204                    }
205                }
206                break;
207
208            case "dc5":
209                # şığıs
210            case "ablative":
211                if ( in_array( $wordEnding, $Consonants ) ) {
212                    if ( in_array( $wordLastVowel, $frontVowels ) ) {
213                        $word .= "тен";
214                    } elseif ( in_array( $wordLastVowel, $backVowels ) ) {
215                        $word .= "тан";
216                    }
217                } elseif (
218                    in_array( $wordEnding, $allVowels ) ||
219                    in_array( $wordEnding, $Sonants ) ||
220                    in_array( $wordEnding, $Sibilants )
221                ) {
222                    if ( in_array( $wordLastVowel, $frontVowels ) ) {
223                        $word .= "ден";
224                    } elseif ( in_array( $wordLastVowel, $backVowels ) ) {
225                        $word .= "дан";
226                    }
227                } elseif ( in_array( $wordEnding, $Nasals ) ) {
228                    if ( in_array( $wordLastVowel, $frontVowels ) ) {
229                        $word .= "нен";
230                    } elseif ( in_array( $wordLastVowel, $backVowels ) ) {
231                        $word .= "нан";
232                    }
233                }
234                break;
235
236            case "dc51":
237                # täweldık + şığıs
238            case "possessive ablative":
239                if ( in_array( $wordEnding, $firstPerson ) || in_array( $wordEnding, $thirdPerson ) ) {
240                    if ( in_array( $wordLastVowel, $frontVowels ) ) {
241                        $word .= "нен";
242                    } elseif ( in_array( $wordLastVowel, $backVowels ) ) {
243                        $word .= "нан";
244                    }
245                } elseif ( in_array( $wordEnding, $secondPerson ) ) {
246                    if ( in_array( $wordLastVowel, $frontVowels ) ) {
247                        $word .= "ден";
248                    } elseif ( in_array( $wordLastVowel, $backVowels ) ) {
249                        $word .= "дан";
250                    }
251                }
252                break;
253
254            case "dc6":
255                # kömektes
256            case "comitative":
257                if ( in_array( $wordEnding, $Consonants ) ) {
258                    $word .= "пен";
259                } elseif (
260                    in_array( $wordEnding, $allVowels ) ||
261                    in_array( $wordEnding, $Nasals ) ||
262                    in_array( $wordEnding, $Sonants )
263                ) {
264                    $word .= "мен";
265                } elseif ( in_array( $wordEnding, $Sibilants ) ) {
266                    $word .= "бен";
267                }
268                break;
269            case "dc61":
270                # täweldık + kömektes
271            case "possessive comitative":
272                if ( in_array( $wordEnding, $Consonants ) ) {
273                    $word .= "пенен";
274                } elseif (
275                    in_array( $wordEnding, $allVowels ) ||
276                    in_array( $wordEnding, $Nasals ) ||
277                    in_array( $wordEnding, $Sonants )
278                ) {
279                    $word .= "менен";
280                } elseif ( in_array( $wordEnding, $Sibilants ) ) {
281                    $word .= "бенен";
282                }
283                break;
284
285            # dc0 #nominative #ataw
286            default:
287                break;
288        }
289
290        return $word;
291    }
292
293    /**
294     * @param string $word
295     * @param string[] $allVowels
296     *
297     * @return array{0: string, 1: string|null}
298     */
299    private function lastLetter( string $word, array $allVowels ): array {
300        // Convert the word to lowercase safely for UTF-8 handling
301        $lowered = $this->lc( $word );
302        $ar = mb_str_split( $lowered, 1 );
303
304        // Get the last letter using array_key_last to ensure a string
305        $lastKey = array_key_last( $ar );
306        $lastLetter = $ar[$lastKey] ?? '';
307
308        // Find the last vowel in the word
309        for ( $i = count( $ar ); $i--; ) {
310            $letter = $ar[$i];
311            if ( in_array( $letter, $allVowels, true ) ) {
312                return [
313                    $lastLetter,
314                    $letter,
315                ];
316            }
317        }
318
319        return [
320            $lastLetter,
321            null,
322        ];
323    }
324
325    /**
326     * @param string $str
327     * @param bool $first Whether to lowercase only the first character
328     *
329     * @return string The string with lowercase conversion applied
330     */
331    public function lc( string $str, bool $first = false ): string {
332        if ( $first ) {
333            return $this->lcfirst( $str );
334        } else {
335            return $this->isMultibyte( $str ) ? mb_strtolower( $str ) : strtolower( $str );
336        }
337    }
338
339    /**
340     * @param string $str
341     *
342     * @return string The string with lowercase conversion applied to the first character
343     */
344    public function lcfirst( string $str ): string {
345        $octetCode = ord( $str );
346        // See https://en.wikipedia.org/wiki/ASCII#Printable_characters
347        if ( $octetCode < 96 ) {
348            // Assume this is an uppercase/uncased ASCII character
349            return lcfirst( $str );
350        } elseif ( $octetCode < 128 ) {
351            // Assume this is a lowercase/uncased ASCII character
352            return $str;
353        }
354
355        return $this->isMultibyte( $str )
356            // Assume this is a multibyte character and mb_internal_encoding() is appropriate
357            ? mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 )
358            // Assume this is a non-multibyte character and LC_CASE is appropriate
359            : lcfirst( $str );
360    }
361
362    /**
363     * @param string $str
364     *
365     * @return bool
366     */
367    private function isMultibyte( string $str ): bool {
368        return strlen( $str ) !== mb_strlen( $str );
369    }
370}