Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
42.62% covered (danger)
42.62%
26 / 61
50.00% covered (danger)
50.00%
1 / 2
CRAP
0.00% covered (danger)
0.00%
0 / 1
Grammar
42.62% covered (danger)
42.62%
26 / 61
50.00% covered (danger)
50.00%
1 / 2
71.59
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 process
41.67% covered (danger)
41.67%
25 / 60
0.00% covered (danger)
0.00%
0 / 1
66.81
1<?php
2/**
3 * @license GPL-2.0-or-later
4 * @file
5 */
6
7namespace Wikimedia\Leximorph\Handler;
8
9use Psr\Log\LoggerInterface;
10use Wikimedia\Leximorph\Handler\Overrides\GrammarFallbackRegistry;
11use Wikimedia\Leximorph\Provider;
12
13/**
14 * Grammar
15 *
16 * The Grammar class performs language-specific grammatical transformations on a given word.
17 * It uses transformation rules (loaded from JSON files) to convert the input word into
18 * the specified grammatical case.
19 *
20 * Usage Example:
21 * <code>
22 *            echo $grammar->process( 'Википедия', 'genitive' );
23 * </code>
24 *
25 * @since     1.45
26 * @author    Doğu Abaris (abaris@null.net)
27 * @license   https://www.gnu.org/copyleft/gpl.html GPL-2.0-or-later
28 */
29class Grammar {
30
31    /**
32     * Initializes the Grammar handler with the given transformations provider and a logger.
33     *
34     * @param Provider $provider Provider instance.
35     * @param GrammarFallbackRegistry $postProcessor The post processor registry.
36     * @param LoggerInterface $logger The logger instance to use.
37     *
38     * @since 1.45
39     */
40    public function __construct(
41        protected readonly Provider $provider,
42        private readonly GrammarFallbackRegistry $postProcessor,
43        private readonly LoggerInterface $logger,
44    ) {
45    }
46
47    /**
48     * Transforms the given word into the specified grammatical case.
49     *
50     * This method applies language-specific grammatical transformations by using transformation
51     * rules loaded from JSON configuration files. The input word is modified according to the first
52     * matching rule for the target grammatical case.
53     *
54     * @param string $word The word to transform.
55     * @param string $case The target grammatical case.
56     * @param ?array<string, array<string, array<string, string>>> $overrideForms Override rules:
57     *    $overrideForms[langCode][case][word] = result
58     *
59     * @since 1.45
60     * @return string The transformed word in the specified case.
61     */
62    public function process( string $word, string $case, ?array $overrideForms = [] ): string {
63        $langCode = $this->provider->getLanguageCode();
64
65        if ( isset( $overrideForms[$langCode][$case][$word] ) ) {
66            return $overrideForms[$langCode][$case][$word];
67        }
68
69        $grammarTransformations = $this->provider->getGrammarTransformationsProvider()->getTransformations();
70
71        if ( array_key_exists( $case, $grammarTransformations ) ) {
72            $forms = $grammarTransformations[$case];
73
74            // Some names of grammar rules are aliases for other rules.
75            // In such cases the value is a string rather than object,
76            // so load the actual rules.
77            if ( is_string( $forms ) ) {
78                $alias = $forms;
79                if ( isset( $grammarTransformations[$alias] ) && is_array( $grammarTransformations[$alias] ) ) {
80                    $forms = $grammarTransformations[$alias];
81                } else {
82                    $this->logger->error(
83                        'Expected alias {alias} to resolve to an array in grammar transformations.',
84                        [ 'alias' => $alias ]
85                    );
86
87                    return $word;
88                }
89            }
90
91            if ( !is_array( $forms ) ) {
92                $this->logger->error(
93                    'Invalid type for grammar forms. Expected array, got {type}.',
94                    [ 'type' => gettype( $forms ) ]
95                );
96
97                return $word;
98            }
99
100            foreach ( $forms as $rule ) {
101                if ( !is_array( $rule ) || !isset( $rule[0] ) || !isset( $rule[1] ) ) {
102                    $this->logger->warning(
103                        'Skipping malformed grammar rule. Expected [pattern, replacement]. Case: {case}, Rule: {rule}',
104                        [
105                            'case' => $case,
106                            'rule' => json_encode( $rule ),
107                        ]
108                    );
109                    continue;
110                }
111
112                if ( !is_string( $rule[0] ) ) {
113                    $this->logger->warning(
114                        'Invalid grammar rule format: first element must be string. Case: {case}, Rule: {rule}',
115                        [
116                            'case' => $case,
117                            'rule' => json_encode( $rule ),
118                        ]
119                    );
120                    continue;
121                }
122
123                $form = $rule[0];
124
125                if ( $form === '@metadata' ) {
126                    continue;
127                }
128
129                $replacement = is_string( $rule[1] ) ? $rule[1] : '';
130
131                $regex = '/' . addcslashes( $form, '/' ) . '/u';
132                $patternMatches = preg_match( $regex, $word );
133
134                if ( $patternMatches === false ) {
135                    $this->logger->error(
136                        'An error occurred while processing grammar: {error}. Word: {word}. Regex: /{form}/.',
137                        [
138                            'error' => preg_last_error_msg(),
139                            'word' => $word,
140                            'form' => $form,
141                        ]
142                    );
143                } elseif ( $patternMatches === 1 ) {
144                    $word = preg_replace( $regex, $replacement, $word ) ?? $word;
145                    break;
146                }
147            }
148        } else {
149            $word = $this->postProcessor->apply( $this->provider->getLanguageCode(), $word, $case );
150        }
151
152        return $word;
153    }
154}