Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
42.62% |
26 / 61 |
|
50.00% |
1 / 2 |
CRAP | |
0.00% |
0 / 1 |
| Grammar | |
42.62% |
26 / 61 |
|
50.00% |
1 / 2 |
71.59 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| process | |
41.67% |
25 / 60 |
|
0.00% |
0 / 1 |
66.81 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * @license GPL-2.0-or-later |
| 4 | * @file |
| 5 | */ |
| 6 | |
| 7 | namespace Wikimedia\Leximorph\Handler; |
| 8 | |
| 9 | use Psr\Log\LoggerInterface; |
| 10 | use Wikimedia\Leximorph\Handler\Overrides\GrammarFallbackRegistry; |
| 11 | use Wikimedia\Leximorph\Provider; |
| 12 | |
| 13 | /** |
| 14 | * Grammar |
| 15 | * |
| 16 | * The Grammar class performs language-specific grammatical transformations on a given word. |
| 17 | * It uses transformation rules (loaded from JSON files) to convert the input word into |
| 18 | * the specified grammatical case. |
| 19 | * |
| 20 | * Usage Example: |
| 21 | * <code> |
| 22 | * echo $grammar->process( 'Википедия', 'genitive' ); |
| 23 | * </code> |
| 24 | * |
| 25 | * @since 1.45 |
| 26 | * @author Doğu Abaris (abaris@null.net) |
| 27 | * @license https://www.gnu.org/copyleft/gpl.html GPL-2.0-or-later |
| 28 | */ |
| 29 | class Grammar { |
| 30 | |
| 31 | /** |
| 32 | * Initializes the Grammar handler with the given transformations provider and a logger. |
| 33 | * |
| 34 | * @param Provider $provider Provider instance. |
| 35 | * @param GrammarFallbackRegistry $postProcessor The post processor registry. |
| 36 | * @param LoggerInterface $logger The logger instance to use. |
| 37 | * |
| 38 | * @since 1.45 |
| 39 | */ |
| 40 | public function __construct( |
| 41 | protected readonly Provider $provider, |
| 42 | private readonly GrammarFallbackRegistry $postProcessor, |
| 43 | private readonly LoggerInterface $logger, |
| 44 | ) { |
| 45 | } |
| 46 | |
| 47 | /** |
| 48 | * Transforms the given word into the specified grammatical case. |
| 49 | * |
| 50 | * This method applies language-specific grammatical transformations by using transformation |
| 51 | * rules loaded from JSON configuration files. The input word is modified according to the first |
| 52 | * matching rule for the target grammatical case. |
| 53 | * |
| 54 | * @param string $word The word to transform. |
| 55 | * @param string $case The target grammatical case. |
| 56 | * @param ?array<string, array<string, array<string, string>>> $overrideForms Override rules: |
| 57 | * $overrideForms[langCode][case][word] = result |
| 58 | * |
| 59 | * @since 1.45 |
| 60 | * @return string The transformed word in the specified case. |
| 61 | */ |
| 62 | public function process( string $word, string $case, ?array $overrideForms = [] ): string { |
| 63 | $langCode = $this->provider->getLanguageCode(); |
| 64 | |
| 65 | if ( isset( $overrideForms[$langCode][$case][$word] ) ) { |
| 66 | return $overrideForms[$langCode][$case][$word]; |
| 67 | } |
| 68 | |
| 69 | $grammarTransformations = $this->provider->getGrammarTransformationsProvider()->getTransformations(); |
| 70 | |
| 71 | if ( array_key_exists( $case, $grammarTransformations ) ) { |
| 72 | $forms = $grammarTransformations[$case]; |
| 73 | |
| 74 | // Some names of grammar rules are aliases for other rules. |
| 75 | // In such cases the value is a string rather than object, |
| 76 | // so load the actual rules. |
| 77 | if ( is_string( $forms ) ) { |
| 78 | $alias = $forms; |
| 79 | if ( isset( $grammarTransformations[$alias] ) && is_array( $grammarTransformations[$alias] ) ) { |
| 80 | $forms = $grammarTransformations[$alias]; |
| 81 | } else { |
| 82 | $this->logger->error( |
| 83 | 'Expected alias {alias} to resolve to an array in grammar transformations.', |
| 84 | [ 'alias' => $alias ] |
| 85 | ); |
| 86 | |
| 87 | return $word; |
| 88 | } |
| 89 | } |
| 90 | |
| 91 | if ( !is_array( $forms ) ) { |
| 92 | $this->logger->error( |
| 93 | 'Invalid type for grammar forms. Expected array, got {type}.', |
| 94 | [ 'type' => gettype( $forms ) ] |
| 95 | ); |
| 96 | |
| 97 | return $word; |
| 98 | } |
| 99 | |
| 100 | foreach ( $forms as $rule ) { |
| 101 | if ( !is_array( $rule ) || !isset( $rule[0] ) || !isset( $rule[1] ) ) { |
| 102 | $this->logger->warning( |
| 103 | 'Skipping malformed grammar rule. Expected [pattern, replacement]. Case: {case}, Rule: {rule}', |
| 104 | [ |
| 105 | 'case' => $case, |
| 106 | 'rule' => json_encode( $rule ), |
| 107 | ] |
| 108 | ); |
| 109 | continue; |
| 110 | } |
| 111 | |
| 112 | if ( !is_string( $rule[0] ) ) { |
| 113 | $this->logger->warning( |
| 114 | 'Invalid grammar rule format: first element must be string. Case: {case}, Rule: {rule}', |
| 115 | [ |
| 116 | 'case' => $case, |
| 117 | 'rule' => json_encode( $rule ), |
| 118 | ] |
| 119 | ); |
| 120 | continue; |
| 121 | } |
| 122 | |
| 123 | $form = $rule[0]; |
| 124 | |
| 125 | if ( $form === '@metadata' ) { |
| 126 | continue; |
| 127 | } |
| 128 | |
| 129 | $replacement = is_string( $rule[1] ) ? $rule[1] : ''; |
| 130 | |
| 131 | $regex = '/' . addcslashes( $form, '/' ) . '/u'; |
| 132 | $patternMatches = preg_match( $regex, $word ); |
| 133 | |
| 134 | if ( $patternMatches === false ) { |
| 135 | $this->logger->error( |
| 136 | 'An error occurred while processing grammar: {error}. Word: {word}. Regex: /{form}/.', |
| 137 | [ |
| 138 | 'error' => preg_last_error_msg(), |
| 139 | 'word' => $word, |
| 140 | 'form' => $form, |
| 141 | ] |
| 142 | ); |
| 143 | } elseif ( $patternMatches === 1 ) { |
| 144 | $word = preg_replace( $regex, $replacement, $word ) ?? $word; |
| 145 | break; |
| 146 | } |
| 147 | } |
| 148 | } else { |
| 149 | $word = $this->postProcessor->apply( $this->provider->getLanguageCode(), $word, $case ); |
| 150 | } |
| 151 | |
| 152 | return $word; |
| 153 | } |
| 154 | } |