Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
41.38% |
24 / 58 |
|
50.00% |
1 / 2 |
CRAP | |
0.00% |
0 / 1 |
| Grammar | |
41.38% |
24 / 58 |
|
50.00% |
1 / 2 |
67.57 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| process | |
40.35% |
23 / 57 |
|
0.00% |
0 / 1 |
62.75 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * @license GPL-2.0-or-later |
| 4 | * @file |
| 5 | */ |
| 6 | |
| 7 | namespace Wikimedia\Leximorph\Handler; |
| 8 | |
| 9 | use Psr\Log\LoggerInterface; |
| 10 | use Wikimedia\Leximorph\Handler\Overrides\GrammarFallbackRegistry; |
| 11 | use Wikimedia\Leximorph\Provider; |
| 12 | |
| 13 | /** |
| 14 | * Grammar |
| 15 | * |
| 16 | * The Grammar class performs language-specific grammatical transformations on a given word. |
| 17 | * It uses transformation rules (loaded from JSON files) to convert the input word into |
| 18 | * the specified grammatical case. |
| 19 | * |
| 20 | * Usage Example: |
| 21 | * <code> |
| 22 | * echo $grammar->process( 'Википедия', 'genitive' ); |
| 23 | * </code> |
| 24 | * |
| 25 | * @since 1.45 |
| 26 | * @author Doğu Abaris (abaris@null.net) |
| 27 | * @license https://www.gnu.org/copyleft/gpl.html GPL-2.0-or-later |
| 28 | */ |
| 29 | class Grammar { |
| 30 | |
| 31 | /** |
| 32 | * Initializes the Grammar handler with the given transformations provider and a logger. |
| 33 | * |
| 34 | * @param Provider $provider Provider instance. |
| 35 | * @param GrammarFallbackRegistry $postProcessor The post processor registry. |
| 36 | * @param LoggerInterface $logger The logger instance to use. |
| 37 | * |
| 38 | * @since 1.45 |
| 39 | */ |
| 40 | public function __construct( |
| 41 | protected readonly Provider $provider, |
| 42 | private readonly GrammarFallbackRegistry $postProcessor, |
| 43 | private readonly LoggerInterface $logger, |
| 44 | ) { |
| 45 | } |
| 46 | |
| 47 | /** |
| 48 | * Transforms the given word into the specified grammatical case. |
| 49 | * |
| 50 | * This method applies language-specific grammatical transformations by using transformation |
| 51 | * rules loaded from JSON configuration files. The input word is modified according to the first |
| 52 | * matching rule for the target grammatical case. |
| 53 | * |
| 54 | * @param string $word The word to transform. |
| 55 | * @param string $case The target grammatical case. |
| 56 | * |
| 57 | * @since 1.45 |
| 58 | * @return string The transformed word in the specified case. |
| 59 | */ |
| 60 | public function process( string $word, string $case ): string { |
| 61 | $grammarTransformations = $this->provider->getGrammarTransformationsProvider()->getTransformations(); |
| 62 | |
| 63 | if ( array_key_exists( $case, $grammarTransformations ) ) { |
| 64 | $forms = $grammarTransformations[$case]; |
| 65 | |
| 66 | // Some names of grammar rules are aliases for other rules. |
| 67 | // In such cases the value is a string rather than object, |
| 68 | // so load the actual rules. |
| 69 | if ( is_string( $forms ) ) { |
| 70 | $alias = $forms; |
| 71 | if ( isset( $grammarTransformations[$alias] ) && is_array( $grammarTransformations[$alias] ) ) { |
| 72 | $forms = $grammarTransformations[$alias]; |
| 73 | } else { |
| 74 | $this->logger->error( |
| 75 | 'Expected alias {alias} to resolve to an array in grammar transformations.', |
| 76 | [ 'alias' => $alias ] |
| 77 | ); |
| 78 | |
| 79 | return $word; |
| 80 | } |
| 81 | } |
| 82 | |
| 83 | if ( !is_array( $forms ) ) { |
| 84 | $this->logger->error( |
| 85 | 'Invalid type for grammar forms. Expected array, got {type}.', |
| 86 | [ 'type' => gettype( $forms ) ] |
| 87 | ); |
| 88 | |
| 89 | return $word; |
| 90 | } |
| 91 | |
| 92 | foreach ( $forms as $rule ) { |
| 93 | if ( !is_array( $rule ) || !isset( $rule[0] ) || !isset( $rule[1] ) ) { |
| 94 | $this->logger->warning( |
| 95 | 'Skipping malformed grammar rule. Expected [pattern, replacement]. Case: {case}, Rule: {rule}', |
| 96 | [ |
| 97 | 'case' => $case, |
| 98 | 'rule' => json_encode( $rule ), |
| 99 | ] |
| 100 | ); |
| 101 | continue; |
| 102 | } |
| 103 | |
| 104 | if ( !is_string( $rule[0] ) ) { |
| 105 | $this->logger->warning( |
| 106 | 'Invalid grammar rule format: first element must be string. Case: {case}, Rule: {rule}', |
| 107 | [ |
| 108 | 'case' => $case, |
| 109 | 'rule' => json_encode( $rule ), |
| 110 | ] |
| 111 | ); |
| 112 | continue; |
| 113 | } |
| 114 | |
| 115 | $form = $rule[0]; |
| 116 | |
| 117 | if ( $form === '@metadata' ) { |
| 118 | continue; |
| 119 | } |
| 120 | |
| 121 | $replacement = is_string( $rule[1] ) ? $rule[1] : ''; |
| 122 | |
| 123 | $regex = '/' . addcslashes( $form, '/' ) . '/u'; |
| 124 | $patternMatches = preg_match( $regex, $word ); |
| 125 | |
| 126 | if ( $patternMatches === false ) { |
| 127 | $this->logger->error( |
| 128 | 'An error occurred while processing grammar: {error}. Word: {word}. Regex: /{form}/.', |
| 129 | [ |
| 130 | 'error' => preg_last_error_msg(), |
| 131 | 'word' => $word, |
| 132 | 'form' => $form, |
| 133 | ] |
| 134 | ); |
| 135 | } elseif ( $patternMatches === 1 ) { |
| 136 | $word = preg_replace( $regex, $replacement, $word ) ?? $word; |
| 137 | break; |
| 138 | } |
| 139 | } |
| 140 | } else { |
| 141 | $word = $this->postProcessor->apply( $this->provider->getLanguageCode(), $word, $case ); |
| 142 | } |
| 143 | |
| 144 | return $word; |
| 145 | } |
| 146 | } |