Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 92 |
|
0.00% |
0 / 8 |
CRAP | |
0.00% |
0 / 1 |
Converter | |
0.00% |
0 / 92 |
|
0.00% |
0 / 8 |
1482 | |
0.00% |
0 / 1 |
convert | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
__construct | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
doConvert | |
0.00% |
0 / 28 |
|
0.00% |
0 / 1 |
132 | |||
nextToken | |
0.00% |
0 / 50 |
|
0.00% |
0 / 1 |
420 | |||
doOperation | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
newNumber | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
newOperator | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
error | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * @author Tim Starling |
4 | * @author Niklas Laxström |
5 | * @license GPL-2.0-or-later |
6 | * @file |
7 | */ |
8 | |
9 | namespace CLDRPluralRuleParser; |
10 | |
11 | use CLDRPluralRuleParser\Converter\Expression; |
12 | use CLDRPluralRuleParser\Converter\Fragment; |
13 | use CLDRPluralRuleParser\Converter\Operator; |
14 | |
15 | /** |
16 | * Helper class for converting rules to reverse polish notation (RPN). |
17 | */ |
18 | class Converter { |
19 | /** |
20 | * The input string |
21 | * |
22 | * @var string |
23 | */ |
24 | public $rule; |
25 | |
26 | /** |
27 | * The current position |
28 | * |
29 | * @var int |
30 | */ |
31 | public $pos; |
32 | |
33 | /** |
34 | * The past-the-end position |
35 | * |
36 | * @var int |
37 | */ |
38 | public $end; |
39 | |
40 | /** |
41 | * The operator stack |
42 | * |
43 | * @var array |
44 | */ |
45 | public $operators = []; |
46 | |
47 | /** |
48 | * The operand stack |
49 | * |
50 | * @var array |
51 | */ |
52 | public $operands = []; |
53 | |
54 | /** |
55 | * Precedence levels. Note that there's no need to worry about associativity |
56 | * for the level 4 operators, since they return boolean and don't accept |
57 | * boolean inputs. |
58 | * |
59 | * @var array |
60 | */ |
61 | private const PRECEDENCE_LEVELS = [ |
62 | 'or' => 2, |
63 | 'and' => 3, |
64 | 'is' => 4, |
65 | 'is-not' => 4, |
66 | 'in' => 4, |
67 | 'not-in' => 4, |
68 | 'within' => 4, |
69 | 'not-within' => 4, |
70 | 'mod' => 5, |
71 | ',' => 6, |
72 | '..' => 7, |
73 | ]; |
74 | |
75 | /** |
76 | * A character list defining whitespace, for use in strspn() etc. |
77 | */ |
78 | private const WHITESPACE_CLASS = " \t\r\n"; |
79 | |
80 | /** |
81 | * Same for digits. Note that the grammar given in UTS #35 doesn't allow |
82 | * negative numbers or decimal separators. |
83 | */ |
84 | private const NUMBER_CLASS = '0123456789'; |
85 | |
86 | /** |
87 | * A character list of symbolic operands. |
88 | */ |
89 | private const OPERAND_SYMBOLS = 'nivwft'; |
90 | |
91 | /** |
92 | * An anchored regular expression which matches a word at the current offset. |
93 | */ |
94 | private const WORD_REGEX = '/[a-zA-Z@]+/A'; |
95 | |
96 | /** |
97 | * Convert a rule to RPN. This is the only public entry point. |
98 | * |
99 | * @param string $rule The rule to convert |
100 | * @return string The RPN representation of the rule |
101 | */ |
102 | public static function convert( $rule ): string { |
103 | $parser = new self( $rule ); |
104 | |
105 | return $parser->doConvert(); |
106 | } |
107 | |
108 | /** |
109 | * Private constructor. |
110 | * @param string $rule |
111 | */ |
112 | protected function __construct( string $rule ) { |
113 | $this->rule = $rule; |
114 | $this->pos = 0; |
115 | $this->end = strlen( $rule ); |
116 | } |
117 | |
118 | /** |
119 | * Do the operation. |
120 | * |
121 | * @return string The RPN representation of the rule (e.g. "5 3 mod n is") |
122 | */ |
123 | protected function doConvert(): string { |
124 | $expectOperator = true; |
125 | |
126 | // Iterate through all tokens, saving the operators and operands to a |
127 | // stack per Dijkstra's shunting yard algorithm. |
128 | /** @var Operator $token */ |
129 | $token = $this->nextToken(); |
130 | while ( $token !== false ) { |
131 | // In this grammar, there are only binary operators, so every valid |
132 | // rule string will alternate between operator and operand tokens. |
133 | $expectOperator = !$expectOperator; |
134 | |
135 | if ( $token instanceof Expression ) { |
136 | // Operand |
137 | if ( $expectOperator ) { |
138 | $token->error( 'unexpected operand' ); |
139 | } |
140 | $this->operands[] = $token; |
141 | } else { |
142 | // Operator |
143 | if ( !$expectOperator ) { |
144 | $token->error( 'unexpected operator' ); |
145 | } |
146 | // Resolve higher precedence levels |
147 | /** @var Operator $lastOp */ |
148 | $lastOp = end( $this->operators ); |
149 | // @phan-suppress-next-next-line PhanUndeclaredProperty |
150 | while ( $lastOp && |
151 | self::PRECEDENCE_LEVELS[$token->name] <= self::PRECEDENCE_LEVELS[$lastOp->name] |
152 | ) { |
153 | $this->doOperation( $lastOp ); |
154 | array_pop( $this->operators ); |
155 | $lastOp = end( $this->operators ); |
156 | } |
157 | $this->operators[] = $token; |
158 | } |
159 | |
160 | $token = $this->nextToken(); |
161 | } |
162 | |
163 | // Finish off the stack |
164 | while ( $this->operators ) { |
165 | $this->doOperation( array_pop( $this->operators ) ); |
166 | } |
167 | |
168 | // Make sure the result is sensible. The first case is possible for an empty |
169 | // string input, the second should be unreachable. |
170 | if ( !count( $this->operands ) ) { |
171 | $this->error( 'condition expected' ); |
172 | } elseif ( count( $this->operands ) > 1 ) { |
173 | $this->error( 'missing operator or too many operands' ); |
174 | } |
175 | |
176 | $value = $this->operands[0]; |
177 | if ( $value->type !== 'boolean' ) { |
178 | $this->error( 'the result must have a boolean type' ); |
179 | } |
180 | |
181 | return $this->operands[0]->rpn; |
182 | } |
183 | |
184 | /** |
185 | * Fetch the next token from the input string. |
186 | * |
187 | * @return Fragment|false The next token |
188 | */ |
189 | protected function nextToken() { |
190 | if ( $this->pos >= $this->end ) { |
191 | return false; |
192 | } |
193 | |
194 | // Whitespace |
195 | $length = strspn( $this->rule, self::WHITESPACE_CLASS, $this->pos ); |
196 | $this->pos += $length; |
197 | |
198 | if ( $this->pos >= $this->end ) { |
199 | return false; |
200 | } |
201 | |
202 | // Number |
203 | $length = strspn( $this->rule, self::NUMBER_CLASS, $this->pos ); |
204 | if ( $length !== 0 ) { |
205 | $token = $this->newNumber( substr( $this->rule, $this->pos, $length ), $this->pos ); |
206 | $this->pos += $length; |
207 | |
208 | return $token; |
209 | } |
210 | |
211 | // Two-character operators |
212 | $op2 = substr( $this->rule, $this->pos, 2 ); |
213 | if ( $op2 === '..' || $op2 === '!=' ) { |
214 | $token = $this->newOperator( $op2, $this->pos, 2 ); |
215 | $this->pos += 2; |
216 | |
217 | return $token; |
218 | } |
219 | |
220 | // Single-character operators |
221 | $op1 = $this->rule[$this->pos]; |
222 | if ( $op1 === ',' || $op1 === '=' || $op1 === '%' ) { |
223 | $token = $this->newOperator( $op1, $this->pos, 1 ); |
224 | $this->pos++; |
225 | |
226 | return $token; |
227 | } |
228 | |
229 | // Word |
230 | if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) { |
231 | $this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' ); |
232 | } |
233 | $word1 = strtolower( $m[0] ); |
234 | $word2 = ''; |
235 | $nextTokenPos = $this->pos + strlen( $word1 ); |
236 | if ( $word1 === 'not' || $word1 === 'is' ) { |
237 | // Look ahead one word |
238 | $nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos ); |
239 | if ( $nextTokenPos < $this->end |
240 | && preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos ) |
241 | ) { |
242 | $word2 = strtolower( $m[0] ); |
243 | $nextTokenPos += strlen( $word2 ); |
244 | } |
245 | } |
246 | |
247 | // Two-word operators like "is not" take precedence over single-word operators like "is" |
248 | if ( $word2 !== '' ) { |
249 | $bothWords = "{$word1}-{$word2}"; |
250 | if ( isset( self::PRECEDENCE_LEVELS[$bothWords] ) ) { |
251 | $token = $this->newOperator( $bothWords, $this->pos, $nextTokenPos - $this->pos ); |
252 | $this->pos = $nextTokenPos; |
253 | |
254 | return $token; |
255 | } |
256 | } |
257 | |
258 | // Single-word operators |
259 | if ( isset( self::PRECEDENCE_LEVELS[$word1] ) ) { |
260 | $token = $this->newOperator( $word1, $this->pos, strlen( $word1 ) ); |
261 | $this->pos += strlen( $word1 ); |
262 | |
263 | return $token; |
264 | } |
265 | |
266 | // The single-character operand symbols |
267 | // @phan-suppress-next-line PhanParamSuspiciousOrder |
268 | if ( strpos( self::OPERAND_SYMBOLS, $word1 ) !== false ) { |
269 | $token = $this->newNumber( $word1, $this->pos ); |
270 | $this->pos++; |
271 | |
272 | return $token; |
273 | } |
274 | |
275 | // Samples |
276 | if ( $word1 === '@integer' || $word1 === '@decimal' ) { |
277 | // Samples are like comments, they have no effect on rule evaluation. |
278 | // They run from the first sample indicator to the end of the string. |
279 | $this->pos = $this->end; |
280 | |
281 | return false; |
282 | } |
283 | |
284 | $this->error( 'unrecognised word' ); |
285 | } |
286 | |
287 | /** |
288 | * For the binary operator $op, pop its operands off the stack and push |
289 | * a fragment with rpn and type members describing the result of that |
290 | * operation. |
291 | * |
292 | * @param Operator $op |
293 | */ |
294 | protected function doOperation( Operator $op ) { |
295 | if ( count( $this->operands ) < 2 ) { |
296 | $op->error( 'missing operand' ); |
297 | } |
298 | $right = array_pop( $this->operands ); |
299 | $left = array_pop( $this->operands ); |
300 | $result = $op->operate( $left, $right ); |
301 | $this->operands[] = $result; |
302 | } |
303 | |
304 | /** |
305 | * Create a numerical expression object |
306 | * |
307 | * @param string $text |
308 | * @param int $pos |
309 | * @return Expression The numerical expression |
310 | */ |
311 | protected function newNumber( string $text, int $pos ): Expression { |
312 | return new Expression( $this, 'number', $text, $pos, strlen( $text ) ); |
313 | } |
314 | |
315 | /** |
316 | * Create a binary operator |
317 | * |
318 | * @param string $type |
319 | * @param int $pos |
320 | * @param int $length |
321 | * @return Operator The operator |
322 | */ |
323 | protected function newOperator( string $type, int $pos, int $length ): Operator { |
324 | return new Operator( $this, $type, $pos, $length ); |
325 | } |
326 | |
327 | /** |
328 | * Throw an error |
329 | * @param string $message |
330 | * @throws Error |
331 | * @return never |
332 | */ |
333 | protected function error( string $message ) { |
334 | throw new Error( $message ); |
335 | } |
336 | } |