Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
77.63% |
170 / 219 |
|
14.29% |
1 / 7 |
CRAP | |
0.00% |
0 / 1 |
JavaScriptMinifier | |
77.63% |
170 / 219 |
|
14.29% |
1 / 7 |
281.29 | |
0.00% |
0 / 1 |
ensureExpandedStates | |
7.69% |
2 / 26 |
|
0.00% |
0 / 1 |
244.31 | |||
minify | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
createMinifier | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
createSourceMapState | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
createIdentityMinifier | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
minifyInternal | |
97.09% |
167 / 172 |
|
0.00% |
0 / 1 |
93 | |||
debug | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
42 |
1 | <?php |
2 | /** |
3 | * Copyright 2011 Paul Copperman <paul.copperman@gmail.com> |
4 | * Copyright 2018 Timo Tijhof |
5 | * Copyright 2021 Roan Kattouw <roan.kattouw@gmail.com> |
6 | * |
7 | * Licensed under the Apache License, Version 2.0 (the "License"); |
8 | * you may not use this file except in compliance with the License. |
9 | * You may obtain a copy of the License at |
10 | * |
11 | * http://www.apache.org/licenses/LICENSE-2.0 |
12 | * |
13 | * Unless required by applicable law or agreed to in writing, software |
14 | * distributed under the License is distributed on an "AS IS" BASIS, |
15 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
16 | * See the License for the specific language governing permissions and |
17 | * limitations under the License. |
18 | * |
19 | * @file |
20 | * @license Apache-2.0 |
21 | * @license MIT |
22 | * @license GPL-2.0-or-later |
23 | * @license LGPL-2.1-or-later |
24 | */ |
25 | |
26 | namespace Wikimedia\Minify; |
27 | |
28 | use ReflectionClass; |
29 | |
30 | /** |
31 | * JavaScript Minifier |
32 | * |
33 | * This class is meant to safely minify JavaScript code, while leaving syntactically correct |
34 | * programs intact. Other libraries, such as JSMin require a certain coding style to work |
35 | * correctly. OTOH, libraries like jsminplus, that do parse the code correctly are rather |
36 | * slow, because they construct a complete parse tree before outputting the code minified. |
37 | * So this class is meant to allow arbitrary (but syntactically correct) input, while being |
38 | * fast enough to be used for on-the-fly minifying. |
39 | * |
40 | * This class was written with ECMA-262 7th Edition in mind ("ECMAScript 2016"). Parsing features |
41 | * new to later editions of ECMAScript might not be supported. It's assumed that the input is |
42 | * syntactically correct; if it's not, this class may not detect that, and may produce incorrect |
43 | * output. |
44 | * |
45 | * This class has limited support for 8.0 spec ("ECMAScript 2017"), specifically, the await |
46 | * keyword, and most kinds of async functions are implemented. Other new parsing features of ES2017 |
47 | * are not yet supported. |
48 | * |
49 | * See also: |
50 | * - <https://262.ecma-international.org/11.0/> |
51 | * - <https://262.ecma-international.org/8.0/> |
52 | * - <https://262.ecma-international.org/7.0/> |
53 | * - <https://262.ecma-international.org/6.0/> |
54 | */ |
55 | class JavaScriptMinifier { |
56 | |
57 | /* Parsing states. |
58 | * The state machine is necessary to decide whether to parse a slash as division |
59 | * operator or as regexp literal, and to know where semicolon insertion is possible. |
60 | * States are generally named after the next expected item. We only distinguish states when the |
61 | * distinction is relevant for our purpose. The meaning of these states is documented |
62 | * in $model below. |
63 | * |
64 | * Negative numbers are used to indicate that the state is inside a generator function, |
65 | * which changes the behavior of 'yield' |
66 | */ |
67 | private const STATEMENT = 1; |
68 | private const CONDITION = 2; |
69 | private const FUNC = 3; |
70 | private const GENFUNC = 4; |
71 | private const PROPERTY_ASSIGNMENT = 5; |
72 | private const EXPRESSION = 6; |
73 | private const EXPRESSION_NO_NL = 7; |
74 | private const EXPRESSION_OP = 8; |
75 | private const EXPRESSION_DOT = 9; |
76 | private const EXPRESSION_END = 10; |
77 | private const EXPRESSION_ARROWFUNC = 11; |
78 | private const EXPRESSION_TERNARY = 12; |
79 | private const EXPRESSION_TERNARY_OP = 13; |
80 | private const EXPRESSION_TERNARY_DOT = 14; |
81 | private const EXPRESSION_TERNARY_ARROWFUNC = 15; |
82 | private const PAREN_EXPRESSION = 16; |
83 | private const PAREN_EXPRESSION_OP = 17; |
84 | private const PAREN_EXPRESSION_DOT = 18; |
85 | private const PAREN_EXPRESSION_ARROWFUNC = 19; |
86 | private const PROPERTY_EXPRESSION = 20; |
87 | private const PROPERTY_EXPRESSION_OP = 21; |
88 | private const PROPERTY_EXPRESSION_DOT = 22; |
89 | private const PROPERTY_EXPRESSION_ARROWFUNC = 23; |
90 | private const CLASS_DEF = 24; |
91 | private const IMPORT_EXPORT = 25; |
92 | private const TEMPLATE_STRING_HEAD = 26; |
93 | private const TEMPLATE_STRING_TAIL = 27; |
94 | private const PAREN_EXPRESSION_OP_NO_NL = 28; |
95 | |
96 | /* Token types */ |
97 | |
98 | /** @var int unary operators */ |
99 | private const TYPE_UN_OP = 101; |
100 | |
101 | /** @var int ++ and -- */ |
102 | private const TYPE_INCR_OP = 102; |
103 | |
104 | /** @var int binary operators (except .) */ |
105 | private const TYPE_BIN_OP = 103; |
106 | |
107 | /** @var int + and - which can be either unary or binary ops */ |
108 | private const TYPE_ADD_OP = 104; |
109 | |
110 | /** @var int . */ |
111 | private const TYPE_DOT = 105; |
112 | |
113 | /** @var int ? */ |
114 | private const TYPE_HOOK = 106; |
115 | |
116 | /** @var int : */ |
117 | private const TYPE_COLON = 107; |
118 | |
119 | /** @var int , */ |
120 | private const TYPE_COMMA = 108; |
121 | |
122 | /** @var int ; */ |
123 | private const TYPE_SEMICOLON = 109; |
124 | |
125 | /** @var int { */ |
126 | private const TYPE_BRACE_OPEN = 110; |
127 | |
128 | /** @var int } */ |
129 | private const TYPE_BRACE_CLOSE = 111; |
130 | |
131 | /** @var int ( and [ */ |
132 | private const TYPE_PAREN_OPEN = 112; |
133 | |
134 | /** @var int ) and ] */ |
135 | private const TYPE_PAREN_CLOSE = 113; |
136 | |
137 | /** @var int => */ |
138 | private const TYPE_ARROW = 114; |
139 | |
140 | /** @var int keywords: break, continue, return, throw */ |
141 | private const TYPE_RETURN = 115; |
142 | |
143 | /** @var int keywords: catch, for, with, switch, while, if */ |
144 | private const TYPE_IF = 116; |
145 | |
146 | /** @var int keywords: case, finally, else, do, try */ |
147 | private const TYPE_DO = 117; |
148 | |
149 | /** @var int keywords: var, let, const */ |
150 | private const TYPE_VAR = 118; |
151 | |
152 | /** @var int keywords: yield */ |
153 | private const TYPE_YIELD = 119; |
154 | |
155 | /** @var int keywords: function */ |
156 | private const TYPE_FUNC = 120; |
157 | |
158 | /** @var int keywords: class */ |
159 | private const TYPE_CLASS = 121; |
160 | |
161 | /** @var int all literals, identifiers, unrecognised tokens, and other keywords */ |
162 | private const TYPE_LITERAL = 122; |
163 | |
164 | /** @var int For special treatment of tokens that usually mean something else */ |
165 | private const TYPE_SPECIAL = 123; |
166 | |
167 | /** @var int keywords: async */ |
168 | private const TYPE_ASYNC = 124; |
169 | |
170 | /** @var int keywords: await */ |
171 | private const TYPE_AWAIT = 125; |
172 | |
173 | /** @var int Go to another state */ |
174 | private const ACTION_GOTO = 201; |
175 | |
176 | /** @var int Push a state to the stack */ |
177 | private const ACTION_PUSH = 202; |
178 | |
179 | /** @var int Pop the state from the top of the stack, and go to that state */ |
180 | private const ACTION_POP = 203; |
181 | |
182 | /** @var int Limit to avoid excessive memory usage */ |
183 | private const STACK_LIMIT = 1000; |
184 | |
185 | /** Length of the longest token in $tokenTypes made of punctuation characters, |
186 | * as defined in $opChars. Update this if you add longer tokens to $tokenTypes. |
187 | * |
188 | * Currently, the longest punctuation token is `>>>=`, which is 4 characters. |
189 | */ |
190 | private const LONGEST_PUNCTUATION_TOKEN = 4; |
191 | |
192 | /** |
193 | * @var int $maxLineLength |
194 | * |
195 | * Maximum line length |
196 | * |
197 | * This is not a strict maximum, but a guideline. Longer lines will be |
198 | * produced when literals (e.g. quoted strings) longer than this are |
199 | * encountered, or when required to guard against semicolon insertion. |
200 | * |
201 | * This is a private member (instead of constant) to allow tests to |
202 | * set it to 1, to verify ASI and line-breaking behaviour. |
203 | */ |
204 | private static $maxLineLength = 1000; |
205 | |
206 | private static bool $expandedStates = false; |
207 | |
208 | /** |
209 | * @var array $opChars |
210 | * |
211 | * Characters which can be combined without whitespace between them. |
212 | */ |
213 | private static $opChars = [ |
214 | // ECMAScript 6.0 § 11.7 Punctuators |
215 | // Unlike the spec, these are individual symbols, not sequences. |
216 | '{' => true, |
217 | '}' => true, |
218 | '(' => true, |
219 | ')' => true, |
220 | '[' => true, |
221 | ']' => true, |
222 | // Dots have a special case after $dotlessNum which require whitespace |
223 | '.' => true, |
224 | ';' => true, |
225 | ',' => true, |
226 | '<' => true, |
227 | '>' => true, |
228 | '=' => true, |
229 | '!' => true, |
230 | '+' => true, |
231 | '-' => true, |
232 | '*' => true, |
233 | '%' => true, |
234 | '&' => true, |
235 | '|' => true, |
236 | '^' => true, |
237 | '~' => true, |
238 | '?' => true, |
239 | ':' => true, |
240 | '/' => true, |
241 | // ECMAScript 6.0 § 11.8.4 String Literals |
242 | '"' => true, |
243 | "'" => true, |
244 | // ECMAScript 6.0 § 11.8.6 Template Literal Lexical Components |
245 | '`' => true, |
246 | ]; |
247 | |
248 | /** |
249 | * @var array $tokenTypes |
250 | * |
251 | * Tokens and their types. |
252 | */ |
253 | private static $tokenTypes = [ |
254 | // ECMAScript 6.0 § 12.5 Unary Operators |
255 | // UnaryExpression includes PostfixExpression, which includes 'new'. |
256 | 'new' => self::TYPE_UN_OP, |
257 | 'delete' => self::TYPE_UN_OP, |
258 | 'void' => self::TYPE_UN_OP, |
259 | 'typeof' => self::TYPE_UN_OP, |
260 | '~' => self::TYPE_UN_OP, |
261 | '!' => self::TYPE_UN_OP, |
262 | // ECMAScript 8.0 § 14.6 AwaitExpression |
263 | // |
264 | // await UnaryExpression |
265 | // |
266 | 'await' => self::TYPE_AWAIT, |
267 | // ECMAScript 6.0 § 12.2 Primary Expression, among others |
268 | '...' => self::TYPE_UN_OP, |
269 | // ECMAScript 6.0 § 12.7 Additive Operators |
270 | '++' => self::TYPE_INCR_OP, |
271 | '--' => self::TYPE_INCR_OP, |
272 | '+' => self::TYPE_ADD_OP, |
273 | '-' => self::TYPE_ADD_OP, |
274 | // ECMAScript 6.0 § 12.6 Multiplicative Operators |
275 | '*' => self::TYPE_BIN_OP, |
276 | '/' => self::TYPE_BIN_OP, |
277 | '%' => self::TYPE_BIN_OP, |
278 | // ECMAScript 7.0 § 12.6 Exponentiation Operator |
279 | '**' => self::TYPE_BIN_OP, |
280 | // ECMAScript 6.0 § 12.8 Bitwise Shift Operators |
281 | '<<' => self::TYPE_BIN_OP, |
282 | '>>' => self::TYPE_BIN_OP, |
283 | '>>>' => self::TYPE_BIN_OP, |
284 | // ECMAScript 6.0 § 12.9 Relational Operators |
285 | '<' => self::TYPE_BIN_OP, |
286 | '>' => self::TYPE_BIN_OP, |
287 | '<=' => self::TYPE_BIN_OP, |
288 | '>=' => self::TYPE_BIN_OP, |
289 | 'instanceof' => self::TYPE_BIN_OP, |
290 | 'in' => self::TYPE_BIN_OP, |
291 | // ECMAScript 6.0 § 12.10 Equality Operators |
292 | '==' => self::TYPE_BIN_OP, |
293 | '!=' => self::TYPE_BIN_OP, |
294 | '===' => self::TYPE_BIN_OP, |
295 | '!==' => self::TYPE_BIN_OP, |
296 | // ECMAScript 6.0 § 12.11 Binary Bitwise Operators |
297 | '&' => self::TYPE_BIN_OP, |
298 | '^' => self::TYPE_BIN_OP, |
299 | '|' => self::TYPE_BIN_OP, |
300 | // ECMAScript 6.0 § 12.12 Binary Logical Operators |
301 | '&&' => self::TYPE_BIN_OP, |
302 | '||' => self::TYPE_BIN_OP, |
303 | // ECMAScript 11.0 § 12.13 Binary Logical Operators |
304 | '??' => self::TYPE_BIN_OP, |
305 | // ECMAScript 6.0 § 12.13 Conditional Operator |
306 | // Also known as ternary. |
307 | '?' => self::TYPE_HOOK, |
308 | ':' => self::TYPE_COLON, |
309 | // ECMAScript 6.0 § 12.14 Assignment Operators |
310 | '=' => self::TYPE_BIN_OP, |
311 | '*=' => self::TYPE_BIN_OP, |
312 | '/=' => self::TYPE_BIN_OP, |
313 | '%=' => self::TYPE_BIN_OP, |
314 | '+=' => self::TYPE_BIN_OP, |
315 | '-=' => self::TYPE_BIN_OP, |
316 | '<<=' => self::TYPE_BIN_OP, |
317 | '>>=' => self::TYPE_BIN_OP, |
318 | '>>>=' => self::TYPE_BIN_OP, |
319 | '&=' => self::TYPE_BIN_OP, |
320 | '^=' => self::TYPE_BIN_OP, |
321 | '|=' => self::TYPE_BIN_OP, |
322 | // ECMAScript 6.0 § 12.15 Comma Operator |
323 | ',' => self::TYPE_COMMA, |
324 | |
325 | // The keywords that disallow LineTerminator before their |
326 | // (sometimes optional) Expression or Identifier. |
327 | // |
328 | // keyword ; |
329 | // keyword [no LineTerminator here] Identifier ; |
330 | // keyword [no LineTerminator here] Expression ; |
331 | // |
332 | // See also ECMAScript 6.0 § 11.9.1 Rules of Automatic Semicolon Insertion |
333 | 'continue' => self::TYPE_RETURN, |
334 | 'break' => self::TYPE_RETURN, |
335 | 'return' => self::TYPE_RETURN, |
336 | 'throw' => self::TYPE_RETURN, |
337 | // yield is only a keyword inside generator functions, otherwise it's an identifier |
338 | // This is handled with the negative states hack: if the state is negative, TYPE_YIELD |
339 | // is treated as TYPE_RETURN, if it's positive it's treated as TYPE_LITERAL |
340 | 'yield' => self::TYPE_YIELD, |
341 | |
342 | // The keywords require a parenthesised Expression or Identifier |
343 | // before the next Statement. |
344 | // |
345 | // keyword ( Expression ) Statement |
346 | // keyword ( Identifier ) Statement |
347 | // |
348 | // See also ECMAScript 6.0: |
349 | // - § 13.6 The if Statement |
350 | // - § 13.7 Iteration Statements (do, while, for) |
351 | // - § 12.10 The with Statement |
352 | // - § 12.11 The switch Statement |
353 | // - § 12.13 The throw Statement |
354 | 'if' => self::TYPE_IF, |
355 | 'catch' => self::TYPE_IF, |
356 | 'while' => self::TYPE_IF, |
357 | 'for' => self::TYPE_IF, |
358 | 'switch' => self::TYPE_IF, |
359 | 'with' => self::TYPE_IF, |
360 | |
361 | // The keywords followed by a Statement, Expression, or Block. |
362 | // |
363 | // else Statement |
364 | // do Statement |
365 | // case Expression |
366 | // try Block |
367 | // finally Block |
368 | // |
369 | // See also ECMAScript 6.0: |
370 | // - § 13.6 The if Statement (else) |
371 | // - § 13.7 Iteration Statements (do, while, for) |
372 | // - § 13.12 The switch Statement (case) |
373 | // - § 13.15 The try Statement |
374 | 'else' => self::TYPE_DO, |
375 | 'do' => self::TYPE_DO, |
376 | 'case' => self::TYPE_DO, |
377 | 'try' => self::TYPE_DO, |
378 | 'finally' => self::TYPE_DO, |
379 | |
380 | // Keywords followed by a variable declaration |
381 | // This is different from the group above, because a { begins |
382 | // object destructuring, rather than a block |
383 | 'var' => self::TYPE_VAR, |
384 | 'let' => self::TYPE_VAR, |
385 | 'const' => self::TYPE_VAR, |
386 | |
387 | // ECMAScript 6.0 § 14.1 Function Definitions |
388 | 'function' => self::TYPE_FUNC, |
389 | // ECMAScript 6.0 § 14.2 Arrow Function Definitions |
390 | '=>' => self::TYPE_ARROW, |
391 | |
392 | // Class declaration or expression: |
393 | // class Identifier { ClassBody } |
394 | // class { ClassBody } |
395 | // class Identifier extends Expression { ClassBody } |
396 | // class extends Expression { ClassBody } |
397 | 'class' => self::TYPE_CLASS, |
398 | |
399 | // ECMAScript 6.0 § 12.3 Left-Hand-Side Expressions (MemberExpression) |
400 | // A dot can also be part of a DecimalLiteral, but in that case we handle the entire |
401 | // DecimalLiteral as one token. A separate '.' token is always part of a MemberExpression. |
402 | '.' => self::TYPE_DOT, |
403 | |
404 | // Can be one of: |
405 | // - Block (ECMAScript 6.0 § 13.2 Block) |
406 | // - ObjectLiteral (ECMAScript 6.0 § 12.2 Primary Expression) |
407 | '{' => self::TYPE_BRACE_OPEN, |
408 | '}' => self::TYPE_BRACE_CLOSE, |
409 | |
410 | // Can be one of: |
411 | // - Parenthesised Identifier or Expression after a |
412 | // TYPE_IF or TYPE_FUNC keyword. |
413 | // - PrimaryExpression (ECMAScript 6.0 § 12.2 Primary Expression) |
414 | // - CallExpression (ECMAScript 6.0 § 12.3 Left-Hand-Side Expressions) |
415 | // - Beginning of an ArrowFunction (ECMAScript 6.0 § 14.2 Arrow Function Definitions) |
416 | '(' => self::TYPE_PAREN_OPEN, |
417 | ')' => self::TYPE_PAREN_CLOSE, |
418 | |
419 | // Can be one of: |
420 | // - ArrayLiteral (ECMAScript 6.0 § 12.2 Primary Expressions) |
421 | // - ComputedPropertyName (ECMAScript 6.0 § 12.2.6 Object Initializer) |
422 | '[' => self::TYPE_PAREN_OPEN, |
423 | ']' => self::TYPE_PAREN_CLOSE, |
424 | |
425 | // Can be one of: |
426 | // - End of any statement |
427 | // - EmptyStatement (ECMAScript 6.0 § 13.4 Empty Statement) |
428 | ';' => self::TYPE_SEMICOLON, |
429 | |
430 | // ECMAScript 8.0 § 14.6 Async Function Definitions |
431 | // async [no LineTerminator here] function ... |
432 | // async [no LineTerminator here] propertyName() ... |
433 | 'async' => self::TYPE_ASYNC, |
434 | |
435 | ]; |
436 | |
437 | /** |
438 | * @var array $model |
439 | * |
440 | * The main table for the state machine. Defines the desired action for every state/token pair. |
441 | * |
442 | * The state pushed onto the stack by ACTION_PUSH will be returned to by ACTION_POP. |
443 | * A state/token pair may not specify both ACTION_POP and ACTION_GOTO. If that does happen, |
444 | * ACTION_POP takes precedence. |
445 | * |
446 | * This table is augmented by self::ensureExpandedStates(). |
447 | */ |
448 | private static $model = [ |
449 | // Statement - This is the initial state. |
450 | self::STATEMENT => [ |
451 | self::TYPE_UN_OP => [ |
452 | self::ACTION_GOTO => self::EXPRESSION, |
453 | ], |
454 | self::TYPE_INCR_OP => [ |
455 | self::ACTION_GOTO => self::EXPRESSION, |
456 | ], |
457 | self::TYPE_ADD_OP => [ |
458 | self::ACTION_GOTO => self::EXPRESSION, |
459 | ], |
460 | self::TYPE_BRACE_OPEN => [ |
461 | // Use of '{' in statement context, creates a Block. |
462 | self::ACTION_PUSH => self::STATEMENT, |
463 | ], |
464 | self::TYPE_BRACE_CLOSE => [ |
465 | // Ends a Block |
466 | self::ACTION_POP => true, |
467 | ], |
468 | self::TYPE_PAREN_OPEN => [ |
469 | self::ACTION_PUSH => self::EXPRESSION_OP, |
470 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
471 | ], |
472 | self::TYPE_RETURN => [ |
473 | self::ACTION_GOTO => self::EXPRESSION_NO_NL, |
474 | ], |
475 | self::TYPE_IF => [ |
476 | self::ACTION_GOTO => self::CONDITION, |
477 | ], |
478 | self::TYPE_VAR => [ |
479 | self::ACTION_GOTO => self::EXPRESSION, |
480 | ], |
481 | self::TYPE_FUNC => [ |
482 | self::ACTION_PUSH => self::STATEMENT, |
483 | self::ACTION_GOTO => self::FUNC, |
484 | ], |
485 | self::TYPE_CLASS => [ |
486 | self::ACTION_PUSH => self::STATEMENT, |
487 | self::ACTION_GOTO => self::CLASS_DEF, |
488 | ], |
489 | self::TYPE_SPECIAL => [ |
490 | 'import' => [ |
491 | self::ACTION_GOTO => self::IMPORT_EXPORT, |
492 | ], |
493 | 'export' => [ |
494 | self::ACTION_GOTO => self::IMPORT_EXPORT, |
495 | ], |
496 | ], |
497 | self::TYPE_LITERAL => [ |
498 | self::ACTION_GOTO => self::EXPRESSION_OP, |
499 | ], |
500 | self::TYPE_ASYNC => [ |
501 | self::ACTION_GOTO => self::EXPRESSION_OP, |
502 | ], |
503 | self::TYPE_AWAIT => [ |
504 | self::ACTION_GOTO => self::EXPRESSION, |
505 | ], |
506 | ], |
507 | // The state after if/catch/while/for/switch/with |
508 | // Waits for an expression in parentheses, then goes to STATEMENT |
509 | self::CONDITION => [ |
510 | self::TYPE_PAREN_OPEN => [ |
511 | self::ACTION_PUSH => self::STATEMENT, |
512 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
513 | ], |
514 | ], |
515 | // The state after the function keyword. Waits for {, then goes to STATEMENT. |
516 | // The function body's closing } will pop the stack, so the state to return to |
517 | // after the function should be pushed to the stack first |
518 | self::FUNC => [ |
519 | // Needed to prevent * in an expression in the argument list from improperly |
520 | // triggering GENFUNC |
521 | self::TYPE_PAREN_OPEN => [ |
522 | self::ACTION_PUSH => self::FUNC, |
523 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
524 | ], |
525 | self::TYPE_BRACE_OPEN => [ |
526 | self::ACTION_GOTO => self::STATEMENT, |
527 | ], |
528 | self::TYPE_SPECIAL => [ |
529 | '*' => [ |
530 | self::ACTION_GOTO => self::GENFUNC, |
531 | ], |
532 | ], |
533 | ], |
534 | // After function*. Waits for { , then goes to a generator function statement. |
535 | self::GENFUNC => [ |
536 | self::TYPE_BRACE_OPEN => [ |
537 | // Note negative value: generator function states are negative |
538 | self::ACTION_GOTO => -self::STATEMENT |
539 | ], |
540 | ], |
541 | // Property assignment - This is an object literal declaration. |
542 | // For example: `{ key: value, key2, [computedKey3]: value3, method4() { ... } }` |
543 | self::PROPERTY_ASSIGNMENT => [ |
544 | // Note that keywords like if, class, var, delete, instanceof etc. can be used as keys, |
545 | // and should be treated as literals here, as they are in EXPRESSION_DOT. In this state, |
546 | // that is implicitly true because TYPE_LITERAL has no action, so it stays in this state. |
547 | // If we later add a state transition for TYPE_LITERAL, that same transition should |
548 | // also be applied to TYPE_RETURN, TYPE_IF, TYPE_DO, TYPE_VAR, TYPE_FUNC and TYPE_CLASS. |
549 | self::TYPE_COLON => [ |
550 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION, |
551 | ], |
552 | // For {, which begins a method |
553 | self::TYPE_BRACE_OPEN => [ |
554 | self::ACTION_PUSH => self::PROPERTY_ASSIGNMENT, |
555 | // This is not flipped, see "Special cases" below |
556 | self::ACTION_GOTO => self::STATEMENT, |
557 | ], |
558 | self::TYPE_BRACE_CLOSE => [ |
559 | self::ACTION_POP => true, |
560 | ], |
561 | // For [, which begins a computed key |
562 | self::TYPE_PAREN_OPEN => [ |
563 | self::ACTION_PUSH => self::PROPERTY_ASSIGNMENT, |
564 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
565 | ], |
566 | self::TYPE_SPECIAL => [ |
567 | '*' => [ |
568 | self::ACTION_PUSH => self::PROPERTY_ASSIGNMENT, |
569 | self::ACTION_GOTO => self::GENFUNC, |
570 | ], |
571 | ], |
572 | ], |
573 | // Place in an expression where we expect an operand or a unary operator: the start |
574 | // of an expression or after an operator. Note that unary operators (including INCR_OP |
575 | // and ADD_OP) cause us to stay in this state, while operands take us to EXPRESSION_OP |
576 | self::EXPRESSION => [ |
577 | self::TYPE_SEMICOLON => [ |
578 | self::ACTION_GOTO => self::STATEMENT, |
579 | ], |
580 | self::TYPE_BRACE_OPEN => [ |
581 | self::ACTION_PUSH => self::EXPRESSION_OP, |
582 | self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT, |
583 | ], |
584 | self::TYPE_BRACE_CLOSE => [ |
585 | self::ACTION_POP => true, |
586 | ], |
587 | self::TYPE_PAREN_OPEN => [ |
588 | self::ACTION_PUSH => self::EXPRESSION_OP, |
589 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
590 | ], |
591 | self::TYPE_FUNC => [ |
592 | self::ACTION_PUSH => self::EXPRESSION_OP, |
593 | self::ACTION_GOTO => self::FUNC, |
594 | ], |
595 | self::TYPE_CLASS => [ |
596 | self::ACTION_PUSH => self::EXPRESSION_OP, |
597 | self::ACTION_GOTO => self::CLASS_DEF, |
598 | ], |
599 | self::TYPE_LITERAL => [ |
600 | self::ACTION_GOTO => self::EXPRESSION_OP, |
601 | ], |
602 | self::TYPE_ASYNC => [ |
603 | self::ACTION_GOTO => self::EXPRESSION_OP, |
604 | ], |
605 | ], |
606 | // An expression immediately after return/throw/break/continue, where a newline |
607 | // is not allowed. This state is identical to EXPRESSION, except that semicolon |
608 | // insertion can happen here, and we never stay here: in cases where EXPRESSION would |
609 | // do nothing, we go to EXPRESSION. |
610 | self::EXPRESSION_NO_NL => [ |
611 | self::TYPE_UN_OP => [ |
612 | self::ACTION_GOTO => self::EXPRESSION, |
613 | ], |
614 | self::TYPE_INCR_OP => [ |
615 | self::ACTION_GOTO => self::EXPRESSION, |
616 | ], |
617 | // BIN_OP seems impossible at the start of an expression, but it can happen in |
618 | // yield *foo |
619 | self::TYPE_BIN_OP => [ |
620 | self::ACTION_GOTO => self::EXPRESSION, |
621 | ], |
622 | self::TYPE_ADD_OP => [ |
623 | self::ACTION_GOTO => self::EXPRESSION, |
624 | ], |
625 | self::TYPE_SEMICOLON => [ |
626 | self::ACTION_GOTO => self::STATEMENT, |
627 | ], |
628 | self::TYPE_BRACE_OPEN => [ |
629 | self::ACTION_PUSH => self::EXPRESSION_OP, |
630 | self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT, |
631 | ], |
632 | self::TYPE_BRACE_CLOSE => [ |
633 | self::ACTION_POP => true, |
634 | ], |
635 | self::TYPE_PAREN_OPEN => [ |
636 | self::ACTION_PUSH => self::EXPRESSION_OP, |
637 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
638 | ], |
639 | self::TYPE_FUNC => [ |
640 | self::ACTION_PUSH => self::EXPRESSION_OP, |
641 | self::ACTION_GOTO => self::FUNC, |
642 | ], |
643 | self::TYPE_CLASS => [ |
644 | self::ACTION_PUSH => self::EXPRESSION_OP, |
645 | self::ACTION_GOTO => self::CLASS_DEF, |
646 | ], |
647 | self::TYPE_LITERAL => [ |
648 | self::ACTION_GOTO => self::EXPRESSION_OP, |
649 | ], |
650 | self::TYPE_ASYNC => [ |
651 | self::ACTION_GOTO => self::EXPRESSION_OP, |
652 | ], |
653 | self::TYPE_AWAIT => [ |
654 | self::ACTION_GOTO => self::EXPRESSION, |
655 | ], |
656 | ], |
657 | // Place in an expression after an operand, where we expect an operator |
658 | self::EXPRESSION_OP => [ |
659 | self::TYPE_BIN_OP => [ |
660 | self::ACTION_GOTO => self::EXPRESSION, |
661 | ], |
662 | self::TYPE_ADD_OP => [ |
663 | self::ACTION_GOTO => self::EXPRESSION, |
664 | ], |
665 | self::TYPE_DOT => [ |
666 | self::ACTION_GOTO => self::EXPRESSION_DOT, |
667 | ], |
668 | self::TYPE_HOOK => [ |
669 | self::ACTION_PUSH => self::EXPRESSION, |
670 | self::ACTION_GOTO => self::EXPRESSION_TERNARY, |
671 | ], |
672 | self::TYPE_COLON => [ |
673 | self::ACTION_GOTO => self::STATEMENT, |
674 | ], |
675 | self::TYPE_COMMA => [ |
676 | self::ACTION_GOTO => self::EXPRESSION, |
677 | ], |
678 | self::TYPE_SEMICOLON => [ |
679 | self::ACTION_GOTO => self::STATEMENT, |
680 | ], |
681 | self::TYPE_ARROW => [ |
682 | self::ACTION_GOTO => self::EXPRESSION_ARROWFUNC, |
683 | ], |
684 | self::TYPE_PAREN_OPEN => [ |
685 | self::ACTION_PUSH => self::EXPRESSION_OP, |
686 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
687 | ], |
688 | self::TYPE_BRACE_CLOSE => [ |
689 | self::ACTION_POP => true, |
690 | ], |
691 | self::TYPE_FUNC => [ |
692 | self::ACTION_PUSH => self::EXPRESSION_OP, |
693 | self::ACTION_GOTO => self::FUNC, |
694 | ], |
695 | ], |
696 | // State after a dot (.). Like EXPRESSION, except that many keywords behave like literals |
697 | // (e.g. class, if, else, var, function) because they're not valid as identifiers but are |
698 | // valid as property names. |
699 | self::EXPRESSION_DOT => [ |
700 | self::TYPE_LITERAL => [ |
701 | self::ACTION_GOTO => self::EXPRESSION_OP, |
702 | ], |
703 | // The following are keywords behaving as literals |
704 | self::TYPE_RETURN => [ |
705 | self::ACTION_GOTO => self::EXPRESSION_OP, |
706 | ], |
707 | self::TYPE_IF => [ |
708 | self::ACTION_GOTO => self::EXPRESSION_OP, |
709 | ], |
710 | self::TYPE_DO => [ |
711 | self::ACTION_GOTO => self::EXPRESSION_OP, |
712 | ], |
713 | self::TYPE_VAR => [ |
714 | self::ACTION_GOTO => self::EXPRESSION_OP, |
715 | ], |
716 | self::TYPE_FUNC => [ |
717 | self::ACTION_GOTO => self::EXPRESSION_OP, |
718 | ], |
719 | self::TYPE_CLASS => [ |
720 | self::ACTION_GOTO => self::EXPRESSION_OP, |
721 | ], |
722 | // We don't expect real unary/binary operators here, but some keywords |
723 | // (new, delete, void, typeof, instanceof, in) are classified as such, and they can be |
724 | // used as property names |
725 | self::TYPE_UN_OP => [ |
726 | self::ACTION_GOTO => self::EXPRESSION_OP, |
727 | ], |
728 | self::TYPE_BIN_OP => [ |
729 | self::ACTION_GOTO => self::EXPRESSION_OP, |
730 | ], |
731 | ], |
732 | // State after the } closing an arrow function body: like STATEMENT except |
733 | // that it has semicolon insertion, COMMA can continue the expression, and after |
734 | // a function we go to STATEMENT instead of EXPRESSION_OP |
735 | self::EXPRESSION_END => [ |
736 | self::TYPE_UN_OP => [ |
737 | self::ACTION_GOTO => self::EXPRESSION, |
738 | ], |
739 | self::TYPE_INCR_OP => [ |
740 | self::ACTION_GOTO => self::EXPRESSION, |
741 | ], |
742 | self::TYPE_ADD_OP => [ |
743 | self::ACTION_GOTO => self::EXPRESSION, |
744 | ], |
745 | self::TYPE_COMMA => [ |
746 | self::ACTION_GOTO => self::EXPRESSION, |
747 | ], |
748 | self::TYPE_SEMICOLON => [ |
749 | self::ACTION_GOTO => self::STATEMENT, |
750 | ], |
751 | self::TYPE_BRACE_OPEN => [ |
752 | self::ACTION_PUSH => self::STATEMENT, |
753 | self::ACTION_GOTO => self::STATEMENT, |
754 | ], |
755 | self::TYPE_BRACE_CLOSE => [ |
756 | self::ACTION_POP => true, |
757 | ], |
758 | self::TYPE_PAREN_OPEN => [ |
759 | self::ACTION_PUSH => self::EXPRESSION_OP, |
760 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
761 | ], |
762 | self::TYPE_RETURN => [ |
763 | self::ACTION_GOTO => self::EXPRESSION_NO_NL, |
764 | ], |
765 | self::TYPE_IF => [ |
766 | self::ACTION_GOTO => self::CONDITION, |
767 | ], |
768 | self::TYPE_VAR => [ |
769 | self::ACTION_GOTO => self::EXPRESSION, |
770 | ], |
771 | self::TYPE_FUNC => [ |
772 | self::ACTION_PUSH => self::STATEMENT, |
773 | self::ACTION_GOTO => self::FUNC, |
774 | ], |
775 | self::TYPE_CLASS => [ |
776 | self::ACTION_PUSH => self::STATEMENT, |
777 | self::ACTION_GOTO => self::CLASS_DEF, |
778 | ], |
779 | self::TYPE_LITERAL => [ |
780 | self::ACTION_GOTO => self::EXPRESSION_OP, |
781 | ], |
782 | self::TYPE_ASYNC => [ |
783 | self::ACTION_GOTO => self::EXPRESSION_OP, |
784 | ], |
785 | ], |
786 | // State after =>. Like EXPRESSION, except that { begins an arrow function body |
787 | // rather than an object literal. |
788 | self::EXPRESSION_ARROWFUNC => [ |
789 | self::TYPE_UN_OP => [ |
790 | self::ACTION_GOTO => self::EXPRESSION, |
791 | ], |
792 | self::TYPE_INCR_OP => [ |
793 | self::ACTION_GOTO => self::EXPRESSION, |
794 | ], |
795 | self::TYPE_ADD_OP => [ |
796 | self::ACTION_GOTO => self::EXPRESSION, |
797 | ], |
798 | self::TYPE_BRACE_OPEN => [ |
799 | self::ACTION_PUSH => self::EXPRESSION_END, |
800 | self::ACTION_GOTO => self::STATEMENT, |
801 | ], |
802 | self::TYPE_PAREN_OPEN => [ |
803 | self::ACTION_PUSH => self::EXPRESSION_OP, |
804 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
805 | ], |
806 | self::TYPE_FUNC => [ |
807 | self::ACTION_PUSH => self::EXPRESSION_OP, |
808 | self::ACTION_GOTO => self::FUNC, |
809 | ], |
810 | self::TYPE_CLASS => [ |
811 | self::ACTION_PUSH => self::EXPRESSION_OP, |
812 | self::ACTION_GOTO => self::CLASS_DEF, |
813 | ], |
814 | self::TYPE_LITERAL => [ |
815 | self::ACTION_GOTO => self::EXPRESSION_OP, |
816 | ], |
817 | ], |
818 | // Expression after a ? . This differs from EXPRESSION because a : ends the ternary |
819 | // rather than starting STATEMENT (outside a ternary, : comes after a goto label) |
820 | // The actual rule for : ending the ternary is in EXPRESSION_TERNARY_OP. |
821 | self::EXPRESSION_TERNARY => [ |
822 | self::TYPE_BRACE_OPEN => [ |
823 | self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, |
824 | self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT, |
825 | ], |
826 | self::TYPE_PAREN_OPEN => [ |
827 | self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, |
828 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
829 | ], |
830 | self::TYPE_FUNC => [ |
831 | self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, |
832 | self::ACTION_GOTO => self::FUNC, |
833 | ], |
834 | self::TYPE_CLASS => [ |
835 | self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, |
836 | self::ACTION_GOTO => self::CLASS_DEF, |
837 | ], |
838 | self::TYPE_LITERAL => [ |
839 | self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, |
840 | ], |
841 | ], |
842 | // Like EXPRESSION_OP, but for ternaries, see EXPRESSION_TERNARY |
843 | self::EXPRESSION_TERNARY_OP => [ |
844 | self::TYPE_BIN_OP => [ |
845 | self::ACTION_GOTO => self::EXPRESSION_TERNARY, |
846 | ], |
847 | self::TYPE_ADD_OP => [ |
848 | self::ACTION_GOTO => self::EXPRESSION_TERNARY, |
849 | ], |
850 | self::TYPE_DOT => [ |
851 | self::ACTION_GOTO => self::EXPRESSION_TERNARY_DOT, |
852 | ], |
853 | self::TYPE_HOOK => [ |
854 | self::ACTION_PUSH => self::EXPRESSION_TERNARY, |
855 | self::ACTION_GOTO => self::EXPRESSION_TERNARY, |
856 | ], |
857 | self::TYPE_COMMA => [ |
858 | self::ACTION_GOTO => self::EXPRESSION_TERNARY, |
859 | ], |
860 | self::TYPE_ARROW => [ |
861 | self::ACTION_GOTO => self::EXPRESSION_TERNARY_ARROWFUNC, |
862 | ], |
863 | self::TYPE_PAREN_OPEN => [ |
864 | self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, |
865 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
866 | ], |
867 | self::TYPE_COLON => [ |
868 | self::ACTION_POP => true, |
869 | ], |
870 | ], |
871 | // Like EXPRESSION_DOT, but for ternaries, see EXPRESSION_TERNARY |
872 | self::EXPRESSION_TERNARY_DOT => [ |
873 | self::TYPE_LITERAL => [ |
874 | self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, |
875 | ], |
876 | // The following are keywords behaving as literals |
877 | self::TYPE_RETURN => [ |
878 | self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, |
879 | ], |
880 | self::TYPE_IF => [ |
881 | self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, |
882 | ], |
883 | self::TYPE_DO => [ |
884 | self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, |
885 | ], |
886 | self::TYPE_VAR => [ |
887 | self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, |
888 | ], |
889 | self::TYPE_FUNC => [ |
890 | self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, |
891 | ], |
892 | self::TYPE_CLASS => [ |
893 | self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, |
894 | ], |
895 | // We don't expect real unary/binary operators here, but some keywords |
896 | // (new, delete, void, typeof, instanceof, in) are classified as such, and they can be |
897 | // used as property names |
898 | self::TYPE_UN_OP => [ |
899 | self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, |
900 | ], |
901 | self::TYPE_BIN_OP => [ |
902 | self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, |
903 | ], |
904 | ], |
905 | // Like EXPRESSION_ARROWFUNC, but for ternaries, see EXPRESSION_TERNARY |
906 | self::EXPRESSION_TERNARY_ARROWFUNC => [ |
907 | self::TYPE_UN_OP => [ |
908 | self::ACTION_GOTO => self::EXPRESSION_TERNARY, |
909 | ], |
910 | self::TYPE_INCR_OP => [ |
911 | self::ACTION_GOTO => self::EXPRESSION_TERNARY, |
912 | ], |
913 | self::TYPE_ADD_OP => [ |
914 | self::ACTION_GOTO => self::EXPRESSION_TERNARY, |
915 | ], |
916 | self::TYPE_BRACE_OPEN => [ |
917 | self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, |
918 | self::ACTION_GOTO => self::STATEMENT, |
919 | ], |
920 | self::TYPE_PAREN_OPEN => [ |
921 | self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, |
922 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
923 | ], |
924 | self::TYPE_FUNC => [ |
925 | self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, |
926 | self::ACTION_GOTO => self::FUNC, |
927 | ], |
928 | self::TYPE_CLASS => [ |
929 | self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, |
930 | self::ACTION_GOTO => self::CLASS_DEF, |
931 | ], |
932 | self::TYPE_LITERAL => [ |
933 | self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, |
934 | ], |
935 | ], |
936 | // Expression inside parentheses. Like EXPRESSION, except that ) ends this state |
937 | // This differs from EXPRESSION because semicolon insertion can't happen here |
938 | self::PAREN_EXPRESSION => [ |
939 | self::TYPE_BRACE_OPEN => [ |
940 | self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, |
941 | self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT, |
942 | ], |
943 | self::TYPE_PAREN_OPEN => [ |
944 | self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, |
945 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
946 | ], |
947 | self::TYPE_PAREN_CLOSE => [ |
948 | self::ACTION_POP => true, |
949 | ], |
950 | self::TYPE_FUNC => [ |
951 | self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, |
952 | self::ACTION_GOTO => self::FUNC, |
953 | ], |
954 | self::TYPE_CLASS => [ |
955 | self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, |
956 | self::ACTION_GOTO => self::CLASS_DEF, |
957 | ], |
958 | self::TYPE_LITERAL => [ |
959 | self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, |
960 | ], |
961 | self::TYPE_ASYNC => [ |
962 | self::ACTION_GOTO => self::PAREN_EXPRESSION_OP_NO_NL, |
963 | ], |
964 | ], |
965 | // Like EXPRESSION_OP, but in parentheses, see PAREN_EXPRESSION |
966 | self::PAREN_EXPRESSION_OP => [ |
967 | self::TYPE_BIN_OP => [ |
968 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
969 | ], |
970 | self::TYPE_ADD_OP => [ |
971 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
972 | ], |
973 | self::TYPE_DOT => [ |
974 | self::ACTION_GOTO => self::PAREN_EXPRESSION_DOT, |
975 | ], |
976 | self::TYPE_HOOK => [ |
977 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
978 | ], |
979 | self::TYPE_COLON => [ |
980 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
981 | ], |
982 | self::TYPE_COMMA => [ |
983 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
984 | ], |
985 | self::TYPE_SEMICOLON => [ |
986 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
987 | ], |
988 | self::TYPE_ARROW => [ |
989 | self::ACTION_GOTO => self::PAREN_EXPRESSION_ARROWFUNC, |
990 | ], |
991 | self::TYPE_PAREN_OPEN => [ |
992 | self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, |
993 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
994 | ], |
995 | self::TYPE_PAREN_CLOSE => [ |
996 | self::ACTION_POP => true, |
997 | ], |
998 | ], |
999 | // Like EXPRESSION_DOT, but in parentheses, see PAREN_EXPRESSION |
1000 | self::PAREN_EXPRESSION_DOT => [ |
1001 | self::TYPE_LITERAL => [ |
1002 | self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, |
1003 | ], |
1004 | // The following are keywords behaving as literals |
1005 | self::TYPE_RETURN => [ |
1006 | self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, |
1007 | ], |
1008 | self::TYPE_IF => [ |
1009 | self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, |
1010 | ], |
1011 | self::TYPE_DO => [ |
1012 | self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, |
1013 | ], |
1014 | self::TYPE_VAR => [ |
1015 | self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, |
1016 | ], |
1017 | self::TYPE_FUNC => [ |
1018 | self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, |
1019 | ], |
1020 | self::TYPE_CLASS => [ |
1021 | self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, |
1022 | ], |
1023 | // We don't expect real unary/binary operators here, but some keywords |
1024 | // (new, delete, void, typeof, instanceof, in) are classified as such, and they can be |
1025 | // used as property names |
1026 | self::TYPE_UN_OP => [ |
1027 | self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, |
1028 | ], |
1029 | self::TYPE_BIN_OP => [ |
1030 | self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, |
1031 | ], |
1032 | ], |
1033 | // Like EXPRESSION_ARROWFUNC, but in parentheses, see PAREN_EXPRESSION |
1034 | self::PAREN_EXPRESSION_ARROWFUNC => [ |
1035 | self::TYPE_UN_OP => [ |
1036 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
1037 | ], |
1038 | self::TYPE_INCR_OP => [ |
1039 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
1040 | ], |
1041 | self::TYPE_ADD_OP => [ |
1042 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
1043 | ], |
1044 | self::TYPE_BRACE_OPEN => [ |
1045 | self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, |
1046 | self::ACTION_GOTO => self::STATEMENT, |
1047 | ], |
1048 | self::TYPE_PAREN_OPEN => [ |
1049 | self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, |
1050 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
1051 | ], |
1052 | self::TYPE_FUNC => [ |
1053 | self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, |
1054 | self::ACTION_GOTO => self::FUNC, |
1055 | ], |
1056 | self::TYPE_CLASS => [ |
1057 | self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, |
1058 | self::ACTION_GOTO => self::CLASS_DEF, |
1059 | ], |
1060 | self::TYPE_LITERAL => [ |
1061 | self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, |
1062 | ], |
1063 | ], |
1064 | |
1065 | // Like PAREN_EXPRESSION_OP, for the state after "async" in a PAREN_EXPRESSION, |
1066 | // for use by the $semicolon model. |
1067 | self::PAREN_EXPRESSION_OP_NO_NL => [ |
1068 | self::TYPE_BIN_OP => [ |
1069 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
1070 | ], |
1071 | self::TYPE_ADD_OP => [ |
1072 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
1073 | ], |
1074 | self::TYPE_DOT => [ |
1075 | self::ACTION_GOTO => self::PAREN_EXPRESSION_DOT, |
1076 | ], |
1077 | self::TYPE_HOOK => [ |
1078 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
1079 | ], |
1080 | self::TYPE_COLON => [ |
1081 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
1082 | ], |
1083 | self::TYPE_COMMA => [ |
1084 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
1085 | ], |
1086 | self::TYPE_SEMICOLON => [ |
1087 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
1088 | ], |
1089 | self::TYPE_ARROW => [ |
1090 | self::ACTION_GOTO => self::PAREN_EXPRESSION_ARROWFUNC, |
1091 | ], |
1092 | self::TYPE_PAREN_OPEN => [ |
1093 | self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, |
1094 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
1095 | ], |
1096 | self::TYPE_PAREN_CLOSE => [ |
1097 | self::ACTION_POP => true, |
1098 | ], |
1099 | ], |
1100 | // Expression as the value of a key in an object literal. Like EXPRESSION, except that |
1101 | // a comma (in PROPERTY_EXPRESSION_OP) goes to PROPERTY_ASSIGNMENT instead |
1102 | self::PROPERTY_EXPRESSION => [ |
1103 | self::TYPE_BRACE_OPEN => [ |
1104 | self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, |
1105 | self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT, |
1106 | ], |
1107 | self::TYPE_BRACE_CLOSE => [ |
1108 | self::ACTION_POP => true, |
1109 | ], |
1110 | self::TYPE_PAREN_OPEN => [ |
1111 | self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, |
1112 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
1113 | ], |
1114 | self::TYPE_FUNC => [ |
1115 | self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, |
1116 | self::ACTION_GOTO => self::FUNC, |
1117 | ], |
1118 | self::TYPE_CLASS => [ |
1119 | self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, |
1120 | self::ACTION_GOTO => self::CLASS_DEF, |
1121 | ], |
1122 | self::TYPE_LITERAL => [ |
1123 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, |
1124 | ], |
1125 | ], |
1126 | // Like EXPRESSION_OP, but in a property expression, see PROPERTY_EXPRESSION |
1127 | self::PROPERTY_EXPRESSION_OP => [ |
1128 | self::TYPE_BIN_OP => [ |
1129 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION, |
1130 | ], |
1131 | self::TYPE_ADD_OP => [ |
1132 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION, |
1133 | ], |
1134 | self::TYPE_DOT => [ |
1135 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION_DOT, |
1136 | ], |
1137 | self::TYPE_HOOK => [ |
1138 | self::ACTION_PUSH => self::PROPERTY_EXPRESSION, |
1139 | self::ACTION_GOTO => self::EXPRESSION_TERNARY, |
1140 | ], |
1141 | self::TYPE_COMMA => [ |
1142 | self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT, |
1143 | ], |
1144 | self::TYPE_ARROW => [ |
1145 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION_ARROWFUNC, |
1146 | ], |
1147 | self::TYPE_BRACE_OPEN => [ |
1148 | self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, |
1149 | ], |
1150 | self::TYPE_BRACE_CLOSE => [ |
1151 | self::ACTION_POP => true, |
1152 | ], |
1153 | self::TYPE_PAREN_OPEN => [ |
1154 | self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, |
1155 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
1156 | ], |
1157 | ], |
1158 | // Like EXPRESSION_DOT, but in a property expression, see PROPERTY_EXPRESSION |
1159 | self::PROPERTY_EXPRESSION_DOT => [ |
1160 | self::TYPE_LITERAL => [ |
1161 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, |
1162 | ], |
1163 | // The following are keywords behaving as literals |
1164 | self::TYPE_RETURN => [ |
1165 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, |
1166 | ], |
1167 | self::TYPE_IF => [ |
1168 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, |
1169 | ], |
1170 | self::TYPE_DO => [ |
1171 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, |
1172 | ], |
1173 | self::TYPE_VAR => [ |
1174 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, |
1175 | ], |
1176 | self::TYPE_FUNC => [ |
1177 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, |
1178 | ], |
1179 | self::TYPE_CLASS => [ |
1180 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, |
1181 | ], |
1182 | // We don't expect real unary/binary operators here, but some keywords |
1183 | // (new, delete, void, typeof, instanceof, in) are classified as such, and they can be |
1184 | // used as property names |
1185 | self::TYPE_UN_OP => [ |
1186 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, |
1187 | ], |
1188 | self::TYPE_BIN_OP => [ |
1189 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, |
1190 | ], |
1191 | ], |
1192 | // Like EXPRESSION_ARROWFUNC, but in a property expression, see PROPERTY_EXPRESSION |
1193 | self::PROPERTY_EXPRESSION_ARROWFUNC => [ |
1194 | self::TYPE_UN_OP => [ |
1195 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION, |
1196 | ], |
1197 | self::TYPE_INCR_OP => [ |
1198 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION, |
1199 | ], |
1200 | self::TYPE_ADD_OP => [ |
1201 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION, |
1202 | ], |
1203 | self::TYPE_BRACE_OPEN => [ |
1204 | self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, |
1205 | self::ACTION_GOTO => self::STATEMENT, |
1206 | ], |
1207 | self::TYPE_PAREN_OPEN => [ |
1208 | self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, |
1209 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
1210 | ], |
1211 | self::TYPE_FUNC => [ |
1212 | self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, |
1213 | self::ACTION_GOTO => self::FUNC, |
1214 | ], |
1215 | self::TYPE_CLASS => [ |
1216 | self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, |
1217 | self::ACTION_GOTO => self::CLASS_DEF, |
1218 | ], |
1219 | self::TYPE_LITERAL => [ |
1220 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, |
1221 | ], |
1222 | ], |
1223 | // Class definition (after the class keyword). Expects an identifier, or the extends |
1224 | // keyword followed by an expression (or both), followed by {, which starts an object |
1225 | // literal. The object literal's closing } will pop the stack, so the state to return |
1226 | // to after the class definition should be pushed to the stack first. |
1227 | self::CLASS_DEF => [ |
1228 | self::TYPE_BRACE_OPEN => [ |
1229 | self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT, |
1230 | ], |
1231 | self::TYPE_PAREN_OPEN => [ |
1232 | self::ACTION_PUSH => self::CLASS_DEF, |
1233 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
1234 | ], |
1235 | self::TYPE_FUNC => [ |
1236 | self::ACTION_PUSH => self::CLASS_DEF, |
1237 | self::ACTION_GOTO => self::FUNC, |
1238 | ], |
1239 | ], |
1240 | // Import or export declaration |
1241 | self::IMPORT_EXPORT => [ |
1242 | self::TYPE_SEMICOLON => [ |
1243 | self::ACTION_GOTO => self::STATEMENT, |
1244 | ], |
1245 | self::TYPE_VAR => [ |
1246 | self::ACTION_GOTO => self::EXPRESSION, |
1247 | ], |
1248 | self::TYPE_FUNC => [ |
1249 | self::ACTION_PUSH => self::EXPRESSION_OP, |
1250 | self::ACTION_GOTO => self::FUNC, |
1251 | ], |
1252 | self::TYPE_CLASS => [ |
1253 | self::ACTION_PUSH => self::EXPRESSION_OP, |
1254 | self::ACTION_GOTO => self::CLASS_DEF, |
1255 | ], |
1256 | self::TYPE_SPECIAL => [ |
1257 | 'default' => [ |
1258 | self::ACTION_GOTO => self::EXPRESSION, |
1259 | ], |
1260 | // Stay in this state for *, as, from |
1261 | '*' => [], |
1262 | 'as' => [], |
1263 | 'from' => [], |
1264 | ], |
1265 | ], |
1266 | // Used in template string-specific code below |
1267 | self::TEMPLATE_STRING_HEAD => [ |
1268 | self::TYPE_LITERAL => [ |
1269 | self::ACTION_PUSH => self::TEMPLATE_STRING_TAIL, |
1270 | self::ACTION_GOTO => self::EXPRESSION, |
1271 | ], |
1272 | ], |
1273 | ]; |
1274 | |
1275 | /** |
1276 | * @var array $semicolon |
1277 | * |
1278 | * Rules for when semicolon insertion is appropriate. Semicolon insertion happens if we are |
1279 | * in one of these states, and encounter one of these tokens preceded by a newline. |
1280 | * |
1281 | * This array is augmented by ensureExpandedStates(). |
1282 | */ |
1283 | private static $semicolon = [ |
1284 | self::EXPRESSION_NO_NL => [ |
1285 | self::TYPE_UN_OP => true, |
1286 | // BIN_OP seems impossible at the start of an expression, but it can happen in |
1287 | // yield *foo |
1288 | self::TYPE_BIN_OP => true, |
1289 | self::TYPE_INCR_OP => true, |
1290 | self::TYPE_ADD_OP => true, |
1291 | self::TYPE_BRACE_OPEN => true, |
1292 | self::TYPE_PAREN_OPEN => true, |
1293 | self::TYPE_RETURN => true, |
1294 | self::TYPE_IF => true, |
1295 | self::TYPE_DO => true, |
1296 | self::TYPE_VAR => true, |
1297 | self::TYPE_FUNC => true, |
1298 | self::TYPE_CLASS => true, |
1299 | self::TYPE_LITERAL => true, |
1300 | self::TYPE_ASYNC => true, |
1301 | ], |
1302 | self::EXPRESSION_OP => [ |
1303 | self::TYPE_UN_OP => true, |
1304 | self::TYPE_INCR_OP => true, |
1305 | self::TYPE_BRACE_OPEN => true, |
1306 | self::TYPE_RETURN => true, |
1307 | self::TYPE_IF => true, |
1308 | self::TYPE_DO => true, |
1309 | self::TYPE_VAR => true, |
1310 | self::TYPE_FUNC => true, |
1311 | self::TYPE_CLASS => true, |
1312 | self::TYPE_LITERAL => true, |
1313 | self::TYPE_ASYNC => true, |
1314 | ], |
1315 | self::EXPRESSION_END => [ |
1316 | self::TYPE_UN_OP => true, |
1317 | self::TYPE_INCR_OP => true, |
1318 | self::TYPE_ADD_OP => true, |
1319 | self::TYPE_BRACE_OPEN => true, |
1320 | self::TYPE_PAREN_OPEN => true, |
1321 | self::TYPE_RETURN => true, |
1322 | self::TYPE_IF => true, |
1323 | self::TYPE_DO => true, |
1324 | self::TYPE_VAR => true, |
1325 | self::TYPE_FUNC => true, |
1326 | self::TYPE_CLASS => true, |
1327 | self::TYPE_LITERAL => true, |
1328 | self::TYPE_ASYNC => true, |
1329 | ], |
1330 | self::PAREN_EXPRESSION_OP_NO_NL => [ |
1331 | self::TYPE_FUNC => true, |
1332 | ] |
1333 | ]; |
1334 | |
1335 | /** |
1336 | * @var array $divStates |
1337 | * |
1338 | * States in which a / is a division operator. In all other states, it's the start of a regex. |
1339 | * |
1340 | * This array is augmented by self::ensureExpandedStates(). |
1341 | */ |
1342 | private static $divStates = [ |
1343 | self::EXPRESSION_OP => true, |
1344 | self::EXPRESSION_TERNARY_OP => true, |
1345 | self::PAREN_EXPRESSION_OP => true, |
1346 | self::PROPERTY_EXPRESSION_OP => true |
1347 | ]; |
1348 | |
1349 | /** |
1350 | * Add copies of all states but with negative numbers to self::$model (if not already present), |
1351 | * to represent generator function states. |
1352 | */ |
1353 | private static function ensureExpandedStates() { |
1354 | // Already done? |
1355 | if ( self::$expandedStates ) { |
1356 | return; |
1357 | } |
1358 | self::$expandedStates = true; |
1359 | |
1360 | // Add copies of all states (except FUNC and GENFUNC) with negative numbers. |
1361 | // These negative states represent states inside generator functions. When in these states, |
1362 | // TYPE_YIELD is treated as TYPE_RETURN, otherwise as TYPE_LITERAL |
1363 | foreach ( self::$model as $state => $transitions ) { |
1364 | if ( $state === self::FUNC || $state === self::GENFUNC ) { |
1365 | continue; |
1366 | } |
1367 | foreach ( $transitions as $tokenType => $actions ) { |
1368 | foreach ( $actions as $action => $target ) { |
1369 | if ( !is_array( $target ) ) { |
1370 | self::$model[-$state][$tokenType][$action] = ( |
1371 | $target === self::FUNC || |
1372 | $target === true || |
1373 | $target === self::GENFUNC |
1374 | ) ? $target : -$target; |
1375 | continue; |
1376 | } |
1377 | |
1378 | foreach ( $target as $subaction => $subtarget ) { |
1379 | self::$model[-$state][$tokenType][$action][$subaction] = ( |
1380 | $subtarget === self::FUNC || |
1381 | $subtarget === true || |
1382 | $subtarget === self::GENFUNC |
1383 | ) ? $subtarget : -$subtarget; |
1384 | } |
1385 | } |
1386 | } |
1387 | } |
1388 | // Special cases: |
1389 | // '{' in a property assignment starts a method, so it shouldn't be flipped |
1390 | self::$model[-self::PROPERTY_ASSIGNMENT][self::TYPE_BRACE_OPEN][self::ACTION_GOTO] = self::STATEMENT; |
1391 | |
1392 | // Also add negative versions of states to the other arrays |
1393 | foreach ( self::$semicolon as $state => $value ) { |
1394 | self::$semicolon[-$state] = $value; |
1395 | } |
1396 | foreach ( self::$divStates as $state => $value ) { |
1397 | self::$divStates[-$state] = $value; |
1398 | } |
1399 | } |
1400 | |
1401 | /** |
1402 | * Returns minified JavaScript code. |
1403 | * |
1404 | * @see MinifierState::setErrorHandler |
1405 | * @param string $s JavaScript code to minify |
1406 | * @param callable|null $onError Called with a ParseError object |
1407 | * @return string Minified code |
1408 | */ |
1409 | public static function minify( $s, $onError = null ) { |
1410 | return self::minifyInternal( $s, null, $onError ); |
1411 | } |
1412 | |
1413 | /** |
1414 | * Create a minifier state object without source map capabilities |
1415 | * |
1416 | * Example: |
1417 | * |
1418 | * JavaScriptMinifier::createMinifier() |
1419 | * ->addSourceFile( 'file.js', $source ) |
1420 | * ->getMinifiedOutput(); |
1421 | * |
1422 | * @return JavaScriptMinifierState |
1423 | */ |
1424 | public static function createMinifier() { |
1425 | return new JavaScriptMinifierState; |
1426 | } |
1427 | |
1428 | /** |
1429 | * Create a minifier state object with source map capabilities |
1430 | * |
1431 | * Example: |
1432 | * |
1433 | * $mapper = JavaScriptMinifier::createSourceMapState() |
1434 | * ->addSourceFile( 'file1.js', $source1 ) |
1435 | * ->addOutput( "\n\n" ) |
1436 | * ->addSourceFile( 'file2.js', $source2 ); |
1437 | * $out = $mapper->getMinifiedOutput(); |
1438 | * $map = $mapper->getSourceMap() |
1439 | * |
1440 | * @return JavaScriptMapperState |
1441 | */ |
1442 | public static function createSourceMapState() { |
1443 | return new JavaScriptMapperState; |
1444 | } |
1445 | |
1446 | /** |
1447 | * Create a MinifierState that doesn't actually minify |
1448 | * |
1449 | * @return IdentityMinifierState |
1450 | */ |
1451 | public static function createIdentityMinifier() { |
1452 | return new IdentityMinifierState; |
1453 | } |
1454 | |
1455 | /** |
1456 | * Minify with optional source map. |
1457 | * |
1458 | * @internal |
1459 | * |
1460 | * @param string $s |
1461 | * @param MappingsGenerator|null $mapGenerator |
1462 | * @param callable|null $onError |
1463 | * @return string |
1464 | */ |
1465 | public static function minifyInternal( $s, $mapGenerator = null, $onError = null ) { |
1466 | self::ensureExpandedStates(); |
1467 | |
1468 | // Here's where the minifying takes place: Loop through the input, looking for tokens |
1469 | // and output them to $out, taking actions to the above defined rules when appropriate. |
1470 | $error = null; |
1471 | $out = ''; |
1472 | $pos = 0; |
1473 | $length = strlen( $s ); |
1474 | $lineLength = 0; |
1475 | $dotlessNum = false; |
1476 | $lastDotlessNum = false; |
1477 | $newlineFound = true; |
1478 | $state = self::STATEMENT; |
1479 | $stack = []; |
1480 | // Optimization: calling end( $stack ) repeatedly is expensive |
1481 | $topOfStack = null; |
1482 | // Pretend that we have seen a semicolon yet |
1483 | $last = ';'; |
1484 | while ( $pos < $length ) { |
1485 | // First, skip over any whitespace and multiline comments, recording whether we |
1486 | // found any newline character |
1487 | $skip = strspn( $s, " \t\n\r\xb\xc", $pos ); |
1488 | if ( !$skip ) { |
1489 | $ch = $s[$pos]; |
1490 | if ( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) { |
1491 | // Multiline comment. Search for the end token or EOT. |
1492 | $end = strpos( $s, '*/', $pos + 2 ); |
1493 | $skip = $end === false ? $length - $pos : $end - $pos + 2; |
1494 | } |
1495 | } |
1496 | if ( $skip ) { |
1497 | // The semicolon insertion mechanism needs to know whether there was a newline |
1498 | // between two tokens, so record it now. |
1499 | if ( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) { |
1500 | $newlineFound = true; |
1501 | } |
1502 | if ( $mapGenerator ) { |
1503 | $mapGenerator->consumeSource( $skip ); |
1504 | } |
1505 | $pos += $skip; |
1506 | continue; |
1507 | } |
1508 | // Handle C++-style comments and html comments, which are treated as single line |
1509 | // comments by the browser, regardless of whether the end tag is on the same line. |
1510 | // Handle --> the same way, but only if it's at the beginning of the line |
1511 | // @phan-suppress-next-line PhanPossiblyUndeclaredVariable |
1512 | if ( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' ) |
1513 | || ( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' ) |
1514 | || ( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' ) |
1515 | ) { |
1516 | $skip = strcspn( $s, "\r\n", $pos ); |
1517 | if ( $mapGenerator ) { |
1518 | $mapGenerator->consumeSource( $skip ); |
1519 | } |
1520 | $pos += $skip; |
1521 | continue; |
1522 | } |
1523 | |
1524 | // Find out which kind of token we're handling. |
1525 | // Note: $end must point past the end of the current token |
1526 | // so that `substr($s, $pos, $end - $pos)` would be the entire token. |
1527 | // In order words, $end will be the offset of the last relevant character |
1528 | // in the stream + 1, or simply put: The offset of the first character |
1529 | // of any next token in the stream. |
1530 | $end = $pos + 1; |
1531 | // Handle string literals |
1532 | if ( $ch === "'" || $ch === '"' ) { |
1533 | // Search to the end of the string literal, skipping over backslash escapes |
1534 | $search = $ch . '\\'; |
1535 | do { |
1536 | // Speculatively add 2 to the end so that if we see a backslash, |
1537 | // the next iteration will start 2 characters further (one for the |
1538 | // backslash, one for the escaped character). |
1539 | // We'll correct this outside the loop. |
1540 | $end += strcspn( $s, $search, $end ) + 2; |
1541 | // If the last character in our search for a quote or a backlash |
1542 | // matched a backslash and we haven't reached the end, keep searching.. |
1543 | } while ( $end - 2 < $length && $s[$end - 2] === '\\' ); |
1544 | // Correction (1): Undo speculative add, keep only one (end of string literal) |
1545 | $end--; |
1546 | if ( $end > $length ) { |
1547 | // Correction (2): Loop wrongly assumed an end quote ended the search, |
1548 | // but search ended because we've reached the end. Correct $end. |
1549 | // TODO: This is invalid and should throw. |
1550 | $end--; |
1551 | } |
1552 | |
1553 | // Handle template strings, either from "`" to begin a new string, |
1554 | // or continuation after the "}" that ends a "${"-expression. |
1555 | } elseif ( $ch === '`' || ( $ch === '}' && $topOfStack === self::TEMPLATE_STRING_TAIL ) ) { |
1556 | if ( $ch === '}' ) { |
1557 | // Pop the TEMPLATE_STRING_TAIL state off the stack |
1558 | // We don't let it get popped off the stack the normal way, to avoid the newline |
1559 | // and comment stripping code above running on the continuation of the literal |
1560 | array_pop( $stack ); |
1561 | // Also pop the previous state off the stack |
1562 | $state = array_pop( $stack ); |
1563 | $topOfStack = end( $stack ); |
1564 | } |
1565 | // Search until we reach either a closing ` or a ${, skipping over backslash escapes |
1566 | // and $ characters followed by something other than { or ` |
1567 | do { |
1568 | $end += strcspn( $s, '`$\\', $end ) + 1; |
1569 | if ( $end - 1 < $length && $s[$end - 1] === '`' ) { |
1570 | // End of the string, stop |
1571 | // We don't do this in the while() condition because the $end++ in the |
1572 | // backslash escape branch makes it difficult to do so without incorrectly |
1573 | // considering an escaped backtick (\`) the end of the string |
1574 | break; |
1575 | } |
1576 | if ( $end - 1 < $length && $s[$end - 1] === '\\' ) { |
1577 | // Backslash escape. Skip the next character, and keep going |
1578 | $end++; |
1579 | continue; |
1580 | } |
1581 | if ( $end < $length && $s[$end - 1] === '$' && $s[$end] === '{' ) { |
1582 | // Beginning of an expression in ${ ... }. Skip the {, and stop |
1583 | $end++; |
1584 | // Push the current state to the stack. We'll pop this off later when hitting |
1585 | // the end of this template string |
1586 | $stack[] = $state; |
1587 | $topOfStack = $state; |
1588 | // Change the state to TEMPLATE_STRING_HEAD. The token type will be detected |
1589 | // as TYPE_LITERAL, and this will cause the state machine to expect an |
1590 | // expression, then go to the TEMPLATE_STRING_TAIL state when it hits the } |
1591 | $state = self::TEMPLATE_STRING_HEAD; |
1592 | break; |
1593 | } |
1594 | } while ( $end - 1 < $length ); |
1595 | if ( $end > $length ) { |
1596 | // Loop wrongly assumed an end quote or ${ ended the search, |
1597 | // but search ended because we've reached the end. Correct $end. |
1598 | // TODO: This is invalid and should throw. |
1599 | $end--; |
1600 | } |
1601 | |
1602 | // We have to distinguish between regexp literals and division operators |
1603 | // A division operator is only possible in certain states |
1604 | } elseif ( $ch === '/' && !isset( self::$divStates[$state] ) ) { |
1605 | // Regexp literal |
1606 | for ( ; ; ) { |
1607 | // Search until we find "/" (end of regexp), "\" (backslash escapes), |
1608 | // or "[" (start of character classes). |
1609 | do { |
1610 | // Speculatively add 2 to ensure next iteration skips |
1611 | // over backslash and escaped character. |
1612 | // We'll correct this outside the loop. |
1613 | $end += strcspn( $s, '/[\\', $end ) + 2; |
1614 | // If backslash escape, keep searching... |
1615 | } while ( $end - 2 < $length && $s[$end - 2] === '\\' ); |
1616 | // Correction (1): Undo speculative add, keep only one (end of regexp) |
1617 | $end--; |
1618 | if ( $end > $length ) { |
1619 | // Correction (2): Loop wrongly assumed end slash was seen |
1620 | // String ended without end of regexp. Correct $end. |
1621 | // TODO: This is invalid and should throw. |
1622 | $end--; |
1623 | break; |
1624 | } |
1625 | if ( $s[$end - 1] === '/' ) { |
1626 | break; |
1627 | } |
1628 | // (Implicit else), we must've found the start of a char class, |
1629 | // skip until we find "]" (end of char class), or "\" (backslash escape) |
1630 | do { |
1631 | // Speculatively add 2 for backslash escape. |
1632 | // We'll substract one outside the loop. |
1633 | $end += strcspn( $s, ']\\', $end ) + 2; |
1634 | // If backslash escape, keep searching... |
1635 | } while ( $end - 2 < $length && $s[$end - 2] === '\\' ); |
1636 | // Correction (1): Undo speculative add, keep only one (end of regexp) |
1637 | $end--; |
1638 | if ( $end > $length ) { |
1639 | // Correction (2): Loop wrongly assumed "]" was seen |
1640 | // String ended without ending char class or regexp. Correct $end. |
1641 | // TODO: This is invalid and should throw. |
1642 | $end--; |
1643 | break; |
1644 | } |
1645 | } |
1646 | // Search past the regexp modifiers (gi) |
1647 | while ( $end < $length && ctype_alpha( $s[$end] ) ) { |
1648 | $end++; |
1649 | } |
1650 | } elseif ( |
1651 | $ch === '0' |
1652 | && ( $pos + 1 < $length ) && ( $s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' ) |
1653 | ) { |
1654 | // Hex numeric literal |
1655 | // x or X |
1656 | $end++; |
1657 | $len = strspn( $s, '0123456789ABCDEFabcdef', $end ); |
1658 | if ( !$len && !$error ) { |
1659 | $error = new ParseError( |
1660 | 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ), |
1661 | $pos, |
1662 | ); |
1663 | } |
1664 | $end += $len; |
1665 | } elseif ( |
1666 | // Optimisation: This check must accept only ASCII digits 0-9. |
1667 | // Avoid ctype_digit() because it is slower and also accepts locale-specific digits. |
1668 | // Using is_numeric() might seem wrong also as it accepts negative numbers, decimal |
1669 | // numbers, and exponents (e.g. strings like "+012.34e6"). But, it is fine here |
1670 | // because we know $ch is a single character, and we believe the only single |
1671 | // characters that is_numeric() accepts are ASCII digits 0-9. |
1672 | is_numeric( $ch ) |
1673 | || ( $ch === '.' && $pos + 1 < $length && is_numeric( $s[$pos + 1] ) ) |
1674 | ) { |
1675 | $end += strspn( $s, '0123456789', $end ); |
1676 | $decimal = strspn( $s, '.', $end ); |
1677 | if ( $decimal ) { |
1678 | if ( $decimal > 2 && !$error ) { |
1679 | $error = new ParseError( 'Too many decimal points', $end ); |
1680 | } |
1681 | $end += strspn( $s, '0123456789', $end + 1 ) + $decimal; |
1682 | } else { |
1683 | $dotlessNum = true; |
1684 | } |
1685 | $exponent = strspn( $s, 'eE', $end ); |
1686 | if ( $exponent ) { |
1687 | if ( $exponent > 1 && !$error ) { |
1688 | $error = new ParseError( 'Number with several E', $end ); |
1689 | } |
1690 | $end += $exponent; |
1691 | |
1692 | // + sign is optional; - sign is required. |
1693 | $end += strspn( $s, '-+', $end ); |
1694 | $len = strspn( $s, '0123456789', $end ); |
1695 | if ( !$len && !$error ) { |
1696 | $error = new ParseError( |
1697 | 'Missing decimal digits after exponent', |
1698 | $pos |
1699 | ); |
1700 | } |
1701 | $end += $len; |
1702 | } |
1703 | } elseif ( isset( self::$opChars[$ch] ) ) { |
1704 | // Punctuation character. Search for the longest matching operator. |
1705 | for ( $tokenLength = self::LONGEST_PUNCTUATION_TOKEN; $tokenLength > 1; $tokenLength-- ) { |
1706 | if ( |
1707 | $pos + $tokenLength <= $length && |
1708 | isset( self::$tokenTypes[ substr( $s, $pos, $tokenLength ) ] ) |
1709 | ) { |
1710 | $end = $pos + $tokenLength; |
1711 | break; |
1712 | } |
1713 | } |
1714 | } else { |
1715 | // Identifier or reserved word. Search for the end by excluding whitespace and |
1716 | // punctuation. |
1717 | $end += strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"`!&|^~\xb\xc\r", $end ); |
1718 | } |
1719 | |
1720 | // Now get the token type from our type array |
1721 | // so $end - $pos == strlen( $token ) |
1722 | $token = substr( $s, $pos, $end - $pos ); |
1723 | $type = isset( self::$model[$state][self::TYPE_SPECIAL][$token] ) |
1724 | ? self::TYPE_SPECIAL |
1725 | : self::$tokenTypes[$token] ?? self::TYPE_LITERAL; |
1726 | if ( $type === self::TYPE_YIELD ) { |
1727 | // yield is treated as TYPE_RETURN inside a generator function (negative state) |
1728 | // but as TYPE_LITERAL when not in a generator function (positive state) |
1729 | $type = $state < 0 ? self::TYPE_RETURN : self::TYPE_LITERAL; |
1730 | } |
1731 | |
1732 | $pad = ''; |
1733 | if ( $newlineFound && isset( self::$semicolon[$state][$type] ) ) { |
1734 | // This token triggers the semicolon insertion mechanism of javascript. While we |
1735 | // could add the ; token here ourselves, keeping the newline has a few advantages. |
1736 | $pad = "\n"; |
1737 | $state = $state < 0 ? -self::STATEMENT : self::STATEMENT; |
1738 | $lineLength = 0; |
1739 | } elseif ( $lineLength + $end - $pos > self::$maxLineLength && |
1740 | !isset( self::$semicolon[$state][$type] ) && |
1741 | $type !== self::TYPE_INCR_OP && |
1742 | $type !== self::TYPE_ARROW |
1743 | ) { |
1744 | // This line would get too long if we added $token, so add a newline first. |
1745 | // Only do this if it won't trigger semicolon insertion and if it won't |
1746 | // put a postfix increment operator or an arrow on its own line, |
1747 | // which is illegal in js. |
1748 | $pad = "\n"; |
1749 | $lineLength = 0; |
1750 | // Check, whether we have to separate the token from the last one with whitespace |
1751 | } elseif ( !isset( self::$opChars[$last] ) && !isset( self::$opChars[$ch] ) ) { |
1752 | $pad = ' '; |
1753 | $lineLength++; |
1754 | // Don't accidentally create ++, -- or // tokens |
1755 | } elseif ( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) { |
1756 | $pad = ' '; |
1757 | $lineLength++; |
1758 | // Don't create invalid dot notation after number literal (T303827). |
1759 | // Keep whitespace in "42. foo". |
1760 | // But keep minifying "foo.bar", "42..foo", and "42.0.foo" per $opChars. |
1761 | } elseif ( $lastDotlessNum && $type === self::TYPE_DOT ) { |
1762 | $pad = ' '; |
1763 | $lineLength++; |
1764 | } |
1765 | |
1766 | // self::debug( $topOfStack, $last, $state, $ch, $token, $type ); |
1767 | |
1768 | if ( $mapGenerator ) { |
1769 | $mapGenerator->outputSpace( $pad ); |
1770 | $mapGenerator->outputToken( $token ); |
1771 | $mapGenerator->consumeSource( $end - $pos ); |
1772 | } |
1773 | $out .= $pad; |
1774 | $out .= $token; |
1775 | $lineLength += $end - $pos; |
1776 | $last = $s[$end - 1]; |
1777 | $pos = $end; |
1778 | $newlineFound = false; |
1779 | $lastDotlessNum = $dotlessNum; |
1780 | $dotlessNum = false; |
1781 | |
1782 | // Now that we have output our token, transition into the new state. |
1783 | $actions = $type === self::TYPE_SPECIAL ? |
1784 | self::$model[$state][$type][$token] : |
1785 | self::$model[$state][$type] ?? []; |
1786 | if ( isset( $actions[self::ACTION_PUSH] ) && |
1787 | count( $stack ) < self::STACK_LIMIT |
1788 | ) { |
1789 | $topOfStack = $actions[self::ACTION_PUSH]; |
1790 | $stack[] = $topOfStack; |
1791 | } |
1792 | if ( $stack && isset( $actions[self::ACTION_POP] ) ) { |
1793 | $state = array_pop( $stack ); |
1794 | $topOfStack = end( $stack ); |
1795 | } elseif ( isset( $actions[self::ACTION_GOTO] ) ) { |
1796 | $state = $actions[self::ACTION_GOTO]; |
1797 | } |
1798 | } |
1799 | if ( $onError && $error ) { |
1800 | $onError( $error ); |
1801 | } |
1802 | return $out; |
1803 | } |
1804 | |
1805 | /** |
1806 | * @param null|false|int $top |
1807 | * @param string $last |
1808 | * @param int $state |
1809 | * @param string $ch |
1810 | * @param string $token |
1811 | * @param int $type |
1812 | */ |
1813 | private static function debug( |
1814 | $top, string $last, |
1815 | int $state, string $ch, string $token, int $type |
1816 | ) { |
1817 | static $first = true; |
1818 | $self = new ReflectionClass( self::class ); |
1819 | |
1820 | foreach ( $self->getConstants() as $name => $value ) { |
1821 | if ( $value === $top ) { |
1822 | $top = $name; |
1823 | } |
1824 | if ( $value === $state ) { |
1825 | $state = $name; |
1826 | } |
1827 | if ( $value === $type ) { |
1828 | $type = $name; |
1829 | } |
1830 | } |
1831 | |
1832 | if ( $first ) { |
1833 | print sprintf( "| %-29s | %-4s | %-29s | %-2s | %-10s | %-29s\n", |
1834 | 'topOfStack', 'last', 'state', 'ch', 'token', 'type' ); |
1835 | print sprintf( "| %'-29s | %'-4s | %'-29s | %'-2s | %'-10s | %'-29s\n", |
1836 | '', '', '', '', '', '' ); |
1837 | $first = false; |
1838 | } |
1839 | print sprintf( "| %-29s | %-4s | %-29s | %-2s | %-10s | %-29s\n", |
1840 | (string)$top, $last, $state, $ch, $token, $type ); |
1841 | } |
1842 | } |