Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
79.44% |
170 / 214 |
|
14.29% |
1 / 7 |
CRAP | |
0.00% |
0 / 1 |
JavaScriptMinifier | |
79.44% |
170 / 214 |
|
14.29% |
1 / 7 |
245.16 | |
0.00% |
0 / 1 |
ensureExpandedStates | |
10.00% |
2 / 20 |
|
0.00% |
0 / 1 |
227.68 | |||
minify | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
createMinifier | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
createSourceMapState | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
createIdentityMinifier | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
minifyInternal | |
97.09% |
167 / 172 |
|
0.00% |
0 / 1 |
93 | |||
debug | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
42 |
1 | <?php |
2 | /** |
3 | * Copyright 2011 Paul Copperman <paul.copperman@gmail.com> |
4 | * Copyright 2018 Timo Tijhof |
5 | * Copyright 2021 Roan Kattouw <roan.kattouw@gmail.com> |
6 | * |
7 | * Licensed under the Apache License, Version 2.0 (the "License"); |
8 | * you may not use this file except in compliance with the License. |
9 | * You may obtain a copy of the License at |
10 | * |
11 | * http://www.apache.org/licenses/LICENSE-2.0 |
12 | * |
13 | * Unless required by applicable law or agreed to in writing, software |
14 | * distributed under the License is distributed on an "AS IS" BASIS, |
15 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
16 | * See the License for the specific language governing permissions and |
17 | * limitations under the License. |
18 | * |
19 | * @file |
20 | * @license Apache-2.0 |
21 | * @license MIT |
22 | * @license GPL-2.0-or-later |
23 | * @license LGPL-2.1-or-later |
24 | */ |
25 | |
26 | namespace Wikimedia\Minify; |
27 | |
28 | /** |
29 | * JavaScript Minifier |
30 | * |
31 | * This class is meant to safely minify JavaScript code, while leaving syntactically correct |
32 | * programs intact. Other libraries, such as JSMin require a certain coding style to work |
33 | * correctly. OTOH, libraries like jsminplus, that do parse the code correctly are rather |
34 | * slow, because they construct a complete parse tree before outputting the code minified. |
35 | * So this class is meant to allow arbitrary (but syntactically correct) input, while being |
36 | * fast enough to be used for on-the-fly minifying. |
37 | * |
38 | * This class was written with ECMA-262 7th Edition in mind ("ECMAScript 2016"). Parsing features |
39 | * new to later editions of ECMAScript might not be supported. It's assumed that the input is |
40 | * syntactically correct; if it's not, this class may not detect that, and may produce incorrect |
41 | * output. |
42 | * |
43 | * This class has limited support for 8.0 spec ("ECMAScript 2017"), specifically, the await |
44 | * keyword and most kinds of async functions are implemented. Other new parsing features of ES2017 |
45 | * are not yet supported. |
46 | * |
47 | * See also: |
48 | * - <https://262.ecma-international.org/8.0/> |
49 | * - <https://262.ecma-international.org/7.0/> |
50 | * - <https://262.ecma-international.org/6.0/> |
51 | */ |
52 | class JavaScriptMinifier { |
53 | |
54 | /* Parsing states. |
55 | * The state machine is necessary to decide whether to parse a slash as division |
56 | * operator or as regexp literal, and to know where semicolon insertion is possible. |
57 | * States are generally named after the next expected item. We only distinguish states when the |
58 | * distinction is relevant for our purpose. The meaning of these states is documented |
59 | * in $model below. |
60 | * |
61 | * Negative numbers are used to indicate that the state is inside a generator function, |
62 | * which changes the behavior of 'yield' |
63 | */ |
64 | private const STATEMENT = 1; |
65 | private const CONDITION = 2; |
66 | private const FUNC = 3; |
67 | private const GENFUNC = 4; |
68 | private const PROPERTY_ASSIGNMENT = 5; |
69 | private const EXPRESSION = 6; |
70 | private const EXPRESSION_NO_NL = 7; |
71 | private const EXPRESSION_OP = 8; |
72 | private const EXPRESSION_DOT = 9; |
73 | private const EXPRESSION_END = 10; |
74 | private const EXPRESSION_ARROWFUNC = 11; |
75 | private const EXPRESSION_TERNARY = 12; |
76 | private const EXPRESSION_TERNARY_OP = 13; |
77 | private const EXPRESSION_TERNARY_DOT = 14; |
78 | private const EXPRESSION_TERNARY_ARROWFUNC = 15; |
79 | private const PAREN_EXPRESSION = 16; |
80 | private const PAREN_EXPRESSION_OP = 17; |
81 | private const PAREN_EXPRESSION_DOT = 18; |
82 | private const PAREN_EXPRESSION_ARROWFUNC = 19; |
83 | private const PROPERTY_EXPRESSION = 20; |
84 | private const PROPERTY_EXPRESSION_OP = 21; |
85 | private const PROPERTY_EXPRESSION_DOT = 22; |
86 | private const PROPERTY_EXPRESSION_ARROWFUNC = 23; |
87 | private const CLASS_DEF = 24; |
88 | private const IMPORT_EXPORT = 25; |
89 | private const TEMPLATE_STRING_HEAD = 26; |
90 | private const TEMPLATE_STRING_TAIL = 27; |
91 | private const PAREN_EXPRESSION_OP_NO_NL = 28; |
92 | |
93 | /* Token types */ |
94 | private const TYPE_UN_OP = 101; // unary operators |
95 | private const TYPE_INCR_OP = 102; // ++ and -- |
96 | private const TYPE_BIN_OP = 103; // binary operators (except .) |
97 | private const TYPE_ADD_OP = 104; // + and - which can be either unary or binary ops |
98 | private const TYPE_DOT = 105; // . |
99 | private const TYPE_HOOK = 106; // ? |
100 | private const TYPE_COLON = 107; // : |
101 | private const TYPE_COMMA = 108; // , |
102 | private const TYPE_SEMICOLON = 109; // ; |
103 | private const TYPE_BRACE_OPEN = 110; // { |
104 | private const TYPE_BRACE_CLOSE = 111; // } |
105 | private const TYPE_PAREN_OPEN = 112; // ( and [ |
106 | private const TYPE_PAREN_CLOSE = 113; // ) and ] |
107 | private const TYPE_ARROW = 114; // => |
108 | private const TYPE_RETURN = 115; // keywords: break, continue, return, throw |
109 | private const TYPE_IF = 116; // keywords: catch, for, with, switch, while, if |
110 | private const TYPE_DO = 117; // keywords: case, finally, else, do, try |
111 | private const TYPE_VAR = 118; // keywords: var, let, const |
112 | private const TYPE_YIELD = 119; // keywords: yield |
113 | private const TYPE_FUNC = 120; // keywords: function |
114 | private const TYPE_CLASS = 121; // keywords: class |
115 | private const TYPE_LITERAL = 122; // all literals, identifiers, unrecognised tokens, and other keywords |
116 | private const TYPE_SPECIAL = 123; // For special treatment of tokens that usually mean something else |
117 | private const TYPE_ASYNC = 124; // keywords: async |
118 | private const TYPE_AWAIT = 125; // keywords: await |
119 | |
120 | private const ACTION_GOTO = 201; // Go to another state |
121 | private const ACTION_PUSH = 202; // Push a state to the stack |
122 | private const ACTION_POP = 203; // Pop the state from the top of the stack, and go to that state |
123 | |
124 | // Limit to avoid excessive memory usage |
125 | private const STACK_LIMIT = 1000; |
126 | |
127 | // Length of the longest token in $tokenTypes made of punctuation characters, |
128 | // as defined in $opChars. Update this if you add longer tokens to $tokenTypes. |
129 | // |
130 | // Currently the longest punctuation token is `>>>=`, which is 4 characters. |
131 | private const LONGEST_PUNCTUATION_TOKEN = 4; |
132 | |
133 | /** |
134 | * @var int $maxLineLength |
135 | * |
136 | * Maximum line length |
137 | * |
138 | * This is not a strict maximum, but a guideline. Longer lines will be |
139 | * produced when literals (e.g. quoted strings) longer than this are |
140 | * encountered, or when required to guard against semicolon insertion. |
141 | * |
142 | * This is a private member (instead of constant) to allow tests to |
143 | * set it to 1, to verify ASI and line-breaking behaviour. |
144 | */ |
145 | private static $maxLineLength = 1000; |
146 | |
147 | private static $expandedStates = false; |
148 | |
149 | /** |
150 | * @var array $opChars |
151 | * |
152 | * Characters which can be combined without whitespace between them. |
153 | */ |
154 | private static $opChars = [ |
155 | // ECMAScript 6.0 § 11.7 Punctuators |
156 | // Unlike the spec, these are individual symbols, not sequences. |
157 | '{' => true, |
158 | '}' => true, |
159 | '(' => true, |
160 | ')' => true, |
161 | '[' => true, |
162 | ']' => true, |
163 | // Dots have a special case after $dotlessNum which require whitespace |
164 | '.' => true, |
165 | ';' => true, |
166 | ',' => true, |
167 | '<' => true, |
168 | '>' => true, |
169 | '=' => true, |
170 | '!' => true, |
171 | '+' => true, |
172 | '-' => true, |
173 | '*' => true, |
174 | '%' => true, |
175 | '&' => true, |
176 | '|' => true, |
177 | '^' => true, |
178 | '~' => true, |
179 | '?' => true, |
180 | ':' => true, |
181 | '/' => true, |
182 | // ECMAScript 6.0 § 11.8.4 String Literals |
183 | '"' => true, |
184 | "'" => true, |
185 | // ECMAScript 6.0 § 11.8.6 Template Literal Lexical Components |
186 | '`' => true, |
187 | ]; |
188 | |
189 | /** |
190 | * @var array $tokenTypes |
191 | * |
192 | * Tokens and their types. |
193 | */ |
194 | private static $tokenTypes = [ |
195 | // ECMAScript 6.0 § 12.5 Unary Operators |
196 | // UnaryExpression includes PostfixExpression, which includes 'new'. |
197 | 'new' => self::TYPE_UN_OP, |
198 | 'delete' => self::TYPE_UN_OP, |
199 | 'void' => self::TYPE_UN_OP, |
200 | 'typeof' => self::TYPE_UN_OP, |
201 | '~' => self::TYPE_UN_OP, |
202 | '!' => self::TYPE_UN_OP, |
203 | // ECMAScript 8.0 § 14.6 AwaitExpression |
204 | // |
205 | // await UnaryExpression |
206 | // |
207 | 'await' => self::TYPE_AWAIT, |
208 | // ECMAScript 6.0 § 12.2 Primary Expression, among others |
209 | '...' => self::TYPE_UN_OP, |
210 | // ECMAScript 6.0 § 12.7 Additive Operators |
211 | '++' => self::TYPE_INCR_OP, |
212 | '--' => self::TYPE_INCR_OP, |
213 | '+' => self::TYPE_ADD_OP, |
214 | '-' => self::TYPE_ADD_OP, |
215 | // ECMAScript 6.0 § 12.6 Multiplicative Operators |
216 | '*' => self::TYPE_BIN_OP, |
217 | '/' => self::TYPE_BIN_OP, |
218 | '%' => self::TYPE_BIN_OP, |
219 | // ECMAScript 7.0 § 12.6 Exponentiation Operator |
220 | '**' => self::TYPE_BIN_OP, |
221 | // ECMAScript 6.0 § 12.8 Bitwise Shift Operators |
222 | '<<' => self::TYPE_BIN_OP, |
223 | '>>' => self::TYPE_BIN_OP, |
224 | '>>>' => self::TYPE_BIN_OP, |
225 | // ECMAScript 6.0 § 12.9 Relational Operators |
226 | '<' => self::TYPE_BIN_OP, |
227 | '>' => self::TYPE_BIN_OP, |
228 | '<=' => self::TYPE_BIN_OP, |
229 | '>=' => self::TYPE_BIN_OP, |
230 | 'instanceof' => self::TYPE_BIN_OP, |
231 | 'in' => self::TYPE_BIN_OP, |
232 | // ECMAScript 6.0 § 12.10 Equality Operators |
233 | '==' => self::TYPE_BIN_OP, |
234 | '!=' => self::TYPE_BIN_OP, |
235 | '===' => self::TYPE_BIN_OP, |
236 | '!==' => self::TYPE_BIN_OP, |
237 | // ECMAScript 6.0 § 12.11 Binary Bitwise Operators |
238 | '&' => self::TYPE_BIN_OP, |
239 | '^' => self::TYPE_BIN_OP, |
240 | '|' => self::TYPE_BIN_OP, |
241 | // ECMAScript 6.0 § 12.12 Binary Logical Operators |
242 | '&&' => self::TYPE_BIN_OP, |
243 | '||' => self::TYPE_BIN_OP, |
244 | // ECMAScript 6.0 § 12.13 Conditional Operator |
245 | // Also known as ternary. |
246 | '?' => self::TYPE_HOOK, |
247 | ':' => self::TYPE_COLON, |
248 | // ECMAScript 6.0 § 12.14 Assignment Operators |
249 | '=' => self::TYPE_BIN_OP, |
250 | '*=' => self::TYPE_BIN_OP, |
251 | '/=' => self::TYPE_BIN_OP, |
252 | '%=' => self::TYPE_BIN_OP, |
253 | '+=' => self::TYPE_BIN_OP, |
254 | '-=' => self::TYPE_BIN_OP, |
255 | '<<=' => self::TYPE_BIN_OP, |
256 | '>>=' => self::TYPE_BIN_OP, |
257 | '>>>=' => self::TYPE_BIN_OP, |
258 | '&=' => self::TYPE_BIN_OP, |
259 | '^=' => self::TYPE_BIN_OP, |
260 | '|=' => self::TYPE_BIN_OP, |
261 | // ECMAScript 6.0 § 12.15 Comma Operator |
262 | ',' => self::TYPE_COMMA, |
263 | |
264 | // The keywords that disallow LineTerminator before their |
265 | // (sometimes optional) Expression or Identifier. |
266 | // |
267 | // keyword ; |
268 | // keyword [no LineTerminator here] Identifier ; |
269 | // keyword [no LineTerminator here] Expression ; |
270 | // |
271 | // See also ECMAScript 6.0 § 11.9.1 Rules of Automatic Semicolon Insertion |
272 | 'continue' => self::TYPE_RETURN, |
273 | 'break' => self::TYPE_RETURN, |
274 | 'return' => self::TYPE_RETURN, |
275 | 'throw' => self::TYPE_RETURN, |
276 | // yield is only a keyword inside generator functions, otherwise it's an identifier |
277 | // This is handled with the negative states hack: if the state is negative, TYPE_YIELD |
278 | // is treated as TYPE_RETURN, if it's positive it's treated as TYPE_LITERAL |
279 | 'yield' => self::TYPE_YIELD, |
280 | |
281 | // The keywords require a parenthesised Expression or Identifier |
282 | // before the next Statement. |
283 | // |
284 | // keyword ( Expression ) Statement |
285 | // keyword ( Identifier ) Statement |
286 | // |
287 | // See also ECMAScript 6.0: |
288 | // - § 13.6 The if Statement |
289 | // - § 13.7 Iteration Statements (do, while, for) |
290 | // - § 12.10 The with Statement |
291 | // - § 12.11 The switch Statement |
292 | // - § 12.13 The throw Statement |
293 | 'if' => self::TYPE_IF, |
294 | 'catch' => self::TYPE_IF, |
295 | 'while' => self::TYPE_IF, |
296 | 'for' => self::TYPE_IF, |
297 | 'switch' => self::TYPE_IF, |
298 | 'with' => self::TYPE_IF, |
299 | |
300 | // The keywords followed by a Statement, Expression, or Block. |
301 | // |
302 | // else Statement |
303 | // do Statement |
304 | // case Expression |
305 | // try Block |
306 | // finally Block |
307 | // |
308 | // See also ECMAScript 6.0: |
309 | // - § 13.6 The if Statement (else) |
310 | // - § 13.7 Iteration Statements (do, while, for) |
311 | // - § 13.12 The switch Statement (case) |
312 | // - § 13.15 The try Statement |
313 | 'else' => self::TYPE_DO, |
314 | 'do' => self::TYPE_DO, |
315 | 'case' => self::TYPE_DO, |
316 | 'try' => self::TYPE_DO, |
317 | 'finally' => self::TYPE_DO, |
318 | |
319 | // Keywords followed by a variable declaration |
320 | // This is different from the group above, because a { begins |
321 | // object destructuring, rather than a block |
322 | 'var' => self::TYPE_VAR, |
323 | 'let' => self::TYPE_VAR, |
324 | 'const' => self::TYPE_VAR, |
325 | |
326 | // ECMAScript 6.0 § 14.1 Function Definitions |
327 | 'function' => self::TYPE_FUNC, |
328 | // ECMAScript 6.0 § 14.2 Arrow Function Definitions |
329 | '=>' => self::TYPE_ARROW, |
330 | |
331 | // Class declaration or expression: |
332 | // class Identifier { ClassBody } |
333 | // class { ClassBody } |
334 | // class Identifier extends Expression { ClassBody } |
335 | // class extends Expression { ClassBody } |
336 | 'class' => self::TYPE_CLASS, |
337 | |
338 | // ECMAScript 6.0 § 12.3 Left-Hand-Side Expressions (MemberExpression) |
339 | // A dot can also be part of a DecimalLiteral, but in that case we handle the entire |
340 | // DecimalLiteral as one token. A separate '.' token is always part of a MemberExpression. |
341 | '.' => self::TYPE_DOT, |
342 | |
343 | // Can be one of: |
344 | // - Block (ECMAScript 6.0 § 13.2 Block) |
345 | // - ObjectLiteral (ECMAScript 6.0 § 12.2 Primary Expression) |
346 | '{' => self::TYPE_BRACE_OPEN, |
347 | '}' => self::TYPE_BRACE_CLOSE, |
348 | |
349 | // Can be one of: |
350 | // - Parenthesised Identifier or Expression after a |
351 | // TYPE_IF or TYPE_FUNC keyword. |
352 | // - PrimaryExpression (ECMAScript 6.0 § 12.2 Primary Expression) |
353 | // - CallExpression (ECMAScript 6.0 § 12.3 Left-Hand-Side Expressions) |
354 | // - Beginning of an ArrowFunction (ECMAScript 6.0 § 14.2 Arrow Function Definitions) |
355 | '(' => self::TYPE_PAREN_OPEN, |
356 | ')' => self::TYPE_PAREN_CLOSE, |
357 | |
358 | // Can be one of: |
359 | // - ArrayLiteral (ECMAScript 6.0 § 12.2 Primary Expressions) |
360 | // - ComputedPropertyName (ECMAScript 6.0 § 12.2.6 Object Initializer) |
361 | '[' => self::TYPE_PAREN_OPEN, |
362 | ']' => self::TYPE_PAREN_CLOSE, |
363 | |
364 | // Can be one of: |
365 | // - End of any statement |
366 | // - EmptyStatement (ECMAScript 6.0 § 13.4 Empty Statement) |
367 | ';' => self::TYPE_SEMICOLON, |
368 | |
369 | // ECMAScript 8.0 § 14.6 Async Function Definitions |
370 | // async [no LineTerminator here] function ... |
371 | // async [no LineTerminator here] propertyName() ... |
372 | 'async' => self::TYPE_ASYNC, |
373 | ]; |
374 | |
375 | /** |
376 | * @var array $model |
377 | * |
378 | * The main table for the state machine. Defines the desired action for every state/token pair. |
379 | * |
380 | * The state pushed onto the stack by ACTION_PUSH will be returned to by ACTION_POP. |
381 | * A state/token pair may not specify both ACTION_POP and ACTION_GOTO. If that does happen, |
382 | * ACTION_POP takes precedence. |
383 | * |
384 | * This table is augmented by self::ensureExpandedStates(). |
385 | */ |
386 | private static $model = [ |
387 | // Statement - This is the initial state. |
388 | self::STATEMENT => [ |
389 | self::TYPE_UN_OP => [ |
390 | self::ACTION_GOTO => self::EXPRESSION, |
391 | ], |
392 | self::TYPE_INCR_OP => [ |
393 | self::ACTION_GOTO => self::EXPRESSION, |
394 | ], |
395 | self::TYPE_ADD_OP => [ |
396 | self::ACTION_GOTO => self::EXPRESSION, |
397 | ], |
398 | self::TYPE_BRACE_OPEN => [ |
399 | // Use of '{' in statement context, creates a Block. |
400 | self::ACTION_PUSH => self::STATEMENT, |
401 | ], |
402 | self::TYPE_BRACE_CLOSE => [ |
403 | // Ends a Block |
404 | self::ACTION_POP => true, |
405 | ], |
406 | self::TYPE_PAREN_OPEN => [ |
407 | self::ACTION_PUSH => self::EXPRESSION_OP, |
408 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
409 | ], |
410 | self::TYPE_RETURN => [ |
411 | self::ACTION_GOTO => self::EXPRESSION_NO_NL, |
412 | ], |
413 | self::TYPE_IF => [ |
414 | self::ACTION_GOTO => self::CONDITION, |
415 | ], |
416 | self::TYPE_VAR => [ |
417 | self::ACTION_GOTO => self::EXPRESSION, |
418 | ], |
419 | self::TYPE_FUNC => [ |
420 | self::ACTION_PUSH => self::STATEMENT, |
421 | self::ACTION_GOTO => self::FUNC, |
422 | ], |
423 | self::TYPE_CLASS => [ |
424 | self::ACTION_PUSH => self::STATEMENT, |
425 | self::ACTION_GOTO => self::CLASS_DEF, |
426 | ], |
427 | self::TYPE_SPECIAL => [ |
428 | 'import' => [ |
429 | self::ACTION_GOTO => self::IMPORT_EXPORT, |
430 | ], |
431 | 'export' => [ |
432 | self::ACTION_GOTO => self::IMPORT_EXPORT, |
433 | ], |
434 | ], |
435 | self::TYPE_LITERAL => [ |
436 | self::ACTION_GOTO => self::EXPRESSION_OP, |
437 | ], |
438 | self::TYPE_ASYNC => [ |
439 | self::ACTION_GOTO => self::EXPRESSION_OP, |
440 | ], |
441 | self::TYPE_AWAIT => [ |
442 | self::ACTION_GOTO => self::EXPRESSION, |
443 | ], |
444 | ], |
445 | // The state after if/catch/while/for/switch/with |
446 | // Waits for an expression in parentheses, then goes to STATEMENT |
447 | self::CONDITION => [ |
448 | self::TYPE_PAREN_OPEN => [ |
449 | self::ACTION_PUSH => self::STATEMENT, |
450 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
451 | ], |
452 | ], |
453 | // The state after the function keyword. Waits for {, then goes to STATEMENT. |
454 | // The function body's closing } will pop the stack, so the state to return to |
455 | // after the function should be pushed to the stack first |
456 | self::FUNC => [ |
457 | // Needed to prevent * in an expression in the argument list from improperly |
458 | // triggering GENFUNC |
459 | self::TYPE_PAREN_OPEN => [ |
460 | self::ACTION_PUSH => self::FUNC, |
461 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
462 | ], |
463 | self::TYPE_BRACE_OPEN => [ |
464 | self::ACTION_GOTO => self::STATEMENT, |
465 | ], |
466 | self::TYPE_SPECIAL => [ |
467 | '*' => [ |
468 | self::ACTION_GOTO => self::GENFUNC, |
469 | ], |
470 | ], |
471 | ], |
472 | // After function*. Waits for { , then goes to a generator function statement. |
473 | self::GENFUNC => [ |
474 | self::TYPE_BRACE_OPEN => [ |
475 | // Note negative value: generator function states are negative |
476 | self::ACTION_GOTO => -self::STATEMENT |
477 | ], |
478 | ], |
479 | // Property assignment - This is an object literal declaration. |
480 | // For example: `{ key: value, key2, [computedKey3]: value3, method4() { ... } }` |
481 | self::PROPERTY_ASSIGNMENT => [ |
482 | // Note that keywords like if, class, var, delete, instanceof etc. can be used as keys, |
483 | // and should be treated as literals here, as they are in EXPRESSION_DOT. In this state, |
484 | // that is implicitly true because TYPE_LITERAL has no action, so it stays in this state. |
485 | // If we later add a state transition for TYPE_LITERAL, that same transition should |
486 | // also be applied to TYPE_RETURN, TYPE_IF, TYPE_DO, TYPE_VAR, TYPE_FUNC and TYPE_CLASS. |
487 | self::TYPE_COLON => [ |
488 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION, |
489 | ], |
490 | // For {, which begins a method |
491 | self::TYPE_BRACE_OPEN => [ |
492 | self::ACTION_PUSH => self::PROPERTY_ASSIGNMENT, |
493 | // This is not flipped, see "Special cases" below |
494 | self::ACTION_GOTO => self::STATEMENT, |
495 | ], |
496 | self::TYPE_BRACE_CLOSE => [ |
497 | self::ACTION_POP => true, |
498 | ], |
499 | // For [, which begins a computed key |
500 | self::TYPE_PAREN_OPEN => [ |
501 | self::ACTION_PUSH => self::PROPERTY_ASSIGNMENT, |
502 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
503 | ], |
504 | self::TYPE_SPECIAL => [ |
505 | '*' => [ |
506 | self::ACTION_PUSH => self::PROPERTY_ASSIGNMENT, |
507 | self::ACTION_GOTO => self::GENFUNC, |
508 | ], |
509 | ], |
510 | ], |
511 | // Place in an expression where we expect an operand or a unary operator: the start |
512 | // of an expression or after an operator. Note that unary operators (including INCR_OP |
513 | // and ADD_OP) cause us to stay in this state, while operands take us to EXPRESSION_OP |
514 | self::EXPRESSION => [ |
515 | self::TYPE_SEMICOLON => [ |
516 | self::ACTION_GOTO => self::STATEMENT, |
517 | ], |
518 | self::TYPE_BRACE_OPEN => [ |
519 | self::ACTION_PUSH => self::EXPRESSION_OP, |
520 | self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT, |
521 | ], |
522 | self::TYPE_BRACE_CLOSE => [ |
523 | self::ACTION_POP => true, |
524 | ], |
525 | self::TYPE_PAREN_OPEN => [ |
526 | self::ACTION_PUSH => self::EXPRESSION_OP, |
527 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
528 | ], |
529 | self::TYPE_FUNC => [ |
530 | self::ACTION_PUSH => self::EXPRESSION_OP, |
531 | self::ACTION_GOTO => self::FUNC, |
532 | ], |
533 | self::TYPE_CLASS => [ |
534 | self::ACTION_PUSH => self::EXPRESSION_OP, |
535 | self::ACTION_GOTO => self::CLASS_DEF, |
536 | ], |
537 | self::TYPE_LITERAL => [ |
538 | self::ACTION_GOTO => self::EXPRESSION_OP, |
539 | ], |
540 | self::TYPE_ASYNC => [ |
541 | self::ACTION_GOTO => self::EXPRESSION_OP, |
542 | ], |
543 | ], |
544 | // An expression immediately after return/throw/break/continue, where a newline |
545 | // is not allowed. This state is identical to EXPRESSION, except that semicolon |
546 | // insertion can happen here, and we never stay here: in cases where EXPRESSION would |
547 | // do nothing, we go to EXPRESSION. |
548 | self::EXPRESSION_NO_NL => [ |
549 | self::TYPE_UN_OP => [ |
550 | self::ACTION_GOTO => self::EXPRESSION, |
551 | ], |
552 | self::TYPE_INCR_OP => [ |
553 | self::ACTION_GOTO => self::EXPRESSION, |
554 | ], |
555 | // BIN_OP seems impossible at the start of an expression, but it can happen in |
556 | // yield *foo |
557 | self::TYPE_BIN_OP => [ |
558 | self::ACTION_GOTO => self::EXPRESSION, |
559 | ], |
560 | self::TYPE_ADD_OP => [ |
561 | self::ACTION_GOTO => self::EXPRESSION, |
562 | ], |
563 | self::TYPE_SEMICOLON => [ |
564 | self::ACTION_GOTO => self::STATEMENT, |
565 | ], |
566 | self::TYPE_BRACE_OPEN => [ |
567 | self::ACTION_PUSH => self::EXPRESSION_OP, |
568 | self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT, |
569 | ], |
570 | self::TYPE_BRACE_CLOSE => [ |
571 | self::ACTION_POP => true, |
572 | ], |
573 | self::TYPE_PAREN_OPEN => [ |
574 | self::ACTION_PUSH => self::EXPRESSION_OP, |
575 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
576 | ], |
577 | self::TYPE_FUNC => [ |
578 | self::ACTION_PUSH => self::EXPRESSION_OP, |
579 | self::ACTION_GOTO => self::FUNC, |
580 | ], |
581 | self::TYPE_CLASS => [ |
582 | self::ACTION_PUSH => self::EXPRESSION_OP, |
583 | self::ACTION_GOTO => self::CLASS_DEF, |
584 | ], |
585 | self::TYPE_LITERAL => [ |
586 | self::ACTION_GOTO => self::EXPRESSION_OP, |
587 | ], |
588 | self::TYPE_ASYNC => [ |
589 | self::ACTION_GOTO => self::EXPRESSION_OP, |
590 | ], |
591 | self::TYPE_AWAIT => [ |
592 | self::ACTION_GOTO => self::EXPRESSION, |
593 | ], |
594 | ], |
595 | // Place in an expression after an operand, where we expect an operator |
596 | self::EXPRESSION_OP => [ |
597 | self::TYPE_BIN_OP => [ |
598 | self::ACTION_GOTO => self::EXPRESSION, |
599 | ], |
600 | self::TYPE_ADD_OP => [ |
601 | self::ACTION_GOTO => self::EXPRESSION, |
602 | ], |
603 | self::TYPE_DOT => [ |
604 | self::ACTION_GOTO => self::EXPRESSION_DOT, |
605 | ], |
606 | self::TYPE_HOOK => [ |
607 | self::ACTION_PUSH => self::EXPRESSION, |
608 | self::ACTION_GOTO => self::EXPRESSION_TERNARY, |
609 | ], |
610 | self::TYPE_COLON => [ |
611 | self::ACTION_GOTO => self::STATEMENT, |
612 | ], |
613 | self::TYPE_COMMA => [ |
614 | self::ACTION_GOTO => self::EXPRESSION, |
615 | ], |
616 | self::TYPE_SEMICOLON => [ |
617 | self::ACTION_GOTO => self::STATEMENT, |
618 | ], |
619 | self::TYPE_ARROW => [ |
620 | self::ACTION_GOTO => self::EXPRESSION_ARROWFUNC, |
621 | ], |
622 | self::TYPE_PAREN_OPEN => [ |
623 | self::ACTION_PUSH => self::EXPRESSION_OP, |
624 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
625 | ], |
626 | self::TYPE_BRACE_CLOSE => [ |
627 | self::ACTION_POP => true, |
628 | ], |
629 | self::TYPE_FUNC => [ |
630 | self::ACTION_PUSH => self::EXPRESSION_OP, |
631 | self::ACTION_GOTO => self::FUNC, |
632 | ], |
633 | ], |
634 | // State after a dot (.). Like EXPRESSION, except that many keywords behave like literals |
635 | // (e.g. class, if, else, var, function) because they're not valid as identifiers but are |
636 | // valid as property names. |
637 | self::EXPRESSION_DOT => [ |
638 | self::TYPE_LITERAL => [ |
639 | self::ACTION_GOTO => self::EXPRESSION_OP, |
640 | ], |
641 | // The following are keywords behaving as literals |
642 | self::TYPE_RETURN => [ |
643 | self::ACTION_GOTO => self::EXPRESSION_OP, |
644 | ], |
645 | self::TYPE_IF => [ |
646 | self::ACTION_GOTO => self::EXPRESSION_OP, |
647 | ], |
648 | self::TYPE_DO => [ |
649 | self::ACTION_GOTO => self::EXPRESSION_OP, |
650 | ], |
651 | self::TYPE_VAR => [ |
652 | self::ACTION_GOTO => self::EXPRESSION_OP, |
653 | ], |
654 | self::TYPE_FUNC => [ |
655 | self::ACTION_GOTO => self::EXPRESSION_OP, |
656 | ], |
657 | self::TYPE_CLASS => [ |
658 | self::ACTION_GOTO => self::EXPRESSION_OP, |
659 | ], |
660 | // We don't expect real unary/binary operators here, but some keywords |
661 | // (new, delete, void, typeof, instanceof, in) are classified as such, and they can be |
662 | // used as property names |
663 | self::TYPE_UN_OP => [ |
664 | self::ACTION_GOTO => self::EXPRESSION_OP, |
665 | ], |
666 | self::TYPE_BIN_OP => [ |
667 | self::ACTION_GOTO => self::EXPRESSION_OP, |
668 | ], |
669 | ], |
670 | // State after the } closing an arrow function body: like STATEMENT except |
671 | // that it has semicolon insertion, COMMA can continue the expression, and after |
672 | // a function we go to STATEMENT instead of EXPRESSION_OP |
673 | self::EXPRESSION_END => [ |
674 | self::TYPE_UN_OP => [ |
675 | self::ACTION_GOTO => self::EXPRESSION, |
676 | ], |
677 | self::TYPE_INCR_OP => [ |
678 | self::ACTION_GOTO => self::EXPRESSION, |
679 | ], |
680 | self::TYPE_ADD_OP => [ |
681 | self::ACTION_GOTO => self::EXPRESSION, |
682 | ], |
683 | self::TYPE_COMMA => [ |
684 | self::ACTION_GOTO => self::EXPRESSION, |
685 | ], |
686 | self::TYPE_SEMICOLON => [ |
687 | self::ACTION_GOTO => self::STATEMENT, |
688 | ], |
689 | self::TYPE_BRACE_OPEN => [ |
690 | self::ACTION_PUSH => self::STATEMENT, |
691 | self::ACTION_GOTO => self::STATEMENT, |
692 | ], |
693 | self::TYPE_BRACE_CLOSE => [ |
694 | self::ACTION_POP => true, |
695 | ], |
696 | self::TYPE_PAREN_OPEN => [ |
697 | self::ACTION_PUSH => self::EXPRESSION_OP, |
698 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
699 | ], |
700 | self::TYPE_RETURN => [ |
701 | self::ACTION_GOTO => self::EXPRESSION_NO_NL, |
702 | ], |
703 | self::TYPE_IF => [ |
704 | self::ACTION_GOTO => self::CONDITION, |
705 | ], |
706 | self::TYPE_VAR => [ |
707 | self::ACTION_GOTO => self::EXPRESSION, |
708 | ], |
709 | self::TYPE_FUNC => [ |
710 | self::ACTION_PUSH => self::STATEMENT, |
711 | self::ACTION_GOTO => self::FUNC, |
712 | ], |
713 | self::TYPE_CLASS => [ |
714 | self::ACTION_PUSH => self::STATEMENT, |
715 | self::ACTION_GOTO => self::CLASS_DEF, |
716 | ], |
717 | self::TYPE_LITERAL => [ |
718 | self::ACTION_GOTO => self::EXPRESSION_OP, |
719 | ], |
720 | self::TYPE_ASYNC => [ |
721 | self::ACTION_GOTO => self::EXPRESSION_OP, |
722 | ], |
723 | ], |
724 | // State after =>. Like EXPRESSION, except that { begins an arrow function body |
725 | // rather than an object literal. |
726 | self::EXPRESSION_ARROWFUNC => [ |
727 | self::TYPE_UN_OP => [ |
728 | self::ACTION_GOTO => self::EXPRESSION, |
729 | ], |
730 | self::TYPE_INCR_OP => [ |
731 | self::ACTION_GOTO => self::EXPRESSION, |
732 | ], |
733 | self::TYPE_ADD_OP => [ |
734 | self::ACTION_GOTO => self::EXPRESSION, |
735 | ], |
736 | self::TYPE_BRACE_OPEN => [ |
737 | self::ACTION_PUSH => self::EXPRESSION_END, |
738 | self::ACTION_GOTO => self::STATEMENT, |
739 | ], |
740 | self::TYPE_PAREN_OPEN => [ |
741 | self::ACTION_PUSH => self::EXPRESSION_OP, |
742 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
743 | ], |
744 | self::TYPE_FUNC => [ |
745 | self::ACTION_PUSH => self::EXPRESSION_OP, |
746 | self::ACTION_GOTO => self::FUNC, |
747 | ], |
748 | self::TYPE_CLASS => [ |
749 | self::ACTION_PUSH => self::EXPRESSION_OP, |
750 | self::ACTION_GOTO => self::CLASS_DEF, |
751 | ], |
752 | self::TYPE_LITERAL => [ |
753 | self::ACTION_GOTO => self::EXPRESSION_OP, |
754 | ], |
755 | ], |
756 | // Expression after a ? . This differs from EXPRESSION because a : ends the ternary |
757 | // rather than starting STATEMENT (outside a ternary, : comes after a goto label) |
758 | // The actual rule for : ending the ternary is in EXPRESSION_TERNARY_OP. |
759 | self::EXPRESSION_TERNARY => [ |
760 | self::TYPE_BRACE_OPEN => [ |
761 | self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, |
762 | self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT, |
763 | ], |
764 | self::TYPE_PAREN_OPEN => [ |
765 | self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, |
766 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
767 | ], |
768 | self::TYPE_FUNC => [ |
769 | self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, |
770 | self::ACTION_GOTO => self::FUNC, |
771 | ], |
772 | self::TYPE_CLASS => [ |
773 | self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, |
774 | self::ACTION_GOTO => self::CLASS_DEF, |
775 | ], |
776 | self::TYPE_LITERAL => [ |
777 | self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, |
778 | ], |
779 | ], |
780 | // Like EXPRESSION_OP, but for ternaries, see EXPRESSION_TERNARY |
781 | self::EXPRESSION_TERNARY_OP => [ |
782 | self::TYPE_BIN_OP => [ |
783 | self::ACTION_GOTO => self::EXPRESSION_TERNARY, |
784 | ], |
785 | self::TYPE_ADD_OP => [ |
786 | self::ACTION_GOTO => self::EXPRESSION_TERNARY, |
787 | ], |
788 | self::TYPE_DOT => [ |
789 | self::ACTION_GOTO => self::EXPRESSION_TERNARY_DOT, |
790 | ], |
791 | self::TYPE_HOOK => [ |
792 | self::ACTION_PUSH => self::EXPRESSION_TERNARY, |
793 | self::ACTION_GOTO => self::EXPRESSION_TERNARY, |
794 | ], |
795 | self::TYPE_COMMA => [ |
796 | self::ACTION_GOTO => self::EXPRESSION_TERNARY, |
797 | ], |
798 | self::TYPE_ARROW => [ |
799 | self::ACTION_GOTO => self::EXPRESSION_TERNARY_ARROWFUNC, |
800 | ], |
801 | self::TYPE_PAREN_OPEN => [ |
802 | self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, |
803 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
804 | ], |
805 | self::TYPE_COLON => [ |
806 | self::ACTION_POP => true, |
807 | ], |
808 | ], |
809 | // Like EXPRESSION_DOT, but for ternaries, see EXPRESSION_TERNARY |
810 | self::EXPRESSION_TERNARY_DOT => [ |
811 | self::TYPE_LITERAL => [ |
812 | self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, |
813 | ], |
814 | // The following are keywords behaving as literals |
815 | self::TYPE_RETURN => [ |
816 | self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, |
817 | ], |
818 | self::TYPE_IF => [ |
819 | self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, |
820 | ], |
821 | self::TYPE_DO => [ |
822 | self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, |
823 | ], |
824 | self::TYPE_VAR => [ |
825 | self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, |
826 | ], |
827 | self::TYPE_FUNC => [ |
828 | self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, |
829 | ], |
830 | self::TYPE_CLASS => [ |
831 | self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, |
832 | ], |
833 | // We don't expect real unary/binary operators here, but some keywords |
834 | // (new, delete, void, typeof, instanceof, in) are classified as such, and they can be |
835 | // used as property names |
836 | self::TYPE_UN_OP => [ |
837 | self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, |
838 | ], |
839 | self::TYPE_BIN_OP => [ |
840 | self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, |
841 | ], |
842 | ], |
843 | // Like EXPRESSION_ARROWFUNC, but for ternaries, see EXPRESSION_TERNARY |
844 | self::EXPRESSION_TERNARY_ARROWFUNC => [ |
845 | self::TYPE_UN_OP => [ |
846 | self::ACTION_GOTO => self::EXPRESSION_TERNARY, |
847 | ], |
848 | self::TYPE_INCR_OP => [ |
849 | self::ACTION_GOTO => self::EXPRESSION_TERNARY, |
850 | ], |
851 | self::TYPE_ADD_OP => [ |
852 | self::ACTION_GOTO => self::EXPRESSION_TERNARY, |
853 | ], |
854 | self::TYPE_BRACE_OPEN => [ |
855 | self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, |
856 | self::ACTION_GOTO => self::STATEMENT, |
857 | ], |
858 | self::TYPE_PAREN_OPEN => [ |
859 | self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, |
860 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
861 | ], |
862 | self::TYPE_FUNC => [ |
863 | self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, |
864 | self::ACTION_GOTO => self::FUNC, |
865 | ], |
866 | self::TYPE_CLASS => [ |
867 | self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP, |
868 | self::ACTION_GOTO => self::CLASS_DEF, |
869 | ], |
870 | self::TYPE_LITERAL => [ |
871 | self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP, |
872 | ], |
873 | ], |
874 | // Expression inside parentheses. Like EXPRESSION, except that ) ends this state |
875 | // This differs from EXPRESSION because semicolon insertion can't happen here |
876 | self::PAREN_EXPRESSION => [ |
877 | self::TYPE_BRACE_OPEN => [ |
878 | self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, |
879 | self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT, |
880 | ], |
881 | self::TYPE_PAREN_OPEN => [ |
882 | self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, |
883 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
884 | ], |
885 | self::TYPE_PAREN_CLOSE => [ |
886 | self::ACTION_POP => true, |
887 | ], |
888 | self::TYPE_FUNC => [ |
889 | self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, |
890 | self::ACTION_GOTO => self::FUNC, |
891 | ], |
892 | self::TYPE_CLASS => [ |
893 | self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, |
894 | self::ACTION_GOTO => self::CLASS_DEF, |
895 | ], |
896 | self::TYPE_LITERAL => [ |
897 | self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, |
898 | ], |
899 | self::TYPE_ASYNC => [ |
900 | self::ACTION_GOTO => self::PAREN_EXPRESSION_OP_NO_NL, |
901 | ], |
902 | ], |
903 | // Like EXPRESSION_OP, but in parentheses, see PAREN_EXPRESSION |
904 | self::PAREN_EXPRESSION_OP => [ |
905 | self::TYPE_BIN_OP => [ |
906 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
907 | ], |
908 | self::TYPE_ADD_OP => [ |
909 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
910 | ], |
911 | self::TYPE_DOT => [ |
912 | self::ACTION_GOTO => self::PAREN_EXPRESSION_DOT, |
913 | ], |
914 | self::TYPE_HOOK => [ |
915 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
916 | ], |
917 | self::TYPE_COLON => [ |
918 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
919 | ], |
920 | self::TYPE_COMMA => [ |
921 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
922 | ], |
923 | self::TYPE_SEMICOLON => [ |
924 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
925 | ], |
926 | self::TYPE_ARROW => [ |
927 | self::ACTION_GOTO => self::PAREN_EXPRESSION_ARROWFUNC, |
928 | ], |
929 | self::TYPE_PAREN_OPEN => [ |
930 | self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, |
931 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
932 | ], |
933 | self::TYPE_PAREN_CLOSE => [ |
934 | self::ACTION_POP => true, |
935 | ], |
936 | ], |
937 | // Like EXPRESSION_DOT, but in parentheses, see PAREN_EXPRESSION |
938 | self::PAREN_EXPRESSION_DOT => [ |
939 | self::TYPE_LITERAL => [ |
940 | self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, |
941 | ], |
942 | // The following are keywords behaving as literals |
943 | self::TYPE_RETURN => [ |
944 | self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, |
945 | ], |
946 | self::TYPE_IF => [ |
947 | self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, |
948 | ], |
949 | self::TYPE_DO => [ |
950 | self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, |
951 | ], |
952 | self::TYPE_VAR => [ |
953 | self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, |
954 | ], |
955 | self::TYPE_FUNC => [ |
956 | self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, |
957 | ], |
958 | self::TYPE_CLASS => [ |
959 | self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, |
960 | ], |
961 | // We don't expect real unary/binary operators here, but some keywords |
962 | // (new, delete, void, typeof, instanceof, in) are classified as such, and they can be |
963 | // used as property names |
964 | self::TYPE_UN_OP => [ |
965 | self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, |
966 | ], |
967 | self::TYPE_BIN_OP => [ |
968 | self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, |
969 | ], |
970 | ], |
971 | // Like EXPRESSION_ARROWFUNC, but in parentheses, see PAREN_EXPRESSION |
972 | self::PAREN_EXPRESSION_ARROWFUNC => [ |
973 | self::TYPE_UN_OP => [ |
974 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
975 | ], |
976 | self::TYPE_INCR_OP => [ |
977 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
978 | ], |
979 | self::TYPE_ADD_OP => [ |
980 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
981 | ], |
982 | self::TYPE_BRACE_OPEN => [ |
983 | self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, |
984 | self::ACTION_GOTO => self::STATEMENT, |
985 | ], |
986 | self::TYPE_PAREN_OPEN => [ |
987 | self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, |
988 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
989 | ], |
990 | self::TYPE_FUNC => [ |
991 | self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, |
992 | self::ACTION_GOTO => self::FUNC, |
993 | ], |
994 | self::TYPE_CLASS => [ |
995 | self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, |
996 | self::ACTION_GOTO => self::CLASS_DEF, |
997 | ], |
998 | self::TYPE_LITERAL => [ |
999 | self::ACTION_GOTO => self::PAREN_EXPRESSION_OP, |
1000 | ], |
1001 | ], |
1002 | |
1003 | // Like PAREN_EXPRESSION_OP, for the state after "async" in a PAREN_EXPRESSION, |
1004 | // for use by the $semicolon model. |
1005 | self::PAREN_EXPRESSION_OP_NO_NL => [ |
1006 | self::TYPE_BIN_OP => [ |
1007 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
1008 | ], |
1009 | self::TYPE_ADD_OP => [ |
1010 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
1011 | ], |
1012 | self::TYPE_DOT => [ |
1013 | self::ACTION_GOTO => self::PAREN_EXPRESSION_DOT, |
1014 | ], |
1015 | self::TYPE_HOOK => [ |
1016 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
1017 | ], |
1018 | self::TYPE_COLON => [ |
1019 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
1020 | ], |
1021 | self::TYPE_COMMA => [ |
1022 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
1023 | ], |
1024 | self::TYPE_SEMICOLON => [ |
1025 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
1026 | ], |
1027 | self::TYPE_ARROW => [ |
1028 | self::ACTION_GOTO => self::PAREN_EXPRESSION_ARROWFUNC, |
1029 | ], |
1030 | self::TYPE_PAREN_OPEN => [ |
1031 | self::ACTION_PUSH => self::PAREN_EXPRESSION_OP, |
1032 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
1033 | ], |
1034 | self::TYPE_PAREN_CLOSE => [ |
1035 | self::ACTION_POP => true, |
1036 | ], |
1037 | ], |
1038 | // Expression as the value of a key in an object literal. Like EXPRESSION, except that |
1039 | // a comma (in PROPERTY_EXPRESSION_OP) goes to PROPERTY_ASSIGNMENT instead |
1040 | self::PROPERTY_EXPRESSION => [ |
1041 | self::TYPE_BRACE_OPEN => [ |
1042 | self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, |
1043 | self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT, |
1044 | ], |
1045 | self::TYPE_BRACE_CLOSE => [ |
1046 | self::ACTION_POP => true, |
1047 | ], |
1048 | self::TYPE_PAREN_OPEN => [ |
1049 | self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, |
1050 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
1051 | ], |
1052 | self::TYPE_FUNC => [ |
1053 | self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, |
1054 | self::ACTION_GOTO => self::FUNC, |
1055 | ], |
1056 | self::TYPE_CLASS => [ |
1057 | self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, |
1058 | self::ACTION_GOTO => self::CLASS_DEF, |
1059 | ], |
1060 | self::TYPE_LITERAL => [ |
1061 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, |
1062 | ], |
1063 | ], |
1064 | // Like EXPRESSION_OP, but in a property expression, see PROPERTY_EXPRESSION |
1065 | self::PROPERTY_EXPRESSION_OP => [ |
1066 | self::TYPE_BIN_OP => [ |
1067 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION, |
1068 | ], |
1069 | self::TYPE_ADD_OP => [ |
1070 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION, |
1071 | ], |
1072 | self::TYPE_DOT => [ |
1073 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION_DOT, |
1074 | ], |
1075 | self::TYPE_HOOK => [ |
1076 | self::ACTION_PUSH => self::PROPERTY_EXPRESSION, |
1077 | self::ACTION_GOTO => self::EXPRESSION_TERNARY, |
1078 | ], |
1079 | self::TYPE_COMMA => [ |
1080 | self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT, |
1081 | ], |
1082 | self::TYPE_ARROW => [ |
1083 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION_ARROWFUNC, |
1084 | ], |
1085 | self::TYPE_BRACE_OPEN => [ |
1086 | self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, |
1087 | ], |
1088 | self::TYPE_BRACE_CLOSE => [ |
1089 | self::ACTION_POP => true, |
1090 | ], |
1091 | self::TYPE_PAREN_OPEN => [ |
1092 | self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, |
1093 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
1094 | ], |
1095 | ], |
1096 | // Like EXPRESSION_DOT, but in a property expression, see PROPERTY_EXPRESSION |
1097 | self::PROPERTY_EXPRESSION_DOT => [ |
1098 | self::TYPE_LITERAL => [ |
1099 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, |
1100 | ], |
1101 | // The following are keywords behaving as literals |
1102 | self::TYPE_RETURN => [ |
1103 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, |
1104 | ], |
1105 | self::TYPE_IF => [ |
1106 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, |
1107 | ], |
1108 | self::TYPE_DO => [ |
1109 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, |
1110 | ], |
1111 | self::TYPE_VAR => [ |
1112 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, |
1113 | ], |
1114 | self::TYPE_FUNC => [ |
1115 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, |
1116 | ], |
1117 | self::TYPE_CLASS => [ |
1118 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, |
1119 | ], |
1120 | // We don't expect real unary/binary operators here, but some keywords |
1121 | // (new, delete, void, typeof, instanceof, in) are classified as such, and they can be |
1122 | // used as property names |
1123 | self::TYPE_UN_OP => [ |
1124 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, |
1125 | ], |
1126 | self::TYPE_BIN_OP => [ |
1127 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, |
1128 | ], |
1129 | ], |
1130 | // Like EXPRESSION_ARROWFUNC, but in a property expression, see PROPERTY_EXPRESSION |
1131 | self::PROPERTY_EXPRESSION_ARROWFUNC => [ |
1132 | self::TYPE_UN_OP => [ |
1133 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION, |
1134 | ], |
1135 | self::TYPE_INCR_OP => [ |
1136 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION, |
1137 | ], |
1138 | self::TYPE_ADD_OP => [ |
1139 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION, |
1140 | ], |
1141 | self::TYPE_BRACE_OPEN => [ |
1142 | self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, |
1143 | self::ACTION_GOTO => self::STATEMENT, |
1144 | ], |
1145 | self::TYPE_PAREN_OPEN => [ |
1146 | self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, |
1147 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
1148 | ], |
1149 | self::TYPE_FUNC => [ |
1150 | self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, |
1151 | self::ACTION_GOTO => self::FUNC, |
1152 | ], |
1153 | self::TYPE_CLASS => [ |
1154 | self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP, |
1155 | self::ACTION_GOTO => self::CLASS_DEF, |
1156 | ], |
1157 | self::TYPE_LITERAL => [ |
1158 | self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP, |
1159 | ], |
1160 | ], |
1161 | // Class definition (after the class keyword). Expects an identifier, or the extends |
1162 | // keyword followed by an expression (or both), followed by {, which starts an object |
1163 | // literal. The object literal's closing } will pop the stack, so the state to return |
1164 | // to after the class definition should be pushed to the stack first. |
1165 | self::CLASS_DEF => [ |
1166 | self::TYPE_BRACE_OPEN => [ |
1167 | self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT, |
1168 | ], |
1169 | self::TYPE_PAREN_OPEN => [ |
1170 | self::ACTION_PUSH => self::CLASS_DEF, |
1171 | self::ACTION_GOTO => self::PAREN_EXPRESSION, |
1172 | ], |
1173 | self::TYPE_FUNC => [ |
1174 | self::ACTION_PUSH => self::CLASS_DEF, |
1175 | self::ACTION_GOTO => self::FUNC, |
1176 | ], |
1177 | ], |
1178 | // Import or export declaration |
1179 | self::IMPORT_EXPORT => [ |
1180 | self::TYPE_SEMICOLON => [ |
1181 | self::ACTION_GOTO => self::STATEMENT, |
1182 | ], |
1183 | self::TYPE_VAR => [ |
1184 | self::ACTION_GOTO => self::EXPRESSION, |
1185 | ], |
1186 | self::TYPE_FUNC => [ |
1187 | self::ACTION_PUSH => self::EXPRESSION_OP, |
1188 | self::ACTION_GOTO => self::FUNC, |
1189 | ], |
1190 | self::TYPE_CLASS => [ |
1191 | self::ACTION_PUSH => self::EXPRESSION_OP, |
1192 | self::ACTION_GOTO => self::CLASS_DEF, |
1193 | ], |
1194 | self::TYPE_SPECIAL => [ |
1195 | 'default' => [ |
1196 | self::ACTION_GOTO => self::EXPRESSION, |
1197 | ], |
1198 | // Stay in this state for *, as, from |
1199 | '*' => [], |
1200 | 'as' => [], |
1201 | 'from' => [], |
1202 | ], |
1203 | ], |
1204 | // Used in template string-specific code below |
1205 | self::TEMPLATE_STRING_HEAD => [ |
1206 | self::TYPE_LITERAL => [ |
1207 | self::ACTION_PUSH => self::TEMPLATE_STRING_TAIL, |
1208 | self::ACTION_GOTO => self::EXPRESSION, |
1209 | ], |
1210 | ], |
1211 | ]; |
1212 | |
1213 | /** |
1214 | * @var array $semicolon |
1215 | * |
1216 | * Rules for when semicolon insertion is appropriate. Semicolon insertion happens if we are |
1217 | * in one of these states, and encounter one of these tokens preceded by a newline. |
1218 | * |
1219 | * This array is augmented by ensureExpandedStates(). |
1220 | */ |
1221 | private static $semicolon = [ |
1222 | self::EXPRESSION_NO_NL => [ |
1223 | self::TYPE_UN_OP => true, |
1224 | // BIN_OP seems impossible at the start of an expression, but it can happen in |
1225 | // yield *foo |
1226 | self::TYPE_BIN_OP => true, |
1227 | self::TYPE_INCR_OP => true, |
1228 | self::TYPE_ADD_OP => true, |
1229 | self::TYPE_BRACE_OPEN => true, |
1230 | self::TYPE_PAREN_OPEN => true, |
1231 | self::TYPE_RETURN => true, |
1232 | self::TYPE_IF => true, |
1233 | self::TYPE_DO => true, |
1234 | self::TYPE_VAR => true, |
1235 | self::TYPE_FUNC => true, |
1236 | self::TYPE_CLASS => true, |
1237 | self::TYPE_LITERAL => true, |
1238 | self::TYPE_ASYNC => true, |
1239 | ], |
1240 | self::EXPRESSION_OP => [ |
1241 | self::TYPE_UN_OP => true, |
1242 | self::TYPE_INCR_OP => true, |
1243 | self::TYPE_BRACE_OPEN => true, |
1244 | self::TYPE_RETURN => true, |
1245 | self::TYPE_IF => true, |
1246 | self::TYPE_DO => true, |
1247 | self::TYPE_VAR => true, |
1248 | self::TYPE_FUNC => true, |
1249 | self::TYPE_CLASS => true, |
1250 | self::TYPE_LITERAL => true, |
1251 | self::TYPE_ASYNC => true, |
1252 | ], |
1253 | self::EXPRESSION_END => [ |
1254 | self::TYPE_UN_OP => true, |
1255 | self::TYPE_INCR_OP => true, |
1256 | self::TYPE_ADD_OP => true, |
1257 | self::TYPE_BRACE_OPEN => true, |
1258 | self::TYPE_PAREN_OPEN => true, |
1259 | self::TYPE_RETURN => true, |
1260 | self::TYPE_IF => true, |
1261 | self::TYPE_DO => true, |
1262 | self::TYPE_VAR => true, |
1263 | self::TYPE_FUNC => true, |
1264 | self::TYPE_CLASS => true, |
1265 | self::TYPE_LITERAL => true, |
1266 | self::TYPE_ASYNC => true, |
1267 | ], |
1268 | self::PAREN_EXPRESSION_OP_NO_NL => [ |
1269 | self::TYPE_FUNC => true, |
1270 | ] |
1271 | ]; |
1272 | |
1273 | /** |
1274 | * @var array $divStates |
1275 | * |
1276 | * States in which a / is a division operator. In all other states, it's the start of a regex. |
1277 | * |
1278 | * This array is augmented by self::ensureExpandedStates(). |
1279 | */ |
1280 | private static $divStates = [ |
1281 | self::EXPRESSION_OP => true, |
1282 | self::EXPRESSION_TERNARY_OP => true, |
1283 | self::PAREN_EXPRESSION_OP => true, |
1284 | self::PROPERTY_EXPRESSION_OP => true |
1285 | ]; |
1286 | |
1287 | /** |
1288 | * Add copies of all states but with negative numbers to self::$model (if not already present), |
1289 | * to represent generator function states. |
1290 | */ |
1291 | private static function ensureExpandedStates() { |
1292 | // Already done? |
1293 | if ( self::$expandedStates ) { |
1294 | return; |
1295 | } |
1296 | self::$expandedStates = true; |
1297 | |
1298 | // Add copies of all states (except FUNC and GENFUNC) with negative numbers. |
1299 | // These negative states represent states inside generator functions. When in these states, |
1300 | // TYPE_YIELD is treated as TYPE_RETURN, otherwise as TYPE_LITERAL |
1301 | foreach ( self::$model as $state => $transitions ) { |
1302 | if ( $state !== self::FUNC && $state !== self::GENFUNC ) { |
1303 | foreach ( $transitions as $tokenType => $actions ) { |
1304 | foreach ( $actions as $action => $target ) { |
1305 | if ( is_array( $target ) ) { |
1306 | foreach ( $target as $subaction => $subtarget ) { |
1307 | self::$model[-$state][$tokenType][$action][$subaction] = |
1308 | $subtarget === self::FUNC || $subtarget === true || $subtarget === self::GENFUNC |
1309 | ? $subtarget : -$subtarget; |
1310 | } |
1311 | } else { |
1312 | self::$model[-$state][$tokenType][$action] = |
1313 | $target === self::FUNC || $target === true || $target === self::GENFUNC |
1314 | ? $target : -$target; |
1315 | } |
1316 | } |
1317 | } |
1318 | } |
1319 | } |
1320 | // Special cases: |
1321 | // '{' in a property assignment starts a method, so it shouldn't be flipped |
1322 | self::$model[-self::PROPERTY_ASSIGNMENT][self::TYPE_BRACE_OPEN][self::ACTION_GOTO] = self::STATEMENT; |
1323 | |
1324 | // Also add negative versions of states to the other arrays |
1325 | foreach ( self::$semicolon as $state => $value ) { |
1326 | self::$semicolon[-$state] = $value; |
1327 | } |
1328 | foreach ( self::$divStates as $state => $value ) { |
1329 | self::$divStates[-$state] = $value; |
1330 | } |
1331 | } |
1332 | |
1333 | /** |
1334 | * Returns minified JavaScript code. |
1335 | * |
1336 | * @see MinifierState::setErrorHandler |
1337 | * @param string $s JavaScript code to minify |
1338 | * @param callable|null $onError Called with a ParserError object |
1339 | * @return string Minified code |
1340 | */ |
1341 | public static function minify( $s, $onError = null ) { |
1342 | return self::minifyInternal( $s, null, $onError ); |
1343 | } |
1344 | |
1345 | /** |
1346 | * Create a minifier state object without source map capabilities |
1347 | * |
1348 | * Example: |
1349 | * |
1350 | * JavaScriptMinifier::createMinifier() |
1351 | * ->addSourceFile( 'file.js', $source ) |
1352 | * ->getMinifiedOutput(); |
1353 | * |
1354 | * @return JavaScriptMinifierState |
1355 | */ |
1356 | public static function createMinifier() { |
1357 | return new JavaScriptMinifierState; |
1358 | } |
1359 | |
1360 | /** |
1361 | * Create a minifier state object with source map capabilities |
1362 | * |
1363 | * Example: |
1364 | * |
1365 | * $mapper = JavaScriptMinifier::createSourceMapState() |
1366 | * ->addSourceFile( 'file1.js', $source1 ) |
1367 | * ->addOutput( "\n\n" ) |
1368 | * ->addSourceFile( 'file2.js', $source2 ); |
1369 | * $out = $mapper->getMinifiedOutput(); |
1370 | * $map = $mapper->getSourceMap() |
1371 | * |
1372 | * @return JavaScriptMapperState |
1373 | */ |
1374 | public static function createSourceMapState() { |
1375 | return new JavaScriptMapperState; |
1376 | } |
1377 | |
1378 | /** |
1379 | * Create a MinifierState that doesn't actually minify |
1380 | * |
1381 | * @return IdentityMinifierState |
1382 | */ |
1383 | public static function createIdentityMinifier() { |
1384 | return new IdentityMinifierState; |
1385 | } |
1386 | |
1387 | /** |
1388 | * Minify with optional source map. |
1389 | * |
1390 | * @internal |
1391 | * |
1392 | * @param string $s |
1393 | * @param MappingsGenerator|null $mapGenerator |
1394 | * @param callable|null $onError |
1395 | * @return string |
1396 | */ |
1397 | public static function minifyInternal( $s, $mapGenerator = null, $onError = null ) { |
1398 | self::ensureExpandedStates(); |
1399 | |
1400 | // Here's where the minifying takes place: Loop through the input, looking for tokens |
1401 | // and output them to $out, taking actions to the above defined rules when appropriate. |
1402 | $error = null; |
1403 | $out = ''; |
1404 | $pos = 0; |
1405 | $length = strlen( $s ); |
1406 | $lineLength = 0; |
1407 | $dotlessNum = false; |
1408 | $lastDotlessNum = false; |
1409 | $newlineFound = true; |
1410 | $state = self::STATEMENT; |
1411 | $stack = []; |
1412 | $topOfStack = null; // Optimization: calling end( $stack ) repeatedly is expensive |
1413 | $last = ';'; // Pretend that we have seen a semicolon yet |
1414 | while ( $pos < $length ) { |
1415 | // First, skip over any whitespace and multiline comments, recording whether we |
1416 | // found any newline character |
1417 | $skip = strspn( $s, " \t\n\r\xb\xc", $pos ); |
1418 | if ( !$skip ) { |
1419 | $ch = $s[$pos]; |
1420 | if ( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) { |
1421 | // Multiline comment. Search for the end token or EOT. |
1422 | $end = strpos( $s, '*/', $pos + 2 ); |
1423 | $skip = $end === false ? $length - $pos : $end - $pos + 2; |
1424 | } |
1425 | } |
1426 | if ( $skip ) { |
1427 | // The semicolon insertion mechanism needs to know whether there was a newline |
1428 | // between two tokens, so record it now. |
1429 | if ( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) { |
1430 | $newlineFound = true; |
1431 | } |
1432 | if ( $mapGenerator ) { |
1433 | $mapGenerator->consumeSource( $skip ); |
1434 | } |
1435 | $pos += $skip; |
1436 | continue; |
1437 | } |
1438 | // Handle C++-style comments and html comments, which are treated as single line |
1439 | // comments by the browser, regardless of whether the end tag is on the same line. |
1440 | // Handle --> the same way, but only if it's at the beginning of the line |
1441 | // @phan-suppress-next-line PhanPossiblyUndeclaredVariable |
1442 | if ( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' ) |
1443 | || ( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' ) |
1444 | || ( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' ) |
1445 | ) { |
1446 | $skip = strcspn( $s, "\r\n", $pos ); |
1447 | if ( $mapGenerator ) { |
1448 | $mapGenerator->consumeSource( $skip ); |
1449 | } |
1450 | $pos += $skip; |
1451 | continue; |
1452 | } |
1453 | |
1454 | // Find out which kind of token we're handling. |
1455 | // Note: $end must point past the end of the current token |
1456 | // so that `substr($s, $pos, $end - $pos)` would be the entire token. |
1457 | // In order words, $end will be the offset of the last relevant character |
1458 | // in the stream + 1, or simply put: The offset of the first character |
1459 | // of any next token in the stream. |
1460 | $end = $pos + 1; |
1461 | // Handle string literals |
1462 | if ( $ch === "'" || $ch === '"' ) { |
1463 | // Search to the end of the string literal, skipping over backslash escapes |
1464 | $search = $ch . '\\'; |
1465 | do { |
1466 | // Speculatively add 2 to the end so that if we see a backslash, |
1467 | // the next iteration will start 2 characters further (one for the |
1468 | // backslash, one for the escaped character). |
1469 | // We'll correct this outside the loop. |
1470 | $end += strcspn( $s, $search, $end ) + 2; |
1471 | // If the last character in our search for a quote or a backlash |
1472 | // matched a backslash and we haven't reached the end, keep searching.. |
1473 | } while ( $end - 2 < $length && $s[$end - 2] === '\\' ); |
1474 | // Correction (1): Undo speculative add, keep only one (end of string literal) |
1475 | $end--; |
1476 | if ( $end > $length ) { |
1477 | // Correction (2): Loop wrongly assumed an end quote ended the search, |
1478 | // but search ended because we've reached the end. Correct $end. |
1479 | // TODO: This is invalid and should throw. |
1480 | $end--; |
1481 | } |
1482 | |
1483 | // Handle template strings, either from "`" to begin a new string, |
1484 | // or continuation after the "}" that ends a "${"-expression. |
1485 | } elseif ( $ch === '`' || ( $ch === '}' && $topOfStack === self::TEMPLATE_STRING_TAIL ) ) { |
1486 | if ( $ch === '}' ) { |
1487 | // Pop the TEMPLATE_STRING_TAIL state off the stack |
1488 | // We don't let it get popped off the stack the normal way, to avoid the newline |
1489 | // and comment stripping code above running on the continuation of the literal |
1490 | array_pop( $stack ); |
1491 | // Also pop the previous state off the stack |
1492 | $state = array_pop( $stack ); |
1493 | $topOfStack = end( $stack ); |
1494 | } |
1495 | // Search until we reach either a closing ` or a ${, skipping over backslash escapes |
1496 | // and $ characters followed by something other than { or ` |
1497 | do { |
1498 | $end += strcspn( $s, '`$\\', $end ) + 1; |
1499 | if ( $end - 1 < $length && $s[$end - 1] === '`' ) { |
1500 | // End of the string, stop |
1501 | // We don't do this in the while() condition because the $end++ in the |
1502 | // backslash escape branch makes it difficult to do so without incorrectly |
1503 | // considering an escaped backtick (\`) the end of the string |
1504 | break; |
1505 | } |
1506 | if ( $end - 1 < $length && $s[$end - 1] === '\\' ) { |
1507 | // Backslash escape. Skip the next character, and keep going |
1508 | $end++; |
1509 | continue; |
1510 | } |
1511 | if ( $end < $length && $s[$end - 1] === '$' && $s[$end] === '{' ) { |
1512 | // Beginning of an expression in ${ ... }. Skip the {, and stop |
1513 | $end++; |
1514 | // Push the current state to the stack. We'll pop this off later when hitting |
1515 | // the end of this template string |
1516 | $stack[] = $state; |
1517 | $topOfStack = $state; |
1518 | // Change the state to TEMPLATE_STRING_HEAD. The token type will be detected |
1519 | // as TYPE_LITERAL, and this will cause the state machine to expect an |
1520 | // expression, then go to the TEMPLATE_STRING_TAIL state when it hits the } |
1521 | $state = self::TEMPLATE_STRING_HEAD; |
1522 | break; |
1523 | } |
1524 | } while ( $end - 1 < $length ); |
1525 | if ( $end > $length ) { |
1526 | // Loop wrongly assumed an end quote or ${ ended the search, |
1527 | // but search ended because we've reached the end. Correct $end. |
1528 | // TODO: This is invalid and should throw. |
1529 | $end--; |
1530 | } |
1531 | |
1532 | // We have to distinguish between regexp literals and division operators |
1533 | // A division operator is only possible in certain states |
1534 | } elseif ( $ch === '/' && !isset( self::$divStates[$state] ) ) { |
1535 | // Regexp literal |
1536 | for ( ; ; ) { |
1537 | // Search until we find "/" (end of regexp), "\" (backslash escapes), |
1538 | // or "[" (start of character classes). |
1539 | do { |
1540 | // Speculatively add 2 to ensure next iteration skips |
1541 | // over backslash and escaped character. |
1542 | // We'll correct this outside the loop. |
1543 | $end += strcspn( $s, '/[\\', $end ) + 2; |
1544 | // If backslash escape, keep searching... |
1545 | } while ( $end - 2 < $length && $s[$end - 2] === '\\' ); |
1546 | // Correction (1): Undo speculative add, keep only one (end of regexp) |
1547 | $end--; |
1548 | if ( $end > $length ) { |
1549 | // Correction (2): Loop wrongly assumed end slash was seen |
1550 | // String ended without end of regexp. Correct $end. |
1551 | // TODO: This is invalid and should throw. |
1552 | $end--; |
1553 | break; |
1554 | } |
1555 | if ( $s[$end - 1] === '/' ) { |
1556 | break; |
1557 | } |
1558 | // (Implicit else), we must've found the start of a char class, |
1559 | // skip until we find "]" (end of char class), or "\" (backslash escape) |
1560 | do { |
1561 | // Speculatively add 2 for backslash escape. |
1562 | // We'll substract one outside the loop. |
1563 | $end += strcspn( $s, ']\\', $end ) + 2; |
1564 | // If backslash escape, keep searching... |
1565 | } while ( $end - 2 < $length && $s[$end - 2] === '\\' ); |
1566 | // Correction (1): Undo speculative add, keep only one (end of regexp) |
1567 | $end--; |
1568 | if ( $end > $length ) { |
1569 | // Correction (2): Loop wrongly assumed "]" was seen |
1570 | // String ended without ending char class or regexp. Correct $end. |
1571 | // TODO: This is invalid and should throw. |
1572 | $end--; |
1573 | break; |
1574 | } |
1575 | } |
1576 | // Search past the regexp modifiers (gi) |
1577 | while ( $end < $length && ctype_alpha( $s[$end] ) ) { |
1578 | $end++; |
1579 | } |
1580 | } elseif ( |
1581 | $ch === '0' |
1582 | && ( $pos + 1 < $length ) && ( $s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' ) |
1583 | ) { |
1584 | // Hex numeric literal |
1585 | $end++; // x or X |
1586 | $len = strspn( $s, '0123456789ABCDEFabcdef', $end ); |
1587 | if ( !$len && !$error ) { |
1588 | $error = new ParseError( |
1589 | 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ), |
1590 | $pos, |
1591 | ); |
1592 | } |
1593 | $end += $len; |
1594 | } elseif ( |
1595 | // Optimisation: This check must accept only ASCII digits 0-9. |
1596 | // Avoid ctype_digit() because it is slower and also accepts locale-specific digits. |
1597 | // Using is_numeric() might seem wrong also as it accepts negative numbers, decimal |
1598 | // numbers, and exponents (e.g. strings like "+012.34e6"). But, it is fine here |
1599 | // because we know $ch is a single character, and we believe the only single |
1600 | // characters that is_numeric() accepts are ASCII digits 0-9. |
1601 | is_numeric( $ch ) |
1602 | || ( $ch === '.' && $pos + 1 < $length && is_numeric( $s[$pos + 1] ) ) |
1603 | ) { |
1604 | $end += strspn( $s, '0123456789', $end ); |
1605 | $decimal = strspn( $s, '.', $end ); |
1606 | if ( $decimal ) { |
1607 | if ( $decimal > 2 && !$error ) { |
1608 | $error = new ParseError( 'Too many decimal points', $end ); |
1609 | } |
1610 | $end += strspn( $s, '0123456789', $end + 1 ) + $decimal; |
1611 | } else { |
1612 | $dotlessNum = true; |
1613 | } |
1614 | $exponent = strspn( $s, 'eE', $end ); |
1615 | if ( $exponent ) { |
1616 | if ( $exponent > 1 && !$error ) { |
1617 | $error = new ParseError( 'Number with several E', $end ); |
1618 | } |
1619 | $end += $exponent; |
1620 | |
1621 | // + sign is optional; - sign is required. |
1622 | $end += strspn( $s, '-+', $end ); |
1623 | $len = strspn( $s, '0123456789', $end ); |
1624 | if ( !$len && !$error ) { |
1625 | $error = new ParseError( |
1626 | 'Missing decimal digits after exponent', |
1627 | $pos |
1628 | ); |
1629 | } |
1630 | $end += $len; |
1631 | } |
1632 | } elseif ( isset( self::$opChars[$ch] ) ) { |
1633 | // Punctuation character. Search for the longest matching operator. |
1634 | for ( $tokenLength = self::LONGEST_PUNCTUATION_TOKEN; $tokenLength > 1; $tokenLength-- ) { |
1635 | if ( |
1636 | $pos + $tokenLength <= $length && |
1637 | isset( self::$tokenTypes[ substr( $s, $pos, $tokenLength ) ] ) |
1638 | ) { |
1639 | $end = $pos + $tokenLength; |
1640 | break; |
1641 | } |
1642 | } |
1643 | } else { |
1644 | // Identifier or reserved word. Search for the end by excluding whitespace and |
1645 | // punctuation. |
1646 | $end += strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"`!&|^~\xb\xc\r", $end ); |
1647 | } |
1648 | |
1649 | // Now get the token type from our type array |
1650 | $token = substr( $s, $pos, $end - $pos ); // so $end - $pos == strlen( $token ) |
1651 | $type = isset( self::$model[$state][self::TYPE_SPECIAL][$token] ) |
1652 | ? self::TYPE_SPECIAL |
1653 | : self::$tokenTypes[$token] ?? self::TYPE_LITERAL; |
1654 | if ( $type === self::TYPE_YIELD ) { |
1655 | // yield is treated as TYPE_RETURN inside a generator function (negative state) |
1656 | // but as TYPE_LITERAL when not in a generator function (positive state) |
1657 | $type = $state < 0 ? self::TYPE_RETURN : self::TYPE_LITERAL; |
1658 | } |
1659 | |
1660 | $pad = ''; |
1661 | if ( $newlineFound && isset( self::$semicolon[$state][$type] ) ) { |
1662 | // This token triggers the semicolon insertion mechanism of javascript. While we |
1663 | // could add the ; token here ourselves, keeping the newline has a few advantages. |
1664 | $pad = "\n"; |
1665 | $state = $state < 0 ? -self::STATEMENT : self::STATEMENT; |
1666 | $lineLength = 0; |
1667 | } elseif ( $lineLength + $end - $pos > self::$maxLineLength && |
1668 | !isset( self::$semicolon[$state][$type] ) && |
1669 | $type !== self::TYPE_INCR_OP && |
1670 | $type !== self::TYPE_ARROW |
1671 | ) { |
1672 | // This line would get too long if we added $token, so add a newline first. |
1673 | // Only do this if it won't trigger semicolon insertion and if it won't |
1674 | // put a postfix increment operator or an arrow on its own line, |
1675 | // which is illegal in js. |
1676 | $pad = "\n"; |
1677 | $lineLength = 0; |
1678 | // Check, whether we have to separate the token from the last one with whitespace |
1679 | } elseif ( !isset( self::$opChars[$last] ) && !isset( self::$opChars[$ch] ) ) { |
1680 | $pad = ' '; |
1681 | $lineLength++; |
1682 | // Don't accidentally create ++, -- or // tokens |
1683 | } elseif ( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) { |
1684 | $pad = ' '; |
1685 | $lineLength++; |
1686 | // Don't create invalid dot notation after number literal (T303827). |
1687 | // Keep whitespace in "42. foo". |
1688 | // But keep minifying "foo.bar", "42..foo", and "42.0.foo" per $opChars. |
1689 | } elseif ( $lastDotlessNum && $type === self::TYPE_DOT ) { |
1690 | $pad = ' '; |
1691 | $lineLength++; |
1692 | } |
1693 | |
1694 | // self::debug( $topOfStack, $last, $state, $ch, $token, $type ); |
1695 | |
1696 | if ( $mapGenerator ) { |
1697 | $mapGenerator->outputSpace( $pad ); |
1698 | $mapGenerator->outputToken( $token ); |
1699 | $mapGenerator->consumeSource( $end - $pos ); |
1700 | } |
1701 | $out .= $pad; |
1702 | $out .= $token; |
1703 | $lineLength += $end - $pos; // += strlen( $token ) |
1704 | $last = $s[$end - 1]; |
1705 | $pos = $end; |
1706 | $newlineFound = false; |
1707 | $lastDotlessNum = $dotlessNum; |
1708 | $dotlessNum = false; |
1709 | |
1710 | // Now that we have output our token, transition into the new state. |
1711 | $actions = $type === self::TYPE_SPECIAL ? |
1712 | self::$model[$state][$type][$token] : |
1713 | self::$model[$state][$type] ?? []; |
1714 | if ( isset( $actions[self::ACTION_PUSH] ) && |
1715 | count( $stack ) < self::STACK_LIMIT |
1716 | ) { |
1717 | $topOfStack = $actions[self::ACTION_PUSH]; |
1718 | $stack[] = $topOfStack; |
1719 | } |
1720 | if ( $stack && isset( $actions[self::ACTION_POP] ) ) { |
1721 | $state = array_pop( $stack ); |
1722 | $topOfStack = end( $stack ); |
1723 | } elseif ( isset( $actions[self::ACTION_GOTO] ) ) { |
1724 | $state = $actions[self::ACTION_GOTO]; |
1725 | } |
1726 | } |
1727 | if ( $onError && $error ) { |
1728 | $onError( $error ); |
1729 | } |
1730 | return $out; |
1731 | } |
1732 | |
1733 | /** |
1734 | * @param null|false|int $top |
1735 | * @param string $last |
1736 | * @param int $state |
1737 | * @param string $ch |
1738 | * @param string $token |
1739 | * @param int $type |
1740 | */ |
1741 | private static function debug( |
1742 | $top, string $last, |
1743 | int $state, string $ch, string $token, int $type |
1744 | ) { |
1745 | static $first = true; |
1746 | $self = new \ReflectionClass( self::class ); |
1747 | $constants = $self->getConstants(); |
1748 | |
1749 | foreach ( $self->getConstants() as $name => $value ) { |
1750 | if ( $value === $top ) { |
1751 | $top = $name; |
1752 | } |
1753 | if ( $value === $state ) { |
1754 | $state = $name; |
1755 | } |
1756 | if ( $value === $type ) { |
1757 | $type = $name; |
1758 | } |
1759 | } |
1760 | |
1761 | if ( $first ) { |
1762 | print sprintf( "| %-29s | %-4s | %-29s | %-2s | %-10s | %-29s\n", |
1763 | 'topOfStack', 'last', 'state', 'ch', 'token', 'type' ); |
1764 | print sprintf( "| %'-29s | %'-4s | %'-29s | %'-2s | %'-10s | %'-29s\n", |
1765 | '', '', '', '', '', '' ); |
1766 | $first = false; |
1767 | } |
1768 | print sprintf( "| %-29s | %-4s | %-29s | %-2s | %-10s | %-29s\n", |
1769 | (string)$top, $last, $state, $ch, $token, $type ); |
1770 | } |
1771 | } |