MediaWiki  master
JavaScriptMinifier.php
Go to the documentation of this file.
1 <?php
29 
30  /* Parsing states.
31  * The state machine is only necessary to decide whether to parse a slash as division
32  * operator or as regexp literal.
33  * States are named after the next expected item. We only distinguish states when the
34  * distinction is relevant for our purpose.
35  */
36  const STATEMENT = 0;
37  const CONDITION = 1;
39  const EXPRESSION = 3;
40  const EXPRESSION_NO_NL = 4; // only relevant for semicolon insertion
41  const EXPRESSION_OP = 5;
42  const EXPRESSION_FUNC = 6;
43  const EXPRESSION_TERNARY = 7; // used to determine the role of a colon
46  const PAREN_EXPRESSION = 10; // expression which is not on the top level
47  const PAREN_EXPRESSION_OP = 11;
49  const PROPERTY_EXPRESSION = 13; // expression which is within an object literal
52 
53  /* Token types */
54  const TYPE_UN_OP = 101; // unary operators
55  const TYPE_INCR_OP = 102; // ++ and --
56  const TYPE_BIN_OP = 103; // binary operators
57  const TYPE_ADD_OP = 104; // + and - which can be either unary or binary ops
58  const TYPE_HOOK = 105; // ?
59  const TYPE_COLON = 106; // :
60  const TYPE_COMMA = 107; // ,
61  const TYPE_SEMICOLON = 108; // ;
62  const TYPE_BRACE_OPEN = 109; // {
63  const TYPE_BRACE_CLOSE = 110; // }
64  const TYPE_PAREN_OPEN = 111; // ( and [
65  const TYPE_PAREN_CLOSE = 112; // ) and ]
66  const TYPE_RETURN = 113; // keywords: break, continue, return, throw
67  const TYPE_IF = 114; // keywords: catch, for, with, switch, while, if
68  const TYPE_DO = 115; // keywords: case, var, finally, else, do, try
69  const TYPE_FUNC = 116; // keywords: function
70  const TYPE_LITERAL = 117; // all literals, identifiers and unrecognised tokens
71 
72  const ACTION_GOTO = 201;
73  const ACTION_PUSH = 202;
74  const ACTION_POP = 203;
75 
76  // Sanity limit to avoid excessive memory usage
77  const STACK_LIMIT = 1000;
78 
89  private static $maxLineLength = 1000;
90 
97  public static function minify( $s ) {
98  // First we declare a few tables that contain our parsing rules
99 
100  // $opChars : Characters which can be combined without whitespace between them.
101  $opChars = [
102  // ECMAScript 5.1 § 7.7 Punctuators
103  // Unlike the spec, these are individual symbols, not sequences.
104  '{' => true,
105  '}' => true,
106  '(' => true,
107  ')' => true,
108  '[' => true,
109  ']' => true,
110  '.' => true,
111  ';' => true,
112  ',' => true,
113  '<' => true,
114  '>' => true,
115  '=' => true,
116  '!' => true,
117  '+' => true,
118  '-' => true,
119  '*' => true,
120  '%' => true,
121  '&' => true,
122  '|' => true,
123  '^' => true,
124  '~' => true,
125  '?' => true,
126  ':' => true,
127  '/' => true,
128  // ECMAScript 5.1 § 7.8.4 String Literals
129  '"' => true,
130  "'" => true,
131  ];
132 
133  // $tokenTypes : Map keywords and operators to their corresponding token type
134  $tokenTypes = [
135  // ECMAScript 5.1 § 11.4 Unary Operators
136  // ECMAScript 5.1 § 11.6 Additive Operators
137  // UnaryExpression includes PostfixExpression, which includes 'new'.
138  'new' => self::TYPE_UN_OP,
139  'delete' => self::TYPE_UN_OP,
140  'void' => self::TYPE_UN_OP,
141  'typeof' => self::TYPE_UN_OP,
142  '++' => self::TYPE_INCR_OP,
143  '--' => self::TYPE_INCR_OP,
144  '+' => self::TYPE_ADD_OP,
145  '-' => self::TYPE_ADD_OP,
146  '~' => self::TYPE_UN_OP,
147  '!' => self::TYPE_UN_OP,
148  // ECMAScript 5.1 § 11.5 Multiplicative Operators
149  '*' => self::TYPE_BIN_OP,
150  '/' => self::TYPE_BIN_OP,
151  '%' => self::TYPE_BIN_OP,
152  // ECMAScript 5.1 § 11.7 Bitwise Shift Operators
153  '<<' => self::TYPE_BIN_OP,
154  '>>' => self::TYPE_BIN_OP,
155  '>>>' => self::TYPE_BIN_OP,
156  // ECMAScript 5.1 § 11.8 Relational Operators
157  '<' => self::TYPE_BIN_OP,
158  '>' => self::TYPE_BIN_OP,
159  '<=' => self::TYPE_BIN_OP,
160  '>=' => self::TYPE_BIN_OP,
161  // ECMAScript 5.1 § 11.9 Equality Operators
162  '==' => self::TYPE_BIN_OP,
163  '!=' => self::TYPE_BIN_OP,
164  '===' => self::TYPE_BIN_OP,
165  '!==' => self::TYPE_BIN_OP,
166  'instanceof' => self::TYPE_BIN_OP,
167  'in' => self::TYPE_BIN_OP,
168  // ECMAScript 5.1 § 11.10 Binary Bitwise Operators
169  '&' => self::TYPE_BIN_OP,
170  '^' => self::TYPE_BIN_OP,
171  '|' => self::TYPE_BIN_OP,
172  // ECMAScript 5.1 § 11.11 Binary Logical Operators
173  '&&' => self::TYPE_BIN_OP,
174  '||' => self::TYPE_BIN_OP,
175  // ECMAScript 5.1 § 11.12 Conditional Operator
176  // Also known as ternary.
177  '?' => self::TYPE_HOOK,
178  ':' => self::TYPE_COLON,
179  // ECMAScript 5.1 § 11.13 Assignment Operators
180  '=' => self::TYPE_BIN_OP,
181  '*=' => self::TYPE_BIN_OP,
182  '/=' => self::TYPE_BIN_OP,
183  '%=' => self::TYPE_BIN_OP,
184  '+=' => self::TYPE_BIN_OP,
185  '-=' => self::TYPE_BIN_OP,
186  '<<=' => self::TYPE_BIN_OP,
187  '>>=' => self::TYPE_BIN_OP,
188  '>>>=' => self::TYPE_BIN_OP,
189  '&=' => self::TYPE_BIN_OP,
190  '^=' => self::TYPE_BIN_OP,
191  '|=' => self::TYPE_BIN_OP,
192  // ECMAScript 5.1 § 11.14 Comma Operator
193  ',' => self::TYPE_COMMA,
194 
195  // The keywords that disallow LineTerminator before their
196  // (sometimes optional) Expression or Identifier.
197  //
198  // keyword ;
199  // keyword [no LineTerminator here] Identifier ;
200  // keyword [no LineTerminator here] Expression ;
201  //
202  // See also ECMAScript 5.1:
203  // - § 12.7 The continue Statement
204  // - $ 12.8 The break Statement
205  // - § 12.9 The return Statement
206  // - § 12.13 The throw Statement
207  'continue' => self::TYPE_RETURN,
208  'break' => self::TYPE_RETURN,
209  'return' => self::TYPE_RETURN,
210  'throw' => self::TYPE_RETURN,
211 
212  // The keywords require a parenthesised Expression or Identifier
213  // before the next Statement.
214  //
215  // keyword ( Expression ) Statement
216  // keyword ( Identifier ) Statement
217  //
218  // See also ECMAScript 5.1:
219  // - § 12.5 The if Statement
220  // - § 12.6 Iteration Statements (do, while, for)
221  // - § 12.10 The with Statement
222  // - § 12.11 The switch Statement
223  // - § 12.13 The throw Statement
224  'if' => self::TYPE_IF,
225  'catch' => self::TYPE_IF,
226  'while' => self::TYPE_IF,
227  'for' => self::TYPE_IF,
228  'switch' => self::TYPE_IF,
229  'with' => self::TYPE_IF,
230 
231  // The keywords followed by an Identifier, Statement,
232  // Expression, or Block.
233  //
234  // var Identifier
235  // else Statement
236  // do Statement
237  // case Expression
238  // try Block
239  // finally Block
240  //
241  // See also ECMAScript 5.1:
242  // - § 12.2 Variable Statement
243  // - § 12.5 The if Statement (else)
244  // - § 12.6 Iteration Statements (do, while, for)
245  // - § 12.11 The switch Statement (case)
246  // - § 12.14 The try Statement
247  'var' => self::TYPE_DO,
248  'else' => self::TYPE_DO,
249  'do' => self::TYPE_DO,
250  'case' => self::TYPE_DO,
251  'try' => self::TYPE_DO,
252  'finally' => self::TYPE_DO,
253 
254  // ECMAScript 5.1 § 13 Function Definition
255  'function' => self::TYPE_FUNC,
256 
257  // Can be one of:
258  // - DecimalLiteral (ECMAScript 5.1 § 7.8.3 Numeric Literals)
259  // - MemberExpression (ECMAScript 5.1 § 11.2 Left-Hand-Side Expressions)
260  '.' => self::TYPE_BIN_OP,
261 
262  // Can be one of:
263  // - Block (ECMAScript 5.1 § 12.1 Block)
264  // - ObjectLiteral (ECMAScript 5.1 § 11.1 Primary Expressions)
265  '{' => self::TYPE_BRACE_OPEN,
266  '}' => self::TYPE_BRACE_CLOSE,
267 
268  // Can be one of:
269  // - Parenthesised Identifier or Expression after a
270  // TYPE_IF or TYPE_FUNC keyword.
271  // - PrimaryExpression (ECMAScript 5.1 § 11.1 Primary Expressions)
272  // - CallExpression (ECMAScript 5.1 § 11.2 Left-Hand-Side Expressions)
273  '(' => self::TYPE_PAREN_OPEN,
274  ')' => self::TYPE_PAREN_CLOSE,
275 
276  // Can be one of:
277  // - ArrayLiteral (ECMAScript 5.1 § 11.1 Primary Expressions)
278  '[' => self::TYPE_PAREN_OPEN,
279  ']' => self::TYPE_PAREN_CLOSE,
280 
281  // Can be one of:
282  // - End of any statement
283  // - EmptyStatement (ECMAScript 5.1 § 12.3 Empty Statement)
284  ';' => self::TYPE_SEMICOLON,
285  ];
286 
287  // $model : This is the main table for our state machine. For every state/token pair
288  // the desired action is defined.
289  //
290  // The state pushed onto the stack by ACTION_PUSH will be returned to by ACTION_POP.
291  //
292  // A given state/token pair MAY NOT specify both ACTION_POP and ACTION_GOTO.
293  // In the event of such mistake, ACTION_POP is used instead of ACTION_GOTO.
294  $model = [
295  // Statement - This is the initial state.
296  self::STATEMENT => [
297  self::TYPE_UN_OP => [
298  self::ACTION_GOTO => self::EXPRESSION,
299  ],
300  self::TYPE_INCR_OP => [
301  self::ACTION_GOTO => self::EXPRESSION,
302  ],
303  self::TYPE_ADD_OP => [
304  self::ACTION_GOTO => self::EXPRESSION,
305  ],
306  self::TYPE_BRACE_OPEN => [
307  // Use of '{' in statement context, creates a Block.
308  self::ACTION_PUSH => self::STATEMENT,
309  ],
310  self::TYPE_BRACE_CLOSE => [
311  // Ends a Block
312  self::ACTION_POP => true,
313  ],
314  self::TYPE_PAREN_OPEN => [
315  self::ACTION_PUSH => self::EXPRESSION_OP,
316  self::ACTION_GOTO => self::PAREN_EXPRESSION,
317  ],
318  self::TYPE_RETURN => [
319  self::ACTION_GOTO => self::EXPRESSION_NO_NL,
320  ],
321  self::TYPE_IF => [
322  self::ACTION_GOTO => self::CONDITION,
323  ],
324  self::TYPE_FUNC => [
325  self::ACTION_GOTO => self::CONDITION,
326  ],
327  self::TYPE_LITERAL => [
328  self::ACTION_GOTO => self::EXPRESSION_OP,
329  ],
330  ],
331  self::CONDITION => [
332  self::TYPE_PAREN_OPEN => [
333  self::ACTION_PUSH => self::STATEMENT,
334  self::ACTION_GOTO => self::PAREN_EXPRESSION,
335  ],
336  ],
337  // Property assignment - This is an object literal declaration.
338  // For example: `{ key: value }`
339  self::PROPERTY_ASSIGNMENT => [
340  self::TYPE_COLON => [
341  self::ACTION_GOTO => self::PROPERTY_EXPRESSION,
342  ],
343  self::TYPE_BRACE_OPEN => [
344  self::ACTION_PUSH => self::PROPERTY_ASSIGNMENT,
345  self::ACTION_GOTO => self::STATEMENT,
346  ],
347  self::TYPE_BRACE_CLOSE => [
348  self::ACTION_POP => true,
349  ],
350  ],
351  self::EXPRESSION => [
352  self::TYPE_SEMICOLON => [
353  self::ACTION_GOTO => self::STATEMENT,
354  ],
355  self::TYPE_BRACE_OPEN => [
356  self::ACTION_PUSH => self::EXPRESSION_OP,
357  self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT,
358  ],
359  self::TYPE_BRACE_CLOSE => [
360  self::ACTION_POP => true,
361  ],
362  self::TYPE_PAREN_OPEN => [
363  self::ACTION_PUSH => self::EXPRESSION_OP,
364  self::ACTION_GOTO => self::PAREN_EXPRESSION,
365  ],
366  self::TYPE_FUNC => [
367  self::ACTION_GOTO => self::EXPRESSION_FUNC,
368  ],
369  self::TYPE_LITERAL => [
370  self::ACTION_GOTO => self::EXPRESSION_OP,
371  ],
372  ],
373  self::EXPRESSION_NO_NL => [
374  self::TYPE_SEMICOLON => [
375  self::ACTION_GOTO => self::STATEMENT,
376  ],
377  self::TYPE_BRACE_OPEN => [
378  self::ACTION_PUSH => self::EXPRESSION_OP,
379  self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT,
380  ],
381  self::TYPE_BRACE_CLOSE => [
382  self::ACTION_POP => true,
383  ],
384  self::TYPE_PAREN_OPEN => [
385  self::ACTION_PUSH => self::EXPRESSION_OP,
386  self::ACTION_GOTO => self::PAREN_EXPRESSION,
387  ],
388  self::TYPE_FUNC => [
389  self::ACTION_GOTO => self::EXPRESSION_FUNC,
390  ],
391  self::TYPE_LITERAL => [
392  self::ACTION_GOTO => self::EXPRESSION_OP,
393  ],
394  ],
395  self::EXPRESSION_OP => [
396  self::TYPE_BIN_OP => [
397  self::ACTION_GOTO => self::EXPRESSION,
398  ],
399  self::TYPE_ADD_OP => [
400  self::ACTION_GOTO => self::EXPRESSION,
401  ],
402  self::TYPE_HOOK => [
403  self::ACTION_PUSH => self::EXPRESSION,
404  self::ACTION_GOTO => self::EXPRESSION_TERNARY,
405  ],
406  self::TYPE_COLON => [
407  self::ACTION_GOTO => self::STATEMENT,
408  ],
409  self::TYPE_COMMA => [
410  self::ACTION_GOTO => self::EXPRESSION,
411  ],
412  self::TYPE_SEMICOLON => [
413  self::ACTION_GOTO => self::STATEMENT,
414  ],
415  self::TYPE_PAREN_OPEN => [
416  self::ACTION_PUSH => self::EXPRESSION_OP,
417  self::ACTION_GOTO => self::PAREN_EXPRESSION,
418  ],
419  self::TYPE_BRACE_CLOSE => [
420  self::ACTION_POP => true,
421  ],
422  ],
423  self::EXPRESSION_FUNC => [
424  self::TYPE_BRACE_OPEN => [
425  self::ACTION_PUSH => self::EXPRESSION_OP,
426  self::ACTION_GOTO => self::STATEMENT,
427  ],
428  ],
429  self::EXPRESSION_TERNARY => [
430  self::TYPE_BRACE_OPEN => [
431  self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP,
432  self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT,
433  ],
434  self::TYPE_PAREN_OPEN => [
435  self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP,
436  self::ACTION_GOTO => self::PAREN_EXPRESSION,
437  ],
438  self::TYPE_FUNC => [
439  self::ACTION_GOTO => self::EXPRESSION_TERNARY_FUNC,
440  ],
441  self::TYPE_LITERAL => [
442  self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP,
443  ],
444  ],
445  self::EXPRESSION_TERNARY_OP => [
446  self::TYPE_BIN_OP => [
447  self::ACTION_GOTO => self::EXPRESSION_TERNARY,
448  ],
449  self::TYPE_ADD_OP => [
450  self::ACTION_GOTO => self::EXPRESSION_TERNARY,
451  ],
452  self::TYPE_HOOK => [
453  self::ACTION_PUSH => self::EXPRESSION_TERNARY,
454  self::ACTION_GOTO => self::EXPRESSION_TERNARY,
455  ],
456  self::TYPE_COMMA => [
457  self::ACTION_GOTO => self::EXPRESSION_TERNARY,
458  ],
459  self::TYPE_PAREN_OPEN => [
460  self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP,
461  self::ACTION_GOTO => self::PAREN_EXPRESSION,
462  ],
463  self::TYPE_COLON => [
464  self::ACTION_POP => true,
465  ],
466  ],
467  self::EXPRESSION_TERNARY_FUNC => [
468  self::TYPE_BRACE_OPEN => [
469  self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP,
470  self::ACTION_GOTO => self::STATEMENT,
471  ],
472  ],
473  self::PAREN_EXPRESSION => [
474  self::TYPE_BRACE_OPEN => [
475  self::ACTION_PUSH => self::PAREN_EXPRESSION_OP,
476  self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT,
477  ],
478  self::TYPE_PAREN_OPEN => [
479  self::ACTION_PUSH => self::PAREN_EXPRESSION_OP,
480  self::ACTION_GOTO => self::PAREN_EXPRESSION,
481  ],
482  self::TYPE_PAREN_CLOSE => [
483  self::ACTION_POP => true,
484  ],
485  self::TYPE_FUNC => [
486  self::ACTION_GOTO => self::PAREN_EXPRESSION_FUNC,
487  ],
488  self::TYPE_LITERAL => [
489  self::ACTION_GOTO => self::PAREN_EXPRESSION_OP,
490  ],
491  ],
492  self::PAREN_EXPRESSION_OP => [
493  self::TYPE_BIN_OP => [
494  self::ACTION_GOTO => self::PAREN_EXPRESSION,
495  ],
496  self::TYPE_ADD_OP => [
497  self::ACTION_GOTO => self::PAREN_EXPRESSION,
498  ],
499  self::TYPE_HOOK => [
500  self::ACTION_GOTO => self::PAREN_EXPRESSION,
501  ],
502  self::TYPE_COLON => [
503  self::ACTION_GOTO => self::PAREN_EXPRESSION,
504  ],
505  self::TYPE_COMMA => [
506  self::ACTION_GOTO => self::PAREN_EXPRESSION,
507  ],
508  self::TYPE_SEMICOLON => [
509  self::ACTION_GOTO => self::PAREN_EXPRESSION,
510  ],
511  self::TYPE_PAREN_OPEN => [
512  self::ACTION_PUSH => self::PAREN_EXPRESSION_OP,
513  self::ACTION_GOTO => self::PAREN_EXPRESSION,
514  ],
515  self::TYPE_PAREN_CLOSE => [
516  self::ACTION_POP => true,
517  ],
518  ],
519  self::PAREN_EXPRESSION_FUNC => [
520  self::TYPE_BRACE_OPEN => [
521  self::ACTION_PUSH => self::PAREN_EXPRESSION_OP,
522  self::ACTION_GOTO => self::STATEMENT,
523  ],
524  ],
525  // Property expression - The value of a key in an object literal.
526  self::PROPERTY_EXPRESSION => [
527  self::TYPE_BRACE_OPEN => [
528  self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP,
529  self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT,
530  ],
531  self::TYPE_BRACE_CLOSE => [
532  self::ACTION_POP => true,
533  ],
534  self::TYPE_PAREN_OPEN => [
535  self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP,
536  self::ACTION_GOTO => self::PAREN_EXPRESSION,
537  ],
538  self::TYPE_FUNC => [
539  self::ACTION_GOTO => self::PROPERTY_EXPRESSION_FUNC,
540  ],
541  self::TYPE_LITERAL => [
542  self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP,
543  ],
544  ],
545  self::PROPERTY_EXPRESSION_OP => [
546  self::TYPE_BIN_OP => [
547  self::ACTION_GOTO => self::PROPERTY_EXPRESSION,
548  ],
549  self::TYPE_ADD_OP => [
550  self::ACTION_GOTO => self::PROPERTY_EXPRESSION,
551  ],
552  self::TYPE_HOOK => [
553  self::ACTION_PUSH => self::PROPERTY_EXPRESSION,
554  self::ACTION_GOTO => self::EXPRESSION_TERNARY,
555  ],
556  self::TYPE_COMMA => [
557  self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT,
558  ],
559  self::TYPE_BRACE_OPEN => [
560  self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP,
561  ],
562  self::TYPE_BRACE_CLOSE => [
563  self::ACTION_POP => true,
564  ],
565  self::TYPE_PAREN_OPEN => [
566  self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP,
567  self::ACTION_GOTO => self::PAREN_EXPRESSION,
568  ],
569  ],
570  self::PROPERTY_EXPRESSION_FUNC => [
571  self::TYPE_BRACE_OPEN => [
572  self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP,
573  self::ACTION_GOTO => self::STATEMENT,
574  ],
575  ],
576  ];
577 
578  // $semicolon : Rules for when a semicolon insertion is appropriate
579  $semicolon = [
580  self::EXPRESSION_NO_NL => [
581  self::TYPE_UN_OP => true,
582  self::TYPE_INCR_OP => true,
583  self::TYPE_ADD_OP => true,
584  self::TYPE_BRACE_OPEN => true,
585  self::TYPE_PAREN_OPEN => true,
586  self::TYPE_RETURN => true,
587  self::TYPE_IF => true,
588  self::TYPE_DO => true,
589  self::TYPE_FUNC => true,
590  self::TYPE_LITERAL => true
591  ],
592  self::EXPRESSION_OP => [
593  self::TYPE_UN_OP => true,
594  self::TYPE_INCR_OP => true,
595  self::TYPE_BRACE_OPEN => true,
596  self::TYPE_RETURN => true,
597  self::TYPE_IF => true,
598  self::TYPE_DO => true,
599  self::TYPE_FUNC => true,
600  self::TYPE_LITERAL => true
601  ]
602  ];
603 
604  // $divStates : Contains all states that can be followed by a division operator
605  $divStates = [
606  self::EXPRESSION_OP => true,
607  self::EXPRESSION_TERNARY_OP => true,
608  self::PAREN_EXPRESSION_OP => true,
609  self::PROPERTY_EXPRESSION_OP => true
610  ];
611 
612  // Here's where the minifying takes place: Loop through the input, looking for tokens
613  // and output them to $out, taking actions to the above defined rules when appropriate.
614  $out = '';
615  $pos = 0;
616  $length = strlen( $s );
617  $lineLength = 0;
618  $newlineFound = true;
619  $state = self::STATEMENT;
620  $stack = [];
621  $last = ';'; // Pretend that we have seen a semicolon yet
622  while ( $pos < $length ) {
623  // First, skip over any whitespace and multiline comments, recording whether we
624  // found any newline character
625  $skip = strspn( $s, " \t\n\r\xb\xc", $pos );
626  if ( !$skip ) {
627  $ch = $s[$pos];
628  if ( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) {
629  // Multiline comment. Search for the end token or EOT.
630  $end = strpos( $s, '*/', $pos + 2 );
631  $skip = $end === false ? $length - $pos : $end - $pos + 2;
632  }
633  }
634  if ( $skip ) {
635  // The semicolon insertion mechanism needs to know whether there was a newline
636  // between two tokens, so record it now.
637  if ( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) {
638  $newlineFound = true;
639  }
640  $pos += $skip;
641  continue;
642  }
643  // Handle C++-style comments and html comments, which are treated as single line
644  // comments by the browser, regardless of whether the end tag is on the same line.
645  // Handle --> the same way, but only if it's at the beginning of the line
646  if ( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' )
647  || ( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' )
648  || ( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' )
649  ) {
650  $pos += strcspn( $s, "\r\n", $pos );
651  continue;
652  }
653 
654  // Find out which kind of token we're handling.
655  // Note: $end must point past the end of the current token
656  // so that `substr($s, $pos, $end - $pos)` would be the entire token.
657  // In order words, $end will be the offset of the last relevant character
658  // in the stream + 1, or simply put: The offset of the first character
659  // of any next token in the stream.
660  $end = $pos + 1;
661  // Handle string literals
662  if ( $ch === "'" || $ch === '"' ) {
663  // Search to the end of the string literal, skipping over backslash escapes
664  $search = $ch . '\\';
665  do{
666  // Speculatively add 2 to the end so that if we see a backslash,
667  // the next iteration will start 2 characters further (one for the
668  // backslash, one for the escaped character).
669  // We'll correct this outside the loop.
670  $end += strcspn( $s, $search, $end ) + 2;
671  // If the last character in our search for a quote or a backlash
672  // matched a backslash and we haven't reached the end, keep searching..
673  } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
674  // Correction (1): Undo speculative add, keep only one (end of string literal)
675  $end--;
676  if ( $end > $length ) {
677  // Correction (2): Loop wrongly assumed an end quote ended the search,
678  // but search ended because we've reached the end. Correct $end.
679  // TODO: This is invalid and should throw.
680  $end--;
681  }
682  // We have to distinguish between regexp literals and division operators
683  // A division operator is only possible in certain states
684  } elseif ( $ch === '/' && !isset( $divStates[$state] ) ) {
685  // Regexp literal
686  for ( ; ; ) {
687  // Search until we find "/" (end of regexp), "\" (backslash escapes),
688  // or "[" (start of character classes).
689  do{
690  // Speculatively add 2 to ensure next iteration skips
691  // over backslash and escaped character.
692  // We'll correct this outside the loop.
693  $end += strcspn( $s, '/[\\', $end ) + 2;
694  // If backslash escape, keep searching...
695  } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
696  // Correction (1): Undo speculative add, keep only one (end of regexp)
697  $end--;
698  if ( $end > $length ) {
699  // Correction (2): Loop wrongly assumed end slash was seen
700  // String ended without end of regexp. Correct $end.
701  // TODO: This is invalid and should throw.
702  $end--;
703  break;
704  }
705  if ( $s[$end - 1] === '/' ) {
706  break;
707  }
708  // (Implicit else), we must've found the start of a char class,
709  // skip until we find "]" (end of char class), or "\" (backslash escape)
710  do{
711  // Speculatively add 2 for backslash escape.
712  // We'll substract one outside the loop.
713  $end += strcspn( $s, ']\\', $end ) + 2;
714  // If backslash escape, keep searching...
715  } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
716  // Correction (1): Undo speculative add, keep only one (end of regexp)
717  $end--;
718  if ( $end > $length ) {
719  // Correction (2): Loop wrongly assumed "]" was seen
720  // String ended without ending char class or regexp. Correct $end.
721  // TODO: This is invalid and should throw.
722  $end--;
723  break;
724  }
725  }
726  // Search past the regexp modifiers (gi)
727  while ( $end < $length && ctype_alpha( $s[$end] ) ) {
728  $end++;
729  }
730  } elseif (
731  $ch === '0'
732  && ( $pos + 1 < $length ) && ( $s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' )
733  ) {
734  // Hex numeric literal
735  $end++; // x or X
736  $len = strspn( $s, '0123456789ABCDEFabcdef', $end );
737  if ( !$len ) {
738  return self::parseError(
739  $s,
740  $pos,
741  'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ) . '...'
742  );
743  }
744  $end += $len;
745  } elseif (
746  ctype_digit( $ch )
747  || ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) )
748  ) {
749  $end += strspn( $s, '0123456789', $end );
750  $decimal = strspn( $s, '.', $end );
751  if ( $decimal ) {
752  if ( $decimal > 2 ) {
753  return self::parseError( $s, $end, 'The number has too many decimal points' );
754  }
755  $end += strspn( $s, '0123456789', $end + 1 ) + $decimal;
756  }
757  $exponent = strspn( $s, 'eE', $end );
758  if ( $exponent ) {
759  if ( $exponent > 1 ) {
760  return self::parseError( $s, $end, 'Number with several E' );
761  }
762  $end++;
763 
764  // + sign is optional; - sign is required.
765  $end += strspn( $s, '-+', $end );
766  $len = strspn( $s, '0123456789', $end );
767  if ( !$len ) {
768  return self::parseError(
769  $s,
770  $pos,
771  'No decimal digits after e, how many zeroes should be added?'
772  );
773  }
774  $end += $len;
775  }
776  } elseif ( isset( $opChars[$ch] ) ) {
777  // Punctuation character. Search for the longest matching operator.
778  while (
779  $end < $length
780  && isset( $tokenTypes[substr( $s, $pos, $end - $pos + 1 )] )
781  ) {
782  $end++;
783  }
784  } else {
785  // Identifier or reserved word. Search for the end by excluding whitespace and
786  // punctuation.
787  $end += strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"!&|^~\xb\xc\r", $end );
788  }
789 
790  // Now get the token type from our type array
791  $token = substr( $s, $pos, $end - $pos ); // so $end - $pos == strlen( $token )
792  $type = $tokenTypes[$token] ?? self::TYPE_LITERAL;
793 
794  if ( $newlineFound && isset( $semicolon[$state][$type] ) ) {
795  // This token triggers the semicolon insertion mechanism of javascript. While we
796  // could add the ; token here ourselves, keeping the newline has a few advantages.
797  $out .= "\n";
798  $state = self::STATEMENT;
799  $lineLength = 0;
800  } elseif ( $lineLength + $end - $pos > self::$maxLineLength &&
801  !isset( $semicolon[$state][$type] ) && $type !== self::TYPE_INCR_OP ) {
802  // This line would get too long if we added $token, so add a newline first.
803  // Only do this if it won't trigger semicolon insertion and if it won't
804  // put a postfix increment operator on its own line, which is illegal in js.
805  $out .= "\n";
806  $lineLength = 0;
807  // Check, whether we have to separate the token from the last one with whitespace
808  } elseif ( !isset( $opChars[$last] ) && !isset( $opChars[$ch] ) ) {
809  $out .= ' ';
810  $lineLength++;
811  // Don't accidentally create ++, -- or // tokens
812  } elseif ( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) {
813  $out .= ' ';
814  $lineLength++;
815  }
816  if (
817  $type === self::TYPE_LITERAL
818  && ( $token === 'true' || $token === 'false' )
819  && ( $state === self::EXPRESSION || $state === self::PROPERTY_EXPRESSION )
820  && $last !== '.'
821  ) {
822  $token = ( $token === 'true' ) ? '!0' : '!1';
823  }
824 
825  $out .= $token;
826  $lineLength += $end - $pos; // += strlen( $token )
827  $last = $s[$end - 1];
828  $pos = $end;
829  $newlineFound = false;
830 
831  // Now that we have output our token, transition into the new state.
832  if ( isset( $model[$state][$type][self::ACTION_PUSH] ) &&
833  count( $stack ) < self::STACK_LIMIT
834  ) {
835  $stack[] = $model[$state][$type][self::ACTION_PUSH];
836  }
837  if ( $stack && isset( $model[$state][$type][self::ACTION_POP] ) ) {
838  $state = array_pop( $stack );
839  } elseif ( isset( $model[$state][$type][self::ACTION_GOTO] ) ) {
840  $state = $model[$state][$type][self::ACTION_GOTO];
841  }
842  }
843  return $out;
844  }
845 
846  static function parseError( $fullJavascript, $position, $errorMsg ) {
847  // TODO: Handle the error: trigger_error, throw exception, return false...
848  return false;
849  }
850 }
This class is meant to safely minify javascript code, while leaving syntactically correct programs in...
static parseError( $fullJavascript, $position, $errorMsg)
static $maxLineLength
Maximum line length.
return true
Definition: router.php:92
static minify( $s)
Returns minified JavaScript code.