Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
100.00% |
242 / 242 |
|
100.00% |
16 / 16 |
CRAP | |
100.00% |
1 / 1 |
Token | |
100.00% |
242 / 242 |
|
100.00% |
16 / 16 |
117 | |
100.00% |
1 / 1 |
__construct | |
100.00% |
92 / 92 |
|
100.00% |
1 / 1 |
55 | |||
type | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
value | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
typeFlag | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
representation | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
unit | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
significant | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
copyWithSignificance | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
2 | |||
toTokenArray | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
toComponentValueArray | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
6 | |||
escapeIdent | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
escapeString | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
escapePregCallback | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
__toString | |
100.00% |
67 / 67 |
|
100.00% |
1 / 1 |
37 | |||
separate | |
100.00% |
43 / 43 |
|
100.00% |
1 / 1 |
4 | |||
urangeHack | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * @file |
4 | * @license https://opensource.org/licenses/Apache-2.0 Apache-2.0 |
5 | */ |
6 | |
7 | namespace Wikimedia\CSS\Objects; |
8 | |
9 | use InvalidArgumentException; |
10 | use UnexpectedValueException; |
11 | |
12 | /** |
13 | * Represent a CSS token |
14 | */ |
15 | class Token extends ComponentValue { |
16 | public const T_IDENT = "ident"; |
17 | public const T_FUNCTION = "function"; |
18 | public const T_AT_KEYWORD = "at-keyword"; |
19 | public const T_HASH = "hash"; |
20 | public const T_STRING = "string"; |
21 | public const T_BAD_STRING = "bad-string"; |
22 | public const T_URL = "url"; |
23 | public const T_BAD_URL = "bad-url"; |
24 | public const T_DELIM = "delim"; |
25 | public const T_NUMBER = "number"; |
26 | public const T_PERCENTAGE = "percentage"; |
27 | public const T_DIMENSION = "dimension"; |
28 | public const T_WHITESPACE = "whitespace"; |
29 | public const T_CDO = "CDO"; |
30 | public const T_CDC = "CDC"; |
31 | public const T_COLON = "colon"; |
32 | public const T_SEMICOLON = "semicolon"; |
33 | public const T_COMMA = "comma"; |
34 | public const T_LEFT_BRACKET = "["; |
35 | public const T_RIGHT_BRACKET = "]"; |
36 | public const T_LEFT_PAREN = "("; |
37 | public const T_RIGHT_PAREN = ")"; |
38 | public const T_LEFT_BRACE = "{"; |
39 | public const T_RIGHT_BRACE = "}"; |
40 | public const T_EOF = "EOF"; |
41 | |
42 | /** @var string One of the T_* constants */ |
43 | protected $type; |
44 | |
45 | /** @var string|int|float Value for various token types */ |
46 | protected $value = ''; |
47 | |
48 | /** @var string Type flag for various token types */ |
49 | protected $typeFlag = ''; |
50 | |
51 | /** @var string|null Representation for numeric tokens */ |
52 | protected $representation = null; |
53 | |
54 | /** @var string Unit for dimension tokens */ |
55 | protected $unit = ''; |
56 | |
57 | /** @var bool Whether this token is considered "significant" */ |
58 | protected $significant = true; |
59 | |
60 | /** @var int See ::urangeHack() */ |
61 | private $urangeHack = 0; |
62 | |
63 | /** |
64 | * @param string $type One of the T_* constants |
65 | * @param string|array $value Value of the token, or an array with the |
66 | * following keys. Depending on the type, some keys may be required and |
67 | * some may be ignored. |
68 | * - value: (string|int|float) Value of the token |
69 | * - position: (array) Token position in the input stream. Same format as |
70 | * returned by self::getPosition(). |
71 | * - typeFlag: (string) Flag for various token types. For T_HASH, 'id' or |
72 | * 'unrestricted'. For T_NUMBER, T_PERCENTAGE, and T_DIMENSION, 'integer' |
73 | * or 'number'. |
74 | * - representation: (string) String representation of the value for |
75 | * T_NUMBER, T_PERCENTAGE, and T_DIMENSION. |
76 | * - unit: (string) Unit for T_DIMENSION. |
77 | * - significant: (bool) Whether the token is considered "significant" |
78 | */ |
79 | public function __construct( $type, $value = [] ) { |
80 | if ( !is_array( $value ) ) { |
81 | $value = [ 'value' => $value ]; |
82 | } |
83 | |
84 | if ( isset( $value['position'] ) ) { |
85 | if ( !is_array( $value['position'] ) || count( $value['position'] ) !== 2 ) { |
86 | throw new InvalidArgumentException( 'Position must be an array of two integers' ); |
87 | } |
88 | [ $this->line, $this->pos ] = $value['position']; |
89 | if ( !is_int( $this->line ) || !is_int( $this->pos ) ) { |
90 | throw new InvalidArgumentException( 'Position must be an array of two integers' ); |
91 | } |
92 | } |
93 | if ( isset( $value['significant'] ) ) { |
94 | $this->significant = (bool)$value['significant']; |
95 | } |
96 | |
97 | $this->type = $type; |
98 | switch ( $type ) { |
99 | case self::T_IDENT: |
100 | case self::T_FUNCTION: |
101 | case self::T_AT_KEYWORD: |
102 | case self::T_STRING: |
103 | case self::T_URL: |
104 | if ( !isset( $value['value'] ) ) { |
105 | throw new InvalidArgumentException( "Token type $this->type requires a value" ); |
106 | } |
107 | $this->value = (string)$value['value']; |
108 | break; |
109 | |
110 | case self::T_HASH: |
111 | if ( !isset( $value['value'] ) ) { |
112 | throw new InvalidArgumentException( "Token type $this->type requires a value" ); |
113 | } |
114 | if ( !isset( $value['typeFlag'] ) ) { |
115 | throw new InvalidArgumentException( "Token type $this->type requires a typeFlag" ); |
116 | } |
117 | if ( !in_array( $value['typeFlag'], [ 'id', 'unrestricted' ], true ) ) { |
118 | throw new InvalidArgumentException( "Invalid type flag for Token type $this->type" ); |
119 | } |
120 | $this->value = (string)$value['value']; |
121 | $this->typeFlag = $value['typeFlag']; |
122 | break; |
123 | |
124 | case self::T_DELIM: |
125 | if ( !isset( $value['value'] ) ) { |
126 | throw new InvalidArgumentException( "Token type $this->type requires a value" ); |
127 | } |
128 | $this->value = (string)$value['value']; |
129 | if ( mb_strlen( $this->value, 'UTF-8' ) !== 1 ) { |
130 | throw new InvalidArgumentException( |
131 | "Value for Token type $this->type must be a single character" |
132 | ); |
133 | } |
134 | break; |
135 | |
136 | case self::T_NUMBER: |
137 | case self::T_PERCENTAGE: |
138 | case self::T_DIMENSION: |
139 | if ( !isset( $value['value'] ) || |
140 | !is_numeric( $value['value'] ) || !is_finite( $value['value'] ) |
141 | ) { |
142 | throw new InvalidArgumentException( "Token type $this->type requires a numeric value" ); |
143 | } |
144 | if ( !isset( $value['typeFlag'] ) ) { |
145 | throw new InvalidArgumentException( "Token type $this->type requires a typeFlag" ); |
146 | } |
147 | $this->typeFlag = $value['typeFlag']; |
148 | if ( $this->typeFlag === 'integer' ) { |
149 | $this->value = (int)$value['value']; |
150 | if ( (float)$this->value !== (float)$value['value'] ) { |
151 | throw new InvalidArgumentException( |
152 | "typeFlag is 'integer', but value supplied is not an integer" |
153 | ); |
154 | } |
155 | } elseif ( $this->typeFlag === 'number' ) { |
156 | $this->value = (float)$value['value']; |
157 | } else { |
158 | throw new InvalidArgumentException( "Invalid type flag for Token type $this->type" ); |
159 | } |
160 | |
161 | if ( isset( $value['representation'] ) ) { |
162 | if ( !is_numeric( $value['representation'] ) ) { |
163 | throw new InvalidArgumentException( 'Representation must be numeric' ); |
164 | } |
165 | $this->representation = $value['representation']; |
166 | if ( (float)$this->representation !== (float)$this->value ) { |
167 | throw new InvalidArgumentException( |
168 | "Representation \"$this->representation\" does not match value \"$this->value\"" |
169 | ); |
170 | } |
171 | } |
172 | |
173 | if ( $type === self::T_DIMENSION ) { |
174 | if ( !isset( $value['unit'] ) ) { |
175 | throw new InvalidArgumentException( "Token type $this->type requires a unit" ); |
176 | } |
177 | $this->unit = $value['unit']; |
178 | } |
179 | break; |
180 | |
181 | case self::T_BAD_STRING: |
182 | case self::T_BAD_URL: |
183 | case self::T_WHITESPACE: |
184 | case self::T_CDO: |
185 | case self::T_CDC: |
186 | case self::T_COLON: |
187 | case self::T_SEMICOLON: |
188 | case self::T_COMMA: |
189 | case self::T_LEFT_BRACKET: |
190 | case self::T_RIGHT_BRACKET: |
191 | case self::T_LEFT_PAREN: |
192 | case self::T_RIGHT_PAREN: |
193 | case self::T_LEFT_BRACE: |
194 | case self::T_RIGHT_BRACE: |
195 | break; |
196 | |
197 | case self::T_EOF: |
198 | // Let EOF have a typeFlag of 'recursion-depth-exceeded', used |
199 | // to avoid cascading errors when that occurs. |
200 | if ( isset( $value['typeFlag'] ) && $value['typeFlag'] !== '' ) { |
201 | $this->typeFlag = $value['typeFlag']; |
202 | if ( $this->typeFlag !== 'recursion-depth-exceeded' ) { |
203 | throw new InvalidArgumentException( "Invalid type flag for Token type $this->type" ); |
204 | } |
205 | } |
206 | break; |
207 | |
208 | default: |
209 | throw new InvalidArgumentException( "Unknown token type \"$this->type\"." ); |
210 | } |
211 | } |
212 | |
213 | /** |
214 | * Get the type of this token |
215 | * @return string One of the Token::T_* constants |
216 | */ |
217 | public function type() { |
218 | return $this->type; |
219 | } |
220 | |
221 | /** |
222 | * Get the value of this token |
223 | * @return string|int|float $value |
224 | */ |
225 | public function value() { |
226 | return $this->value; |
227 | } |
228 | |
229 | /** |
230 | * Get the type flag for this T_HASH or numeric token |
231 | * @return string |
232 | */ |
233 | public function typeFlag() { |
234 | return $this->typeFlag; |
235 | } |
236 | |
237 | /** |
238 | * Get the representation for this numeric token |
239 | * @return string|null |
240 | */ |
241 | public function representation() { |
242 | return $this->representation; |
243 | } |
244 | |
245 | /** |
246 | * Get the unit for this T_DIMENSION token |
247 | * @return string |
248 | */ |
249 | public function unit() { |
250 | return $this->unit; |
251 | } |
252 | |
253 | /** |
254 | * Whether this token is considered "significant" |
255 | * |
256 | * A token that isn't "significant" may be removed for minification of CSS. |
257 | * For example, most whitespace is entirely optional, as is the semicolon |
258 | * after the last declaration in a block. |
259 | * |
260 | * @return bool |
261 | */ |
262 | public function significant() { |
263 | return $this->significant; |
264 | } |
265 | |
266 | /** |
267 | * Make a copy of this token with altered "significant" flag |
268 | * @param bool $significant Whether the new token is considered "significant" |
269 | * @return Token May be the same as the current token |
270 | */ |
271 | public function copyWithSignificance( $significant ) { |
272 | $significant = (bool)$significant; |
273 | if ( $significant === $this->significant ) { |
274 | return $this; |
275 | } |
276 | $ret = clone $this; |
277 | $ret->significant = $significant; |
278 | return $ret; |
279 | } |
280 | |
281 | /** @inheritDoc */ |
282 | public function toTokenArray() { |
283 | return [ $this ]; |
284 | } |
285 | |
286 | /** @inheritDoc */ |
287 | public function toComponentValueArray() { |
288 | switch ( $this->type ) { |
289 | case self::T_FUNCTION: |
290 | case self::T_LEFT_BRACKET: |
291 | case self::T_LEFT_PAREN: |
292 | case self::T_LEFT_BRACE: |
293 | throw new UnexpectedValueException( |
294 | "Token type \"$this->type\" is not valid in a ComponentValueList." |
295 | ); |
296 | |
297 | default: |
298 | return [ $this ]; |
299 | } |
300 | } |
301 | |
302 | /** |
303 | * Escape an ident-like string |
304 | * @param string $s |
305 | * @return string |
306 | */ |
307 | private static function escapeIdent( $s ) { |
308 | return preg_replace_callback( |
309 | '/ |
310 | [^a-zA-Z0-9_\-\x{80}-\x{10ffff}] # Characters that are never allowed |
311 | | (?:^|(?<=^-))[0-9] # Digits are not allowed at the start of an identifier |
312 | | [\p{Z}\p{Cc}\p{Cf}\p{Co}\p{Cs}] # To be safe, control characters and whitespace |
313 | /ux', |
314 | [ __CLASS__, 'escapePregCallback' ], |
315 | $s |
316 | ); |
317 | } |
318 | |
319 | /** |
320 | * Escape characters in a string |
321 | * |
322 | * - Double quote needs escaping as the string delimiter. |
323 | * - Backslash needs escaping since it's the escape character. |
324 | * - Newline (\n) isn't valid in a string, and so needs escaping. |
325 | * - Carriage return (\r), form feed (\f), and U+0000 would be changed by |
326 | * CSS's input conversion rules, and so need escaping. |
327 | * - Other non-space whitespace and controls don't need escaping, but it's |
328 | * safer to do so. |
329 | * - Angle brackets are escaped numerically to make it safer to embed in HTML. |
330 | * |
331 | * @param string $s |
332 | * @return string |
333 | */ |
334 | private static function escapeString( $s ) { |
335 | return preg_replace_callback( |
336 | '/[^ \P{Z}]|[\p{Cc}\p{Cf}\p{Co}\p{Cs}"\x5c<>]/u', |
337 | [ __CLASS__, 'escapePregCallback' ], |
338 | $s |
339 | ); |
340 | } |
341 | |
342 | /** |
343 | * Callback for escaping functions |
344 | * @param array $m Matches |
345 | * @return string |
346 | */ |
347 | private static function escapePregCallback( $m ) { |
348 | // Newlines, carriage returns, form feeds, and hex digits have to be |
349 | // escaped numerically. Other non-space whitespace and controls don't |
350 | // have to be, but it's saner to do so. Angle brackets are escaped |
351 | // numerically too to make it safer to embed in HTML. |
352 | if ( preg_match( '/[^ \P{Z}]|[\p{Cc}\p{Cf}\p{Co}\p{Cs}0-9a-fA-F<>]/u', $m[0] ) ) { |
353 | return sprintf( '\\%x ', mb_ord( $m[0] ) ); |
354 | } |
355 | return '\\' . $m[0]; |
356 | } |
357 | |
358 | public function __toString() { |
359 | switch ( $this->type ) { |
360 | case self::T_IDENT: |
361 | return self::escapeIdent( $this->value ); |
362 | |
363 | case self::T_FUNCTION: |
364 | return self::escapeIdent( $this->value ) . '('; |
365 | |
366 | case self::T_AT_KEYWORD: |
367 | return '@' . self::escapeIdent( $this->value ); |
368 | |
369 | case self::T_HASH: |
370 | if ( $this->typeFlag === 'id' ) { |
371 | return '#' . self::escapeIdent( $this->value ); |
372 | } |
373 | |
374 | return '#' . preg_replace_callback( |
375 | '/ |
376 | [^a-zA-Z0-9_\-\x{80}-\x{10ffff}] # Characters that are never allowed |
377 | | [\p{Z}\p{Cc}\p{Cf}\p{Co}\p{Cs}] # To be safe, control characters and whitespace |
378 | /ux', |
379 | [ __CLASS__, 'escapePregCallback' ], |
380 | $this->value |
381 | ); |
382 | |
383 | case self::T_STRING: |
384 | // We could try to decide whether single or double quote is |
385 | // better, but it doesn't seem worth the effort. |
386 | return '"' . self::escapeString( $this->value ) . '"'; |
387 | |
388 | case self::T_URL: |
389 | // We could try to decide whether single or double quote is |
390 | // better, but it doesn't seem worth the effort. |
391 | return 'url("' . self::escapeString( $this->value ) . '")'; |
392 | |
393 | case self::T_BAD_STRING: |
394 | // It's supposed to round trip, so... |
395 | // (this is really awful because we can't close it) |
396 | return "'badstring\n"; |
397 | |
398 | case self::T_BAD_URL: |
399 | // It's supposed to round trip, so... |
400 | return "url(badurl'')"; |
401 | |
402 | case self::T_DELIM: |
403 | if ( $this->value === '\\' ) { |
404 | return "\\\n"; |
405 | } |
406 | return $this->value; |
407 | |
408 | case self::T_NUMBER: |
409 | case self::T_PERCENTAGE: |
410 | case self::T_DIMENSION: |
411 | if ( $this->representation !== null && (float)$this->representation === (float)$this->value ) { |
412 | $number = $this->representation; |
413 | } elseif ( $this->typeFlag === 'integer' ) { |
414 | $number = sprintf( '%d', $this->value ); |
415 | } else { |
416 | $number = sprintf( '%.15g', $this->value ); |
417 | } |
418 | |
419 | if ( $this->type === self::T_PERCENTAGE ) { |
420 | $unit = '%'; |
421 | } elseif ( $this->type === self::T_DIMENSION ) { |
422 | $unit = self::escapeIdent( $this->unit ); |
423 | if ( strpos( $number, 'e' ) === false && strpos( $number, 'E' ) === false && |
424 | preg_match( '/^[eE][+-]?\d/', $unit ) |
425 | ) { |
426 | // Unit would look like exponential notation, so escape the leading "e" |
427 | $unit = sprintf( '\\%x ', ord( $unit[0] ) ) . substr( $unit, 1 ); |
428 | } |
429 | } else { |
430 | $unit = ''; |
431 | } |
432 | |
433 | return $number . $unit; |
434 | |
435 | case self::T_WHITESPACE: |
436 | return ' '; |
437 | |
438 | case self::T_CDO: |
439 | return '<!--'; |
440 | |
441 | case self::T_CDC: |
442 | return '-->'; |
443 | |
444 | case self::T_COLON: |
445 | return ':'; |
446 | |
447 | case self::T_SEMICOLON: |
448 | return ';'; |
449 | |
450 | case self::T_COMMA: |
451 | return ','; |
452 | |
453 | case self::T_LEFT_BRACKET: |
454 | case self::T_RIGHT_BRACKET: |
455 | case self::T_LEFT_PAREN: |
456 | case self::T_RIGHT_PAREN: |
457 | case self::T_LEFT_BRACE: |
458 | case self::T_RIGHT_BRACE: |
459 | return $this->type; |
460 | |
461 | case self::T_EOF: |
462 | return ''; |
463 | |
464 | default: |
465 | throw new UnexpectedValueException( "Unknown token type \"$this->type\"." ); |
466 | } |
467 | } |
468 | |
469 | /** |
470 | * Indicate whether the two tokens need to be separated |
471 | * @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#serialization |
472 | * @param Token $firstToken |
473 | * @param Token $secondToken |
474 | * @return bool |
475 | */ |
476 | public static function separate( Token $firstToken, Token $secondToken ) { |
477 | // Keys are the row headings, values are the columns that have an ✗ |
478 | static $sepTable = [ |
479 | self::T_IDENT => [ |
480 | self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER, |
481 | self::T_PERCENTAGE, self::T_DIMENSION, self::T_CDC, self::T_LEFT_PAREN, |
482 | // Internet Explorer is buggy in some contexts (T191134) |
483 | self::T_HASH, |
484 | ], |
485 | self::T_AT_KEYWORD => [ |
486 | self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER, |
487 | self::T_PERCENTAGE, self::T_DIMENSION, self::T_CDC, |
488 | ], |
489 | self::T_HASH => [ |
490 | self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER, |
491 | self::T_PERCENTAGE, self::T_DIMENSION, self::T_CDC, |
492 | // Internet Explorer is buggy in some contexts (T191134) |
493 | self::T_HASH, |
494 | ], |
495 | self::T_DIMENSION => [ |