Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
100.00% covered (success)
100.00%
189 / 189
100.00% covered (success)
100.00%
16 / 16
CRAP
100.00% covered (success)
100.00%
1 / 1
Token
100.00% covered (success)
100.00%
189 / 189
100.00% covered (success)
100.00%
16 / 16
118
100.00% covered (success)
100.00%
1 / 1
 __construct
100.00% covered (success)
100.00%
67 / 67
100.00% covered (success)
100.00%
1 / 1
56
 type
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 value
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 typeFlag
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 representation
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 unit
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 significant
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 copyWithSignificance
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
2
 toTokenArray
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 toComponentValueArray
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
6
 escapeIdent
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
1
 escapeString
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
1
 escapePregCallback
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
 __toString
100.00% covered (success)
100.00%
42 / 42
100.00% covered (success)
100.00%
1 / 1
37
 separate
100.00% covered (success)
100.00%
44 / 44
100.00% covered (success)
100.00%
1 / 1
4
 urangeHack
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
2
1<?php
2declare( strict_types = 1 );
3
4/**
5 * @file
6 * @license https://opensource.org/licenses/Apache-2.0 Apache-2.0
7 */
8
9namespace Wikimedia\CSS\Objects;
10
11use InvalidArgumentException;
12use UnexpectedValueException;
13
14/**
15 * Represent a CSS token
16 */
17class Token extends ComponentValue {
18    public const T_IDENT = "ident";
19    public const T_FUNCTION = "function";
20    public const T_AT_KEYWORD = "at-keyword";
21    public const T_HASH = "hash";
22    public const T_STRING = "string";
23    public const T_BAD_STRING = "bad-string";
24    public const T_URL = "url";
25    public const T_BAD_URL = "bad-url";
26    public const T_DELIM = "delim";
27    public const T_NUMBER = "number";
28    public const T_PERCENTAGE = "percentage";
29    public const T_DIMENSION = "dimension";
30    public const T_WHITESPACE = "whitespace";
31    public const T_CDO = "CDO";
32    public const T_CDC = "CDC";
33    public const T_COLON = "colon";
34    public const T_SEMICOLON = "semicolon";
35    public const T_COMMA = "comma";
36    public const T_LEFT_BRACKET = "[";
37    public const T_RIGHT_BRACKET = "]";
38    public const T_LEFT_PAREN = "(";
39    public const T_RIGHT_PAREN = ")";
40    public const T_LEFT_BRACE = "{";
41    public const T_RIGHT_BRACE = "}";
42    public const T_EOF = "EOF";
43
44    /** @var string One of the T_* constants */
45    protected $type;
46
47    /** @var string|int|float Value for various token types */
48    protected $value = '';
49
50    /** @var string Type flag for various token types */
51    protected $typeFlag = '';
52
53    /** @var string|null Representation for numeric tokens */
54    protected $representation = null;
55
56    /** @var string Unit for dimension tokens */
57    protected $unit = '';
58
59    /** @var bool Whether this token is considered "significant" */
60    protected $significant = true;
61
62    /** @var int See ::urangeHack() */
63    private $urangeHack = 0;
64
65    /**
66     * @param string $type One of the T_* constants
67     * @param string|array $value Value of the token, or an array with the
68     *  following keys. Depending on the type, some keys may be required and
69     *  some may be ignored.
70     *  - value: (string|int|float) Value of the token
71     *  - position: (array) Token position in the input stream. Same format as
72     *    returned by self::getPosition().
73     *  - typeFlag: (string) Flag for various token types. For T_HASH, 'id' or
74     *    'unrestricted'. For T_NUMBER, T_PERCENTAGE, and T_DIMENSION, 'integer'
75     *    or 'number'.
76     *  - representation: (string) String representation of the value for
77     *    T_NUMBER, T_PERCENTAGE, and T_DIMENSION.
78     *  - unit: (string) Unit for T_DIMENSION.
79     *  - significant: (bool) Whether the token is considered "significant"
80     */
81    public function __construct( $type, $value = [] ) {
82        if ( !is_array( $value ) ) {
83            $value = [ 'value' => $value ];
84        }
85
86        if ( isset( $value['position'] ) ) {
87            if ( !is_array( $value['position'] ) || count( $value['position'] ) !== 2 ) {
88                throw new InvalidArgumentException( 'Position must be an array of two integers' );
89            }
90            [ $this->line, $this->pos ] = $value['position'];
91            if ( !is_int( $this->line ) || !is_int( $this->pos ) ) {
92                throw new InvalidArgumentException( 'Position must be an array of two integers' );
93            }
94        }
95        if ( isset( $value['significant'] ) ) {
96            $this->significant = (bool)$value['significant'];
97        }
98
99        $this->type = $type;
100        switch ( $type ) {
101            case self::T_IDENT:
102            case self::T_FUNCTION:
103            case self::T_AT_KEYWORD:
104            case self::T_STRING:
105            case self::T_URL:
106                if ( !isset( $value['value'] ) ) {
107                    throw new InvalidArgumentException( "Token type $this->type requires a value" );
108                }
109                $this->value = (string)$value['value'];
110                break;
111
112            case self::T_HASH:
113                if ( !isset( $value['value'] ) ) {
114                    throw new InvalidArgumentException( "Token type $this->type requires a value" );
115                }
116                if ( !isset( $value['typeFlag'] ) ) {
117                    throw new InvalidArgumentException( "Token type $this->type requires a typeFlag" );
118                }
119                if ( !in_array( $value['typeFlag'], [ 'id', 'unrestricted' ], true ) ) {
120                    throw new InvalidArgumentException( "Invalid type flag for Token type $this->type" );
121                }
122                $this->value = (string)$value['value'];
123                $this->typeFlag = $value['typeFlag'];
124                break;
125
126            case self::T_DELIM:
127                if ( !isset( $value['value'] ) ) {
128                    throw new InvalidArgumentException( "Token type $this->type requires a value" );
129                }
130                $this->value = (string)$value['value'];
131                if ( mb_strlen( $this->value, 'UTF-8' ) !== 1 ) {
132                    throw new InvalidArgumentException(
133                        "Value for Token type $this->type must be a single character"
134                    );
135                }
136                break;
137
138            case self::T_NUMBER:
139            case self::T_PERCENTAGE:
140            case self::T_DIMENSION:
141                if ( !isset( $value['value'] ) ||
142                    !is_numeric( $value['value'] ) || ( is_float( $value['value'] ) && !is_finite( $value['value'] ) )
143                ) {
144                    throw new InvalidArgumentException( "Token type $this->type requires a numeric value" );
145                }
146                if ( !isset( $value['typeFlag'] ) ) {
147                    throw new InvalidArgumentException( "Token type $this->type requires a typeFlag" );
148                }
149                $this->typeFlag = $value['typeFlag'];
150                if ( $this->typeFlag === 'integer' ) {
151                    $this->value = (int)$value['value'];
152                    if ( (float)$this->value !== (float)$value['value'] ) {
153                        throw new InvalidArgumentException(
154                            "typeFlag is 'integer', but value supplied is not an integer"
155                        );
156                    }
157                } elseif ( $this->typeFlag === 'number' ) {
158                    $this->value = (float)$value['value'];
159                } else {
160                    throw new InvalidArgumentException( "Invalid type flag for Token type $this->type" );
161                }
162
163                if ( isset( $value['representation'] ) ) {
164                    if ( !is_numeric( $value['representation'] ) ) {
165                        throw new InvalidArgumentException( 'Representation must be numeric' );
166                    }
167                    $this->representation = $value['representation'];
168                    if ( (float)$this->representation !== (float)$this->value ) {
169                        throw new InvalidArgumentException(
170                            "Representation \"$this->representation\" does not match value \"$this->value\""
171                        );
172                    }
173                }
174
175                if ( $type === self::T_DIMENSION ) {
176                    if ( !isset( $value['unit'] ) ) {
177                        throw new InvalidArgumentException( "Token type $this->type requires a unit" );
178                    }
179                    $this->unit = $value['unit'];
180                }
181                break;
182
183            case self::T_BAD_STRING:
184            case self::T_BAD_URL:
185            case self::T_WHITESPACE:
186            case self::T_CDO:
187            case self::T_CDC:
188            case self::T_COLON:
189            case self::T_SEMICOLON:
190            case self::T_COMMA:
191            case self::T_LEFT_BRACKET:
192            case self::T_RIGHT_BRACKET:
193            case self::T_LEFT_PAREN:
194            case self::T_RIGHT_PAREN:
195            case self::T_LEFT_BRACE:
196            case self::T_RIGHT_BRACE:
197                break;
198
199            case self::T_EOF:
200                // Let EOF have a typeFlag of 'recursion-depth-exceeded', used
201                // to avoid cascading errors when that occurs.
202                if ( isset( $value['typeFlag'] ) && $value['typeFlag'] !== '' ) {
203                    $this->typeFlag = $value['typeFlag'];
204                    if ( $this->typeFlag !== 'recursion-depth-exceeded' ) {
205                        throw new InvalidArgumentException( "Invalid type flag for Token type $this->type" );
206                    }
207                }
208                break;
209
210            default:
211                throw new InvalidArgumentException( "Unknown token type \"$this->type\"." );
212        }
213    }
214
215    /**
216     * Get the type of this token
217     * @return string One of the Token::T_* constants
218     */
219    public function type() {
220        return $this->type;
221    }
222
223    /**
224     * Get the value of this token
225     * @return string|int|float $value
226     */
227    public function value() {
228        return $this->value;
229    }
230
231    /**
232     * Get the type flag for this T_HASH or numeric token
233     * @return string
234     */
235    public function typeFlag() {
236        return $this->typeFlag;
237    }
238
239    /**
240     * Get the representation for this numeric token
241     * @return string|null
242     */
243    public function representation() {
244        return $this->representation;
245    }
246
247    /**
248     * Get the unit for this T_DIMENSION token
249     * @return string
250     */
251    public function unit() {
252        return $this->unit;
253    }
254
255    /**
256     * Whether this token is considered "significant"
257     *
258     * A token that isn't "significant" may be removed for minification of CSS.
259     * For example, most whitespace is entirely optional, as is the semicolon
260     * after the last declaration in a block.
261     *
262     * @return bool
263     */
264    public function significant() {
265        return $this->significant;
266    }
267
268    /**
269     * Make a copy of this token with altered "significant" flag
270     * @param bool $significant Whether the new token is considered "significant"
271     * @return Token May be the same as the current token
272     */
273    public function copyWithSignificance( $significant ) {
274        $significant = (bool)$significant;
275        if ( $significant === $this->significant ) {
276            return $this;
277        }
278        $ret = clone $this;
279        $ret->significant = $significant;
280        return $ret;
281    }
282
283    /** @inheritDoc */
284    public function toTokenArray() {
285        return [ $this ];
286    }
287
288    /** @inheritDoc */
289    public function toComponentValueArray() {
290        switch ( $this->type ) {
291            case self::T_FUNCTION:
292            case self::T_LEFT_BRACKET:
293            case self::T_LEFT_PAREN:
294            case self::T_LEFT_BRACE:
295                throw new UnexpectedValueException(
296                    "Token type \"$this->type\" is not valid in a ComponentValueList."
297                );
298
299            default:
300                return [ $this ];
301        }
302    }
303
304    /**
305     * Escape an ident-like string
306     * @param string $s
307     * @return string
308     */
309    private static function escapeIdent( $s ) {
310        return preg_replace_callback(
311            '/
312                [^a-zA-Z0-9_\-\x{80}-\x{10ffff}]   # Characters that are never allowed
313                | (?:^|(?<=^-))[0-9]               # Digits are not allowed at the start of an identifier
314                | [\p{Z}\p{Cc}\p{Cf}\p{Co}\p{Cs}]  # To be safe, control characters and whitespace
315            /ux',
316            [ __CLASS__, 'escapePregCallback' ],
317            $s
318        );
319    }
320
321    /**
322     * Escape characters in a string
323     *
324     * - Double quote needs escaping as the string delimiter.
325     * - Backslash needs escaping since it's the escape character.
326     * - Newline (\n) isn't valid in a string, and so needs escaping.
327     * - Carriage return (\r), form feed (\f), and U+0000 would be changed by
328     *   CSS's input conversion rules, and so need escaping.
329     * - Other non-space whitespace and controls don't need escaping, but it's
330     *   safer to do so.
331     * - Angle brackets are escaped numerically to make it safer to embed in HTML.
332     *
333     * @param string $s
334     * @return string
335     */
336    private static function escapeString( $s ) {
337        return preg_replace_callback(
338            '/[^ \P{Z}]|[\p{Cc}\p{Cf}\p{Co}\p{Cs}"\x5c<>]/u',
339            [ __CLASS__, 'escapePregCallback' ],
340            $s
341        );
342    }
343
344    /**
345     * Callback for escaping functions
346     * @param array $m Matches
347     * @return string
348     */
349    private static function escapePregCallback( $m ) {
350        // Newlines, carriage returns, form feeds, and hex digits have to be
351        // escaped numerically. Other non-space whitespace and controls don't
352        // have to be, but it's saner to do so. Angle brackets are escaped
353        // numerically too to make it safer to embed in HTML.
354        if ( preg_match( '/[^ \P{Z}]|[\p{Cc}\p{Cf}\p{Co}\p{Cs}0-9a-fA-F<>]/u', $m[0] ) ) {
355            return sprintf( '\\%x ', mb_ord( $m[0] ) );
356        }
357        return '\\' . $m[0];
358    }
359
360    public function __toString() {
361        switch ( $this->type ) {
362            case self::T_IDENT:
363                return self::escapeIdent( $this->value );
364
365            case self::T_FUNCTION:
366                return self::escapeIdent( $this->value ) . '(';
367
368            case self::T_AT_KEYWORD:
369                return '@' . self::escapeIdent( $this->value );
370
371            case self::T_HASH:
372                if ( $this->typeFlag === 'id' ) {
373                    return '#' . self::escapeIdent( $this->value );
374                }
375
376                return '#' . preg_replace_callback(
377                    '/
378                        [^a-zA-Z0-9_\-\x{80}-\x{10ffff}]   # Characters that are never allowed
379                        | [\p{Z}\p{Cc}\p{Cf}\p{Co}\p{Cs}]  # To be safe, control characters and whitespace
380                    /ux',
381                    [ __CLASS__, 'escapePregCallback' ],
382                    $this->value
383                );
384
385            case self::T_STRING:
386                // We could try to decide whether single or double quote is
387                // better, but it doesn't seem worth the effort.
388                return '"' . self::escapeString( $this->value ) . '"';
389
390            case self::T_URL:
391                // We could try to decide whether single or double quote is
392                // better, but it doesn't seem worth the effort.
393                return 'url("' . self::escapeString( $this->value ) . '")';
394
395            case self::T_BAD_STRING:
396                // It's supposed to round trip, so...
397                // (this is really awful because we can't close it)
398                return "'badstring\n";
399
400            case self::T_BAD_URL:
401                // It's supposed to round trip, so...
402                return "url(badurl'')";
403
404            case self::T_DELIM:
405                if ( $this->value === '\\' ) {
406                    return "\\\n";
407                }
408                return $this->value;
409
410            case self::T_NUMBER:
411            case self::T_PERCENTAGE:
412            case self::T_DIMENSION:
413                if ( $this->representation !== null && (float)$this->representation === (float)$this->value ) {
414                    $number = $this->representation;
415                } elseif ( $this->typeFlag === 'integer' ) {
416                    $number = sprintf( '%d', $this->value );
417                } else {
418                    $number = sprintf( '%.15g', $this->value );
419                }
420
421                if ( $this->type === self::T_PERCENTAGE ) {
422                    $unit = '%';
423                } elseif ( $this->type === self::T_DIMENSION ) {
424                    $unit = self::escapeIdent( $this->unit );
425                    if ( !str_contains( $number, 'e' ) && !str_contains( $number, 'E' ) &&
426                        preg_match( '/^[eE][+-]?\d/', $unit )
427                    ) {
428                        // Unit would look like exponential notation, so escape the leading "e"
429                        $unit = sprintf( '\\%x ', ord( $unit[0] ) ) . substr( $unit, 1 );
430                    }
431                } else {
432                    $unit = '';
433                }
434
435                return $number . $unit;
436
437            case self::T_WHITESPACE:
438                return ' ';
439
440            case self::T_CDO:
441                return '<!--';
442
443            case self::T_CDC:
444                return '-->';
445
446            case self::T_COLON:
447                return ':';
448
449            case self::T_SEMICOLON:
450                return ';';
451
452            case self::T_COMMA:
453                return ',';
454
455            case self::T_LEFT_BRACKET:
456            case self::T_RIGHT_BRACKET:
457            case self::T_LEFT_PAREN:
458            case self::T_RIGHT_PAREN:
459            case self::T_LEFT_BRACE:
460            case self::T_RIGHT_BRACE:
461                return $this->type;
462
463            case self::T_EOF:
464                return '';
465
466            default:
467                throw new UnexpectedValueException( "Unknown token type \"$this->type\"." );
468        }
469    }
470
471    /**
472     * Indicate whether the two tokens need to be separated
473     * @see https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/#serialization
474     * @param Token $firstToken
475     * @param Token $secondToken
476     * @return bool
477     */
478    public static function separate( Token $firstToken, Token $secondToken ) {
479        // Keys are the row headings, values are the columns that have an âœ—
480        static $sepTable = [
481            self::T_IDENT => [
482                self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER,
483                self::T_PERCENTAGE, self::T_DIMENSION, self::T_CDC, self::T_LEFT_PAREN,
484                // Internet Explorer is buggy in some contexts (T191134)
485                self::T_HASH,
486            ],
487            self::T_AT_KEYWORD => [
488                self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER,
489                self::T_PERCENTAGE, self::T_DIMENSION, self::T_CDC,
490            ],
491            self::T_HASH => [
492                self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER,
493                self::T_PERCENTAGE, self::T_DIMENSION, self::T_CDC,
494                // Internet Explorer is buggy in some contexts (T191134)
495                self::T_HASH,
496            ],
497            self::T_DIMENSION => [
498                self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER,
499                self::T_PERCENTAGE, self::T_DIMENSION, self::T_CDC,
500                // Internet Explorer is buggy in some contexts (T191134)
501                self::T_HASH,
502            ],
503            '#' => [
504                self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER,
505                self::T_PERCENTAGE, self::T_DIMENSION,
506            ],
507            '-' => [
508                self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER,
509                self::T_PERCENTAGE, self::T_DIMENSION,
510            ],
511            self::T_NUMBER => [
512                self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, self::T_NUMBER,
513                self::T_PERCENTAGE, self::T_DIMENSION, '%',
514                // Internet Explorer is buggy in some contexts
515                self::T_HASH,
516            ],
517            '@' => [
518                self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-',
519            ],
520            '.' => [ self::T_NUMBER, self::T_PERCENTAGE, self::T_DIMENSION ],
521            '+' => [ self::T_NUMBER, self::T_PERCENTAGE, self::T_DIMENSION ],
522            '/' => [ '*' ],
523            // Not required by spec, but help prevent XSS in foreign content (T381617)
524            '<' => [ self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '!', '/' ],
525        ];
526
527        $t1 = $firstToken->type === self::T_DELIM ? $firstToken->value : $firstToken->type;
528        $t2 = $secondToken->type === self::T_DELIM ? $secondToken->value : $secondToken->type;
529
530        return isset( $sepTable[$t1] ) && in_array( $t2, $sepTable[$t1], true );
531    }
532
533    /**
534     * Allow for marking the 'U' T_IDENT beginning a <urange>, to later avoid
535     * serializing it with extraneous comments.
536     * @internal
537     * @see \Wikimedia\CSS\Util::stringify()
538     * @see \Wikimedia\CSS\Grammar\UrangeMatcher
539     * @param int|null $hack Set the hack value
540     * @return int Current/old hack value
541     */
542    public function urangeHack( $hack = null ) {
543        $ret = $this->urangeHack;
544        if ( $hack !== null ) {
545            $this->urangeHack = max( (int)$this->urangeHack, $hack );
546        }
547        return $ret;
548    }
549
550}