Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
100.00% covered (success)
100.00%
188 / 188
100.00% covered (success)
100.00%
16 / 16
CRAP
100.00% covered (success)
100.00%
1 / 1
Token
100.00% covered (success)
100.00%
188 / 188
100.00% covered (success)
100.00%
16 / 16
117
100.00% covered (success)
100.00%
1 / 1
 __construct
100.00% covered (success)
100.00%
67 / 67
100.00% covered (success)
100.00%
1 / 1
55
 type
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 value
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 typeFlag
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 representation
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 unit
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 significant
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 copyWithSignificance
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
2
 toTokenArray
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 toComponentValueArray
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
6
 escapeIdent
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
1
 escapeString
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
1
 escapePregCallback
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
 __toString
100.00% covered (success)
100.00%
42 / 42
100.00% covered (success)
100.00%
1 / 1
37
 separate
100.00% covered (success)
100.00%
43 / 43
100.00% covered (success)
100.00%
1 / 1
4
 urangeHack
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
2
1<?php
2/**
3 * @file
4 * @license https://opensource.org/licenses/Apache-2.0 Apache-2.0
5 */
6
7namespace Wikimedia\CSS\Objects;
8
9use InvalidArgumentException;
10use UnexpectedValueException;
11
12/**
13 * Represent a CSS token
14 */
15class Token extends ComponentValue {
16    public const T_IDENT = "ident";
17    public const T_FUNCTION = "function";
18    public const T_AT_KEYWORD = "at-keyword";
19    public const T_HASH = "hash";
20    public const T_STRING = "string";
21    public const T_BAD_STRING = "bad-string";
22    public const T_URL = "url";
23    public const T_BAD_URL = "bad-url";
24    public const T_DELIM = "delim";
25    public const T_NUMBER = "number";
26    public const T_PERCENTAGE = "percentage";
27    public const T_DIMENSION = "dimension";
28    public const T_WHITESPACE = "whitespace";
29    public const T_CDO = "CDO";
30    public const T_CDC = "CDC";
31    public const T_COLON = "colon";
32    public const T_SEMICOLON = "semicolon";
33    public const T_COMMA = "comma";
34    public const T_LEFT_BRACKET = "[";
35    public const T_RIGHT_BRACKET = "]";
36    public const T_LEFT_PAREN = "(";
37    public const T_RIGHT_PAREN = ")";
38    public const T_LEFT_BRACE = "{";
39    public const T_RIGHT_BRACE = "}";
40    public const T_EOF = "EOF";
41
42    /** @var string One of the T_* constants */
43    protected $type;
44
45    /** @var string|int|float Value for various token types */
46    protected $value = '';
47
48    /** @var string Type flag for various token types */
49    protected $typeFlag = '';
50
51    /** @var string|null Representation for numeric tokens */
52    protected $representation = null;
53
54    /** @var string Unit for dimension tokens */
55    protected $unit = '';
56
57    /** @var bool Whether this token is considered "significant" */
58    protected $significant = true;
59
60    /** @var int See ::urangeHack() */
61    private $urangeHack = 0;
62
63    /**
64     * @param string $type One of the T_* constants
65     * @param string|array $value Value of the token, or an array with the
66     *  following keys. Depending on the type, some keys may be required and
67     *  some may be ignored.
68     *  - value: (string|int|float) Value of the token
69     *  - position: (array) Token position in the input stream. Same format as
70     *    returned by self::getPosition().
71     *  - typeFlag: (string) Flag for various token types. For T_HASH, 'id' or
72     *    'unrestricted'. For T_NUMBER, T_PERCENTAGE, and T_DIMENSION, 'integer'
73     *    or 'number'.
74     *  - representation: (string) String representation of the value for
75     *    T_NUMBER, T_PERCENTAGE, and T_DIMENSION.
76     *  - unit: (string) Unit for T_DIMENSION.
77     *  - significant: (bool) Whether the token is considered "significant"
78     */
79    public function __construct( $type, $value = [] ) {
80        if ( !is_array( $value ) ) {
81            $value = [ 'value' => $value ];
82        }
83
84        if ( isset( $value['position'] ) ) {
85            if ( !is_array( $value['position'] ) || count( $value['position'] ) !== 2 ) {
86                throw new InvalidArgumentException( 'Position must be an array of two integers' );
87            }
88            [ $this->line, $this->pos ] = $value['position'];
89            if ( !is_int( $this->line ) || !is_int( $this->pos ) ) {
90                throw new InvalidArgumentException( 'Position must be an array of two integers' );
91            }
92        }
93        if ( isset( $value['significant'] ) ) {
94            $this->significant = (bool)$value['significant'];
95        }
96
97        $this->type = $type;
98        switch ( $type ) {
99            case self::T_IDENT:
100            case self::T_FUNCTION:
101            case self::T_AT_KEYWORD:
102            case self::T_STRING:
103            case self::T_URL:
104                if ( !isset( $value['value'] ) ) {
105                    throw new InvalidArgumentException( "Token type $this->type requires a value" );
106                }
107                $this->value = (string)$value['value'];
108                break;
109
110            case self::T_HASH:
111                if ( !isset( $value['value'] ) ) {
112                    throw new InvalidArgumentException( "Token type $this->type requires a value" );
113                }
114                if ( !isset( $value['typeFlag'] ) ) {
115                    throw new InvalidArgumentException( "Token type $this->type requires a typeFlag" );
116                }
117                if ( !in_array( $value['typeFlag'], [ 'id', 'unrestricted' ], true ) ) {
118                    throw new InvalidArgumentException( "Invalid type flag for Token type $this->type" );
119                }
120                $this->value = (string)$value['value'];
121                $this->typeFlag = $value['typeFlag'];
122                break;
123
124            case self::T_DELIM:
125                if ( !isset( $value['value'] ) ) {
126                    throw new InvalidArgumentException( "Token type $this->type requires a value" );
127                }
128                $this->value = (string)$value['value'];
129                if ( mb_strlen( $this->value, 'UTF-8' ) !== 1 ) {
130                    throw new InvalidArgumentException(
131                        "Value for Token type $this->type must be a single character"
132                    );
133                }
134                break;
135
136            case self::T_NUMBER:
137            case self::T_PERCENTAGE:
138            case self::T_DIMENSION:
139                if ( !isset( $value['value'] ) ||
140                    !is_numeric( $value['value'] ) || !is_finite( $value['value'] )
141                ) {
142                    throw new InvalidArgumentException( "Token type $this->type requires a numeric value" );
143                }
144                if ( !isset( $value['typeFlag'] ) ) {
145                    throw new InvalidArgumentException( "Token type $this->type requires a typeFlag" );
146                }
147                $this->typeFlag = $value['typeFlag'];
148                if ( $this->typeFlag === 'integer' ) {
149                    $this->value = (int)$value['value'];
150                    if ( (float)$this->value !== (float)$value['value'] ) {
151                        throw new InvalidArgumentException(
152                            "typeFlag is 'integer', but value supplied is not an integer"
153                        );
154                    }
155                } elseif ( $this->typeFlag === 'number' ) {
156                    $this->value = (float)$value['value'];
157                } else {
158                    throw new InvalidArgumentException( "Invalid type flag for Token type $this->type" );
159                }
160
161                if ( isset( $value['representation'] ) ) {
162                    if ( !is_numeric( $value['representation'] ) ) {
163                        throw new InvalidArgumentException( 'Representation must be numeric' );
164                    }
165                    $this->representation = $value['representation'];
166                    if ( (float)$this->representation !== (float)$this->value ) {
167                        throw new InvalidArgumentException(
168                            "Representation \"$this->representation\" does not match value \"$this->value\""
169                        );
170                    }
171                }
172
173                if ( $type === self::T_DIMENSION ) {
174                    if ( !isset( $value['unit'] ) ) {
175                        throw new InvalidArgumentException( "Token type $this->type requires a unit" );
176                    }
177                    $this->unit = $value['unit'];
178                }
179                break;
180
181            case self::T_BAD_STRING:
182            case self::T_BAD_URL:
183            case self::T_WHITESPACE:
184            case self::T_CDO:
185            case self::T_CDC:
186            case self::T_COLON:
187            case self::T_SEMICOLON:
188            case self::T_COMMA:
189            case self::T_LEFT_BRACKET:
190            case self::T_RIGHT_BRACKET:
191            case self::T_LEFT_PAREN:
192            case self::T_RIGHT_PAREN:
193            case self::T_LEFT_BRACE:
194            case self::T_RIGHT_BRACE:
195                break;
196
197            case self::T_EOF:
198                // Let EOF have a typeFlag of 'recursion-depth-exceeded', used
199                // to avoid cascading errors when that occurs.
200                if ( isset( $value['typeFlag'] ) && $value['typeFlag'] !== '' ) {
201                    $this->typeFlag = $value['typeFlag'];
202                    if ( $this->typeFlag !== 'recursion-depth-exceeded' ) {
203                        throw new InvalidArgumentException( "Invalid type flag for Token type $this->type" );
204                    }
205                }
206                break;
207
208            default:
209                throw new InvalidArgumentException( "Unknown token type \"$this->type\"." );
210        }
211    }
212
213    /**
214     * Get the type of this token
215     * @return string One of the Token::T_* constants
216     */
217    public function type() {
218        return $this->type;
219    }
220
221    /**
222     * Get the value of this token
223     * @return string|int|float $value
224     */
225    public function value() {
226        return $this->value;
227    }
228
229    /**
230     * Get the type flag for this T_HASH or numeric token
231     * @return string
232     */
233    public function typeFlag() {
234        return $this->typeFlag;
235    }
236
237    /**
238     * Get the representation for this numeric token
239     * @return string|null
240     */
241    public function representation() {
242        return $this->representation;
243    }
244
245    /**
246     * Get the unit for this T_DIMENSION token
247     * @return string
248     */
249    public function unit() {
250        return $this->unit;
251    }
252
253    /**
254     * Whether this token is considered "significant"
255     *
256     * A token that isn't "significant" may be removed for minification of CSS.
257     * For example, most whitespace is entirely optional, as is the semicolon
258     * after the last declaration in a block.
259     *
260     * @return bool
261     */
262    public function significant() {
263        return $this->significant;
264    }
265
266    /**
267     * Make a copy of this token with altered "significant" flag
268     * @param bool $significant Whether the new token is considered "significant"
269     * @return Token May be the same as the current token
270     */
271    public function copyWithSignificance( $significant ) {
272        $significant = (bool)$significant;
273        if ( $significant === $this->significant ) {
274            return $this;
275        }
276        $ret = clone $this;
277        $ret->significant = $significant;
278        return $ret;
279    }
280
281    /** @inheritDoc */
282    public function toTokenArray() {
283        return [ $this ];
284    }
285
286    /** @inheritDoc */
287    public function toComponentValueArray() {
288        switch ( $this->type ) {
289            case self::T_FUNCTION:
290            case self::T_LEFT_BRACKET:
291            case self::T_LEFT_PAREN:
292            case self::T_LEFT_BRACE:
293                throw new UnexpectedValueException(
294                    "Token type \"$this->type\" is not valid in a ComponentValueList."
295                );
296
297            default:
298                return [ $this ];
299        }
300    }
301
302    /**
303     * Escape an ident-like string
304     * @param string $s
305     * @return string
306     */
307    private static function escapeIdent( $s ) {
308        return preg_replace_callback(
309            '/
310                [^a-zA-Z0-9_\-\x{80}-\x{10ffff}]   # Characters that are never allowed
311                | (?:^|(?<=^-))[0-9]               # Digits are not allowed at the start of an identifier
312                | [\p{Z}\p{Cc}\p{Cf}\p{Co}\p{Cs}]  # To be safe, control characters and whitespace
313            /ux',
314            [ __CLASS__, 'escapePregCallback' ],
315            $s
316        );
317    }
318
319    /**
320     * Escape characters in a string
321     *
322     * - Double quote needs escaping as the string delimiter.
323     * - Backslash needs escaping since it's the escape character.
324     * - Newline (\n) isn't valid in a string, and so needs escaping.
325     * - Carriage return (\r), form feed (\f), and U+0000 would be changed by
326     *   CSS's input conversion rules, and so need escaping.
327     * - Other non-space whitespace and controls don't need escaping, but it's
328     *   safer to do so.
329     * - Angle brackets are escaped numerically to make it safer to embed in HTML.
330     *
331     * @param string $s
332     * @return string
333     */
334    private static function escapeString( $s ) {
335        return preg_replace_callback(
336            '/[^ \P{Z}]|[\p{Cc}\p{Cf}\p{Co}\p{Cs}"\x5c<>]/u',
337            [ __CLASS__, 'escapePregCallback' ],
338            $s
339        );
340    }
341
342    /**
343     * Callback for escaping functions
344     * @param array $m Matches
345     * @return string
346     */
347    private static function escapePregCallback( $m ) {
348        // Newlines, carriage returns, form feeds, and hex digits have to be
349        // escaped numerically. Other non-space whitespace and controls don't
350        // have to be, but it's saner to do so. Angle brackets are escaped
351        // numerically too to make it safer to embed in HTML.
352        if ( preg_match( '/[^ \P{Z}]|[\p{Cc}\p{Cf}\p{Co}\p{Cs}0-9a-fA-F<>]/u', $m[0] ) ) {
353            return sprintf( '\\%x ', mb_ord( $m[0] ) );
354        }
355        return '\\' . $m[0];
356    }
357
358    public function __toString() {
359        switch ( $this->type ) {
360            case self::T_IDENT:
361                return self::escapeIdent( $this->value );
362
363            case self::T_FUNCTION:
364                return self::escapeIdent( $this->value ) . '(';
365
366            case self::T_AT_KEYWORD:
367                return '@' . self::escapeIdent( $this->value );
368
369            case self::T_HASH:
370                if ( $this->typeFlag === 'id' ) {
371                    return '#' . self::escapeIdent( $this->value );
372                }
373
374                return '#' . preg_replace_callback(
375                    '/
376                        [^a-zA-Z0-9_\-\x{80}-\x{10ffff}]   # Characters that are never allowed
377                        | [\p{Z}\p{Cc}\p{Cf}\p{Co}\p{Cs}]  # To be safe, control characters and whitespace
378                    /ux',
379                    [ __CLASS__, 'escapePregCallback' ],
380                    $this->value
381                );
382
383            case self::T_STRING:
384                // We could try to decide whether single or double quote is
385                // better, but it doesn't seem worth the effort.
386                return '"' . self::escapeString( $this->value ) . '"';
387
388            case self::T_URL:
389                // We could try to decide whether single or double quote is
390                // better, but it doesn't seem worth the effort.
391                return 'url("' . self::escapeString( $this->value ) . '")';
392
393            case self::T_BAD_STRING:
394                // It's supposed to round trip, so...
395                // (this is really awful because we can't close it)
396                return "'badstring\n";
397
398            case self::T_BAD_URL:
399                // It's supposed to round trip, so...
400                return "url(badurl'')";
401
402            case self::T_DELIM:
403                if ( $this->value === '\\' ) {
404                    return "\\\n";
405                }
406                return $this->value;
407
408            case self::T_NUMBER:
409            case self::T_PERCENTAGE:
410            case self::T_DIMENSION:
411                if ( $this->representation !== null && (float)$this->representation === (float)$this->value ) {
412                    $number = $this->representation;
413                } elseif ( $this->typeFlag === 'integer' ) {
414                    $number = sprintf( '%d', $this->value );
415                } else {
416                    $number = sprintf( '%.15g', $this->value );
417                }
418
419                if ( $this->type === self::T_PERCENTAGE ) {
420                    $unit = '%';
421                } elseif ( $this->type === self::T_DIMENSION ) {
422                    $unit = self::escapeIdent( $this->unit );
423                    if ( strpos( $number, 'e' ) === false && strpos( $number, 'E' ) === false &&
424                        preg_match( '/^[eE][+-]?\d/', $unit )
425                    ) {
426                        // Unit would look like exponential notation, so escape the leading "e"
427                        $unit = sprintf( '\\%x ', ord( $unit[0] ) ) . substr( $unit, 1 );
428                    }
429                } else {
430                    $unit = '';
431                }
432
433                return $number . $unit;
434
435            case self::T_WHITESPACE:
436                return ' ';
437
438            case self::T_CDO:
439                return '<!--';
440
441            case self::T_CDC:
442                return '-->';
443
444            case self::T_COLON:
445                return ':';
446
447            case self::T_SEMICOLON:
448                return ';';
449
450            case self::T_COMMA:
451                return ',';
452
453            case self::T_LEFT_BRACKET:
454            case self::T_RIGHT_BRACKET:
455            case self::T_LEFT_PAREN:
456            case self::T_RIGHT_PAREN:
457            case self::T_LEFT_BRACE:
458            case self::T_RIGHT_BRACE:
459                return $this->type;
460
461            case self::T_EOF:
462                return '';
463
464            default:
465                throw new UnexpectedValueException( "Unknown token type \"$this->type\"." );
466        }
467    }
468
469    /**
470     * Indicate whether the two tokens need to be separated
471     * @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#serialization
472     * @param Token $firstToken
473     * @param Token $secondToken
474     * @return bool
475     */
476    public static function separate( Token $firstToken, Token $secondToken ) {
477        // Keys are the row headings, values are the columns that have an âœ—
478        static $sepTable = [
479            self::T_IDENT => [
480                self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER,
481                self::T_PERCENTAGE, self::T_DIMENSION, self::T_CDC, self::T_LEFT_PAREN,
482                // Internet Explorer is buggy in some contexts (T191134)
483                self::T_HASH,
484            ],
485            self::T_AT_KEYWORD => [
486                self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER,
487                self::T_PERCENTAGE, self::T_DIMENSION, self::T_CDC,
488            ],
489            self::T_HASH => [
490                self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER,
491                self::T_PERCENTAGE, self::T_DIMENSION, self::T_CDC,
492                // Internet Explorer is buggy in some contexts (T191134)
493                self::T_HASH,
494            ],
495            self::T_DIMENSION => [
496                self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER,
497                self::T_PERCENTAGE, self::T_DIMENSION, self::T_CDC,
498                // Internet Explorer is buggy in some contexts (T191134)
499                self::T_HASH,
500            ],
501            '#' => [
502                self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER,
503                self::T_PERCENTAGE, self::T_DIMENSION,
504            ],
505            '-' => [
506                self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER,
507                self::T_PERCENTAGE, self::T_DIMENSION,
508            ],
509            self::T_NUMBER => [
510                self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, self::T_NUMBER,
511                self::T_PERCENTAGE, self::T_DIMENSION, '%',
512                // Internet Explorer is buggy in some contexts
513                self::T_HASH,
514            ],
515            '@' => [
516                self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-',
517            ],
518            '.' => [ self::T_NUMBER, self::T_PERCENTAGE, self::T_DIMENSION ],
519            '+' => [ self::T_NUMBER, self::T_PERCENTAGE, self::T_DIMENSION ],
520            '/' => [ '*' ],
521        ];
522
523        $t1 = $firstToken->type === self::T_DELIM ? $firstToken->value : $firstToken->type;
524        $t2 = $secondToken->type === self::T_DELIM ? $secondToken->value : $secondToken->type;
525
526        return isset( $sepTable[$t1] ) && in_array( $t2, $sepTable[$t1], true );
527    }
528
529    /**
530     * Allow for marking the 'U' T_IDENT beginning a <urange>, to later avoid
531     * serializing it with extraneous comments.
532     * @internal
533     * @see \Wikimedia\CSS\Util::stringify()
534     * @see \Wikimedia\CSS\Grammar\UrangeMatcher
535     * @param int|null $hack Set the hack value
536     * @return int Current/old hack value
537     */
538    public function urangeHack( $hack = null ) {
539        $ret = $this->urangeHack;
540        if ( $hack !== null ) {
541            $this->urangeHack = max( (int)$this->urangeHack, $hack );
542        }
543        return $ret;
544    }
545
546}