Code Coverage for /src/src/Parser/DataSourceTokenizer.php

	Code Coverage
	Lines			Functions and Methods				Classes and Traits
Total	100.00% covered (success)	100.00%	339 / 339	100.00% covered (success)	100.00%	26 / 26	CRAP	100.00% covered (success)	100.00%	1 / 1
DataSourceTokenizer	100.00% covered (success)	100.00%	339 / 339	100.00% covered (success)	100.00%	26 / 26	201	100.00% covered (success)	100.00%	1 / 1
__construct	100.00% covered (success)	100.00%	1 / 1	100.00% covered (success)	100.00%	1 / 1	1
nextChar	100.00% covered (success)	100.00%	11 / 11	100.00% covered (success)	100.00%	1 / 1	7
consumeCharacter	100.00% covered (success)	100.00%	8 / 8	100.00% covered (success)	100.00%	1 / 1	3
reconsumeCharacter	100.00% covered (success)	100.00%	6 / 6	100.00% covered (success)	100.00%	1 / 1	3
lookAhead	100.00% covered (success)	100.00%	9 / 9	100.00% covered (success)	100.00%	1 / 1	1
getParseErrors	100.00% covered (success)	100.00%	1 / 1	100.00% covered (success)	100.00%	1 / 1	1
clearParseErrors	100.00% covered (success)	100.00%	1 / 1	100.00% covered (success)	100.00%	1 / 1	1
parseError	100.00% covered (success)	100.00%	7 / 7	100.00% covered (success)	100.00%	1 / 1	6
consumeToken	100.00% covered (success)	100.00%	108 / 108	100.00% covered (success)	100.00%	1 / 1	55
consumeNumericToken	100.00% covered (success)	100.00%	8 / 8	100.00% covered (success)	100.00%	1 / 1	3
consumeIdentLikeToken	100.00% covered (success)	100.00%	14 / 14	100.00% covered (success)	100.00%	1 / 1	11
consumeStringToken	100.00% covered (success)	100.00%	22 / 22	100.00% covered (success)	100.00%	1 / 1	10
consumeUrlToken	100.00% covered (success)	100.00%	36 / 36	100.00% covered (success)	100.00%	1 / 1	18
consumeBadUrlRemnants	100.00% covered (success)	100.00%	6 / 6	100.00% covered (success)	100.00%	1 / 1	5
isWhitespace	100.00% covered (success)	100.00%	1 / 1	100.00% covered (success)	100.00%	1 / 1	3
isNameStartCharacter	100.00% covered (success)	100.00%	4 / 4	100.00% covered (success)	100.00%	1 / 1	6
isNameCharacter	100.00% covered (success)	100.00%	5 / 5	100.00% covered (success)	100.00%	1 / 1	9
isNonPrintable	100.00% covered (success)	100.00%	5 / 5	100.00% covered (success)	100.00%	1 / 1	6
isDigit	100.00% covered (success)	100.00%	2 / 2	100.00% covered (success)	100.00%	1 / 1	2
isHexDigit	100.00% covered (success)	100.00%	4 / 4	100.00% covered (success)	100.00%	1 / 1	6
isValidEscape	100.00% covered (success)	100.00%	1 / 1	100.00% covered (success)	100.00%	1 / 1	2
wouldStartIdentifier	100.00% covered (success)	100.00%	8 / 8	100.00% covered (success)	100.00%	1 / 1	6
wouldStartNumber	100.00% covered (success)	100.00%	5 / 5	100.00% covered (success)	100.00%	1 / 1	6
consumeEscape	100.00% covered (success)	100.00%	17 / 17	100.00% covered (success)	100.00%	1 / 1	10
consumeName	100.00% covered (success)	100.00%	10 / 10	100.00% covered (success)	100.00%	1 / 1	4
consumeNumber	100.00% covered (success)	100.00%	39 / 39	100.00% covered (success)	100.00%	1 / 1	16

1	<?php
2	/**
3	* @file
4	* @license https://opensource.org/licenses/Apache-2.0 Apache-2.0
5	*/
6
7	namespace Wikimedia\CSS\Parser;
8
9	use InvalidArgumentException;
10	use UnexpectedValueException;
11	use UtfNormal\Constants;
12	use UtfNormal\Utils;
13	use Wikimedia\CSS\Objects\Token;
14
15	/**
16	* Parse CSS into tokens
17	*
18	* This implements the tokenizer from the CSS Syntax Module Level 3 candidate recommendation.
19	* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/
20	*/
21	class DataSourceTokenizer implements Tokenizer {
22
23	/** @var DataSource */
24	protected $source;
25
26	/** @var int line in the input */
27	protected $line = 1;
28
29	/** @var int position in the line in the input */
30	protected $pos = 0;
31
32	/** @var string\|null\|object The most recently consumed character */
33	protected $currentCharacter = null;
34
35	/** @var string\|null The next character to be consumed */
36	protected $nextCharacter = null;
37
38	/** @var array Parse errors. Each error is [ string $tag, int $line, int $pos ] */
39	protected $parseErrors = [];
40
41	/**
42	* @param DataSource $source
43	* @param array $options Configuration options.
44	* (none currently defined)
45	*/
46	public function __construct( DataSource $source, array $options = [] ) {
47	$this->source = $source;
48	}
49
50	/**
51	* Read a character from the data source
52	* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#input-preprocessing
53	* @return string One UTF-8 character, or empty string on EOF
54	*/
55	protected function nextChar() {
56	$char = $this->source->readCharacter();
57
58	// Perform transformations per the spec
59
60	// Any U+0000 or surrogate code point becomes U+FFFD
61	if ( $char === "\0" \|\| ( $char >= "\u{D800}" && $char <= "\u{DFFF}" ) ) {
62	return Constants::UTF8_REPLACEMENT;
63	}
64
65	// Any U+000D, U+000C, or pair of U+000D + U+000A becomes U+000A
66	if ( $char === "\f" ) {
67	// U+000C
68	return "\n";
69	}
70
71	if ( $char === "\r" ) {
72	// Either U+000D + U+000A or a lone U+000D
73	$char2 = $this->source->readCharacter();
74	if ( $char2 !== "\n" ) {
75	$this->source->putBackCharacter( $char2 );
76	}
77	return "\n";
78	}
79
80	return $char;
81	}
82
83	/**
84	* Update the current and next character fields
85	*/
86	protected function consumeCharacter() {
87	if ( $this->currentCharacter === "\n" ) {
88	$this->line++;
89	$this->pos = 1;
90	} elseif ( $this->currentCharacter !== DataSource::EOF ) {
91	$this->pos++;
92	}
93
94	$this->currentCharacter = $this->nextChar();
95	$this->nextCharacter = $this->nextChar();
96	$this->source->putBackCharacter( $this->nextCharacter );
97	}
98
99	/**
100	* Reconsume the next character
101	*
102	* In more normal terms, this pushes a character back onto the data source,
103	* so it will be read again for the next call to self::consumeCharacter().
104	*/
105	protected function reconsumeCharacter() {
106	// @codeCoverageIgnoreStart
107	if ( !is_string( $this->currentCharacter ) ) {
108	throw new UnexpectedValueException( "[$this->line:$this->pos] Can't reconsume" );
109	}
110	// @codeCoverageIgnoreEnd
111
112	if ( $this->currentCharacter === DataSource::EOF ) {
113	// Huh?
114	return;
115	}
116
117	$this->source->putBackCharacter( $this->currentCharacter );
118	$this->nextCharacter = $this->currentCharacter;
119	$this->currentCharacter = (object)[];
120	$this->pos--;
121	}
122
123	/**
124	* Look ahead at the next three characters
125	* @return string[] Three characters
126	*/
127	protected function lookAhead() {
128	$ret = [
129	$this->nextChar(),
130	$this->nextChar(),
131	$this->nextChar(),
132	];
133	$this->source->putBackCharacter( $ret[2] );
134	$this->source->putBackCharacter( $ret[1] );
135	$this->source->putBackCharacter( $ret[0] );
136
137	return $ret;
138	}
139
140	/** @inheritDoc */
141	public function getParseErrors() {
142	return $this->parseErrors;
143	}
144
145	/** @inheritDoc */
146	public function clearParseErrors() {
147	$this->parseErrors = [];
148	}
149
150	/**
151	* Record a parse error
152	* @param string $tag Error tag
153	* @param array\|null $position Report the error as starting at this
154	* position instead of at the current position.
155	* @param array $data Extra data about the error.
156	*/
157	protected function parseError( $tag, array $position = null, array $data = [] ) {
158	if ( $position ) {
159	if ( isset( $position['position'] ) ) {
160	$position = $position['position'];
161	}
162	if ( count( $position ) !== 2 \|\| !is_int( $position[0] ) \|\| !is_int( $position[1] ) ) {
163	// @codeCoverageIgnoreStart
164	throw new InvalidArgumentException( 'Invalid position' );
165	// @codeCoverageIgnoreEnd
166	}
167	$err = [ $tag, $position[0], $position[1] ];
168	} else {
169	$err = [ $tag, $this->line, $this->pos ];
170	}
171	$this->parseErrors[] = array_merge( $err, $data );
172	}
173
174	/**
175	* Read a token from the data source
176	* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-token
177	* @return Token
178	* @suppress PhanPluginDuplicateAdjacentStatement,PhanPluginDuplicateSwitchCaseLooseEquality
179	*/
180	public function consumeToken() {
181	// We "consume comments" inline below, see `case '/'`.
182
183	$this->consumeCharacter();
184	$pos = [ 'position' => [ $this->line, $this->pos ] ];
185
186	switch ( (string)$this->currentCharacter ) {
187	case "\n":
188	case "\t":
189	case ' ':
190	// Whitespace token
191	while ( self::isWhitespace( $this->nextCharacter ) ) {
192	$this->consumeCharacter();
193	}
194	return new Token( Token::T_WHITESPACE, $pos );
195
196	case '"':
197	case '\'':
198	// String token
199	return $this->consumeStringToken( $this->currentCharacter, $pos );
200
201	case '#':
202	[ $next, $next2, $next3 ] = $this->lookAhead();
203	if ( self::isNameCharacter( $this->nextCharacter ) \|\|
204	self::isValidEscape( $next, $next2 )
205	) {
206	return new Token( Token::T_HASH, $pos + [
207	'typeFlag' => self::wouldStartIdentifier( $next, $next2, $next3 ) ? 'id' : 'unrestricted',
208	'value' => $this->consumeName(),
209	] );
210	}
211
212	return new Token( Token::T_DELIM, $pos + [ 'value' => $this->currentCharacter ] );
213
214	case '(':
215	return new Token( Token::T_LEFT_PAREN, $pos );
216
217	case ')':
218	return new Token( Token::T_RIGHT_PAREN, $pos );
219
220	case '+':
221	case '.':
222	[ $next, $next2, ] = $this->lookAhead();
223	if ( self::wouldStartNumber( $this->currentCharacter, $next, $next2 ) ) {
224	$this->reconsumeCharacter();
225	return $this->consumeNumericToken( $pos );
226	}
227
228	return new Token( Token::T_DELIM, $pos + [ 'value' => $this->currentCharacter ] );
229
230	case ',':
231	return new Token( Token::T_COMMA, $pos );
232
233	case '-':
234	[ $next, $next2, ] = $this->lookAhead();
235	if ( self::wouldStartNumber( $this->currentCharacter, $next, $next2 ) ) {
236	$this->reconsumeCharacter();
237	return $this->consumeNumericToken( $pos );
238	}
239
240	if ( $next === '-' && $next2 === '>' ) {
241	$this->consumeCharacter();
242	$this->consumeCharacter();
243	return new Token( Token::T_CDC, $pos );
244	}
245
246	if ( self::wouldStartIdentifier( $this->currentCharacter, $next, $next2 ) ) {
247	$this->reconsumeCharacter();
248	return $this->consumeIdentLikeToken( $pos );
249	}
250
251	return new Token( Token::T_DELIM, $pos + [ 'value' => $this->currentCharacter ] );
252
253	case '/':
254	if ( $this->nextCharacter === '*' ) {
255	$this->consumeCharacter();
256	$this->consumeCharacter();
257	while ( $this->currentCharacter !== DataSource::EOF &&
258	// @phan-suppress-next-line PhanSuspiciousValueComparisonInLoop
259	!( $this->currentCharacter === '*' && $this->nextCharacter === '/' )
260	) {
261	$this->consumeCharacter();
262	}
263	if ( $this->currentCharacter === DataSource::EOF ) {
264	$this->parseError( 'unclosed-comment', $pos );
265	}
266	$this->consumeCharacter();
267	// @phan-suppress-next-line PhanPossiblyInfiniteRecursionSameParams
268	return $this->consumeToken();
269	}
270
271	return new Token( Token::T_DELIM, $pos + [ 'value' => $this->currentCharacter ] );
272
273	case ':':
274	return new Token( Token::T_COLON, $pos );
275
276	case ';':
277	return new Token( Token::T_SEMICOLON, $pos );
278
279	case '<':
280	[ $next, $next2, $next3 ] = $this->lookAhead();
281	if ( $next === '!' && $next2 === '-' && $next3 === '-' ) {
282	$this->consumeCharacter();
283	$this->consumeCharacter();
284	$this->consumeCharacter();
285	return new Token( Token::T_CDO, $pos );
286	}
287
288	return new Token( Token::T_DELIM, $pos + [ 'value' => $this->currentCharacter ] );
289
290	case '@':
291	[ $next, $next2, $next3 ] = $this->lookAhead();
292	if ( self::wouldStartIdentifier( $next, $next2, $next3 ) ) {
293	return new Token( Token::T_AT_KEYWORD, $pos + [ 'value' => $this->consumeName() ] );
294	}
295
296	return new Token( Token::T_DELIM, $pos + [ 'value' => $this->currentCharacter ] );
297
298	case '[':
299	return new Token( Token::T_LEFT_BRACKET, $pos );
300
301	case '\\':
302	if ( self::isValidEscape( $this->currentCharacter, $this->nextCharacter ) ) {
303	$this->reconsumeCharacter();
304	return $this->consumeIdentLikeToken( $pos );
305	}
306
307	$this->parseError( 'bad-escape' );
308	return new Token( Token::T_DELIM, $pos + [ 'value' => $this->currentCharacter ] );
309
310	case ']':
311	return new Token( Token::T_RIGHT_BRACKET, $pos );
312
313	case '{':
314	return new Token( Token::T_LEFT_BRACE, $pos );
315
316	case '}':
317	return new Token( Token::T_RIGHT_BRACE, $pos );
318
319	case '0':
320	case '1':
321	case '2':
322	case '3':
323	case '4':
324	case '5':
325	case '6':
326	case '7':
327	case '8':
328	case '9':
329	$this->reconsumeCharacter();
330	return $this->consumeNumericToken( $pos );
331
332	case DataSource::EOF:
333	return new Token( Token::T_EOF, $pos );
334
335	default:
336	if ( self::isNameStartCharacter( $this->currentCharacter ) ) {
337	$this->reconsumeCharacter();
338	return $this->consumeIdentLikeToken( $pos );
339	}
340
341	return new Token( Token::T_DELIM, $pos + [ 'value' => $this->currentCharacter ] );
342	}
343	}
344
345	/**
346	* Consume a numeric token
347	* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-numeric-token
348	* @param array $data Data for the new token (typically contains just 'position')
349	* @return Token
350	*/
351	protected function consumeNumericToken( array $data ) {
352	[ $data['representation'], $data['value'], $data['typeFlag'] ] = $this->consumeNumber();
353
354	[ $next, $next2, $next3 ] = $this->lookAhead();
355	if ( self::wouldStartIdentifier( $next, $next2, $next3 ) ) {
356	return new Token( Token::T_DIMENSION, $data + [ 'unit' => $this->consumeName() ] );
357	} elseif ( $this->nextCharacter === '%' ) {
358	$this->consumeCharacter();
359	return new Token( Token::T_PERCENTAGE, $data );
360	} else {
361	return new Token( Token::T_NUMBER, $data );
362	}
363	}
364
365	/**
366	* Consume an ident-like token
367	* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-ident-like-token
368	* @param array $data Data for the new token (typically contains just 'position')
369	* @return Token
370	*/
371	protected function consumeIdentLikeToken( array $data ) {
372	$name = $this->consumeName();
373
374	if ( $this->nextCharacter === '(' ) {
375	$this->consumeCharacter();
376
377	if ( !strcasecmp( $name, 'url' ) ) {
378	while ( true ) {
379	[ $next, $next2 ] = $this->lookAhead();
380	if ( !self::isWhitespace( $next ) \|\| !self::isWhitespace( $next2 ) ) {
381	break;
382	}
383	$this->consumeCharacter();
384	}
385	if ( $next !== '"' && $next !== '\'' &&
386	!( self::isWhitespace( $next ) && ( $next2 === '"' \|\| $next2 === '\'' ) )
387	) {
388	return $this->consumeUrlToken( $data );
389	}
390	}
391
392	return new Token( Token::T_FUNCTION, $data + [ 'value' => $name ] );
393	}
394
395	return new Token( Token::T_IDENT, $data + [ 'value' => $name ] );
396	}
397
398	/**
399	* Consume a string token
400	*
401	* This assumes the leading quote or apostrophe has already been consumed.
402	*
403	* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-string-token
404	* @param string $endChar Ending character of the string
405	* @param array $data Data for the new token (typically contains just 'position')
406	* @return Token
407	*/
408	protected function consumeStringToken( $endChar, array $data ) {
409	$data['value'] = '';
410
411	while ( true ) {
412	$this->consumeCharacter();
413	switch ( $this->currentCharacter ) {
414	case DataSource::EOF:
415	$this->parseError( 'unclosed-string', $data );
416	break 2;
417
418	case $endChar:
419	break 2;
420
421	case "\n":
422	$this->parseError( 'newline-in-string' );
423	$this->reconsumeCharacter();
424	return new Token( Token::T_BAD_STRING, [ 'value' => '' ] + $data );
425
426	case '\\':
427	if ( $this->nextCharacter === DataSource::EOF ) {
428	// Do nothing
429	} elseif ( $this->nextCharacter === "\n" ) {
430	// Consume it
431	$this->consumeCharacter();
432	} elseif ( self::isValidEscape( $this->currentCharacter, $this->nextCharacter ) ) {
433	$data['value'] .= $this->consumeEscape();
434	} else {
435	// @codeCoverageIgnoreStart
436	throw new UnexpectedValueException( "[$this->line:$this->pos] Unexpected state" );
437	// @codeCoverageIgnoreEnd
438	}
439	break;
440
441	default:
442	$data['value'] .= $this->currentCharacter;
443	break;
444	}
445	}
446
447	// @phan-suppress-next-line PhanPluginUnreachableCode Reached by break 2
448	return new Token( Token::T_STRING, $data );
449	}
450
451	/**
452	* Consume a URL token
453	*
454	* This assumes the leading "url(" has already been consumed.
455	*
456	* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-url-token
457	* @param array $data Data for the new token (typically contains just 'position')
458	* @return Token
459	*/
460	protected function consumeUrlToken( array $data ) {
461	// 1.
462	$data['value'] = '';
463
464	// 2.
465	while ( self::isWhitespace( $this->nextCharacter ) ) {
466	$this->consumeCharacter();
467	}
468
469	// 3.
470	while ( true ) {
471	$this->consumeCharacter();
472	switch ( $this->currentCharacter ) {
473	case DataSource::EOF:
474	$this->parseError( 'unclosed-url', $data );
475	break 2;
476
477	// @codeCoverageIgnoreStart
478	case ')':
479	// @codeCoverageIgnoreEnd
480	break 2;
481
482	// @codeCoverageIgnoreStart
483	case "\n":
484	case "\t":
485	case ' ':
486	// @codeCoverageIgnoreEnd
487	while ( self::isWhitespace( $this->nextCharacter ) ) {
488	$this->consumeCharacter();
489	}
490	if ( $this->nextCharacter === ')' ) {
491	$this->consumeCharacter();
492	break 2;
493	} elseif ( $this->nextCharacter === DataSource::EOF ) {
494	$this->consumeCharacter();
495	$this->parseError( 'unclosed-url', $data );
496	break 2;
497	} else {
498	$this->consumeBadUrlRemnants();
499	return new Token( Token::T_BAD_URL, [ 'value' => '' ] + $data );
500	}
501
502	// @codeCoverageIgnoreStart
503	case '"':
504	case '\'':
505	case '(':
506	// @codeCoverageIgnoreEnd
507	$this->parseError( 'bad-character-in-url' );
508	$this->consumeBadUrlRemnants();
509	return new Token( Token::T_BAD_URL, [ 'value' => '' ] + $data );
510
511	// @codeCoverageIgnoreStart
512	case '\\':
513	// @codeCoverageIgnoreEnd
514	if ( self::isValidEscape( $this->currentCharacter, $this->nextCharacter ) ) {
515	$data['value'] .= $this->consumeEscape();
516	} else {
517	$this->parseError( 'bad-escape' );
518	$this->consumeBadUrlRemnants();
519	return new Token( Token::T_BAD_URL, [ 'value' => '' ] + $data );
520	}
521	break;
522
523	default:
524	if ( self::isNonPrintable( $this->currentCharacter ) ) {
525	$this->parseError( 'bad-character-in-url' );
526	$this->consumeBadUrlRemnants();
527	return new Token( Token::T_BAD_URL, [ 'value' => '' ] + $data );
528	}
529
530	$data['value'] .= $this->currentCharacter;
531	break;
532	}
533	}
534
535	// @phan-suppress-next-line PhanPluginUnreachableCode Reached by break 2
536	return new Token( Token::T_URL, $data );
537	}
538
539	/**
540	* Clean up after finding an error in a URL
541	* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-remnants-of-bad-url
542	*/
543	protected function consumeBadUrlRemnants() {
544	while ( true ) {
545	$this->consumeCharacter();
546	if ( $this->currentCharacter === ')' \|\| $this->currentCharacter === DataSource::EOF ) {
547	break;
548	}
549	if ( self::isValidEscape( $this->currentCharacter, $this->nextCharacter ) ) {
550	$this->consumeEscape();
551	}
552	}
553	}
554
555	/**
556	* Indicate if a character is whitespace
557	* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#whitespace
558	* @param string $char A single UTF-8 character
559	* @return bool
560	*/
561	protected static function isWhitespace( $char ) {
562	return $char === "\n" \|\| $char === "\t" \|\| $char === " ";
563	}
564
565	/**
566	* Indicate if a character is a name-start code point
567	* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#name-start-code-point
568	* @param string $char A single UTF-8 character
569	* @return bool
570	*/
571	protected static function isNameStartCharacter( $char ) {
572	// Every non-ASCII character is a name start character, so we can just
573	// check the first byte.
574	$char = ord( $char );
575	return ( $char >= 0x41 && $char <= 0x5a ) \|\|
576	( $char >= 0x61 && $char <= 0x7a ) \|\|
577	$char >= 0x80 \|\| $char === 0x5f;
578	}
579
580	/**
581	* Indicate if a character is a name code point
582	* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#name-code-point
583	* @param string $char A single UTF-8 character
584	* @return bool
585	*/
586	protected static function isNameCharacter( $char ) {
587	// Every non-ASCII character is a name character, so we can just check
588	// the first byte.
589	$char = ord( $char );
590	return ( $char >= 0x41 && $char <= 0x5a ) \|\|
591	( $char >= 0x61 && $char <= 0x7a ) \|\|
592	( $char >= 0x30 && $char <= 0x39 ) \|\|
593	$char >= 0x80 \|\| $char === 0x5f \|\| $char === 0x2d;
594	}
595
596	/**
597	* Indicate if a character is non-printable
598	* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#non-printable-code-point
599	* @param string $char A single UTF-8 character
600	* @return bool
601	*/
602	protected static function isNonPrintable( $char ) {
603	// No non-ASCII character is non-printable, so we can just check the
604	// first byte.
605	$char = ord( $char );
606	return ( $char >= 0x00 && $char <= 0x08 ) \|\|
607	$char === 0x0b \|\|
608	( $char >= 0x0e && $char <= 0x1f ) \|\|
609	$char === 0x7f;
610	}
611
612	/**
613	* Indicate if a character is a digit
614	* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#digit
615	* @param string $char A single UTF-8 character
616	* @return bool
617	*/
618	protected static function isDigit( $char ) {
619	// No non-ASCII character is a digit, so we can just check the first
620	// byte.
621	$char = ord( $char );
622	return $char >= 0x30 && $char <= 0x39;
623	}
624
625	/**
626	* Indicate if a character is a hex digit
627	* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#hex-digit
628	* @param string $char A single UTF-8 character
629	* @return bool
630	*/
631	protected static function isHexDigit( $char ) {
632	// No non-ASCII character is a hex digit, so we can just check the
633	// first byte.
634	$char = ord( $char );
635	return ( $char >= 0x30 && $char <= 0x39 ) \|\|
636	( $char >= 0x41 && $char <= 0x46 ) \|\|
637	( $char >= 0x61 && $char <= 0x66 );
638	}
639
640	/**
641	* Determine if two characters constitute a valid escape
642	* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#starts-with-a-valid-escape
643	* @param string $char1
644	* @param string $char2
645	* @return bool
646	*/
647	protected static function isValidEscape( $char1, $char2 ) {
648	return $char1 === '\\' && $char2 !== "\n";
649	}
650
651	/**
652	* Determine if three characters would start an identifier
653	* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#would-start-an-identifier
654	* @param string $char1
655	* @param string $char2
656	* @param string $char3
657	* @return bool
658	*/
659	protected static function wouldStartIdentifier( $char1, $char2, $char3 ) {
660	if ( $char1 === '-' ) {
661	return self::isNameStartCharacter( $char2 ) \|\| $char2 === '-' \|\|
662	self::isValidEscape( $char2, $char3 );
663	} elseif ( self::isNameStartCharacter( $char1 ) ) {
664	return true;
665	} elseif ( $char1 === '\\' ) {
666	return self::isValidEscape( $char1, $char2 );
667	} else {
668	return false;
669	}
670	}
671
672	/**
673	* Determine if three characters would start a number
674	* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#starts-with-a-number
675	* @param string $char1
676	* @param string $char2
677	* @param string $char3
678	* @return bool
679	*/
680	protected static function wouldStartNumber( $char1, $char2, $char3 ) {
681	if ( $char1 === '+' \|\| $char1 === '-' ) {
682	return self::isDigit( $char2 ) \|\|
683	( $char2 === '.' && self::isDigit( $char3 ) );
684	} elseif ( $char1 === '.' ) {
685	return self::isDigit( $char2 );
686	// @codeCoverageIgnoreStart
687	// Nothing reaches this code
688	} else {
689	return self::isDigit( $char1 );
690	}
691	// @codeCoverageIgnoreEnd
692	}
693
694	/**
695	* Consume a valid escape
696	*
697	* This assumes the leading backslash is consumed.
698	*
699	* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-escaped-code-point
700	* @return string Escaped character
701	*/
702	protected function consumeEscape() {
703	$position = [ 'position' => [ $this->line, $this->pos ] ];
704
705	$this->consumeCharacter();
706
707	// 1-6 hexits, plus one optional whitespace character
708	if ( self::isHexDigit( $this->currentCharacter ) ) {
709	$num = $this->currentCharacter;
710	while ( strlen( $num ) < 6 && self::isHexDigit( $this->nextCharacter ) ) {
711	$this->consumeCharacter();
712	$num .= $this->currentCharacter;
713	}
714	if ( self::isWhitespace( $this->nextCharacter ) ) {
715	$this->consumeCharacter();
716	}
717
718	$num = intval( $num, 16 );
719	if ( $num === 0 \|\| ( $num >= 0xd800 && $num <= 0xdfff ) \|\| $num > 0x10ffff ) {
720	return Constants::UTF8_REPLACEMENT;
721	}
722	return Utils::codepointToUtf8( $num );
723	}
724
725	if ( $this->currentCharacter === DataSource::EOF ) {
726	$this->parseError( 'bad-escape', $position );
727	return Constants::UTF8_REPLACEMENT;
728	}
729
730	return $this->currentCharacter;
731	}
732
733	/**
734	* Consume a name
735	*
736	* Note this does not do validation on the input stream. Call
737	* self::wouldStartIdentifier() or the like before calling the method if
738	* necessary.
739	*
740	* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-name
741	* @return string Name
742	*/
743	protected function consumeName() {
744	$name = '';
745
746	while ( true ) {
747	$this->consumeCharacter();
748
749	if ( self::isNameCharacter( $this->currentCharacter ) ) {
750	$name .= $this->currentCharacter;
751	} elseif ( self::isValidEscape( $this->currentCharacter, $this->nextCharacter ) ) {
752	$name .= $this->consumeEscape();
753	} else {
754	$this->reconsumeCharacter();
755	break;
756	}
757	}
758
759	return $name;
760	}
761
762	/**
763	* Consume a number
764	*
765	* Note this does not do validation on the input stream. Call
766	* self::wouldStartNumber() before calling the method if necessary.
767	*
768	* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-number
769	* @return array [ string $value, int\|float $number, string $type ('integer' or 'number') ]
770	* @suppress PhanPluginDuplicateAdjacentStatement
771	*/
772	protected function consumeNumber() {
773	// 1.
774	$repr = '';
775	$type = 'integer';
776
777	// 2.
778	if ( $this->nextCharacter === '+' \|\| $this->nextCharacter === '-' ) {
779	$this->consumeCharacter();
780	$repr .= $this->currentCharacter;
781	}
782
783	// 3.
784	while ( self::isDigit( $this->nextCharacter ) ) {
785	$this->consumeCharacter();
786	$repr .= $this->currentCharacter;
787	}
788
789	// 4.
790	if ( $this->nextCharacter === '.' ) {
791	[ $next, $next2, ] = $this->lookAhead();
792	if ( self::isDigit( $next2 ) ) {
793	// 4.1.
794	$this->consumeCharacter();
795	$this->consumeCharacter();
796	// 4.2.
797	$repr .= $next . $next2;
798	// 4.3.
799	$type = 'number';
800	// 4.4.
801	while ( self::isDigit( $this->nextCharacter ) ) {
802	$this->consumeCharacter();
803	$repr .= $this->currentCharacter;
804	}
805	}
806	}
807
808	// 5.
809	if ( $this->nextCharacter === 'e' \|\| $this->nextCharacter === 'E' ) {
810	[ $next, $next2, $next3 ] = $this->lookAhead();
811	$ok = false;
812	if ( ( $next2 === '+' \|\| $next2 === '-' ) && self::isDigit( $next3 ) ) {
813	$ok = true;
814	// 5.1.
815	$this->consumeCharacter();
816	$this->consumeCharacter();
817	$this->consumeCharacter();
818	// 5.2.
819	$repr .= $next . $next2 . $next3;
820	} elseif ( self::isDigit( $next2 ) ) {
821	$ok = true;
822	// 5.1.
823	$this->consumeCharacter();
824	$this->consumeCharacter();
825	// 5.2.
826	$repr .= $next . $next2;
827	}
828	if ( $ok ) {
829	// 5.3.
830	$type = 'number';
831	// 5.4.
832	while ( self::isDigit( $this->nextCharacter ) ) {
833	$this->consumeCharacter();
834	$repr .= $this->currentCharacter;
835	}
836	}
837	}
838
839	// 6. We assume PHP's casting follows the same rules as
840	// https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#convert-string-to-number
841	$value = $type === 'integer' ? (int)$repr : (float)$repr;
842
843	// 7.
844	return [ $repr, $value, $type ];
845	}
846	}