Code Coverage
 
Classes and Traits
Functions and Methods
Lines
Total
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
25 / 25
CRAP
100.00% covered (success)
100.00%
216 / 216
Parser
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
25 / 25
99
100.00% covered (success)
100.00%
216 / 216
 __construct
100.00% covered (success)
100.00%
1 / 1
1
100.00% covered (success)
100.00%
2 / 2
 newFromString
100.00% covered (success)
100.00%
1 / 1
2
100.00% covered (success)
100.00%
3 / 3
 newFromDataSource
100.00% covered (success)
100.00%
1 / 1
1
100.00% covered (success)
100.00%
2 / 2
 newFromTokens
100.00% covered (success)
100.00%
1 / 1
1
100.00% covered (success)
100.00%
2 / 2
 consumeToken
100.00% covered (success)
100.00%
1 / 1
4
100.00% covered (success)
100.00%
6 / 6
 consumeTokenAndWhitespace
100.00% covered (success)
100.00%
1 / 1
2
100.00% covered (success)
100.00%
3 / 3
 getParseErrors
100.00% covered (success)
100.00%
1 / 1
1
100.00% covered (success)
100.00%
1 / 1
 clearParseErrors
100.00% covered (success)
100.00%
1 / 1
1
100.00% covered (success)
100.00%
2 / 2
 parseError
100.00% covered (success)
100.00%
1 / 1
1
100.00% covered (success)
100.00%
3 / 3
 parseStylesheet
100.00% covered (success)
100.00%
1 / 1
4
100.00% covered (success)
100.00%
7 / 7
 parseRuleList
100.00% covered (success)
100.00%
1 / 1
1
100.00% covered (success)
100.00%
2 / 2
 parseRule
100.00% covered (success)
100.00%
1 / 1
5
100.00% covered (success)
100.00%
14 / 14
 parseDeclaration
100.00% covered (success)
100.00%
1 / 1
2
100.00% covered (success)
100.00%
6 / 6
 parseDeclarationList
100.00% covered (success)
100.00%
1 / 1
1
100.00% covered (success)
100.00%
2 / 2
 parseDeclarationOrAtRuleList
100.00% covered (success)
100.00%
1 / 1
1
100.00% covered (success)
100.00%
2 / 2
 parseComponentValue
100.00% covered (success)
100.00%
1 / 1
3
100.00% covered (success)
100.00%
10 / 10
 parseComponentValueList
100.00% covered (success)
100.00%
1 / 1
4
100.00% covered (success)
100.00%
8 / 8
 consumeRuleList
100.00% covered (success)
100.00%
1 / 1
9
100.00% covered (success)
100.00%
17 / 17
 consumeDeclarationOrAtRuleList
100.00% covered (success)
100.00%
1 / 1
14
100.00% covered (success)
100.00%
36 / 36
 consumeDeclaration
100.00% covered (success)
100.00%
1 / 1
13
100.00% covered (success)
100.00%
23 / 23
 consumeAtRule
100.00% covered (success)
100.00%
1 / 1
6
100.00% covered (success)
100.00%
13 / 13
 consumeQualifiedRule
100.00% covered (success)
100.00%
1 / 1
5
100.00% covered (success)
100.00%
11 / 11
 consumeComponentValue
100.00% covered (success)
100.00%
1 / 1
7
100.00% covered (success)
100.00%
17 / 17
 consumeSimpleBlock
100.00% covered (success)
100.00%
1 / 1
5
100.00% covered (success)
100.00%
13 / 13
 consumeFunction
100.00% covered (success)
100.00%
1 / 1
5
100.00% covered (success)
100.00%
11 / 11
<?php
/**
 * @file
 * @license https://opensource.org/licenses/Apache-2.0 Apache-2.0
 */
namespace Wikimedia\CSS\Parser;
use Wikimedia\CSS\Objects\AtRule;
use Wikimedia\CSS\Objects\ComponentValue;
use Wikimedia\CSS\Objects\ComponentValueList;
use Wikimedia\CSS\Objects\CSSFunction;
use Wikimedia\CSS\Objects\Declaration;
use Wikimedia\CSS\Objects\DeclarationList;
use Wikimedia\CSS\Objects\DeclarationOrAtRuleList;
use Wikimedia\CSS\Objects\QualifiedRule;
use Wikimedia\CSS\Objects\Rule;
use Wikimedia\CSS\Objects\RuleList;
use Wikimedia\CSS\Objects\SimpleBlock;
use Wikimedia\CSS\Objects\Stylesheet;
use Wikimedia\CSS\Objects\Token;
// Note: While reading the code below, you might find that my calls to
// consumeToken() don't match what the spec says and I don't ever "reconsume" a
// token. It turns out that the spec is overcomplicated and confused with
// respect to the "current input token" and the "next input token". It turns
// out things are pretty simple: every "consume an X" is called with the
// current input token being the first token of X, and returns with the current
// input token being the last token of X (or EOF if X ends at EOF).
// Also of note is that, since our Tokenizer can only return a stream of tokens
// rather than a stream of component values, the consume functions here only
// consider tokens. ComponentValueList::toTokenArray() may be used to convert a
// list of component values to a list of tokens if necessary.
/**
 * Parse CSS into a structure for further processing.
 *
 * This implements the CSS Syntax Module Level 3 candidate recommendation.
 * @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/
 *
 * The usual entry points are:
 *  - Parser::parseStylesheet() to parse a stylesheet or the contents of a <style> tag.
 *  - Parser::parseDeclarationList() to parse an inline style attribute
 */
class Parser {
    /** Maximum depth of nested ComponentValues */
    private const CV_DEPTH_LIMIT = 100; // Arbitrary number that seems like it should be enough
    /** @var Tokenizer */
    protected $tokenizer;
    /** @var Token|null The most recently consumed token */
    protected $currentToken = null;
    /** @var array Parse errors. Each error is [ string $tag, int $line, int $pos ] */
    protected $parseErrors = [];
    /** @var int Recursion depth, incremented in self::consumeComponentValue() */
    protected $cvDepth = 0;
    /**
     * @param Tokenizer $tokenizer CSS Tokenizer
     */
    public function __construct( Tokenizer $tokenizer ) {
        $this->tokenizer = $tokenizer;
    }
    /**
     * Create a Parser for a CSS string
     * @param string $source CSS to parse.
     * @param array $options Configuration options, see DataSourceTokenizer::__construct(). Also,
     *  - convert: (array) If specified, detect the encoding as defined in the
     *    CSS spec. The value is passed as the $encodings argument to
     *    Encoder::convert().
     * @return static
     */
    public static function newFromString( $source, array $options = [] ) {
        if ( isset( $options['convert'] ) ) {
            $source = Encoder::convert( $source, $options['convert'] );
        }
        return static::newFromDataSource( new StringDataSource( $source ), $options );
    }
    /**
     * Create a Parser for a CSS DataSource
     * @param DataSource $source CSS to parse.
     * @param array $options Configuration options, see DataSourceTokenizer::__construct().
     * @return static
     */
    public static function newFromDataSource( DataSource $source, array $options = [] ) {
        $tokenizer = new DataSourceTokenizer( $source, $options );
        return new static( $tokenizer );
    }
    /**
     * Create a Parser for a list of Tokens
     * @param Token[] $tokens Token-stream to parse
     * @param Token|null $eof EOF-token
     * @return static
     */
    public static function newFromTokens( array $tokens, Token $eof = null ) {
        $tokenizer = new TokenListTokenizer( $tokens, $eof );
        return new static( $tokenizer );
    }
    /**
     * Consume a token
     */
    protected function consumeToken() {
        if ( !$this->currentToken || $this->currentToken->type() !== Token::T_EOF ) {
            $this->currentToken = $this->tokenizer->consumeToken();
            // Copy any parse errors encountered
            foreach ( $this->tokenizer->getParseErrors() as $error ) {
                $this->parseErrors[] = $error;
            }
            $this->tokenizer->clearParseErrors();
        }
    }
    /**
     * Consume a token, also consuming any following whitespace (and comments)
     */
    protected function consumeTokenAndWhitespace() {
        do {
            $this->consumeToken();
        } while ( $this->currentToken->type() === Token::T_WHITESPACE );
    }
    /**
     * Return all parse errors seen so far
     * @return array Array of [ string $tag, int $line, int $pos, ... ]
     */
    public function getParseErrors() {
        return $this->parseErrors;
    }
    /**
     * Clear parse errors
     */
    public function clearParseErrors() {
        $this->parseErrors = [];
    }
    /**
     * Record a parse error
     * @param string $tag Error tag
     * @param Token $token Report the error as starting at this token.
     * @param array $data Extra data about the error.
     */
    protected function parseError( $tag, Token $token, array $data = [] ) {
        list( $line, $pos ) = $token->getPosition();
        $this->parseErrors[] = array_merge( [ $tag, $line, $pos ], $data );
    }
    /**
     * Parse a stylesheet
     * @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#parse-a-stylesheet
     * @note Per the Editor's Draft, if the first rule is an at-rule named
     *  "charset" it will be silently dropped. If you're not using the provided
     *  Sanitizer classes to further sanitize the CSS, you'll want to manually
     *  filter out any other such rules before stringifying the stylesheet
     *  and/or prepend `@charset "utf-8";` after stringifying it.
     * @return Stylesheet
     */
    public function parseStylesheet() {
        $this->consumeToken(); // Move to the first token
        $list = $this->consumeRuleList( true );
        // Drop @charset per the Editor's Draft
        if ( isset( $list[0] ) && $list[0] instanceof AtRule &&
            !strcasecmp( $list[0]->getName(), 'charset' )
        ) {
            $list->remove( 0 );
            $list->rewind();
        }
        return new Stylesheet( $list );
    }
    /**
     * Parse a list of rules
     * @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#parse-a-list-of-rules
     * @return RuleList
     */
    public function parseRuleList() {
        $this->consumeToken(); // Move to the first token
        return $this->consumeRuleList( false );
    }
    /**
     * Parse a rule
     * @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#parse-a-rule
     * @return Rule|null
     */
    public function parseRule() {
        // 1. and 2.
        $this->consumeTokenAndWhitespace();
        // 3.
        if ( $this->currentToken->type() === Token::T_EOF ) {
            $this->parseError( 'unexpected-eof', $this->currentToken ); // "return a syntax error"?
            return null;
        }
        if ( $this->currentToken->type() === Token::T_AT_KEYWORD ) {
            $rule = $this->consumeAtRule();
        } else {
            $rule = $this->consumeQualifiedRule();
            if ( !$rule ) {
                return null;
            }
        }
        // 4.
        $this->consumeTokenAndWhitespace();
        // 5.
        if ( $this->currentToken->type() === Token::T_EOF ) {
            return $rule;
        } else {
            $this->parseError( 'expected-eof', $this->currentToken ); // "return a syntax error"?
            return null;
        }
    }
    /**
     * Parse a declaration
     * @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#parse-a-declaration
     * @return Declaration|null
     */
    public function parseDeclaration() {
        // 1. and 2.
        $this->consumeTokenAndWhitespace();
        // 3.
        if ( $this->currentToken->type() !== Token::T_IDENT ) {
            $this->parseError( 'expected-ident', $this->currentToken ); // "return a syntax error"?
            return null;
        }
        // 4.
        $declaration = $this->consumeDeclaration();
        // Declarations always run to EOF, no need to check.
        return $declaration;
    }
    /**
     * Parse a list of declarations
     * @note This is not the entry point the standard calls "parse a list of declarations",
     *  see self::parseDeclarationOrAtRuleList()
     * @return DeclarationList
     */
    public function parseDeclarationList() {
        $this->consumeToken(); // Move to the first token
        return $this->consumeDeclarationOrAtRuleList( false );
    }
    /**
     * Parse a list of declarations and at-rules
     * @note This is the entry point the standard calls "parse a list of declarations"
     * @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#parse-a-list-of-declarations
     * @return DeclarationOrAtRuleList
     */
    public function parseDeclarationOrAtRuleList() {
        $this->consumeToken(); // Move to the first token
        return $this->consumeDeclarationOrAtRuleList();
    }
    /**
     * Parse a (non-whitespace) component value
     * @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#parse-a-component-value
     * @return ComponentValue|null
     */
    public function parseComponentValue() {
        // 1. and 2.
        $this->consumeTokenAndWhitespace();
        // 3.
        if ( $this->currentToken->type() === Token::T_EOF ) {
            $this->parseError( 'unexpected-eof', $this->currentToken ); // "return a syntax error"?
            return null;
        }
        // 4.
        $value = $this->consumeComponentValue();
        // The spec says to return a syntax error if nothing is returned, but
        // that can never happen and the Editor's Draft removed that language.
        // 5.
        $this->consumeTokenAndWhitespace();
        // 6.
        if ( $this->currentToken->type() === Token::T_EOF ) {
            return $value;
        } else {
            $this->parseError( 'expected-eof', $this->currentToken ); // "return a syntax error"?
            return null;
        }
    }
    /**
     * Parse a list of component values
     * @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#parse-a-list-of-component-values
     * @return ComponentValueList
     */
    public function parseComponentValueList() {
        $list = new ComponentValueList();
        while ( true ) {
            $this->consumeToken(); // Move to the first/next token
            $value = $this->consumeComponentValue();
            if ( $value instanceof Token && $value->type() === Token::T_EOF ) {
                break;
            }
            $list->add( $value );
        }
        return $list;
    }
    /**
     * Consume a list of rules
     * @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-list-of-rules
     * @param bool $topLevel Determines the behavior when CDO and CDC tokens are encountered
     * @return RuleList
     */
    protected function consumeRuleList( $topLevel ) {
        $list = new RuleList();
        // @phan-suppress-next-line PhanInfiniteLoop
        while ( true ) {
            $rule = false;
            switch ( $this->currentToken->type() ) {
                case Token::T_WHITESPACE:
                    break;
                case Token::T_EOF:
                    break 2;
                case Token::T_CDO:
                case Token::T_CDC:
                    if ( $topLevel ) {
                        // Do nothing
                    } else {
                        $rule = $this->consumeQualifiedRule();
                    }
                    break;
                case Token::T_AT_KEYWORD:
                    $rule = $this->consumeAtRule();
                    break;
                default:
                    $rule = $this->consumeQualifiedRule();
                    break;
            }
            if ( $rule ) {
                $list->add( $rule );
            }
            $this->consumeToken();
        }
        return $list;
    }
    /**
     * Consume a list of declarations and at-rules
     * @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-list-of-declarations
     * @param bool $allowAtRules Whether to allow at-rules. This flag is not in
     *  the spec, and is used to implement the non-spec self::parseDeclarationList().
     * @return DeclarationOrAtRuleList|DeclarationList
     */
    protected function consumeDeclarationOrAtRuleList( $allowAtRules = true ) {
        $list = $allowAtRules ? new DeclarationOrAtRuleList() : new DeclarationList();
        // @phan-suppress-next-line PhanInfiniteLoop
        while ( true ) {
            $declaration = false;
            switch ( $this->currentToken->type() ) {
                case Token::T_WHITESPACE:
                    break;
                case Token::T_SEMICOLON:
                    $declaration = null;
                    break;
                case Token::T_EOF:
                    break 2;
                case Token::T_AT_KEYWORD:
                    if ( $allowAtRules ) {
                        $declaration = $this->consumeAtRule();
                    } else {
                        $this->parseError( 'unexpected-token-in-declaration-list', $this->currentToken );
                        $this->consumeAtRule();
                        $declaration = null;
                    }
                    break;
                case Token::T_IDENT:
                    // The draft changes this to ComponentValue instead of Token, which makes more sense.
                    $cvs = [];
                    do {
                        $cvs[] = $this->consumeComponentValue();
                        $this->consumeToken();
                    } while (
                        $this->currentToken->type() !== Token::T_SEMICOLON &&
                        $this->currentToken->type() !== Token::T_EOF
                    );
                    $tokens = ( new ComponentValueList( $cvs ) )->toTokenArray();
                    $parser = static::newFromTokens( $tokens, $this->currentToken );
                    $parser->consumeToken(); // Load that first token
                    $declaration = $parser->consumeDeclaration();
                    // Propagate any errors
                    $this->parseErrors = array_merge( $this->parseErrors, $parser->parseErrors );
                    break;
                default:
                    $this->parseError( 'unexpected-token-in-declaration-list', $this->currentToken );
                    do {
                        $this->consumeComponentValue();
                        $this->consumeToken();
                    } while (
                        $this->currentToken->type() !== Token::T_SEMICOLON &&
                        $this->currentToken->type() !== Token::T_EOF
                    );
                    $declaration = null;
                    break;
            }
            if ( $declaration ) {
                $list->add( $declaration );
            }
            $this->consumeToken();
        }
        return $list;
    }
    /**
     * Consume a declaration
     * @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-declaration
     * @return Declaration|null
     */
    protected function consumeDeclaration() {
        $declaration = new Declaration( $this->currentToken );
        // 2.
        $this->consumeTokenAndWhitespace();
        // 3.
        if ( $this->currentToken->type() !== Token::T_COLON ) {
            $this->parseError( 'expected-colon', $this->currentToken );
            return null;
        }
        $this->consumeToken();
        // 4.
        $value = $declaration->getValue();
        $l1 = $l2 = -1;
        while ( $this->currentToken->type() !== Token::T_EOF ) {
            // The draft changes this to ComponentValue instead of Token, which makes more sense.
            $value->add( $this->consumeComponentValue() );
            if ( $this->currentToken->type() !== Token::T_WHITESPACE ) {
                $l1 = $l2;
                $l2 = $value->count() - 1;
            }
            $this->consumeToken();
        }
        // 5.
        $v1 = $l1 >= 0 ? $value[$l1] : null;
        $v2 = $l2 >= 0 ? $value[$l2] : null;
        if ( $v1 instanceof Token && $v1->type() === Token::T_DELIM && $v1->value() === '!' &&
            $v2 instanceof Token && $v2->type() === Token::T_IDENT &&
            !strcasecmp( $v2->value(), 'important' )
        ) {
            // Technically it doesn't say to remove any whitespace within/after
            // the "!important" too, but it makes sense to do so.
            while ( isset( $value[$l1] ) ) {
                $value->remove( $l1 );
            }
            $declaration->setImportant( true );
        }
        // 6.
        return $declaration;
    }
    /**
     * Consume an at-rule
     * @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-an-at-rule
     * @return AtRule
     */
    protected function consumeAtRule() {
        $rule = new AtRule( $this->currentToken );
        $this->consumeToken();
        while ( true ) {
            switch ( $this->currentToken->type() ) {
                case Token::T_SEMICOLON:
                    return $rule;
                case Token::T_EOF:
                    // Parse error from the editor's draft as of 2017-01-11
                    if ( $this->currentToken->typeFlag() !== 'recursion-depth-exceeded' ) {
                        $this->parseError( 'unexpected-eof-in-rule', $this->currentToken );
                    }
                    return $rule;
                case Token::T_LEFT_BRACE:
                    $rule->setBlock( $this->consumeSimpleBlock() );
                    return $rule;
                default:
                    $rule->getPrelude()->add( $this->consumeComponentValue() );
                    break;
            }
            $this->consumeToken();
        }
        // @codeCoverageIgnoreStart
    }
    // @codeCoverageIgnoreEnd
    /**
     * Consume a qualified rule
     * @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-qualified-rule
     * @return QualifiedRule|null
     */
    protected function consumeQualifiedRule() {
        $rule = new QualifiedRule( $this->currentToken );
        while ( true ) {
            switch ( $this->currentToken->type() ) {
                case Token::T_EOF:
                    if ( $this->currentToken->typeFlag() !== 'recursion-depth-exceeded' ) {
                        $this->parseError( 'unexpected-eof-in-rule', $this->currentToken );
                    }
                    return null;
                case Token::T_LEFT_BRACE:
                    $rule->setBlock( $this->consumeSimpleBlock() );
                    return $rule;
                default:
                    $rule->getPrelude()->add( $this->consumeComponentValue() );
                    break;
            }
            $this->consumeToken();
        }
        // @codeCoverageIgnoreStart
    }
    // @codeCoverageIgnoreEnd
    /**
     * Consume a component value
     * @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-component-value
     * @return ComponentValue
     */
    protected function consumeComponentValue() {
        if ( ++$this->cvDepth > static::CV_DEPTH_LIMIT ) {
            $this->parseError( 'recursion-depth-exceeded', $this->currentToken );
            // There's no way to safely recover from this without more recursion.
            // So just eat the rest of the input, then return a
            // specially-flagged EOF so we can avoid 100 "unexpected EOF"
            // errors.
            $position = $this->currentToken->getPosition();
            while ( $this->currentToken->type() !== Token::T_EOF ) {
                $this->consumeToken();
            }
            $this->currentToken = new Token( Token::T_EOF, [
                'position' => $position,
                'typeFlag' => 'recursion-depth-exceeded'
            ] );
        }
        switch ( $this->currentToken->type() ) {
            case Token::T_LEFT_BRACE:
            case Token::T_LEFT_BRACKET:
            case Token::T_LEFT_PAREN:
                $ret = $this->consumeSimpleBlock();
                break;
            case Token::T_FUNCTION:
                $ret = $this->consumeFunction();
                break;
            default:
                $ret = $this->currentToken;
                break;
        }
        $this->cvDepth--;
        // @phan-suppress-next-line PhanTypeMismatchReturnNullable $ret always set
        return $ret;
    }
    /**
     * Consume a simple block
     * @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-simple-block
     * @return SimpleBlock
     */
    protected function consumeSimpleBlock() {
        $block = new SimpleBlock( $this->currentToken );
        $endTokenType = $block->getEndTokenType();
        $this->consumeToken();
        while ( true ) {
            switch ( $this->currentToken->type() ) {
                case Token::T_EOF:
                    // Parse error from the editor's draft as of 2017-01-12
                    if ( $this->currentToken->typeFlag() !== 'recursion-depth-exceeded' ) {
                        $this->parseError( 'unexpected-eof-in-block', $this->currentToken );
                    }
                    return $block;
                case $endTokenType:
                    return $block;
                default:
                    $block->getValue()->add( $this->consumeComponentValue() );
                    break;
            }
            $this->consumeToken();
        }
        // @codeCoverageIgnoreStart
    }
    // @codeCoverageIgnoreEnd
    /**
     * Consume a function
     * @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-function
     * @return CSSFunction
     */
    protected function consumeFunction() {
        $function = new CSSFunction( $this->currentToken );
        $this->consumeToken();
        while ( true ) {
            switch ( $this->currentToken->type() ) {
                case Token::T_EOF:
                    // Parse error from the editor's draft as of 2017-01-12
                    if ( $this->currentToken->typeFlag() !== 'recursion-depth-exceeded' ) {
                        $this->parseError( 'unexpected-eof-in-function', $this->currentToken );
                    }
                    return $function;
                case Token::T_RIGHT_PAREN:
                    return $function;
                default:
                    $function->getValue()->add( $this->consumeComponentValue() );
                    break;
            }
            $this->consumeToken();
        }
        // @codeCoverageIgnoreStart
    }
    // @codeCoverageIgnoreEnd
}