Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
100.00% |
52 / 52 |
|
100.00% |
9 / 9 |
CRAP | |
100.00% |
1 / 1 |
| Matcher | |
100.00% |
52 / 52 |
|
100.00% |
9 / 9 |
28 | |
100.00% |
1 / 1 |
| create | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| capture | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| matchAgainst | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
5 | |||
| collectSignificantWhitespace | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
| markSignificantWhitespace | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
7 | |||
| getDefaultOptions | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| setDefaultOptions | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| next | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
4 | |||
| makeMatch | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
5 | |||
| generateMatches | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
| 1 | <?php |
| 2 | /** |
| 3 | * @file |
| 4 | * @license https://opensource.org/licenses/Apache-2.0 Apache-2.0 |
| 5 | */ |
| 6 | |
| 7 | namespace Wikimedia\CSS\Grammar; |
| 8 | |
| 9 | use Iterator; |
| 10 | use Wikimedia\CSS\Objects\ComponentValueList; |
| 11 | use Wikimedia\CSS\Objects\CSSFunction; |
| 12 | use Wikimedia\CSS\Objects\SimpleBlock; |
| 13 | use Wikimedia\CSS\Objects\Token; |
| 14 | |
| 15 | /** |
| 16 | * Base class for grammar matchers. |
| 17 | * |
| 18 | * The [CSS Syntax Level 3][SYN3] and [Values Level 4][VAL4] specifications use |
| 19 | * a mostly context-free grammar to define what things like selectors and |
| 20 | * property values look like. The Matcher classes allow for constructing an |
| 21 | * object that will determine whether a ComponentValueList actually matches |
| 22 | * this grammar. |
| 23 | * |
| 24 | * [SYN3]: https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/ |
| 25 | * [VAL4]: https://www.w3.org/TR/2024/WD-css-values-4-20240312/ |
| 26 | */ |
| 27 | abstract class Matcher { |
| 28 | |
| 29 | /** @var string|null Name to set on GrammarMatch objects */ |
| 30 | protected $captureName = null; |
| 31 | |
| 32 | /** |
| 33 | * @var array Default options for self::matchAgainst() |
| 34 | * - skip-whitespace: (bool) Allow whitespace in between any two tokens |
| 35 | * - nonterminal: (bool) Don't require the whole of $values is matched |
| 36 | * - mark-significance: (bool) On a successful match, replace T_WHITESPACE |
| 37 | * tokens as necessary to indicate significant whitespace. |
| 38 | */ |
| 39 | protected $defaultOptions = [ |
| 40 | 'skip-whitespace' => true, |
| 41 | 'nonterminal' => false, |
| 42 | 'mark-significance' => false, |
| 43 | ]; |
| 44 | |
| 45 | /** |
| 46 | * Create an instance. |
| 47 | * @param mixed ...$args See static::__construct() |
| 48 | * @return static |
| 49 | */ |
| 50 | public static function create( ...$args ) { |
| 51 | // @phan-suppress-next-line PhanParamTooManyUnpack,PhanTypeInstantiateAbstractStatic |
| 52 | return new static( ...$args ); |
| 53 | } |
| 54 | |
| 55 | /** |
| 56 | * Return a copy of this matcher that will capture its matches |
| 57 | * |
| 58 | * A "capturing" Matcher will produce GrammarMatches that return a value from |
| 59 | * the GrammarMatch::getName() method. The GrammarMatch::getCapturedMatches() |
| 60 | * method may be used to retrieve them from the top-level GrammarMatch. |
| 61 | * |
| 62 | * The concept is similar to capturing groups in PCRE and other regex |
| 63 | * languages. |
| 64 | * |
| 65 | * @param string|null $captureName Name to apply to captured GrammarMatch objects |
| 66 | * @return static |
| 67 | */ |
| 68 | public function capture( $captureName ) { |
| 69 | $ret = clone $this; |
| 70 | $ret->captureName = $captureName; |
| 71 | return $ret; |
| 72 | } |
| 73 | |
| 74 | /** |
| 75 | * Match against a list of ComponentValues |
| 76 | * @param ComponentValueList $values |
| 77 | * @param array $options Matching options, see self::$defaultOptions |
| 78 | * @return GrammarMatch|null |
| 79 | */ |
| 80 | public function matchAgainst( ComponentValueList $values, array $options = [] ) { |
| 81 | $options += $this->getDefaultOptions(); |
| 82 | $start = $this->next( $values, -1, $options ); |
| 83 | $l = count( $values ); |
| 84 | foreach ( $this->generateMatches( $values, $start, $options ) as $match ) { |
| 85 | if ( $options['nonterminal'] || $match->getNext() === $l ) { |
| 86 | if ( $options['mark-significance'] ) { |
| 87 | $significantWS = self::collectSignificantWhitespace( $match ); |
| 88 | self::markSignificantWhitespace( $values, $match, $significantWS, $match->getNext() ); |
| 89 | } |
| 90 | return $match; |
| 91 | } |
| 92 | } |
| 93 | return null; |
| 94 | } |
| 95 | |
| 96 | /** |
| 97 | * Collect any 'significantWhitespace' matches |
| 98 | * @param GrammarMatch $match |
| 99 | * @param Token[] &$ret |
| 100 | * @return Token[] |
| 101 | */ |
| 102 | private static function collectSignificantWhitespace( GrammarMatch $match, &$ret = [] ) { |
| 103 | if ( $match->getName() === 'significantWhitespace' ) { |
| 104 | $ret = array_merge( $ret, $match->getValues() ); |
| 105 | } |
| 106 | foreach ( $match->getCapturedMatches() as $m ) { |
| 107 | self::collectSignificantWhitespace( $m, $ret ); |
| 108 | } |
| 109 | return $ret; |
| 110 | } |
| 111 | |
| 112 | /** |
| 113 | * Mark whitespace as significant or not |
| 114 | * @param ComponentValueList $list |
| 115 | * @param GrammarMatch $match |
| 116 | * @param Token[] $significantWS |
| 117 | * @param int $end |
| 118 | */ |
| 119 | private static function markSignificantWhitespace( $list, $match, $significantWS, $end ) { |
| 120 | for ( $i = 0; $i < $end; $i++ ) { |
| 121 | $cv = $list[$i]; |
| 122 | if ( $cv instanceof Token && $cv->type() === Token::T_WHITESPACE ) { |
| 123 | $significant = in_array( $cv, $significantWS, true ); |
| 124 | if ( $significant !== $cv->significant() ) { |
| 125 | $newCv = $cv->copyWithSignificance( $significant ); |
| 126 | $match->fixWhitespace( $cv, $newCv ); |
| 127 | $list[$i] = $newCv; |
| 128 | } |
| 129 | } elseif ( $cv instanceof CSSFunction || $cv instanceof SimpleBlock ) { |
| 130 | self::markSignificantWhitespace( |
| 131 | $cv->getValue(), $match, $significantWS, count( $cv->getValue() ) |
| 132 | ); |
| 133 | } |
| 134 | } |
| 135 | } |
| 136 | |
| 137 | /** |
| 138 | * Fetch the default options for this Matcher |
| 139 | * @return array See self::$defaultOptions |
| 140 | */ |
| 141 | public function getDefaultOptions() { |
| 142 | return $this->defaultOptions; |
| 143 | } |
| 144 | |
| 145 | /** |
| 146 | * Set the default options for this Matcher |
| 147 | * @param array $options See self::$defaultOptions |
| 148 | * @return static $this |
| 149 | */ |
| 150 | public function setDefaultOptions( array $options ) { |
| 151 | $this->defaultOptions = $options + $this->defaultOptions; |
| 152 | return $this; |
| 153 | } |
| 154 | |
| 155 | /** |
| 156 | * Find the next ComponentValue in the input, possibly skipping whitespace |
| 157 | * @param ComponentValueList $values Input values |
| 158 | * @param int $start Current position in the input. May be -1, in which |
| 159 | * case the first position in the input should be returned. |
| 160 | * @param array $options See self::$defaultOptions |
| 161 | * @return int Next token index |
| 162 | */ |
| 163 | protected function next( ComponentValueList $values, $start, array $options ) { |
| 164 | $skipWS = $options['skip-whitespace']; |
| 165 | |
| 166 | $i = $start; |
| 167 | $l = count( $values ); |
| 168 | do { |
| 169 | $i++; |
| 170 | } while ( $skipWS && $i < $l && |
| 171 | // @phan-suppress-next-line PhanNonClassMethodCall False positive |
| 172 | $values[$i] instanceof Token && $values[$i]->type() === Token::T_WHITESPACE |
| 173 | ); |
| 174 | return $i; |
| 175 | } |
| 176 | |
| 177 | /** |
| 178 | * Create a GrammarMatch |
| 179 | * @param ComponentValueList $list |
| 180 | * @param int $start |
| 181 | * @param int $end First position after the match |
| 182 | * @param GrammarMatch|null $submatch Sub-match, for capturing. If $submatch |
| 183 | * itself named it will be kept as a capture in the returned GrammarMatch, |
| 184 | * otherwise its captured matches (if any) as returned by getCapturedMatches() |
| 185 | * will be kept as captures in the returned GrammarMatch. |
| 186 | * @param array $stack Stack from which to fetch more submatches for |
| 187 | * capturing (see $submatch). The stack is expected to be an array of |
| 188 | * arrays, with the first element of each subarray being a GrammarMatch. |
| 189 | * @return GrammarMatch |
| 190 | */ |
| 191 | protected function makeMatch( |
| 192 | ComponentValueList $list, $start, $end, ?GrammarMatch $submatch = null, array $stack = [] |
| 193 | ) { |
| 194 | $matches = array_column( $stack, 0 ); |
| 195 | $matches[] = $submatch; |
| 196 | |
| 197 | $keptMatches = []; |
| 198 | while ( $matches ) { |
| 199 | $m = array_shift( $matches ); |
| 200 | if ( !$m instanceof GrammarMatch ) { |
| 201 | // skip it, probably null |
| 202 | } elseif ( $m->getName() !== null ) { |
| 203 | $keptMatches[] = $m; |
| 204 | } elseif ( $m->getCapturedMatches() ) { |
| 205 | $matches = array_merge( $m->getCapturedMatches(), $matches ); |
| 206 | } |
| 207 | } |
| 208 | |
| 209 | return new GrammarMatch( $list, $start, $end - $start, $this->captureName, $keptMatches ); |
| 210 | } |
| 211 | |
| 212 | /** |
| 213 | * Match against a list of ComponentValues |
| 214 | * |
| 215 | * The job of a Matcher is to determine all the ways its particular grammar |
| 216 | * fragment can consume ComponentValues starting at a particular location |
| 217 | * in the ComponentValueList, represented by returning GrammarMatch objects. |
| 218 | * For example, a matcher implementing `IDENT*` at a starting position where |
| 219 | * there are three IDENT tokens in a row would be able to match 0, 1, 2, or |
| 220 | * all 3 of those IDENT tokens, and therefore should return an iterator |
| 221 | * over that set of GrammarMatch objects. |
| 222 | * |
| 223 | * Some matchers take other matchers as input, for example `IDENT*` is |
| 224 | * probably going to be implemented as a matcher for `*` that repeatedly |
| 225 | * applies a matcher for `IDENT`. The `*` matcher would call the `IDENT` |
| 226 | * matcher's generateMatches() method directly. |
| 227 | * |
| 228 | * Most Matchers implement this method as a generator to not build up |
| 229 | * the full set of results when it's reasonably likely the caller is going |
| 230 | * to terminate early. |
| 231 | * |
| 232 | * @param ComponentValueList $values |
| 233 | * @param int $start Starting position in $values |
| 234 | * @param array $options See self::$defaultOptions. |
| 235 | * Always use the options passed in, don't use $this->defaultOptions yourself. |
| 236 | * @return Iterator<GrammarMatch> Iterates over the set of GrammarMatch |
| 237 | * objects defining all the ways this matcher can match. |
| 238 | */ |
| 239 | abstract protected function generateMatches( ComponentValueList $values, $start, array $options ); |
| 240 | } |