Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
97.96% |
96 / 98 |
|
71.43% |
5 / 7 |
CRAP | |
0.00% |
0 / 1 |
| VueComponentParser | |
97.96% |
96 / 98 |
|
71.43% |
5 / 7 |
39 | |
0.00% |
0 / 1 |
| parse | |
95.00% |
19 / 20 |
|
0.00% |
0 / 1 |
8 | |||
| findUniqueTags | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
4 | |||
| validateAttributes | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
5 | |||
| getStyleAndLang | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
3 | |||
| getTemplateHtml | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
1 | |||
| newTemplateFormatter | |
100.00% |
21 / 21 |
|
100.00% |
1 / 1 |
9 | |||
| newFilteringDispatcher | |
94.74% |
18 / 19 |
|
0.00% |
0 / 1 |
9.01 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * @license GPL-2.0-or-later |
| 4 | * @file |
| 5 | * @author Roan Kattouw |
| 6 | */ |
| 7 | |
| 8 | namespace MediaWiki\ResourceLoader; |
| 9 | |
| 10 | use InvalidArgumentException; |
| 11 | use Wikimedia\Parsoid\DOM\Element; |
| 12 | use Wikimedia\Parsoid\Utils\DOMCompat; |
| 13 | use Wikimedia\Parsoid\Utils\DOMUtils; |
| 14 | use Wikimedia\RemexHtml\HTMLData; |
| 15 | use Wikimedia\RemexHtml\Serializer\HtmlFormatter; |
| 16 | use Wikimedia\RemexHtml\Serializer\Serializer; |
| 17 | use Wikimedia\RemexHtml\Serializer\SerializerNode; |
| 18 | use Wikimedia\RemexHtml\Tokenizer\Attributes; |
| 19 | use Wikimedia\RemexHtml\Tokenizer\Tokenizer; |
| 20 | use Wikimedia\RemexHtml\TreeBuilder\Dispatcher; |
| 21 | use Wikimedia\RemexHtml\TreeBuilder\TreeBuilder; |
| 22 | |
| 23 | /** |
| 24 | * Parser for Vue single file components (.vue files). See parse() for usage. |
| 25 | * |
| 26 | * @ingroup ResourceLoader |
| 27 | * @internal |
| 28 | */ |
| 29 | class VueComponentParser { |
| 30 | /** |
| 31 | * Parse a Vue single file component, and extract the script, template and style parts. |
| 32 | * |
| 33 | * Returns an associative array with the following keys: |
| 34 | * - 'script': The JS code in the <script> tag |
| 35 | * - 'template': The HTML in the <template> tag |
| 36 | * - 'style': The CSS/LESS styles in the <style> tag, or null if the <style> tag was missing |
| 37 | * - 'styleLang': The language used for 'style'; either 'css' or 'less', or null if no <style> tag |
| 38 | * |
| 39 | * The following options can be passed in the $options parameter: |
| 40 | * - 'minifyTemplate': Whether to minify the HTML in the template tag. This removes |
| 41 | * HTML comments and strips whitespace. Default: false |
| 42 | * |
| 43 | * @param string $html HTML with <script>, <template> and <style> tags at the top level |
| 44 | * @param array $options Associative array of options |
| 45 | * @return array |
| 46 | * @throws InvalidArgumentException If the input is invalid |
| 47 | */ |
| 48 | public function parse( string $html, array $options = [] ): array { |
| 49 | // Ensure that <script>,<template>,etc tags go into the <body>, not |
| 50 | // the <head> |
| 51 | $doc = DOMUtils::parseHTML( "<body>$html" ); |
| 52 | $body = DOMCompat::getBody( $doc ); |
| 53 | |
| 54 | // Find the <script>, <template> and <style> tags. They can appear in any order, but they |
| 55 | // must be at the top level, and there can only be one of each. |
| 56 | if ( !$body ) { |
| 57 | throw new InvalidArgumentException( 'Parsed DOM did not contain a <body> tag' ); |
| 58 | } |
| 59 | $nodes = $this->findUniqueTags( $body, [ 'script', 'template', 'style' ] ); |
| 60 | |
| 61 | // Throw an error if we didn't find a <script> or <template> tag. <style> is optional. |
| 62 | foreach ( [ 'script', 'template' ] as $requiredTag ) { |
| 63 | if ( !isset( $nodes[ $requiredTag ] ) ) { |
| 64 | throw new InvalidArgumentException( "No <$requiredTag> tag found" ); |
| 65 | } |
| 66 | } |
| 67 | |
| 68 | $this->validateAttributes( $nodes['script'], [] ); |
| 69 | $this->validateAttributes( $nodes['template'], [] ); |
| 70 | if ( isset( $nodes['style'] ) ) { |
| 71 | $this->validateAttributes( $nodes['style'], [ 'lang' ] ); |
| 72 | } |
| 73 | |
| 74 | $styleData = isset( $nodes['style'] ) ? $this->getStyleAndLang( $nodes['style'] ) : null; |
| 75 | $template = $this->getTemplateHtml( $html, $options['minifyTemplate'] ?? false ); |
| 76 | |
| 77 | return [ |
| 78 | 'script' => trim( $nodes['script']->textContent ), |
| 79 | 'template' => $template, |
| 80 | 'style' => $styleData ? $styleData['style'] : null, |
| 81 | 'styleLang' => $styleData ? $styleData['lang'] : null |
| 82 | ]; |
| 83 | } |
| 84 | |
| 85 | /** |
| 86 | * Find occurrences of specified tags in a DOM node, expecting at most one occurrence of each. |
| 87 | * This method only looks at the top-level children of $rootNode, it doesn't descend into them. |
| 88 | * |
| 89 | * @param Element $rootNode Node whose children to look at |
| 90 | * @param string[] $tagNames Tag names to look for (must be all lowercase) |
| 91 | * @return Element[] Associative arrays whose keys are tag names and values are DOM nodes |
| 92 | */ |
| 93 | private function findUniqueTags( Element $rootNode, array $tagNames ): array { |
| 94 | $nodes = []; |
| 95 | for ( $node = DOMCompat::getFirstElementChild( $rootNode ); |
| 96 | $node !== null; |
| 97 | $node = DOMCompat::getNextElementSibling( $node ) ) { |
| 98 | $tagName = DOMUtils::nodeName( $node ); |
| 99 | if ( in_array( $tagName, $tagNames ) ) { |
| 100 | if ( isset( $nodes[ $tagName ] ) ) { |
| 101 | throw new InvalidArgumentException( "More than one <$tagName> tag found" ); |
| 102 | } |
| 103 | $nodes[ $tagName ] = $node; |
| 104 | } |
| 105 | } |
| 106 | return $nodes; |
| 107 | } |
| 108 | |
| 109 | /** |
| 110 | * Verify that a given node only has a given set of attributes, and no others. |
| 111 | * @param Element $node Node to check |
| 112 | * @param list<string> $allowedAttributes Attributes the node is allowed to have |
| 113 | * @throws InvalidArgumentException If the node has an attribute it's not allowed to have |
| 114 | */ |
| 115 | private function validateAttributes( Element $node, array $allowedAttributes ): void { |
| 116 | if ( $allowedAttributes ) { |
| 117 | foreach ( DOMCompat::attributes( $node ) as $name => $value ) { |
| 118 | if ( !in_array( $name, $allowedAttributes ) ) { |
| 119 | $nodeName = DOMUtils::nodeName( $node ); |
| 120 | throw new InvalidArgumentException( "<{$nodeName}> may not have the " . |
| 121 | "{$name} attribute" ); |
| 122 | } |
| 123 | } |
| 124 | } elseif ( count( DOMCompat::attributes( $node ) ) > 0 ) { |
| 125 | $nodeName = DOMUtils::nodeName( $node ); |
| 126 | throw new InvalidArgumentException( "<{$nodeName}> may not have any attributes" ); |
| 127 | } |
| 128 | } |
| 129 | |
| 130 | /** |
| 131 | * Get the contents and language of the <style> tag. The language can be 'css' or 'less'. |
| 132 | * @param Element $styleNode The <style> tag. |
| 133 | * @return array [ 'style' => string, 'lang' => string ] |
| 134 | * @throws InvalidArgumentException If an invalid language is used, or if the 'scoped' attribute is set. |
| 135 | */ |
| 136 | private function getStyleAndLang( Element $styleNode ): array { |
| 137 | $style = trim( $styleNode->textContent ); |
| 138 | $styleLang = DOMCompat::getAttribute( $styleNode, 'lang' ) ?? 'css'; |
| 139 | if ( $styleLang !== 'css' && $styleLang !== 'less' ) { |
| 140 | throw new InvalidArgumentException( "<style lang=\"$styleLang\"> is invalid," . |
| 141 | " lang must be \"css\" or \"less\"" ); |
| 142 | } |
| 143 | return [ |
| 144 | 'style' => $style, |
| 145 | 'lang' => $styleLang, |
| 146 | ]; |
| 147 | } |
| 148 | |
| 149 | /** |
| 150 | * Get the HTML contents of the <template> tag, optionally minifed. |
| 151 | * |
| 152 | * To work around a bug in PHP's DOMDocument where attributes like @click get mangled, |
| 153 | * we re-parse the entire file using a Remex parse+serialize pipeline, with a custom dispatcher |
| 154 | * to zoom in on just the contents of the <template> tag, and a custom formatter for minification. |
| 155 | * Keeping everything in Remex and never converting it to DOM avoids the attribute mangling issue. |
| 156 | * |
| 157 | * @param string $html HTML that contains a <template> tag somewhere |
| 158 | * @param bool $minify Whether to minify the output (remove comments, strip whitespace) |
| 159 | * @return string HTML contents of the template tag |
| 160 | */ |
| 161 | private function getTemplateHtml( string $html, bool $minify ): string { |
| 162 | $serializer = new Serializer( $this->newTemplateFormatter( $minify ) ); |
| 163 | $tokenizer = new Tokenizer( |
| 164 | $this->newFilteringDispatcher( |
| 165 | new TreeBuilder( $serializer, [ 'ignoreErrors' => true ] ), |
| 166 | 'template' |
| 167 | ), |
| 168 | $html, [ 'ignoreErrors' => true ] |
| 169 | ); |
| 170 | $tokenizer->execute( [ 'fragmentNamespace' => HTMLData::NS_HTML, 'fragmentName' => 'template' ] ); |
| 171 | return trim( $serializer->getResult() ); |
| 172 | } |
| 173 | |
| 174 | /** |
| 175 | * Custom HtmlFormatter subclass that optionally removes comments and strips whitespace. |
| 176 | * If $minify=false, this formatter falls through to HtmlFormatter for everything (except that |
| 177 | * it strips the <!doctype html> tag). |
| 178 | * |
| 179 | * @param bool $minify If true, remove comments and strip whitespace |
| 180 | */ |
| 181 | private function newTemplateFormatter( bool $minify ): HtmlFormatter { |
| 182 | return new class( $minify ) extends HtmlFormatter { |
| 183 | private bool $minify; |
| 184 | |
| 185 | public function __construct( bool $minify ) { |
| 186 | $this->minify = $minify; |
| 187 | } |
| 188 | |
| 189 | /** @inheritDoc */ |
| 190 | public function startDocument( $fragmentNamespace, $fragmentName ) { |
| 191 | // Remove <!doctype html> |
| 192 | return ''; |
| 193 | } |
| 194 | |
| 195 | /** @inheritDoc */ |
| 196 | public function comment( SerializerNode $parent, $text ) { |
| 197 | if ( $this->minify ) { |
| 198 | // Remove all comments |
| 199 | return ''; |
| 200 | } |
| 201 | return parent::comment( $parent, $text ); |
| 202 | } |
| 203 | |
| 204 | /** @inheritDoc */ |
| 205 | public function characters( SerializerNode $parent, $text, $start, $length ) { |
| 206 | if ( |
| 207 | $this->minify && ( |
| 208 | // Don't touch <pre>/<listing>/<textarea> nodes |
| 209 | $parent->namespace !== HTMLData::NS_HTML || |
| 210 | !isset( $this->prefixLfElements[ $parent->name ] ) |
| 211 | ) |
| 212 | ) { |
| 213 | $text = substr( $text, $start, $length ); |
| 214 | // Collapse runs of adjacent whitespace, and convert all whitespace to spaces |
| 215 | $text = preg_replace( '/[ \r\n\t]+/', ' ', $text ); |
| 216 | $start = 0; |
| 217 | $length = strlen( $text ); |
| 218 | } |
| 219 | return parent::characters( $parent, $text, $start, $length ); |
| 220 | } |
| 221 | |
| 222 | /** @inheritDoc */ |
| 223 | public function element( SerializerNode $parent, SerializerNode $node, $contents ) { |
| 224 | if ( |
| 225 | $this->minify && ( |
| 226 | // Don't touch <pre>/<listing>/<textarea> nodes |
| 227 | $node->namespace !== HTMLData::NS_HTML || |
| 228 | !isset( $this->prefixLfElements[ $node->name ] ) |
| 229 | ) && |
| 230 | $contents !== null |
| 231 | ) { |
| 232 | // Remove leading and trailing whitespace |
| 233 | $contents = preg_replace( '/(^[ \r\n\t]+)|([\r\n\t ]+$)/', '', $contents ); |
| 234 | } |
| 235 | return parent::element( $parent, $node, $contents ); |
| 236 | } |
| 237 | }; |
| 238 | } |
| 239 | |
| 240 | /** |
| 241 | * Custom Dispatcher subclass that only dispatches tree events inside a tag with a certain name. |
| 242 | * This effectively filters the tree to only the contents of that tag. |
| 243 | * |
| 244 | * @param TreeBuilder $treeBuilder |
| 245 | * @param string $nodeName Tag name to filter for |
| 246 | */ |
| 247 | private function newFilteringDispatcher( TreeBuilder $treeBuilder, string $nodeName ): Dispatcher { |
| 248 | return new class( $treeBuilder, $nodeName ) extends Dispatcher { |
| 249 | private string $nodeName; |
| 250 | private int $nodeDepth = 0; |
| 251 | private bool $seenTag = false; |
| 252 | |
| 253 | public function __construct( TreeBuilder $treeBuilder, string $nodeName ) { |
| 254 | $this->nodeName = $nodeName; |
| 255 | parent::__construct( $treeBuilder ); |
| 256 | } |
| 257 | |
| 258 | /** @inheritDoc */ |
| 259 | public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) { |
| 260 | if ( $this->nodeDepth ) { |
| 261 | parent::startTag( $name, $attrs, $selfClose, $sourceStart, $sourceLength ); |
| 262 | } |
| 263 | |
| 264 | if ( $name === $this->nodeName ) { |
| 265 | if ( $this->nodeDepth === 0 && $this->seenTag ) { |
| 266 | // This is the second opening tag, not nested in the first one |
| 267 | throw new InvalidArgumentException( "More than one <{$this->nodeName}> tag found" ); |
| 268 | } |
| 269 | $this->nodeDepth++; |
| 270 | $this->seenTag = true; |
| 271 | } |
| 272 | } |
| 273 | |
| 274 | /** @inheritDoc */ |
| 275 | public function endTag( $name, $sourceStart, $sourceLength ) { |
| 276 | if ( $name === $this->nodeName ) { |
| 277 | $this->nodeDepth--; |
| 278 | } |
| 279 | if ( $this->nodeDepth ) { |
| 280 | parent::endTag( $name, $sourceStart, $sourceLength ); |
| 281 | } |
| 282 | } |
| 283 | |
| 284 | /** @inheritDoc */ |
| 285 | public function characters( $text, $start, $length, $sourceStart, $sourceLength ) { |
| 286 | if ( $this->nodeDepth ) { |
| 287 | parent::characters( $text, $start, $length, $sourceStart, $sourceLength ); |
| 288 | } |
| 289 | } |
| 290 | |
| 291 | /** @inheritDoc */ |
| 292 | public function comment( $text, $sourceStart, $sourceLength ) { |
| 293 | if ( $this->nodeDepth ) { |
| 294 | parent::comment( $text, $sourceStart, $sourceLength ); |
| 295 | } |
| 296 | } |
| 297 | }; |
| 298 | } |
| 299 | } |