MediaWiki REL1_37
VueComponentParser.php
Go to the documentation of this file.
1<?php
22use Wikimedia\RemexHtml\DOM\DOMBuilder;
23use Wikimedia\RemexHtml\HTMLData;
24use Wikimedia\RemexHtml\Serializer\HtmlFormatter;
25use Wikimedia\RemexHtml\Serializer\Serializer;
26use Wikimedia\RemexHtml\Serializer\SerializerNode;
27use Wikimedia\RemexHtml\Tokenizer\Attributes;
28use Wikimedia\RemexHtml\Tokenizer\Tokenizer;
29use Wikimedia\RemexHtml\TreeBuilder\Dispatcher;
30use Wikimedia\RemexHtml\TreeBuilder\Element;
31use Wikimedia\RemexHtml\TreeBuilder\TreeBuilder;
32
58 public function parse( string $html, array $options = [] ): array {
59 $dom = $this->parseHTML( $html );
60 // Remex wraps everything in <html><head>, unwrap that
61 $head = $dom->getElementsByTagName( 'head' )->item( 0 );
62
63 // Find the <script>, <template> and <style> tags. They can appear in any order, but they
64 // must be at the top level, and there can only be one of each.
65 if ( !$head ) {
66 throw new Exception( 'Parsed DOM did not contain a <head> tag' );
67 }
68 $nodes = $this->findUniqueTags( $head, [ 'script', 'template', 'style' ] );
69
70 // Throw an error if we didn't find a <script> or <template> tag. <style> is optional.
71 foreach ( [ 'script', 'template' ] as $requiredTag ) {
72 if ( !isset( $nodes[ $requiredTag ] ) ) {
73 throw new Exception( "No <$requiredTag> tag found" );
74 }
75 }
76
77 $this->validateAttributes( $nodes['script'], [] );
78 $this->validateAttributes( $nodes['template'], [] );
79 if ( isset( $nodes['style'] ) ) {
80 $this->validateAttributes( $nodes['style'], [ 'lang' ] );
81 }
82 $this->validateTemplateTag( $nodes['template'] );
83
84 $styleData = isset( $nodes['style'] ) ? $this->getStyleAndLang( $nodes['style'] ) : null;
85 $template = $this->getTemplateHtml( $html, $options['minifyTemplate'] ?? false );
86
87 return [
88 'script' => trim( $nodes['script']->nodeValue ),
89 'template' => $template,
90 'style' => $styleData ? $styleData['style'] : null,
91 'styleLang' => $styleData ? $styleData['lang'] : null
92 ];
93 }
94
100 private function parseHTML( $html ): DOMDocument {
101 $domBuilder = new DOMBuilder( [ 'suppressHtmlNamespace' => true ] );
102 $treeBuilder = new TreeBuilder( $domBuilder, [ 'ignoreErrors' => true ] );
103 $tokenizer = new Tokenizer( new Dispatcher( $treeBuilder ), $html, [ 'ignoreErrors' => true ] );
104 $tokenizer->execute();
105 // @phan-suppress-next-line PhanTypeMismatchReturnSuperType
106 return $domBuilder->getFragment();
107 }
108
117 private function findUniqueTags( DOMNode $rootNode, array $tagNames ): array {
118 $nodes = [];
119 foreach ( $rootNode->childNodes as $node ) {
120 $tagName = strtolower( $node->nodeName );
121 if ( in_array( $tagName, $tagNames ) ) {
122 if ( isset( $nodes[ $tagName ] ) ) {
123 throw new Exception( "More than one <$tagName> tag found" );
124 }
125 $nodes[ $tagName ] = $node;
126 }
127 }
128 return $nodes;
129 }
130
137 private function validateAttributes( DOMNode $node, array $allowedAttributes ): void {
138 if ( $allowedAttributes ) {
139 foreach ( $node->attributes as $attr ) {
140 if ( !in_array( $attr->name, $allowedAttributes ) ) {
141 throw new Exception( "<{$node->nodeName}> may not have the " .
142 "{$attr->name} attribute" );
143 }
144 }
145 } elseif ( $node->attributes->length > 0 ) {
146 throw new Exception( "<{$node->nodeName}> may not have any attributes" );
147 }
148 }
149
159 private function validateTemplateTag( DOMNode $templateNode ): void {
160 // Verify that the <template> tag only contains one tag, and put it in $rootTemplateNode
161 // We can't use ->childNodes->length === 1 here because whitespace shows up as text nodes,
162 // and comments are also allowed.
163 $rootTemplateNode = null;
164 foreach ( $templateNode->childNodes as $node ) {
165 if ( $node->nodeType === XML_ELEMENT_NODE ) {
166 if ( $rootTemplateNode !== null ) {
167 throw new Exception( '<template> tag may not have multiple child tags' );
168 }
169 $rootTemplateNode = $node;
170 } elseif ( $node->nodeType === XML_TEXT_NODE ) {
171 // Text nodes are allowed, as long as they only contain whitespace
172 if ( trim( $node->nodeValue ) !== '' ) {
173 throw new Exception( '<template> tag may not contain text' );
174 }
175 } elseif ( $node->nodeType !== XML_COMMENT_NODE ) {
176 // Comment nodes are allowed, anything else is not allowed
177 throw new Exception( "<template> tag may only contain element and comment nodes, " .
178 " found node of type {$node->nodeType}" );
179 }
180 }
181 if ( $rootTemplateNode === null ) {
182 throw new Exception( '<template> tag may not be empty' );
183 }
184 }
185
192 private function getStyleAndLang( DOMElement $styleNode ): array {
193 $style = trim( $styleNode->nodeValue );
194 $styleLang = $styleNode->hasAttribute( 'lang' ) ?
195 $styleNode->getAttribute( 'lang' ) : 'css';
196 if ( $styleLang !== 'css' && $styleLang !== 'less' ) {
197 throw new Exception( "<style lang=\"$styleLang\"> is invalid," .
198 " lang must be \"css\" or \"less\"" );
199 }
200 return [
201 'style' => $style,
202 'lang' => $styleLang,
203 ];
204 }
205
218 private function getTemplateHtml( $html, $minify ) {
219 $serializer = new Serializer( $this->newTemplateFormatter( $minify ) );
220 $tokenizer = new Tokenizer(
221 $this->newFilteringDispatcher(
222 new TreeBuilder( $serializer, [ 'ignoreErrors' => true ] ),
223 'template'
224 ),
225 $html, [ 'ignoreErrors' => true ]
226 );
227 $tokenizer->execute( [ 'fragmentNamespace' => HTMLData::NS_HTML, 'fragmentName' => 'template' ] );
228 return trim( $serializer->getResult() );
229 }
230
239 private function newTemplateFormatter( $minify ) {
240 return new class( $minify ) extends HtmlFormatter {
241 private $minify;
242
243 public function __construct( $minify ) {
244 $this->minify = $minify;
245 }
246
247 public function startDocument( $fragmentNamespace, $fragmentName ) {
248 // Remove <!doctype html>
249 return '';
250 }
251
252 public function comment( SerializerNode $parent, $text ) {
253 if ( $this->minify ) {
254 // Remove all comments
255 return '';
256 }
257 return parent::comment( $parent, $text );
258 }
259
260 public function characters( SerializerNode $parent, $text, $start, $length ) {
261 if (
262 $this->minify && (
263 // Don't touch <pre>/<listing>/<textarea> nodes
264 $parent->namespace !== HTMLData::NS_HTML ||
265 !isset( $this->prefixLfElements[ $parent->name ] )
266 )
267 ) {
268 $text = substr( $text, $start, $length );
269 // Collapse runs of adjacent whitespace, and convert all whitespace to spaces
270 $text = preg_replace( '/[ \r\n\t]+/', ' ', $text );
271 $start = 0;
272 $length = strlen( $text );
273 }
274 return parent::characters( $parent, $text, $start, $length );
275 }
276
277 public function element( SerializerNode $parent, SerializerNode $node, $contents ) {
278 if (
279 $this->minify && (
280 // Don't touch <pre>/<listing>/<textarea> nodes
281 $node->namespace !== HTMLData::NS_HTML ||
282 !isset( $this->prefixLfElements[ $node->name ] )
283 )
284 ) {
285 // Remove leading and trailing whitespace
286 $contents = preg_replace( '/(^[ \r\n\t]+)|([\r\n\t ]+$)/', '', $contents );
287 }
288 return parent::element( $parent, $node, $contents );
289 }
290 };
291 }
292
301 private function newFilteringDispatcher( TreeBuilder $treeBuilder, $nodeName ) {
302 return new class( $treeBuilder, $nodeName ) extends Dispatcher {
303 private $nodeName;
304 private $nodeDepth = 0;
305 private $seenTag = false;
306
307 public function __construct( TreeBuilder $treeBuilder, $nodeName ) {
308 $this->nodeName = $nodeName;
309 parent::__construct( $treeBuilder );
310 }
311
312 public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
313 if ( $this->nodeDepth ) {
314 parent::startTag( $name, $attrs, $selfClose, $sourceStart, $sourceLength );
315 }
316
317 if ( $name === $this->nodeName ) {
318 if ( $this->nodeDepth === 0 && $this->seenTag ) {
319 // This is the second opening tag, not nested in the first one
320 throw new Exception( "More than one <{$this->nodeName}> tag found" );
321 }
322 $this->nodeDepth++;
323 $this->seenTag = true;
324 }
325 }
326
327 public function endTag( $name, $sourceStart, $sourceLength ) {
328 if ( $name === $this->nodeName ) {
329 $this->nodeDepth--;
330 }
331 if ( $this->nodeDepth ) {
332 parent::endTag( $name, $sourceStart, $sourceLength );
333 }
334 }
335
336 public function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
337 if ( $this->nodeDepth ) {
338 parent::characters( $text, $start, $length, $sourceStart, $sourceLength );
339 }
340 }
341
342 public function comment( $text, $sourceStart, $sourceLength ) {
343 if ( $this->nodeDepth ) {
344 parent::comment( $text, $sourceStart, $sourceLength );
345 }
346 }
347 };
348 }
349}
if(ini_get('mbstring.func_overload')) if(!defined('MW_ENTRY_POINT'))
Pre-config setup: Before loading LocalSettings.php.
Definition Setup.php:88
Parser for Vue single file components (.vue files).
findUniqueTags(DOMNode $rootNode, array $tagNames)
Find occurrences of specified tags in a DOM node, expecting at most one occurrence of each.
newFilteringDispatcher(TreeBuilder $treeBuilder, $nodeName)
Custom Dispatcher subclass that only dispatches tree events inside a tag with a certain name.
getStyleAndLang(DOMElement $styleNode)
Get the contents and language of the <style> tag.
newTemplateFormatter( $minify)
Custom HtmlFormatter subclass that optionally removes comments and strips whitespace.
parse(string $html, array $options=[])
Parse a Vue single file component, and extract the script, template and style parts.
validateAttributes(DOMNode $node, array $allowedAttributes)
Verify that a given node only has a given set of attributes, and no others.
parseHTML( $html)
Parse HTML to DOM using RemexHtml.
validateTemplateTag(DOMNode $templateNode)
Check that the <template> tag has exactly one element child.
getTemplateHtml( $html, $minify)
Get the HTML contents of the <template> tag, optionally minifed.
return true
Definition router.php:92