MediaWiki fundraising/REL1_35
VueComponentParser.php
Go to the documentation of this file.
1<?php
22use RemexHtml\DOM\DOMBuilder;
23use RemexHtml\HTMLData;
24use RemexHtml\Serializer\HtmlFormatter;
25use RemexHtml\Serializer\Serializer;
26use RemexHtml\Serializer\SerializerNode;
27use RemexHtml\Tokenizer\Attributes;
28use RemexHtml\Tokenizer\Tokenizer;
29use RemexHtml\TreeBuilder\Dispatcher;
30use RemexHtml\TreeBuilder\Element;
31use RemexHtml\TreeBuilder\TreeBuilder;
32
58 public function parse( string $html, array $options = [] ) : array {
59 $dom = $this->parseHTML( $html );
60 // Remex wraps everything in <html><head>, unwrap that
61 $head = $dom->firstChild->firstChild;
62
63 // Find the <script>, <template> and <style> tags. They can appear in any order, but they
64 // must be at the top level, and there can only be one of each.
65 $nodes = $this->findUniqueTags( $head, [ 'script', 'template', 'style' ] );
66
67 // Throw an error if we didn't find a <script> or <template> tag. <style> is optional.
68 foreach ( [ 'script', 'template' ] as $requiredTag ) {
69 if ( !isset( $nodes[ $requiredTag ] ) ) {
70 throw new Exception( "No <$requiredTag> tag found" );
71 }
72 }
73
74 $this->validateAttributes( $nodes['script'], [] );
75 $this->validateAttributes( $nodes['template'], [] );
76 if ( isset( $nodes['style'] ) ) {
77 $this->validateAttributes( $nodes['style'], [ 'lang' ] );
78 }
79 $this->validateTemplateTag( $nodes['template'] );
80
81 $styleData = isset( $nodes['style'] ) ? $this->getStyleAndLang( $nodes['style'] ) : null;
82 $template = $this->getTemplateHtml( $html, $options['minifyTemplate'] ?? false );
83
84 return [
85 'script' => trim( $nodes['script']->nodeValue ),
86 'template' => $template,
87 'style' => $styleData ? $styleData['style'] : null,
88 'styleLang' => $styleData ? $styleData['lang'] : null
89 ];
90 }
91
97 private function parseHTML( $html ) : DOMDocument {
98 $domBuilder = new DOMBuilder( [ 'suppressHtmlNamespace' => true ] );
99 $treeBuilder = new TreeBuilder( $domBuilder, [ 'ignoreErrors' => true ] );
100 $tokenizer = new Tokenizer( new Dispatcher( $treeBuilder ), $html, [ 'ignoreErrors' => true ] );
101 $tokenizer->execute();
102 return $domBuilder->getFragment();
103 }
104
113 private function findUniqueTags( DOMNode $rootNode, array $tagNames ) : array {
114 $nodes = [];
115 foreach ( $rootNode->childNodes as $node ) {
116 $tagName = strtolower( $node->nodeName );
117 if ( in_array( $tagName, $tagNames ) ) {
118 if ( isset( $nodes[ $tagName ] ) ) {
119 throw new Exception( "More than one <$tagName> tag found" );
120 }
121 $nodes[ $tagName ] = $node;
122 }
123 }
124 return $nodes;
125 }
126
133 private function validateAttributes( DOMNode $node, array $allowedAttributes ) : void {
134 if ( $allowedAttributes ) {
135 foreach ( $node->attributes as $attr ) {
136 if ( !in_array( $attr->name, $allowedAttributes ) ) {
137 throw new Exception( "<{$node->nodeName}> may not have the " .
138 "{$attr->name} attribute" );
139 }
140 }
141 } elseif ( $node->attributes->length > 0 ) {
142 throw new Exception( "<{$node->nodeName}> may not have any attributes" );
143 }
144 }
145
155 private function validateTemplateTag( DOMNode $templateNode ) : void {
156 // Verify that the <template> tag only contains one tag, and put it in $rootTemplateNode
157 // We can't use ->childNodes->length === 1 here because whitespace shows up as text nodes,
158 // and comments are also allowed.
159 $rootTemplateNode = null;
160 foreach ( $templateNode->childNodes as $node ) {
161 if ( $node->nodeType === XML_ELEMENT_NODE ) {
162 if ( $rootTemplateNode !== null ) {
163 throw new Exception( '<template> tag may not have multiple child tags' );
164 }
165 $rootTemplateNode = $node;
166 } elseif ( $node->nodeType === XML_TEXT_NODE ) {
167 // Text nodes are allowed, as long as they only contain whitespace
168 if ( trim( $node->nodeValue ) !== '' ) {
169 throw new Exception( '<template> tag may not contain text' );
170 }
171 } elseif ( $node->nodeType !== XML_COMMENT_NODE ) {
172 // Comment nodes are allowed, anything else is not allowed
173 throw new Exception( "<template> tag may only contain element and comment nodes, " .
174 " found node of type {$node->nodeType}" );
175 }
176 }
177 if ( $rootTemplateNode === null ) {
178 throw new Exception( '<template> tag may not be empty' );
179 }
180 }
181
188 private function getStyleAndLang( DOMElement $styleNode ) : array {
189 $style = trim( $styleNode->nodeValue );
190 $styleLang = $styleNode->hasAttribute( 'lang' ) ?
191 $styleNode->getAttribute( 'lang' ) : 'css';
192 if ( $styleLang !== 'css' && $styleLang !== 'less' ) {
193 throw new Exception( "<style lang=\"$styleLang\"> is invalid," .
194 " lang must be \"css\" or \"less\"" );
195 }
196 return [
197 'style' => $style,
198 'lang' => $styleLang,
199 ];
200 }
201
214 private function getTemplateHtml( $html, $minify ) {
215 $serializer = new Serializer( $this->newTemplateFormatter( $minify ) );
216 $tokenizer = new Tokenizer(
217 $this->newFilteringDispatcher(
218 new TreeBuilder( $serializer, [ 'ignoreErrors' => true ] ),
219 'template'
220 ),
221 $html, [ 'ignoreErrors' => true ]
222 );
223 $tokenizer->execute( [ 'fragmentNamespace' => HTMLData::NS_HTML, 'fragmentName' => 'template' ] );
224 return trim( $serializer->getResult() );
225 }
226
235 private function newTemplateFormatter( $minify ) {
236 return new class( $minify ) extends HtmlFormatter {
237 private $minify;
238
239 public function __construct( $minify ) {
240 $this->minify = $minify;
241 }
242
243 public function startDocument( $fragmentNamespace, $fragmentName ) {
244 // Remove <!doctype html>
245 return '';
246 }
247
248 public function comment( SerializerNode $parent, $text ) {
249 if ( $this->minify ) {
250 // Remove all comments
251 return '';
252 }
253 return parent::comment( $parent, $text );
254 }
255
256 public function characters( SerializerNode $parent, $text, $start, $length ) {
257 if (
258 $this->minify && (
259 // Don't touch <pre>/<listing>/<textarea> nodes
260 $parent->namespace !== HTMLData::NS_HTML ||
261 !isset( $this->prefixLfElements[ $parent->name ] )
262 )
263 ) {
264 $text = substr( $text, $start, $length );
265 // Collapse runs of adjacent whitespace, and convert all whitespace to spaces
266 $text = preg_replace( '/[ \r\n\t]+/', ' ', $text );
267 $start = 0;
268 $length = strlen( $text );
269 }
270 return parent::characters( $parent, $text, $start, $length );
271 }
272
273 public function element( SerializerNode $parent, SerializerNode $node, $contents ) {
274 if (
275 $this->minify && (
276 // Don't touch <pre>/<listing>/<textarea> nodes
277 $node->namespace !== HTMLData::NS_HTML ||
278 !isset( $this->prefixLfElements[ $node->name ] )
279 )
280 ) {
281 // Remove leading and trailing whitespace
282 $contents = preg_replace( '/(^[ \r\n\t]+)|([\r\n\t ]+$)/', '', $contents );
283 }
284 return parent::element( $parent, $node, $contents );
285 }
286 };
287 }
288
297 private function newFilteringDispatcher( TreeBuilder $treeBuilder, $nodeName ) {
298 return new class( $treeBuilder, $nodeName ) extends Dispatcher {
299 private $nodeName;
300 private $nodeDepth = 0;
301 private $seenTag = false;
302
303 public function __construct( TreeBuilder $treeBuilder, $nodeName ) {
304 $this->nodeName = $nodeName;
305 parent::__construct( $treeBuilder );
306 }
307
308 public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
309 if ( $this->nodeDepth ) {
310 parent::startTag( $name, $attrs, $selfClose, $sourceStart, $sourceLength );
311 }
312
313 if ( $name === $this->nodeName ) {
314 if ( $this->nodeDepth === 0 && $this->seenTag ) {
315 // This is the second opening tag, not nested in the first one
316 throw new Exception( "More than one <{$this->nodeName}> tag found" );
317 }
318 $this->nodeDepth++;
319 $this->seenTag = true;
320 }
321 }
322
323 public function endTag( $name, $sourceStart, $sourceLength ) {
324 if ( $name === $this->nodeName ) {
325 $this->nodeDepth--;
326 }
327 if ( $this->nodeDepth ) {
328 parent::endTag( $name, $sourceStart, $sourceLength );
329 }
330 }
331
332 public function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
333 if ( $this->nodeDepth ) {
334 parent::characters( $text, $start, $length, $sourceStart, $sourceLength );
335 }
336 }
337
338 public function comment( $text, $sourceStart, $sourceLength ) {
339 if ( $this->nodeDepth ) {
340 parent::comment( $text, $sourceStart, $sourceLength );
341 }
342 }
343 };
344 }
345}
Parser for Vue single file components (.vue files).
findUniqueTags(DOMNode $rootNode, array $tagNames)
Find occurrences of specified tags in a DOM node, expecting at most one occurrence of each.
newFilteringDispatcher(TreeBuilder $treeBuilder, $nodeName)
Custom Dispatcher subclass that only dispatches tree events inside a tag with a certain name.
getStyleAndLang(DOMElement $styleNode)
Get the contents and language of the <style> tag.
newTemplateFormatter( $minify)
Custom HtmlFormatter subclass that optionally removes comments and strips whitespace.
parse(string $html, array $options=[])
Parse a Vue single file component, and extract the script, template and style parts.
validateAttributes(DOMNode $node, array $allowedAttributes)
Verify that a given node only has a given set of attributes, and no others.
parseHTML( $html)
Parse HTML to DOM using RemexHtml.
validateTemplateTag(DOMNode $templateNode)
Check that the <template> tag has exactly one element child.
getTemplateHtml( $html, $minify)
Get the HTML contents of the <template> tag, optionally minifed.
return true
Definition router.php:92
if($IP===false)
Definition status.php:5