MediaWiki  master
VueComponentParser.php
Go to the documentation of this file.
1 <?php
22 use RemexHtml\DOM\DOMBuilder;
23 use RemexHtml\HTMLData;
24 use RemexHtml\Serializer\HtmlFormatter;
25 use RemexHtml\Serializer\Serializer;
26 use RemexHtml\Serializer\SerializerNode;
27 use RemexHtml\Tokenizer\Attributes;
28 use RemexHtml\Tokenizer\Tokenizer;
29 use RemexHtml\TreeBuilder\Dispatcher;
30 use RemexHtml\TreeBuilder\Element;
31 use RemexHtml\TreeBuilder\TreeBuilder;
32 
58  public function parse( string $html, array $options = [] ) : array {
59  $dom = $this->parseHTML( $html );
60  // Remex wraps everything in <html><head>, unwrap that
61  $head = $dom->firstChild->firstChild;
62 
63  // Find the <script>, <template> and <style> tags. They can appear in any order, but they
64  // must be at the top level, and there can only be one of each.
65  $nodes = $this->findUniqueTags( $head, [ 'script', 'template', 'style' ] );
66 
67  // Throw an error if we didn't find a <script> or <template> tag. <style> is optional.
68  foreach ( [ 'script', 'template' ] as $requiredTag ) {
69  if ( !isset( $nodes[ $requiredTag ] ) ) {
70  throw new Exception( "No <$requiredTag> tag found" );
71  }
72  }
73 
74  $this->validateAttributes( $nodes['script'], [] );
75  $this->validateAttributes( $nodes['template'], [] );
76  if ( isset( $nodes['style'] ) ) {
77  $this->validateAttributes( $nodes['style'], [ 'lang' ] );
78  }
79  $this->validateTemplateTag( $nodes['template'] );
80 
81  $styleData = isset( $nodes['style'] ) ? $this->getStyleAndLang( $nodes['style'] ) : null;
82  $template = $this->getTemplateHtml( $html, $options['minifyTemplate'] ?? false );
83 
84  return [
85  'script' => trim( $nodes['script']->nodeValue ),
86  'template' => $template,
87  'style' => $styleData ? $styleData['style'] : null,
88  'styleLang' => $styleData ? $styleData['lang'] : null
89  ];
90  }
91 
97  private function parseHTML( $html ) : DOMDocument {
98  $domBuilder = new DOMBuilder( [ 'suppressHtmlNamespace' => true ] );
99  $treeBuilder = new TreeBuilder( $domBuilder, [ 'ignoreErrors' => true ] );
100  $tokenizer = new Tokenizer( new Dispatcher( $treeBuilder ), $html, [ 'ignoreErrors' => true ] );
101  $tokenizer->execute();
102  return $domBuilder->getFragment();
103  }
104 
113  private function findUniqueTags( DOMNode $rootNode, array $tagNames ) : array {
114  $nodes = [];
115  foreach ( $rootNode->childNodes as $node ) {
116  $tagName = strtolower( $node->nodeName );
117  if ( in_array( $tagName, $tagNames ) ) {
118  if ( isset( $nodes[ $tagName ] ) ) {
119  throw new Exception( "More than one <$tagName> tag found" );
120  }
121  $nodes[ $tagName ] = $node;
122  }
123  }
124  return $nodes;
125  }
126 
133  private function validateAttributes( DOMNode $node, array $allowedAttributes ) : void {
134  if ( $allowedAttributes ) {
135  foreach ( $node->attributes as $attr ) {
136  if ( !in_array( $attr->name, $allowedAttributes ) ) {
137  throw new Exception( "<{$node->nodeName}> may not have the " .
138  "{$attr->name} attribute" );
139  }
140  }
141  } elseif ( $node->attributes->length > 0 ) {
142  throw new Exception( "<{$node->nodeName}> may not have any attributes" );
143  }
144  }
145 
155  private function validateTemplateTag( DOMNode $templateNode ) : void {
156  // Verify that the <template> tag only contains one tag, and put it in $rootTemplateNode
157  // We can't use ->childNodes->length === 1 here because whitespace shows up as text nodes,
158  // and comments are also allowed.
159  $rootTemplateNode = null;
160  foreach ( $templateNode->childNodes as $node ) {
161  if ( $node->nodeType === XML_ELEMENT_NODE ) {
162  if ( $rootTemplateNode !== null ) {
163  throw new Exception( '<template> tag may not have multiple child tags' );
164  }
165  $rootTemplateNode = $node;
166  } elseif ( $node->nodeType === XML_TEXT_NODE ) {
167  // Text nodes are allowed, as long as they only contain whitespace
168  if ( trim( $node->nodeValue ) !== '' ) {
169  throw new Exception( '<template> tag may not contain text' );
170  }
171  } elseif ( $node->nodeType !== XML_COMMENT_NODE ) {
172  // Comment nodes are allowed, anything else is not allowed
173  throw new Exception( "<template> tag may only contain element and comment nodes, " .
174  " found node of type {$node->nodeType}" );
175  }
176  }
177  if ( $rootTemplateNode === null ) {
178  throw new Exception( '<template> tag may not be empty' );
179  }
180  }
181 
188  private function getStyleAndLang( DOMElement $styleNode ) : array {
189  $style = trim( $styleNode->nodeValue );
190  $styleLang = $styleNode->hasAttribute( 'lang' ) ?
191  $styleNode->getAttribute( 'lang' ) : 'css';
192  if ( $styleLang !== 'css' && $styleLang !== 'less' ) {
193  throw new Exception( "<style lang=\"$styleLang\"> is invalid," .
194  " lang must be \"css\" or \"less\"" );
195  }
196  return [
197  'style' => $style,
198  'lang' => $styleLang,
199  ];
200  }
201 
214  private function getTemplateHtml( $html, $minify ) {
215  $serializer = new Serializer( $this->newTemplateFormatter( $minify ) );
216  $tokenizer = new Tokenizer(
217  $this->newFilteringDispatcher(
218  new TreeBuilder( $serializer, [ 'ignoreErrors' => true ] ),
219  'template'
220  ),
221  $html, [ 'ignoreErrors' => true ]
222  );
223  $tokenizer->execute( [ 'fragmentNamespace' => HTMLData::NS_HTML, 'fragmentName' => 'template' ] );
224  return trim( $serializer->getResult() );
225  }
226 
235  private function newTemplateFormatter( $minify ) {
236  return new class( $minify ) extends HtmlFormatter {
237  private $minify;
238 
239  public function __construct( $minify ) {
240  $this->minify = $minify;
241  }
242 
243  public function startDocument( $fragmentNamespace, $fragmentName ) {
244  // Remove <!doctype html>
245  return '';
246  }
247 
248  public function comment( SerializerNode $parent, $text ) {
249  if ( $this->minify ) {
250  // Remove all comments
251  return '';
252  }
253  return parent::comment( $parent, $text );
254  }
255 
256  public function characters( SerializerNode $parent, $text, $start, $length ) {
257  if (
258  $this->minify && (
259  // Don't touch <pre>/<listing>/<textarea> nodes
260  $parent->namespace !== HTMLData::NS_HTML ||
261  !isset( $this->prefixLfElements[ $parent->name ] )
262  )
263  ) {
264  $text = substr( $text, $start, $length );
265  // Collapse runs of adjacent whitespace, and convert all whitespace to spaces
266  $text = preg_replace( '/[ \r\n\t]+/', ' ', $text );
267  $start = 0;
268  $length = strlen( $text );
269  }
270  return parent::characters( $parent, $text, $start, $length );
271  }
272 
273  public function element( SerializerNode $parent, SerializerNode $node, $contents ) {
274  if (
275  $this->minify && (
276  // Don't touch <pre>/<listing>/<textarea> nodes
277  $node->namespace !== HTMLData::NS_HTML ||
278  !isset( $this->prefixLfElements[ $node->name ] )
279  )
280  ) {
281  // Remove leading and trailing whitespace
282  $contents = preg_replace( '/(^[ \r\n\t]+)|([\r\n\t ]+$)/', '', $contents );
283  }
284  return parent::element( $parent, $node, $contents );
285  }
286  };
287  }
288 
297  private function newFilteringDispatcher( TreeBuilder $treeBuilder, $nodeName ) {
298  return new class( $treeBuilder, $nodeName ) extends Dispatcher {
299  private $nodeName;
300  private $nodeDepth = 0;
301  private $seenTag = false;
302 
303  public function __construct( TreeBuilder $treeBuilder, $nodeName ) {
304  $this->nodeName = $nodeName;
305  parent::__construct( $treeBuilder );
306  }
307 
308  public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
309  if ( $this->nodeDepth ) {
310  parent::startTag( $name, $attrs, $selfClose, $sourceStart, $sourceLength );
311  }
312 
313  if ( $name === $this->nodeName ) {
314  if ( $this->nodeDepth === 0 && $this->seenTag ) {
315  // This is the second opening tag, not nested in the first one
316  throw new Exception( "More than one <{$this->nodeName}> tag found" );
317  }
318  $this->nodeDepth++;
319  $this->seenTag = true;
320  }
321  }
322 
323  public function endTag( $name, $sourceStart, $sourceLength ) {
324  if ( $name === $this->nodeName ) {
325  $this->nodeDepth--;
326  }
327  if ( $this->nodeDepth ) {
328  parent::endTag( $name, $sourceStart, $sourceLength );
329  }
330  }
331 
332  public function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
333  if ( $this->nodeDepth ) {
334  parent::characters( $text, $start, $length, $sourceStart, $sourceLength );
335  }
336  }
337 
338  public function comment( $text, $sourceStart, $sourceLength ) {
339  if ( $this->nodeDepth ) {
340  parent::comment( $text, $sourceStart, $sourceLength );
341  }
342  }
343  };
344  }
345 }
VueComponentParser\validateTemplateTag
validateTemplateTag(DOMNode $templateNode)
Check that the <template> tag has exactly one element child.
Definition: VueComponentParser.php:155
VueComponentParser\validateAttributes
validateAttributes(DOMNode $node, array $allowedAttributes)
Verify that a given node only has a given set of attributes, and no others.
Definition: VueComponentParser.php:133
VueComponentParser\getTemplateHtml
getTemplateHtml( $html, $minify)
Get the HTML contents of the <template> tag, optionally minifed.
Definition: VueComponentParser.php:214
VueComponentParser\newTemplateFormatter
newTemplateFormatter( $minify)
Custom HtmlFormatter subclass that optionally removes comments and strips whitespace.
Definition: VueComponentParser.php:235
VueComponentParser\parse
parse(string $html, array $options=[])
Parse a Vue single file component, and extract the script, template and style parts.
Definition: VueComponentParser.php:58
VueComponentParser\findUniqueTags
findUniqueTags(DOMNode $rootNode, array $tagNames)
Find occurrences of specified tags in a DOM node, expecting at most one occurrence of each.
Definition: VueComponentParser.php:113
VueComponentParser\newFilteringDispatcher
newFilteringDispatcher(TreeBuilder $treeBuilder, $nodeName)
Custom Dispatcher subclass that only dispatches tree events inside a tag with a certain name.
Definition: VueComponentParser.php:297
VueComponentParser\parseHTML
parseHTML( $html)
Parse HTML to DOM using RemexHtml.
Definition: VueComponentParser.php:97
VueComponentParser
Parser for Vue single file components (.vue files).
Definition: VueComponentParser.php:39
VueComponentParser\getStyleAndLang
getStyleAndLang(DOMElement $styleNode)
Get the contents and language of the <style> tag.
Definition: VueComponentParser.php:188