MediaWiki  master
VueComponentParser.php
Go to the documentation of this file.
1 <?php
22 namespace MediaWiki\ResourceLoader;
23 
24 use DOMDocument;
25 use DOMElement;
26 use DOMNode;
27 use Exception;
28 use Wikimedia\RemexHtml\DOM\DOMBuilder;
29 use Wikimedia\RemexHtml\HTMLData;
30 use Wikimedia\RemexHtml\Serializer\HtmlFormatter;
31 use Wikimedia\RemexHtml\Serializer\Serializer;
32 use Wikimedia\RemexHtml\Serializer\SerializerNode;
33 use Wikimedia\RemexHtml\Tokenizer\Attributes;
34 use Wikimedia\RemexHtml\Tokenizer\Tokenizer;
35 use Wikimedia\RemexHtml\TreeBuilder\Dispatcher;
36 use Wikimedia\RemexHtml\TreeBuilder\Element;
37 use Wikimedia\RemexHtml\TreeBuilder\TreeBuilder;
38 
64  public function parse( string $html, array $options = [] ): array {
65  $dom = $this->parseHTML( $html );
66  // Remex wraps everything in <html><head>, unwrap that
67  $head = $dom->getElementsByTagName( 'head' )->item( 0 );
68 
69  // Find the <script>, <template> and <style> tags. They can appear in any order, but they
70  // must be at the top level, and there can only be one of each.
71  if ( !$head ) {
72  throw new Exception( 'Parsed DOM did not contain a <head> tag' );
73  }
74  $nodes = $this->findUniqueTags( $head, [ 'script', 'template', 'style' ] );
75 
76  // Throw an error if we didn't find a <script> or <template> tag. <style> is optional.
77  foreach ( [ 'script', 'template' ] as $requiredTag ) {
78  if ( !isset( $nodes[ $requiredTag ] ) ) {
79  throw new Exception( "No <$requiredTag> tag found" );
80  }
81  }
82 
83  $this->validateAttributes( $nodes['script'], [] );
84  $this->validateAttributes( $nodes['template'], [] );
85  if ( isset( $nodes['style'] ) ) {
86  $this->validateAttributes( $nodes['style'], [ 'lang' ] );
87  }
88 
89  $styleData = isset( $nodes['style'] ) ? $this->getStyleAndLang( $nodes['style'] ) : null;
90  $template = $this->getTemplateHtml( $html, $options['minifyTemplate'] ?? false );
91 
92  return [
93  'script' => trim( $nodes['script']->nodeValue ),
94  'template' => $template,
95  'style' => $styleData ? $styleData['style'] : null,
96  'styleLang' => $styleData ? $styleData['lang'] : null
97  ];
98  }
99 
105  private function parseHTML( $html ): DOMDocument {
106  $domBuilder = new DOMBuilder( [ 'suppressHtmlNamespace' => true ] );
107  $treeBuilder = new TreeBuilder( $domBuilder, [ 'ignoreErrors' => true ] );
108  $tokenizer = new Tokenizer( new Dispatcher( $treeBuilder ), $html, [ 'ignoreErrors' => true ] );
109  $tokenizer->execute();
110  // @phan-suppress-next-line PhanTypeMismatchReturnSuperType
111  return $domBuilder->getFragment();
112  }
113 
122  private function findUniqueTags( DOMNode $rootNode, array $tagNames ): array {
123  $nodes = [];
124  foreach ( $rootNode->childNodes as $node ) {
125  $tagName = strtolower( $node->nodeName );
126  if ( in_array( $tagName, $tagNames ) ) {
127  if ( isset( $nodes[ $tagName ] ) ) {
128  throw new Exception( "More than one <$tagName> tag found" );
129  }
130  $nodes[ $tagName ] = $node;
131  }
132  }
133  return $nodes;
134  }
135 
142  private function validateAttributes( DOMNode $node, array $allowedAttributes ): void {
143  if ( $allowedAttributes ) {
144  foreach ( $node->attributes as $attr ) {
145  if ( !in_array( $attr->name, $allowedAttributes ) ) {
146  throw new Exception( "<{$node->nodeName}> may not have the " .
147  "{$attr->name} attribute" );
148  }
149  }
150  } elseif ( $node->attributes->length > 0 ) {
151  throw new Exception( "<{$node->nodeName}> may not have any attributes" );
152  }
153  }
154 
161  private function getStyleAndLang( DOMElement $styleNode ): array {
162  $style = trim( $styleNode->nodeValue );
163  $styleLang = $styleNode->hasAttribute( 'lang' ) ?
164  $styleNode->getAttribute( 'lang' ) : 'css';
165  if ( $styleLang !== 'css' && $styleLang !== 'less' ) {
166  throw new Exception( "<style lang=\"$styleLang\"> is invalid," .
167  " lang must be \"css\" or \"less\"" );
168  }
169  return [
170  'style' => $style,
171  'lang' => $styleLang,
172  ];
173  }
174 
187  private function getTemplateHtml( $html, $minify ) {
188  $serializer = new Serializer( $this->newTemplateFormatter( $minify ) );
189  $tokenizer = new Tokenizer(
190  $this->newFilteringDispatcher(
191  new TreeBuilder( $serializer, [ 'ignoreErrors' => true ] ),
192  'template'
193  ),
194  $html, [ 'ignoreErrors' => true ]
195  );
196  $tokenizer->execute( [ 'fragmentNamespace' => HTMLData::NS_HTML, 'fragmentName' => 'template' ] );
197  return trim( $serializer->getResult() );
198  }
199 
208  private function newTemplateFormatter( $minify ) {
209  return new class( $minify ) extends HtmlFormatter {
210  private $minify;
211 
212  public function __construct( $minify ) {
213  $this->minify = $minify;
214  }
215 
216  public function startDocument( $fragmentNamespace, $fragmentName ) {
217  // Remove <!doctype html>
218  return '';
219  }
220 
221  public function comment( SerializerNode $parent, $text ) {
222  if ( $this->minify ) {
223  // Remove all comments
224  return '';
225  }
226  return parent::comment( $parent, $text );
227  }
228 
229  public function characters( SerializerNode $parent, $text, $start, $length ) {
230  if (
231  $this->minify && (
232  // Don't touch <pre>/<listing>/<textarea> nodes
233  $parent->namespace !== HTMLData::NS_HTML ||
234  !isset( $this->prefixLfElements[ $parent->name ] )
235  )
236  ) {
237  $text = substr( $text, $start, $length );
238  // Collapse runs of adjacent whitespace, and convert all whitespace to spaces
239  $text = preg_replace( '/[ \r\n\t]+/', ' ', $text );
240  $start = 0;
241  $length = strlen( $text );
242  }
243  return parent::characters( $parent, $text, $start, $length );
244  }
245 
246  public function element( SerializerNode $parent, SerializerNode $node, $contents ) {
247  if (
248  $this->minify && (
249  // Don't touch <pre>/<listing>/<textarea> nodes
250  $node->namespace !== HTMLData::NS_HTML ||
251  !isset( $this->prefixLfElements[ $node->name ] )
252  )
253  ) {
254  // Remove leading and trailing whitespace
255  $contents = preg_replace( '/(^[ \r\n\t]+)|([\r\n\t ]+$)/', '', $contents );
256  }
257  return parent::element( $parent, $node, $contents );
258  }
259  };
260  }
261 
270  private function newFilteringDispatcher( TreeBuilder $treeBuilder, $nodeName ) {
271  return new class( $treeBuilder, $nodeName ) extends Dispatcher {
272  private $nodeName;
273  private $nodeDepth = 0;
274  private $seenTag = false;
275 
276  public function __construct( TreeBuilder $treeBuilder, $nodeName ) {
277  $this->nodeName = $nodeName;
278  parent::__construct( $treeBuilder );
279  }
280 
281  public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
282  if ( $this->nodeDepth ) {
283  parent::startTag( $name, $attrs, $selfClose, $sourceStart, $sourceLength );
284  }
285 
286  if ( $name === $this->nodeName ) {
287  if ( $this->nodeDepth === 0 && $this->seenTag ) {
288  // This is the second opening tag, not nested in the first one
289  throw new Exception( "More than one <{$this->nodeName}> tag found" );
290  }
291  $this->nodeDepth++;
292  $this->seenTag = true;
293  }
294  }
295 
296  public function endTag( $name, $sourceStart, $sourceLength ) {
297  if ( $name === $this->nodeName ) {
298  $this->nodeDepth--;
299  }
300  if ( $this->nodeDepth ) {
301  parent::endTag( $name, $sourceStart, $sourceLength );
302  }
303  }
304 
305  public function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
306  if ( $this->nodeDepth ) {
307  parent::characters( $text, $start, $length, $sourceStart, $sourceLength );
308  }
309  }
310 
311  public function comment( $text, $sourceStart, $sourceLength ) {
312  if ( $this->nodeDepth ) {
313  parent::comment( $text, $sourceStart, $sourceLength );
314  }
315  }
316  };
317  }
318 }
319 
321 class_alias( VueComponentParser::class, 'VueComponentParser' );
if(!defined('MW_SETUP_CALLBACK'))
The persistent session ID (if any) loaded at startup.
Definition: WebStart.php:82
Parser for Vue single file components (.vue files).
newFilteringDispatcher(TreeBuilder $treeBuilder, $nodeName)
Custom Dispatcher subclass that only dispatches tree events inside a tag with a certain name.
parse(string $html, array $options=[])
Parse a Vue single file component, and extract the script, template and style parts.
findUniqueTags(DOMNode $rootNode, array $tagNames)
Find occurrences of specified tags in a DOM node, expecting at most one occurrence of each.
newTemplateFormatter( $minify)
Custom HtmlFormatter subclass that optionally removes comments and strips whitespace.
validateAttributes(DOMNode $node, array $allowedAttributes)
Verify that a given node only has a given set of attributes, and no others.
getStyleAndLang(DOMElement $styleNode)
Get the contents and language of the <style> tag.
getTemplateHtml( $html, $minify)
Get the HTML contents of the <template> tag, optionally minifed.
parseHTML( $html)
Parse HTML to DOM using RemexHtml.
return true
Definition: router.php:90