22use RemexHtml\DOM\DOMBuilder;
23use RemexHtml\HTMLData;
24use RemexHtml\Serializer\HtmlFormatter;
25use RemexHtml\Serializer\Serializer;
26use RemexHtml\Serializer\SerializerNode;
27use RemexHtml\Tokenizer\Attributes;
28use RemexHtml\Tokenizer\Tokenizer;
29use RemexHtml\TreeBuilder\Dispatcher;
30use RemexHtml\TreeBuilder\Element;
31use RemexHtml\TreeBuilder\TreeBuilder;
58 public function parse(
string $html, array $options = [] ) : array {
61 $head = $dom->firstChild->firstChild;
65 $nodes = $this->
findUniqueTags( $head, [
'script',
'template',
'style' ] );
68 foreach ( [
'script',
'template' ] as $requiredTag ) {
69 if ( !isset( $nodes[ $requiredTag ] ) ) {
70 throw new Exception(
"No <$requiredTag> tag found" );
76 if ( isset( $nodes[
'style'] ) ) {
81 $styleData = isset( $nodes[
'style'] ) ? $this->
getStyleAndLang( $nodes[
'style'] ) : null;
82 $template = $this->
getTemplateHtml( $html, $options[
'minifyTemplate'] ??
false );
85 'script' => trim( $nodes[
'script']->nodeValue ),
86 'template' => $template,
87 'style' => $styleData ? $styleData[
'style'] :
null,
88 'styleLang' => $styleData ? $styleData[
'lang'] : null
98 $domBuilder = new DOMBuilder( [
'suppressHtmlNamespace' =>
true ] );
99 $treeBuilder =
new TreeBuilder( $domBuilder, [
'ignoreErrors' =>
true ] );
100 $tokenizer =
new Tokenizer(
new Dispatcher( $treeBuilder ), $html, [
'ignoreErrors' =>
true ] );
101 $tokenizer->execute();
102 return $domBuilder->getFragment();
115 foreach ( $rootNode->childNodes as $node ) {
116 $tagName = strtolower( $node->nodeName );
117 if ( in_array( $tagName, $tagNames ) ) {
118 if ( isset( $nodes[ $tagName ] ) ) {
119 throw new Exception(
"More than one <$tagName> tag found" );
121 $nodes[ $tagName ] = $node;
134 if ( $allowedAttributes ) {
135 foreach ( $node->attributes as $attr ) {
136 if ( !in_array( $attr->name, $allowedAttributes ) ) {
137 throw new Exception(
"<{$node->nodeName}> may not have the " .
138 "{$attr->name} attribute" );
141 } elseif ( $node->attributes->length > 0 ) {
142 throw new Exception(
"<{$node->nodeName}> may not have any attributes" );
159 $rootTemplateNode = null;
160 foreach ( $templateNode->childNodes as $node ) {
161 if ( $node->nodeType === XML_ELEMENT_NODE ) {
162 if ( $rootTemplateNode !==
null ) {
163 throw new Exception(
'<template> tag may not have multiple child tags' );
165 $rootTemplateNode = $node;
166 } elseif ( $node->nodeType === XML_TEXT_NODE ) {
168 if ( trim( $node->nodeValue ) !==
'' ) {
169 throw new Exception(
'<template> tag may not contain text' );
171 } elseif ( $node->nodeType !== XML_COMMENT_NODE ) {
173 throw new Exception(
"<template> tag may only contain element and comment nodes, " .
174 " found node of type {$node->nodeType}" );
177 if ( $rootTemplateNode ===
null ) {
178 throw new Exception(
'<template> tag may not be empty' );
189 $style = trim( $styleNode->nodeValue );
190 $styleLang = $styleNode->hasAttribute(
'lang' ) ?
191 $styleNode->getAttribute(
'lang' ) :
'css';
192 if ( $styleLang !==
'css' && $styleLang !==
'less' ) {
193 throw new Exception(
"<style lang=\"$styleLang\"> is invalid," .
194 " lang must be \"css\" or \"less\"" );
198 'lang' => $styleLang,
215 $serializer =
new Serializer( $this->newTemplateFormatter( $minify ) );
216 $tokenizer =
new Tokenizer(
217 $this->newFilteringDispatcher(
218 new TreeBuilder( $serializer, [
'ignoreErrors' =>
true ] ),
221 $html, [
'ignoreErrors' =>
true ]
223 $tokenizer->execute( [
'fragmentNamespace' => HTMLData::NS_HTML,
'fragmentName' =>
'template' ] );
224 return trim( $serializer->getResult() );
236 return new class( $minify ) extends HtmlFormatter {
239 public function __construct( $minify ) {
240 $this->minify = $minify;
243 public function startDocument( $fragmentNamespace, $fragmentName ) {
248 public function comment( SerializerNode $parent, $text ) {
249 if ( $this->minify ) {
253 return parent::comment( $parent, $text );
256 public function characters( SerializerNode $parent, $text, $start, $length ) {
260 $parent->namespace !== HTMLData::NS_HTML ||
261 !isset( $this->prefixLfElements[ $parent->name ] )
264 $text = substr( $text, $start, $length );
266 $text = preg_replace(
'/[ \r\n\t]+/',
' ', $text );
268 $length = strlen( $text );
270 return parent::characters( $parent, $text, $start, $length );
273 public function element( SerializerNode $parent, SerializerNode $node, $contents ) {
277 $node->namespace !== HTMLData::NS_HTML ||
278 !isset( $this->prefixLfElements[ $node->name ] )
282 $contents = preg_replace(
'/(^[ \r\n\t]+)|([\r\n\t ]+$)/',
'', $contents );
284 return parent::element( $parent, $node, $contents );
298 return new class( $treeBuilder, $nodeName ) extends Dispatcher {
300 private $nodeDepth = 0;
301 private $seenTag =
false;
303 public function __construct( TreeBuilder $treeBuilder, $nodeName ) {
304 $this->nodeName = $nodeName;
305 parent::__construct( $treeBuilder );
308 public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
309 if ( $this->nodeDepth ) {
310 parent::startTag( $name, $attrs, $selfClose, $sourceStart, $sourceLength );
313 if ( $name === $this->nodeName ) {
314 if ( $this->nodeDepth === 0 && $this->seenTag ) {
316 throw new Exception(
"More than one <{$this->nodeName}> tag found" );
319 $this->seenTag =
true;
323 public function endTag( $name, $sourceStart, $sourceLength ) {
324 if ( $name === $this->nodeName ) {
327 if ( $this->nodeDepth ) {
328 parent::endTag( $name, $sourceStart, $sourceLength );
332 public function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
333 if ( $this->nodeDepth ) {
334 parent::characters( $text, $start, $length, $sourceStart, $sourceLength );
338 public function comment( $text, $sourceStart, $sourceLength ) {
339 if ( $this->nodeDepth ) {
340 parent::comment( $text, $sourceStart, $sourceLength );
Parser for Vue single file components (.vue files).
findUniqueTags(DOMNode $rootNode, array $tagNames)
Find occurrences of specified tags in a DOM node, expecting at most one occurrence of each.
newFilteringDispatcher(TreeBuilder $treeBuilder, $nodeName)
Custom Dispatcher subclass that only dispatches tree events inside a tag with a certain name.
getStyleAndLang(DOMElement $styleNode)
Get the contents and language of the <style> tag.
newTemplateFormatter( $minify)
Custom HtmlFormatter subclass that optionally removes comments and strips whitespace.
parse(string $html, array $options=[])
Parse a Vue single file component, and extract the script, template and style parts.
validateAttributes(DOMNode $node, array $allowedAttributes)
Verify that a given node only has a given set of attributes, and no others.
parseHTML( $html)
Parse HTML to DOM using RemexHtml.
validateTemplateTag(DOMNode $templateNode)
Check that the <template> tag has exactly one element child.
getTemplateHtml( $html, $minify)
Get the HTML contents of the <template> tag, optionally minifed.