22use Wikimedia\RemexHtml\DOM\DOMBuilder;
23use Wikimedia\RemexHtml\HTMLData;
24use Wikimedia\RemexHtml\Serializer\HtmlFormatter;
25use Wikimedia\RemexHtml\Serializer\Serializer;
26use Wikimedia\RemexHtml\Serializer\SerializerNode;
27use Wikimedia\RemexHtml\Tokenizer\Attributes;
28use Wikimedia\RemexHtml\Tokenizer\Tokenizer;
29use Wikimedia\RemexHtml\TreeBuilder\Dispatcher;
30use Wikimedia\RemexHtml\TreeBuilder\Element;
31use Wikimedia\RemexHtml\TreeBuilder\TreeBuilder;
58 public function parse(
string $html, array $options = [] ): array {
61 $head = $dom->getElementsByTagName(
'head' )->item( 0 );
66 throw new Exception(
'Parsed DOM did not contain a <head> tag' );
68 $nodes = $this->
findUniqueTags( $head, [
'script',
'template',
'style' ] );
71 foreach ( [
'script',
'template' ] as $requiredTag ) {
72 if ( !isset( $nodes[ $requiredTag ] ) ) {
73 throw new Exception(
"No <$requiredTag> tag found" );
79 if ( isset( $nodes[
'style'] ) ) {
84 $styleData = isset( $nodes[
'style'] ) ? $this->
getStyleAndLang( $nodes[
'style'] ) : null;
85 $template = $this->
getTemplateHtml( $html, $options[
'minifyTemplate'] ??
false );
88 'script' => trim( $nodes[
'script']->nodeValue ),
89 'template' => $template,
90 'style' => $styleData ? $styleData[
'style'] :
null,
91 'styleLang' => $styleData ? $styleData[
'lang'] : null
101 $domBuilder = new DOMBuilder( [
'suppressHtmlNamespace' =>
true ] );
102 $treeBuilder =
new TreeBuilder( $domBuilder, [
'ignoreErrors' =>
true ] );
103 $tokenizer =
new Tokenizer(
new Dispatcher( $treeBuilder ), $html, [
'ignoreErrors' =>
true ] );
104 $tokenizer->execute();
106 return $domBuilder->getFragment();
119 foreach ( $rootNode->childNodes as $node ) {
120 $tagName = strtolower( $node->nodeName );
121 if ( in_array( $tagName, $tagNames ) ) {
122 if ( isset( $nodes[ $tagName ] ) ) {
123 throw new Exception(
"More than one <$tagName> tag found" );
125 $nodes[ $tagName ] = $node;
138 if ( $allowedAttributes ) {
139 foreach ( $node->attributes as $attr ) {
140 if ( !in_array( $attr->name, $allowedAttributes ) ) {
141 throw new Exception(
"<{$node->nodeName}> may not have the " .
142 "{$attr->name} attribute" );
145 } elseif ( $node->attributes->length > 0 ) {
146 throw new Exception(
"<{$node->nodeName}> may not have any attributes" );
163 $rootTemplateNode = null;
164 foreach ( $templateNode->childNodes as $node ) {
165 if ( $node->nodeType === XML_ELEMENT_NODE ) {
166 if ( $rootTemplateNode !==
null ) {
167 throw new Exception(
'<template> tag may not have multiple child tags' );
169 $rootTemplateNode = $node;
170 } elseif ( $node->nodeType === XML_TEXT_NODE ) {
172 if ( trim( $node->nodeValue ) !==
'' ) {
173 throw new Exception(
'<template> tag may not contain text' );
175 } elseif ( $node->nodeType !== XML_COMMENT_NODE ) {
177 throw new Exception(
"<template> tag may only contain element and comment nodes, " .
178 " found node of type {$node->nodeType}" );
181 if ( $rootTemplateNode ===
null ) {
182 throw new Exception(
'<template> tag may not be empty' );
193 $style = trim( $styleNode->nodeValue );
194 $styleLang = $styleNode->hasAttribute(
'lang' ) ?
195 $styleNode->getAttribute(
'lang' ) :
'css';
196 if ( $styleLang !==
'css' && $styleLang !==
'less' ) {
197 throw new Exception(
"<style lang=\"$styleLang\"> is invalid," .
198 " lang must be \"css\" or \"less\"" );
202 'lang' => $styleLang,
219 $serializer =
new Serializer( $this->newTemplateFormatter( $minify ) );
220 $tokenizer =
new Tokenizer(
221 $this->newFilteringDispatcher(
222 new TreeBuilder( $serializer, [
'ignoreErrors' =>
true ] ),
225 $html, [
'ignoreErrors' =>
true ]
227 $tokenizer->execute( [
'fragmentNamespace' => HTMLData::NS_HTML,
'fragmentName' =>
'template' ] );
228 return trim( $serializer->getResult() );
240 return new class( $minify ) extends HtmlFormatter {
243 public function __construct( $minify ) {
244 $this->minify = $minify;
247 public function startDocument( $fragmentNamespace, $fragmentName ) {
252 public function comment( SerializerNode $parent, $text ) {
253 if ( $this->minify ) {
257 return parent::comment( $parent, $text );
260 public function characters( SerializerNode $parent, $text, $start, $length ) {
264 $parent->namespace !== HTMLData::NS_HTML ||
265 !isset( $this->prefixLfElements[ $parent->name ] )
268 $text = substr( $text, $start, $length );
270 $text = preg_replace(
'/[ \r\n\t]+/',
' ', $text );
272 $length = strlen( $text );
274 return parent::characters( $parent, $text, $start, $length );
277 public function element( SerializerNode $parent, SerializerNode $node, $contents ) {
281 $node->namespace !== HTMLData::NS_HTML ||
282 !isset( $this->prefixLfElements[ $node->name ] )
286 $contents = preg_replace(
'/(^[ \r\n\t]+)|([\r\n\t ]+$)/',
'', $contents );
288 return parent::element( $parent, $node, $contents );
302 return new class( $treeBuilder, $nodeName ) extends Dispatcher {
304 private $nodeDepth = 0;
305 private $seenTag =
false;
307 public function __construct( TreeBuilder $treeBuilder, $nodeName ) {
308 $this->nodeName = $nodeName;
309 parent::__construct( $treeBuilder );
312 public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
313 if ( $this->nodeDepth ) {
314 parent::startTag( $name, $attrs, $selfClose, $sourceStart, $sourceLength );
317 if ( $name === $this->nodeName ) {
318 if ( $this->nodeDepth === 0 && $this->seenTag ) {
320 throw new Exception(
"More than one <{$this->nodeName}> tag found" );
323 $this->seenTag =
true;
327 public function endTag( $name, $sourceStart, $sourceLength ) {
328 if ( $name === $this->nodeName ) {
331 if ( $this->nodeDepth ) {
332 parent::endTag( $name, $sourceStart, $sourceLength );
336 public function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
337 if ( $this->nodeDepth ) {
338 parent::characters( $text, $start, $length, $sourceStart, $sourceLength );
342 public function comment( $text, $sourceStart, $sourceLength ) {
343 if ( $this->nodeDepth ) {
344 parent::comment( $text, $sourceStart, $sourceLength );
if(ini_get('mbstring.func_overload')) if(!defined('MW_ENTRY_POINT'))
Pre-config setup: Before loading LocalSettings.php.
Parser for Vue single file components (.vue files).
findUniqueTags(DOMNode $rootNode, array $tagNames)
Find occurrences of specified tags in a DOM node, expecting at most one occurrence of each.
newFilteringDispatcher(TreeBuilder $treeBuilder, $nodeName)
Custom Dispatcher subclass that only dispatches tree events inside a tag with a certain name.
getStyleAndLang(DOMElement $styleNode)
Get the contents and language of the <style> tag.
newTemplateFormatter( $minify)
Custom HtmlFormatter subclass that optionally removes comments and strips whitespace.
parse(string $html, array $options=[])
Parse a Vue single file component, and extract the script, template and style parts.
validateAttributes(DOMNode $node, array $allowedAttributes)
Verify that a given node only has a given set of attributes, and no others.
parseHTML( $html)
Parse HTML to DOM using RemexHtml.
validateTemplateTag(DOMNode $templateNode)
Check that the <template> tag has exactly one element child.
getTemplateHtml( $html, $minify)
Get the HTML contents of the <template> tag, optionally minifed.