MediaWiki master
VueComponentParser.php
Go to the documentation of this file.
1<?php
23
24use DOMDocument;
25use DOMElement;
26use DOMNode;
27use InvalidArgumentException;
28use Wikimedia\RemexHtml\DOM\DOMBuilder;
29use Wikimedia\RemexHtml\HTMLData;
30use Wikimedia\RemexHtml\Serializer\HtmlFormatter;
31use Wikimedia\RemexHtml\Serializer\Serializer;
32use Wikimedia\RemexHtml\Serializer\SerializerNode;
33use Wikimedia\RemexHtml\Tokenizer\Attributes;
34use Wikimedia\RemexHtml\Tokenizer\Tokenizer;
35use Wikimedia\RemexHtml\TreeBuilder\Dispatcher;
36use Wikimedia\RemexHtml\TreeBuilder\TreeBuilder;
37use Wikimedia\Zest\Zest;
38
64 public function parse( string $html, array $options = [] ): array {
65 $dom = $this->parseHTML( $html );
66 // Remex wraps everything in <html><head>, unwrap that
67 $head = Zest::getElementsByTagName( $dom, 'head' )[ 0 ];
68
69 // Find the <script>, <template> and <style> tags. They can appear in any order, but they
70 // must be at the top level, and there can only be one of each.
71 if ( !$head ) {
72 throw new InvalidArgumentException( 'Parsed DOM did not contain a <head> tag' );
73 }
74 $nodes = $this->findUniqueTags( $head, [ 'script', 'template', 'style' ] );
75
76 // Throw an error if we didn't find a <script> or <template> tag. <style> is optional.
77 foreach ( [ 'script', 'template' ] as $requiredTag ) {
78 if ( !isset( $nodes[ $requiredTag ] ) ) {
79 throw new InvalidArgumentException( "No <$requiredTag> tag found" );
80 }
81 }
82
83 $this->validateAttributes( $nodes['script'], [] );
84 $this->validateAttributes( $nodes['template'], [] );
85 if ( isset( $nodes['style'] ) ) {
86 $this->validateAttributes( $nodes['style'], [ 'lang' ] );
87 }
88
89 $styleData = isset( $nodes['style'] ) ? $this->getStyleAndLang( $nodes['style'] ) : null;
90 $template = $this->getTemplateHtml( $html, $options['minifyTemplate'] ?? false );
91
92 return [
93 'script' => trim( $nodes['script']->nodeValue ?? '' ),
94 'template' => $template,
95 'style' => $styleData ? $styleData['style'] : null,
96 'styleLang' => $styleData ? $styleData['lang'] : null
97 ];
98 }
99
105 private function parseHTML( $html ): DOMDocument {
106 $domBuilder = new DOMBuilder( [ 'suppressHtmlNamespace' => true ] );
107 $treeBuilder = new TreeBuilder( $domBuilder, [ 'ignoreErrors' => true ] );
108 $tokenizer = new Tokenizer( new Dispatcher( $treeBuilder ), $html, [ 'ignoreErrors' => true ] );
109 $tokenizer->execute();
110 // @phan-suppress-next-line PhanTypeMismatchReturnSuperType
111 return $domBuilder->getFragment();
112 }
113
122 private function findUniqueTags( DOMNode $rootNode, array $tagNames ): array {
123 $nodes = [];
124 foreach ( $rootNode->childNodes as $node ) {
125 $tagName = strtolower( $node->nodeName );
126 if ( in_array( $tagName, $tagNames ) ) {
127 if ( isset( $nodes[ $tagName ] ) ) {
128 throw new InvalidArgumentException( "More than one <$tagName> tag found" );
129 }
130 $nodes[ $tagName ] = $node;
131 }
132 }
133 return $nodes;
134 }
135
142 private function validateAttributes( DOMNode $node, array $allowedAttributes ): void {
143 if ( $allowedAttributes ) {
144 foreach ( $node->attributes as $attr ) {
145 if ( !in_array( $attr->name, $allowedAttributes ) ) {
146 throw new InvalidArgumentException( "<{$node->nodeName}> may not have the " .
147 "{$attr->name} attribute" );
148 }
149 }
150 } elseif ( $node->attributes->length > 0 ) {
151 throw new InvalidArgumentException( "<{$node->nodeName}> may not have any attributes" );
152 }
153 }
154
161 private function getStyleAndLang( DOMElement $styleNode ): array {
162 $style = trim( $styleNode->nodeValue ?? '' );
163 $styleLang = $styleNode->hasAttribute( 'lang' ) ?
164 $styleNode->getAttribute( 'lang' ) : 'css';
165 if ( $styleLang !== 'css' && $styleLang !== 'less' ) {
166 throw new InvalidArgumentException( "<style lang=\"$styleLang\"> is invalid," .
167 " lang must be \"css\" or \"less\"" );
168 }
169 return [
170 'style' => $style,
171 'lang' => $styleLang,
172 ];
173 }
174
187 private function getTemplateHtml( $html, $minify ) {
188 $serializer = new Serializer( $this->newTemplateFormatter( $minify ) );
189 $tokenizer = new Tokenizer(
190 $this->newFilteringDispatcher(
191 new TreeBuilder( $serializer, [ 'ignoreErrors' => true ] ),
192 'template'
193 ),
194 $html, [ 'ignoreErrors' => true ]
195 );
196 $tokenizer->execute( [ 'fragmentNamespace' => HTMLData::NS_HTML, 'fragmentName' => 'template' ] );
197 return trim( $serializer->getResult() );
198 }
199
208 private function newTemplateFormatter( $minify ) {
209 return new class( $minify ) extends HtmlFormatter {
210 private $minify;
211
212 public function __construct( $minify ) {
213 $this->minify = $minify;
214 }
215
216 public function startDocument( $fragmentNamespace, $fragmentName ) {
217 // Remove <!doctype html>
218 return '';
219 }
220
221 public function comment( SerializerNode $parent, $text ) {
222 if ( $this->minify ) {
223 // Remove all comments
224 return '';
225 }
226 return parent::comment( $parent, $text );
227 }
228
229 public function characters( SerializerNode $parent, $text, $start, $length ) {
230 if (
231 $this->minify && (
232 // Don't touch <pre>/<listing>/<textarea> nodes
233 $parent->namespace !== HTMLData::NS_HTML ||
234 !isset( $this->prefixLfElements[ $parent->name ] )
235 )
236 ) {
237 $text = substr( $text, $start, $length );
238 // Collapse runs of adjacent whitespace, and convert all whitespace to spaces
239 $text = preg_replace( '/[ \r\n\t]+/', ' ', $text );
240 $start = 0;
241 $length = strlen( $text );
242 }
243 return parent::characters( $parent, $text, $start, $length );
244 }
245
246 public function element( SerializerNode $parent, SerializerNode $node, $contents ) {
247 if (
248 $this->minify && (
249 // Don't touch <pre>/<listing>/<textarea> nodes
250 $node->namespace !== HTMLData::NS_HTML ||
251 !isset( $this->prefixLfElements[ $node->name ] )
252 ) &&
253 $contents !== null
254 ) {
255 // Remove leading and trailing whitespace
256 $contents = preg_replace( '/(^[ \r\n\t]+)|([\r\n\t ]+$)/', '', $contents );
257 }
258 return parent::element( $parent, $node, $contents );
259 }
260 };
261 }
262
271 private function newFilteringDispatcher( TreeBuilder $treeBuilder, $nodeName ) {
272 return new class( $treeBuilder, $nodeName ) extends Dispatcher {
273 private $nodeName;
274 private $nodeDepth = 0;
275 private $seenTag = false;
276
277 public function __construct( TreeBuilder $treeBuilder, $nodeName ) {
278 $this->nodeName = $nodeName;
279 parent::__construct( $treeBuilder );
280 }
281
282 public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
283 if ( $this->nodeDepth ) {
284 parent::startTag( $name, $attrs, $selfClose, $sourceStart, $sourceLength );
285 }
286
287 if ( $name === $this->nodeName ) {
288 if ( $this->nodeDepth === 0 && $this->seenTag ) {
289 // This is the second opening tag, not nested in the first one
290 throw new InvalidArgumentException( "More than one <{$this->nodeName}> tag found" );
291 }
292 $this->nodeDepth++;
293 $this->seenTag = true;
294 }
295 }
296
297 public function endTag( $name, $sourceStart, $sourceLength ) {
298 if ( $name === $this->nodeName ) {
299 $this->nodeDepth--;
300 }
301 if ( $this->nodeDepth ) {
302 parent::endTag( $name, $sourceStart, $sourceLength );
303 }
304 }
305
306 public function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
307 if ( $this->nodeDepth ) {
308 parent::characters( $text, $start, $length, $sourceStart, $sourceLength );
309 }
310 }
311
312 public function comment( $text, $sourceStart, $sourceLength ) {
313 if ( $this->nodeDepth ) {
314 parent::comment( $text, $sourceStart, $sourceLength );
315 }
316 }
317 };
318 }
319}
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:81
Parser for Vue single file components (.vue files).
parse(string $html, array $options=[])
Parse a Vue single file component, and extract the script, template and style parts.
__construct( $options, callable $shouldModifyCallback, callable $modifyCallback)
element(SerializerNode $parent, SerializerNode $node, $contents)
startDocument( $fragmentNamespace, $fragmentName)