MediaWiki REL1_40
VueComponentParser.php
Go to the documentation of this file.
1<?php
23
24use DOMDocument;
25use DOMElement;
26use DOMNode;
27use Exception;
28use Wikimedia\RemexHtml\DOM\DOMBuilder;
29use Wikimedia\RemexHtml\HTMLData;
30use Wikimedia\RemexHtml\Serializer\HtmlFormatter;
31use Wikimedia\RemexHtml\Serializer\Serializer;
32use Wikimedia\RemexHtml\Serializer\SerializerNode;
33use Wikimedia\RemexHtml\Tokenizer\Attributes;
34use Wikimedia\RemexHtml\Tokenizer\Tokenizer;
35use Wikimedia\RemexHtml\TreeBuilder\Dispatcher;
36use Wikimedia\RemexHtml\TreeBuilder\Element;
37use Wikimedia\RemexHtml\TreeBuilder\TreeBuilder;
38use Wikimedia\Zest\Zest;
39
65 public function parse( string $html, array $options = [] ): array {
66 $dom = $this->parseHTML( $html );
67 // Remex wraps everything in <html><head>, unwrap that
68 $head = Zest::getElementsByTagName( $dom, 'head' )[ 0 ];
69
70 // Find the <script>, <template> and <style> tags. They can appear in any order, but they
71 // must be at the top level, and there can only be one of each.
72 if ( !$head ) {
73 throw new Exception( 'Parsed DOM did not contain a <head> tag' );
74 }
75 $nodes = $this->findUniqueTags( $head, [ 'script', 'template', 'style' ] );
76
77 // Throw an error if we didn't find a <script> or <template> tag. <style> is optional.
78 foreach ( [ 'script', 'template' ] as $requiredTag ) {
79 if ( !isset( $nodes[ $requiredTag ] ) ) {
80 throw new Exception( "No <$requiredTag> tag found" );
81 }
82 }
83
84 $this->validateAttributes( $nodes['script'], [] );
85 $this->validateAttributes( $nodes['template'], [] );
86 if ( isset( $nodes['style'] ) ) {
87 $this->validateAttributes( $nodes['style'], [ 'lang' ] );
88 }
89
90 $styleData = isset( $nodes['style'] ) ? $this->getStyleAndLang( $nodes['style'] ) : null;
91 $template = $this->getTemplateHtml( $html, $options['minifyTemplate'] ?? false );
92
93 return [
94 'script' => trim( $nodes['script']->nodeValue ?? '' ),
95 'template' => $template,
96 'style' => $styleData ? $styleData['style'] : null,
97 'styleLang' => $styleData ? $styleData['lang'] : null
98 ];
99 }
100
106 private function parseHTML( $html ): DOMDocument {
107 $domBuilder = new DOMBuilder( [ 'suppressHtmlNamespace' => true ] );
108 $treeBuilder = new TreeBuilder( $domBuilder, [ 'ignoreErrors' => true ] );
109 $tokenizer = new Tokenizer( new Dispatcher( $treeBuilder ), $html, [ 'ignoreErrors' => true ] );
110 $tokenizer->execute();
111 // @phan-suppress-next-line PhanTypeMismatchReturnSuperType
112 return $domBuilder->getFragment();
113 }
114
123 private function findUniqueTags( DOMNode $rootNode, array $tagNames ): array {
124 $nodes = [];
125 foreach ( $rootNode->childNodes as $node ) {
126 $tagName = strtolower( $node->nodeName );
127 if ( in_array( $tagName, $tagNames ) ) {
128 if ( isset( $nodes[ $tagName ] ) ) {
129 throw new Exception( "More than one <$tagName> tag found" );
130 }
131 $nodes[ $tagName ] = $node;
132 }
133 }
134 return $nodes;
135 }
136
143 private function validateAttributes( DOMNode $node, array $allowedAttributes ): void {
144 if ( $allowedAttributes ) {
145 foreach ( $node->attributes as $attr ) {
146 if ( !in_array( $attr->name, $allowedAttributes ) ) {
147 throw new Exception( "<{$node->nodeName}> may not have the " .
148 "{$attr->name} attribute" );
149 }
150 }
151 } elseif ( $node->attributes->length > 0 ) {
152 throw new Exception( "<{$node->nodeName}> may not have any attributes" );
153 }
154 }
155
162 private function getStyleAndLang( DOMElement $styleNode ): array {
163 $style = trim( $styleNode->nodeValue ?? '' );
164 $styleLang = $styleNode->hasAttribute( 'lang' ) ?
165 $styleNode->getAttribute( 'lang' ) : 'css';
166 if ( $styleLang !== 'css' && $styleLang !== 'less' ) {
167 throw new Exception( "<style lang=\"$styleLang\"> is invalid," .
168 " lang must be \"css\" or \"less\"" );
169 }
170 return [
171 'style' => $style,
172 'lang' => $styleLang,
173 ];
174 }
175
188 private function getTemplateHtml( $html, $minify ) {
189 $serializer = new Serializer( $this->newTemplateFormatter( $minify ) );
190 $tokenizer = new Tokenizer(
191 $this->newFilteringDispatcher(
192 new TreeBuilder( $serializer, [ 'ignoreErrors' => true ] ),
193 'template'
194 ),
195 $html, [ 'ignoreErrors' => true ]
196 );
197 $tokenizer->execute( [ 'fragmentNamespace' => HTMLData::NS_HTML, 'fragmentName' => 'template' ] );
198 return trim( $serializer->getResult() );
199 }
200
209 private function newTemplateFormatter( $minify ) {
210 return new class( $minify ) extends HtmlFormatter {
211 private $minify;
212
213 public function __construct( $minify ) {
214 $this->minify = $minify;
215 }
216
217 public function startDocument( $fragmentNamespace, $fragmentName ) {
218 // Remove <!doctype html>
219 return '';
220 }
221
222 public function comment( SerializerNode $parent, $text ) {
223 if ( $this->minify ) {
224 // Remove all comments
225 return '';
226 }
227 return parent::comment( $parent, $text );
228 }
229
230 public function characters( SerializerNode $parent, $text, $start, $length ) {
231 if (
232 $this->minify && (
233 // Don't touch <pre>/<listing>/<textarea> nodes
234 $parent->namespace !== HTMLData::NS_HTML ||
235 !isset( $this->prefixLfElements[ $parent->name ] )
236 )
237 ) {
238 $text = substr( $text, $start, $length );
239 // Collapse runs of adjacent whitespace, and convert all whitespace to spaces
240 $text = preg_replace( '/[ \r\n\t]+/', ' ', $text );
241 $start = 0;
242 $length = strlen( $text );
243 }
244 return parent::characters( $parent, $text, $start, $length );
245 }
246
247 public function element( SerializerNode $parent, SerializerNode $node, $contents ) {
248 if (
249 $this->minify && (
250 // Don't touch <pre>/<listing>/<textarea> nodes
251 $node->namespace !== HTMLData::NS_HTML ||
252 !isset( $this->prefixLfElements[ $node->name ] )
253 ) &&
254 $contents !== null
255 ) {
256 // Remove leading and trailing whitespace
257 $contents = preg_replace( '/(^[ \r\n\t]+)|([\r\n\t ]+$)/', '', $contents );
258 }
259 return parent::element( $parent, $node, $contents );
260 }
261 };
262 }
263
272 private function newFilteringDispatcher( TreeBuilder $treeBuilder, $nodeName ) {
273 return new class( $treeBuilder, $nodeName ) extends Dispatcher {
274 private $nodeName;
275 private $nodeDepth = 0;
276 private $seenTag = false;
277
278 public function __construct( TreeBuilder $treeBuilder, $nodeName ) {
279 $this->nodeName = $nodeName;
280 parent::__construct( $treeBuilder );
281 }
282
283 public function startTag( $name, Attributes $attrs, $selfClose, $sourceStart, $sourceLength ) {
284 if ( $this->nodeDepth ) {
285 parent::startTag( $name, $attrs, $selfClose, $sourceStart, $sourceLength );
286 }
287
288 if ( $name === $this->nodeName ) {
289 if ( $this->nodeDepth === 0 && $this->seenTag ) {
290 // This is the second opening tag, not nested in the first one
291 throw new Exception( "More than one <{$this->nodeName}> tag found" );
292 }
293 $this->nodeDepth++;
294 $this->seenTag = true;
295 }
296 }
297
298 public function endTag( $name, $sourceStart, $sourceLength ) {
299 if ( $name === $this->nodeName ) {
300 $this->nodeDepth--;
301 }
302 if ( $this->nodeDepth ) {
303 parent::endTag( $name, $sourceStart, $sourceLength );
304 }
305 }
306
307 public function characters( $text, $start, $length, $sourceStart, $sourceLength ) {
308 if ( $this->nodeDepth ) {
309 parent::characters( $text, $start, $length, $sourceStart, $sourceLength );
310 }
311 }
312
313 public function comment( $text, $sourceStart, $sourceLength ) {
314 if ( $this->nodeDepth ) {
315 parent::comment( $text, $sourceStart, $sourceLength );
316 }
317 }
318 };
319 }
320}
321
323class_alias( VueComponentParser::class, 'VueComponentParser' );
if(!defined('MW_SETUP_CALLBACK'))
The persistent session ID (if any) loaded at startup.
Definition WebStart.php:88
Parser for Vue single file components (.vue files).
parse(string $html, array $options=[])
Parse a Vue single file component, and extract the script, template and style parts.
return true
Definition router.php:92