MediaWiki master
SVGReader.php
Go to the documentation of this file.
1<?php
2
18use Wikimedia\AtEase\AtEase;
19
23class SVGReader {
24 private const DEFAULT_WIDTH = 512;
25 private const DEFAULT_HEIGHT = 512;
26 private const NS_SVG = 'http://www.w3.org/2000/svg';
27 public const LANG_PREFIX_MATCH = 1;
28 public const LANG_FULL_MATCH = 2;
29
31 private $reader;
32
34 private $mDebug = false;
35
37 private $metadata = [];
39 private $languages = [];
41 private $languagePrefixes = [];
42
48 public function __construct( $source ) {
49 $svgMetadataCutoff = MediaWikiServices::getInstance()->getMainConfig()
50 ->get( MainConfigNames::SVGMetadataCutoff );
51 $this->reader = new XMLReader();
52
53 // Don't use $file->getSize() since file object passed to SVGHandler::getMetadata is bogus.
54 $size = filesize( $source );
55 if ( $size === false ) {
56 throw new InvalidSVGException( "Error getting filesize of SVG." );
57 }
58
59 if ( $size > $svgMetadataCutoff ) {
60 $this->debug( "SVG is $size bytes, which is bigger than {$svgMetadataCutoff}. Truncating." );
61 $contents = file_get_contents( $source, false, null, 0, $svgMetadataCutoff );
62 if ( $contents === false ) {
63 throw new InvalidSVGException( 'Error reading SVG file.' );
64 }
65 $status = $this->reader->XML( $contents, null, LIBXML_NOERROR | LIBXML_NOWARNING );
66 } else {
67 $status = $this->reader->open( $source, null, LIBXML_NOERROR | LIBXML_NOWARNING );
68 }
69 if ( !$status ) {
70 throw new InvalidSVGException( "Error getting xml of SVG." );
71 }
72
73 // Expand entities, since Adobe Illustrator uses them for xmlns
74 // attributes (T33719). Note that libxml2 has some protection
75 // against large recursive entity expansions so this is not as
76 // insecure as it might appear to be. However, it is still extremely
77 // insecure. It's necessary to wrap any read() calls with
78 // libxml_disable_entity_loader() to avoid arbitrary local file
79 // inclusion, or even arbitrary code execution if the expect
80 // extension is installed (T48859).
81 // phpcs:ignore Generic.PHP.NoSilencedErrors -- suppress deprecation per T268847
82 $oldDisable = @libxml_disable_entity_loader( true );
83 $this->reader->setParserProperty( XMLReader::SUBST_ENTITIES, true );
84
85 $this->metadata['width'] = self::DEFAULT_WIDTH;
86 $this->metadata['height'] = self::DEFAULT_HEIGHT;
87
88 // The size in the units specified by the SVG file
89 // (for the metadata box)
90 // Per the SVG spec, if unspecified, default to '100%'
91 $this->metadata['originalWidth'] = '100%';
92 $this->metadata['originalHeight'] = '100%';
93
94 // Because we cut off the end of the svg making an invalid one. Complicated
95 // try catch thing to make sure warnings get restored. Seems like there should
96 // be a better way.
97 AtEase::suppressWarnings();
98 try {
99 // Note: if this throws, the width/height will be taken to be 0x0.
100 // Should we consider it the default 512x512 instead?
101 $this->read();
102 } finally {
103 libxml_disable_entity_loader( $oldDisable );
104 AtEase::restoreWarnings();
105 }
106 }
107
111 public function getMetadata() {
112 return $this->metadata;
113 }
114
120 protected function read() {
121 $keepReading = $this->reader->read();
122
123 /* Skip until first element */
124 while ( $keepReading && $this->reader->nodeType !== XMLReader::ELEMENT ) {
125 $keepReading = $this->reader->read();
126 }
127
128 if ( $this->reader->localName !== 'svg' || $this->reader->namespaceURI !== self::NS_SVG ) {
129 throw new InvalidSVGException( "Expected <svg> tag, got " .
130 $this->reader->localName . " in NS " . $this->reader->namespaceURI );
131 }
132 $this->debug( '<svg> tag is correct.' );
133 $this->handleSVGAttribs();
134
135 $exitDepth = $this->reader->depth;
136 $keepReading = $this->reader->read();
137 while ( $keepReading ) {
138 $tag = $this->reader->localName;
139 $type = $this->reader->nodeType;
140 $isSVG = ( $this->reader->namespaceURI === self::NS_SVG );
141
142 $this->debug( "$tag" );
143
144 if ( $isSVG && $tag === 'svg' && $type === XMLReader::END_ELEMENT
145 && $this->reader->depth <= $exitDepth
146 ) {
147 break;
148 }
149
150 if ( $isSVG && $tag === 'title' ) {
151 $this->readField( $tag, 'title' );
152 } elseif ( $isSVG && $tag === 'desc' ) {
153 $this->readField( $tag, 'description' );
154 } elseif ( $isSVG && $tag === 'metadata' && $type === XMLReader::ELEMENT ) {
155 $this->readXml( 'metadata' );
156 } elseif ( $isSVG && $tag === 'script' ) {
157 // We normally do not allow scripted svgs.
158 // However its possible to configure MW to let them
159 // in, and such files should be considered animated.
160 $this->metadata['animated'] = true;
161 } elseif ( $tag !== '#text' ) {
162 $this->debug( "Unhandled top-level XML tag $tag" );
163
164 // Recurse into children of current tag, looking for animation and languages.
165 $this->animateFilterAndLang( $tag );
166 }
167
168 // Goto next element, which is sibling of current (Skip children).
169 $keepReading = $this->reader->next();
170 }
171
172 $this->reader->close();
173
174 $this->metadata['translations'] = $this->languages + $this->languagePrefixes;
175
176 return true;
177 }
178
185 private function readField( $name, $metafield = null ) {
186 $this->debug( "Read field $metafield" );
187 if ( !$metafield || $this->reader->nodeType !== XMLReader::ELEMENT ) {
188 return;
189 }
190 $keepReading = $this->reader->read();
191 while ( $keepReading ) {
192 if ( $this->reader->localName === $name
193 && $this->reader->namespaceURI === self::NS_SVG
194 && $this->reader->nodeType === XMLReader::END_ELEMENT
195 ) {
196 break;
197 }
198
199 if ( $this->reader->nodeType === XMLReader::TEXT ) {
200 $this->metadata[$metafield] = trim( $this->reader->value );
201 }
202 $keepReading = $this->reader->read();
203 }
204 }
205
211 private function readXml( $metafield = null ) {
212 $this->debug( "Read top level metadata" );
213 if ( !$metafield || $this->reader->nodeType !== XMLReader::ELEMENT ) {
214 return;
215 }
216 // @todo Find and store type of xml snippet. metadata['metadataType'] = "rdf"
217 $this->metadata[$metafield] = trim( $this->reader->readInnerXml() );
218
219 $this->reader->next();
220 }
221
228 private function animateFilterAndLang( $name ) {
229 $this->debug( "animate filter for tag $name" );
230 if ( $this->reader->nodeType !== XMLReader::ELEMENT ) {
231 return;
232 }
233 if ( $this->reader->isEmptyElement ) {
234 return;
235 }
236 $exitDepth = $this->reader->depth;
237 $keepReading = $this->reader->read();
238 while ( $keepReading ) {
239 if ( $this->reader->localName === $name && $this->reader->depth <= $exitDepth
240 && $this->reader->nodeType === XMLReader::END_ELEMENT
241 ) {
242 break;
243 }
244
245 if ( $this->reader->namespaceURI === self::NS_SVG
246 && $this->reader->nodeType === XMLReader::ELEMENT
247 ) {
248 $sysLang = $this->reader->getAttribute( 'systemLanguage' );
249 if ( $sysLang !== null && $sysLang !== '' ) {
250 // See https://www.w3.org/TR/SVG/struct.html#SystemLanguageAttribute
251 $langList = explode( ',', $sysLang );
252 foreach ( $langList as $langItem ) {
253 $langItem = trim( $langItem );
254 if ( LanguageCode::isWellFormedLanguageTag( $langItem ) ) {
255 $this->languages[$langItem] = self::LANG_FULL_MATCH;
256 }
257 // Note, the standard says that any prefix should work,
258 // here we do only the initial prefix, since that will catch
259 // 99% of cases, and we are going to compare against fallbacks.
260 // This differs mildly from how the spec says languages should be
261 // handled, however it matches better how the MediaWiki language
262 // preference is generally handled.
263 $dash = strpos( $langItem, '-' );
264 // Intentionally checking both !false and > 0 at the same time.
265 if ( $dash ) {
266 $itemPrefix = substr( $langItem, 0, $dash );
267 if ( LanguageCode::isWellFormedLanguageTag( $itemPrefix ) ) {
268 $this->languagePrefixes[$itemPrefix] = self::LANG_PREFIX_MATCH;
269 }
270 }
271 }
272 }
273 switch ( $this->reader->localName ) {
274 case 'style':
275 $styleContents = $this->reader->readString();
276 if (
277 str_contains( $styleContents, 'animated' ) ||
278 str_contains( $styleContents, '@keyframes' )
279 ) {
280 $this->debug( "HOUSTON WE HAVE ANIMATION" );
281 $this->metadata['animated'] = true;
282 }
283 break;
284 case 'script':
285 // Normally we disallow files with
286 // <script>, but its possible
287 // to configure MW to disable
288 // such checks.
289 case 'animate':
290 case 'set':
291 case 'animateMotion':
292 case 'animateColor':
293 case 'animateTransform':
294 $this->debug( "HOUSTON WE HAVE ANIMATION" );
295 $this->metadata['animated'] = true;
296 break;
297 }
298 }
299 $keepReading = $this->reader->read();
300 }
301 }
302
303 private function debug( string $data ) {
304 if ( $this->mDebug ) {
305 wfDebug( "SVGReader: $data" );
306 }
307 }
308
314 private function handleSVGAttribs() {
315 $defaultWidth = self::DEFAULT_WIDTH;
316 $defaultHeight = self::DEFAULT_HEIGHT;
317 $aspect = 1.0;
318 $width = null;
319 $height = null;
320
321 if ( $this->reader->getAttribute( 'viewBox' ) ) {
322 // min-x min-y width height
323 $viewBox = preg_split( '/\s*[\s,]\s*/', trim( $this->reader->getAttribute( 'viewBox' ) ?? '' ) );
324 if ( count( $viewBox ) === 4 ) {
325 $viewWidth = self::scaleSVGUnit( $viewBox[2] );
326 $viewHeight = self::scaleSVGUnit( $viewBox[3] );
327 if ( $viewWidth > 0 && $viewHeight > 0 ) {
328 $aspect = $viewWidth / $viewHeight;
329 $defaultHeight = $defaultWidth / $aspect;
330 }
331 }
332 }
333 if ( $this->reader->getAttribute( 'width' ) ) {
334 $width = self::scaleSVGUnit( $this->reader->getAttribute( 'width' ) ?? '', $defaultWidth );
335 $this->metadata['originalWidth'] = $this->reader->getAttribute( 'width' );
336 }
337 if ( $this->reader->getAttribute( 'height' ) ) {
338 $height = self::scaleSVGUnit( $this->reader->getAttribute( 'height' ) ?? '', $defaultHeight );
339 $this->metadata['originalHeight'] = $this->reader->getAttribute( 'height' );
340 }
341
342 if ( $width === null && $height === null ) {
343 $width = $defaultWidth;
344 $height = $width / $aspect;
345 } elseif ( $width !== null && $height === null ) {
346 $height = $width / $aspect;
347 } elseif ( $height !== null && $width === null ) {
348 $width = $height * $aspect;
349 }
350
351 if ( $width > 0 && $height > 0 ) {
352 $this->metadata['width'] = (int)round( $width );
353 $this->metadata['height'] = (int)round( $height );
354 }
355 }
356
366 public static function scaleSVGUnit( $length, $viewportSize = 512 ) {
367 // Per CSS values spec, assume 96dpi.
368 static $unitLength = [
369 'px' => 1.0,
370 'pt' => 1.333333,
371 'pc' => 16.0,
372 'mm' => 3.7795275,
373 'q' => 0.944881,
374 'cm' => 37.795275,
375 'in' => 96.0,
376 'em' => 16.0, // Browser default font size if unspecified
377 'rem' => 16.0,
378 'ch' => 8.0, // Spec says 1em if impossible to determine
379 'ex' => 8.0, // Spec says 0.5em if impossible to determine
380 '' => 1.0, // "User units" pixels by default
381 ];
382 // TODO: Does not support vw, vh, vmin, vmax.
383 $matches = [];
384 if ( preg_match(
385 '/^\s*([-+]?\d*(?:\.\d+|\d+)(?:[Ee][-+]?\d+)?)\s*' .
386 '(rem|em|ex|px|pt|pc|cm|mm|in|ch|q|%)\s*$/i',
387 $length,
389 ) ) {
390 $length = (float)$matches[1];
391 $unit = strtolower( $matches[2] );
392 if ( $unit === '%' ) {
393 return $length * 0.01 * $viewportSize;
394 }
395
396 return $length * $unitLength[$unit];
397 }
398
399 // Assume pixels
400 return (float)$length;
401 }
402}
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Methods for dealing with language codes.
A class containing constants representing the names of configuration variables.
Service locator for MediaWiki core services.
const LANG_PREFIX_MATCH
Definition SVGReader.php:27
static scaleSVGUnit( $length, $viewportSize=512)
Return a rounded pixel equivalent for a labeled CSS/SVG length.
read()
Read the SVG.
const LANG_FULL_MATCH
Definition SVGReader.php:28
__construct( $source)
Creates an SVGReader drawing from the source provided.
Definition SVGReader.php:48
$source