Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
89.83% covered (warning)
89.83%
159 / 177
22.22% covered (danger)
22.22%
2 / 9
CRAP
0.00% covered (danger)
0.00%
0 / 1
SVGReader
89.83% covered (warning)
89.83%
159 / 177
22.22% covered (danger)
22.22%
2 / 9
87.90
0.00% covered (danger)
0.00%
0 / 1
 __construct
72.00% covered (warning)
72.00%
18 / 25
0.00% covered (danger)
0.00%
0 / 1
5.55
 getMetadata
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 read
90.91% covered (success)
90.91%
30 / 33
0.00% covered (danger)
0.00%
0 / 1
20.30
 readField
91.67% covered (success)
91.67%
11 / 12
0.00% covered (danger)
0.00%
0 / 1
8.04
 readXml
80.00% covered (warning)
80.00%
4 / 5
0.00% covered (danger)
0.00%
0 / 1
3.07
 animateFilterAndLang
97.67% covered (success)
97.67%
42 / 43
0.00% covered (danger)
0.00%
0 / 1
24
 debug
50.00% covered (danger)
50.00%
1 / 2
0.00% covered (danger)
0.00%
0 / 1
2.50
 handleSVGAttribs
86.21% covered (warning)
86.21%
25 / 29
0.00% covered (danger)
0.00%
0 / 1
15.59
 scaleSVGUnit
100.00% covered (success)
100.00%
27 / 27
100.00% covered (success)
100.00%
1 / 1
3
1<?php
2
3/**
4 * Extraction of SVG image metadata.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 * http://www.gnu.org/copyleft/gpl.html
20 *
21 * @file
22 * @ingroup Media
23 * @author "Derk-Jan Hartman <hartman _at_ videolan d0t org>"
24 * @author Brooke Vibber
25 * @copyright Copyright © 2010-2010 Brooke Vibber, Derk-Jan Hartman
26 * @license GPL-2.0-or-later
27 */
28
29use MediaWiki\MainConfigNames;
30use MediaWiki\MediaWikiServices;
31use Wikimedia\AtEase\AtEase;
32
33/**
34 * @ingroup Media
35 */
36class SVGReader {
37    private const DEFAULT_WIDTH = 512;
38    private const DEFAULT_HEIGHT = 512;
39    private const NS_SVG = 'http://www.w3.org/2000/svg';
40    public const LANG_PREFIX_MATCH = 1;
41    public const LANG_FULL_MATCH = 2;
42
43    /** @var XMLReader */
44    private $reader;
45
46    /** @var bool */
47    private $mDebug = false;
48
49    /** @var array */
50    private $metadata = [];
51    private $languages = [];
52    private $languagePrefixes = [];
53
54    /**
55     * Creates an SVGReader drawing from the source provided
56     * @param string $source URI from which to read
57     * @throws InvalidSVGException
58     */
59    public function __construct( $source ) {
60        $svgMetadataCutoff = MediaWikiServices::getInstance()->getMainConfig()
61            ->get( MainConfigNames::SVGMetadataCutoff );
62        $this->reader = new XMLReader();
63
64        // Don't use $file->getSize() since file object passed to SVGHandler::getMetadata is bogus.
65        $size = filesize( $source );
66        if ( $size === false ) {
67            throw new InvalidSVGException( "Error getting filesize of SVG." );
68        }
69
70        if ( $size > $svgMetadataCutoff ) {
71            $this->debug( "SVG is $size bytes, which is bigger than {$svgMetadataCutoff}. Truncating." );
72            $contents = file_get_contents( $source, false, null, 0, $svgMetadataCutoff );
73            if ( $contents === false ) {
74                throw new InvalidSVGException( 'Error reading SVG file.' );
75            }
76            $status = $this->reader->XML( $contents, null, LIBXML_NOERROR | LIBXML_NOWARNING );
77        } else {
78            $status = $this->reader->open( $source, null, LIBXML_NOERROR | LIBXML_NOWARNING );
79        }
80        if ( !$status ) {
81            throw new InvalidSVGException( "Error getting xml of SVG." );
82        }
83
84        // Expand entities, since Adobe Illustrator uses them for xmlns
85        // attributes (T33719). Note that libxml2 has some protection
86        // against large recursive entity expansions so this is not as
87        // insecure as it might appear to be. However, it is still extremely
88        // insecure. It's necessary to wrap any read() calls with
89        // libxml_disable_entity_loader() to avoid arbitrary local file
90        // inclusion, or even arbitrary code execution if the expect
91        // extension is installed (T48859).
92        // phpcs:ignore Generic.PHP.NoSilencedErrors -- suppress deprecation per T268847
93        $oldDisable = @libxml_disable_entity_loader( true );
94        $this->reader->setParserProperty( XMLReader::SUBST_ENTITIES, true );
95
96        $this->metadata['width'] = self::DEFAULT_WIDTH;
97        $this->metadata['height'] = self::DEFAULT_HEIGHT;
98
99        // The size in the units specified by the SVG file
100        // (for the metadata box)
101        // Per the SVG spec, if unspecified, default to '100%'
102        $this->metadata['originalWidth'] = '100%';
103        $this->metadata['originalHeight'] = '100%';
104
105        // Because we cut off the end of the svg making an invalid one. Complicated
106        // try catch thing to make sure warnings get restored. Seems like there should
107        // be a better way.
108        AtEase::suppressWarnings();
109        try {
110            // Note: if this throws, the width/height will be taken to be 0x0.
111            // Should we consider it the default 512x512 instead?
112            $this->read();
113        } finally {
114            libxml_disable_entity_loader( $oldDisable );
115            AtEase::restoreWarnings();
116        }
117    }
118
119    /**
120     * @return array Array with the known metadata
121     */
122    public function getMetadata() {
123        return $this->metadata;
124    }
125
126    /**
127     * Read the SVG
128     * @throws InvalidSVGException
129     * @return bool
130     */
131    protected function read() {
132        $keepReading = $this->reader->read();
133
134        /* Skip until first element */
135        while ( $keepReading && $this->reader->nodeType !== XMLReader::ELEMENT ) {
136            $keepReading = $this->reader->read();
137        }
138
139        if ( $this->reader->localName !== 'svg' || $this->reader->namespaceURI !== self::NS_SVG ) {
140            throw new InvalidSVGException( "Expected <svg> tag, got " .
141                $this->reader->localName . " in NS " . $this->reader->namespaceURI );
142        }
143        $this->debug( '<svg> tag is correct.' );
144        $this->handleSVGAttribs();
145
146        $exitDepth = $this->reader->depth;
147        $keepReading = $this->reader->read();
148        while ( $keepReading ) {
149            $tag = $this->reader->localName;
150            $type = $this->reader->nodeType;
151            $isSVG = ( $this->reader->namespaceURI === self::NS_SVG );
152
153            $this->debug( "$tag" );
154
155            if ( $isSVG && $tag === 'svg' && $type === XMLReader::END_ELEMENT
156                && $this->reader->depth <= $exitDepth
157            ) {
158                    break;
159            }
160
161            if ( $isSVG && $tag === 'title' ) {
162                $this->readField( $tag, 'title' );
163            } elseif ( $isSVG && $tag === 'desc' ) {
164                $this->readField( $tag, 'description' );
165            } elseif ( $isSVG && $tag === 'metadata' && $type === XMLReader::ELEMENT ) {
166                $this->readXml( 'metadata' );
167            } elseif ( $isSVG && $tag === 'script' ) {
168                // We normally do not allow scripted svgs.
169                // However its possible to configure MW to let them
170                // in, and such files should be considered animated.
171                $this->metadata['animated'] = true;
172            } elseif ( $tag !== '#text' ) {
173                $this->debug( "Unhandled top-level XML tag $tag" );
174
175                // Recurse into children of current tag, looking for animation and languages.
176                $this->animateFilterAndLang( $tag );
177            }
178
179            // Goto next element, which is sibling of current (Skip children).
180            $keepReading = $this->reader->next();
181        }
182
183        $this->reader->close();
184
185        $this->metadata['translations'] = $this->languages + $this->languagePrefixes;
186
187        return true;
188    }
189
190    /**
191     * Read a textelement from an element
192     *
193     * @param string $name Name of the element that we are reading from
194     * @param string|null $metafield Field that we will fill with the result
195     */
196    private function readField( $name, $metafield = null ) {
197        $this->debug( "Read field $metafield" );
198        if ( !$metafield || $this->reader->nodeType !== XMLReader::ELEMENT ) {
199            return;
200        }
201        $keepReading = $this->reader->read();
202        while ( $keepReading ) {
203            if ( $this->reader->localName === $name
204                && $this->reader->namespaceURI === self::NS_SVG
205                && $this->reader->nodeType === XMLReader::END_ELEMENT
206            ) {
207                break;
208            }
209
210            if ( $this->reader->nodeType === XMLReader::TEXT ) {
211                $this->metadata[$metafield] = trim( $this->reader->value );
212            }
213            $keepReading = $this->reader->read();
214        }
215    }
216
217    /**
218     * Read an XML snippet from an element
219     *
220     * @param string|null $metafield Field that we will fill with the result
221     */
222    private function readXml( $metafield = null ) {
223        $this->debug( "Read top level metadata" );
224        if ( !$metafield || $this->reader->nodeType !== XMLReader::ELEMENT ) {
225            return;
226        }
227        // @todo Find and store type of xml snippet. metadata['metadataType'] = "rdf"
228        $this->metadata[$metafield] = trim( $this->reader->readInnerXml() );
229
230        $this->reader->next();
231    }
232
233    /**
234     * Filter all children, looking for animated elements.
235     * Also get a list of languages that can be targeted.
236     *
237     * @param string $name Name of the element that we are reading from
238     */
239    private function animateFilterAndLang( $name ) {
240        $this->debug( "animate filter for tag $name" );
241        if ( $this->reader->nodeType !== XMLReader::ELEMENT ) {
242            return;
243        }
244        if ( $this->reader->isEmptyElement ) {
245            return;
246        }
247        $exitDepth = $this->reader->depth;
248        $keepReading = $this->reader->read();
249        while ( $keepReading ) {
250            if ( $this->reader->localName === $name && $this->reader->depth <= $exitDepth
251                && $this->reader->nodeType === XMLReader::END_ELEMENT
252            ) {
253                break;
254            }
255
256            if ( $this->reader->namespaceURI === self::NS_SVG
257                && $this->reader->nodeType === XMLReader::ELEMENT
258            ) {
259                $sysLang = $this->reader->getAttribute( 'systemLanguage' );
260                if ( $sysLang !== null && $sysLang !== '' ) {
261                    // See https://www.w3.org/TR/SVG/struct.html#SystemLanguageAttribute
262                    $langList = explode( ',', $sysLang );
263                    foreach ( $langList as $langItem ) {
264                        $langItem = trim( $langItem );
265                        if ( LanguageCode::isWellFormedLanguageTag( $langItem ) ) {
266                            $this->languages[$langItem] = self::LANG_FULL_MATCH;
267                        }
268                        // Note, the standard says that any prefix should work,
269                        // here we do only the initial prefix, since that will catch
270                        // 99% of cases, and we are going to compare against fallbacks.
271                        // This differs mildly from how the spec says languages should be
272                        // handled, however it matches better how the MediaWiki language
273                        // preference is generally handled.
274                        $dash = strpos( $langItem, '-' );
275                        // Intentionally checking both !false and > 0 at the same time.
276                        if ( $dash ) {
277                            $itemPrefix = substr( $langItem, 0, $dash );
278                            if ( LanguageCode::isWellFormedLanguageTag( $itemPrefix ) ) {
279                                $this->languagePrefixes[$itemPrefix] = self::LANG_PREFIX_MATCH;
280                            }
281                        }
282                    }
283                }
284                switch ( $this->reader->localName ) {
285                    case 'style':
286                        $styleContents = $this->reader->readString();
287                        if (
288                            str_contains( $styleContents, 'animated' ) ||
289                            str_contains( $styleContents, '@keyframes' )
290                        ) {
291                            $this->debug( "HOUSTON WE HAVE ANIMATION" );
292                            $this->metadata['animated'] = true;
293                        }
294                        break;
295                    case 'script':
296                        // Normally we disallow files with
297                        // <script>, but its possible
298                        // to configure MW to disable
299                        // such checks.
300                    case 'animate':
301                    case 'set':
302                    case 'animateMotion':
303                    case 'animateColor':
304                    case 'animateTransform':
305                        $this->debug( "HOUSTON WE HAVE ANIMATION" );
306                        $this->metadata['animated'] = true;
307                        break;
308                }
309            }
310            $keepReading = $this->reader->read();
311        }
312    }
313
314    private function debug( $data ) {
315        if ( $this->mDebug ) {
316            wfDebug( "SVGReader: $data" );
317        }
318    }
319
320    /**
321     * Parse the attributes of an SVG element
322     *
323     * The parser has to be in the start element of "<svg>"
324     */
325    private function handleSVGAttribs() {
326        $defaultWidth = self::DEFAULT_WIDTH;
327        $defaultHeight = self::DEFAULT_HEIGHT;
328        $aspect = 1.0;
329        $width = null;
330        $height = null;
331
332        if ( $this->reader->getAttribute( 'viewBox' ) ) {
333            // min-x min-y width height
334            $viewBox = preg_split( '/\s*[\s,]\s*/', trim( $this->reader->getAttribute( 'viewBox' ) ?? '' ) );
335            if ( count( $viewBox ) === 4 ) {
336                $viewWidth = self::scaleSVGUnit( $viewBox[2] );
337                $viewHeight = self::scaleSVGUnit( $viewBox[3] );
338                if ( $viewWidth > 0 && $viewHeight > 0 ) {
339                    $aspect = $viewWidth / $viewHeight;
340                    $defaultHeight = $defaultWidth / $aspect;
341                }
342            }
343        }
344        if ( $this->reader->getAttribute( 'width' ) ) {
345            $width = self::scaleSVGUnit( $this->reader->getAttribute( 'width' ) ?? '', $defaultWidth );
346            $this->metadata['originalWidth'] = $this->reader->getAttribute( 'width' );
347        }
348        if ( $this->reader->getAttribute( 'height' ) ) {
349            $height = self::scaleSVGUnit( $this->reader->getAttribute( 'height' ) ?? '', $defaultHeight );
350            $this->metadata['originalHeight'] = $this->reader->getAttribute( 'height' );
351        }
352
353        if ( !isset( $width ) && !isset( $height ) ) {
354            $width = $defaultWidth;
355            $height = $width / $aspect;
356        } elseif ( isset( $width ) && !isset( $height ) ) {
357            $height = $width / $aspect;
358        } elseif ( isset( $height ) && !isset( $width ) ) {
359            $width = $height * $aspect;
360        }
361
362        if ( $width > 0 && $height > 0 ) {
363            $this->metadata['width'] = (int)round( $width );
364            $this->metadata['height'] = (int)round( $height );
365        }
366    }
367
368    /**
369     * Return a rounded pixel equivalent for a labeled CSS/SVG length.
370     * https://www.w3.org/TR/SVG11/coords.html#Units
371     * https://www.w3.org/TR/css-values-3/#lengths
372     *
373     * @param string $length CSS/SVG length.
374     * @param float|int $viewportSize Optional scale for percentage units...
375     * @return float Length in pixels
376     */
377    public static function scaleSVGUnit( $length, $viewportSize = 512 ) {
378        // Per CSS values spec, assume 96dpi.
379        static $unitLength = [
380            'px' => 1.0,
381            'pt' => 1.333333,
382            'pc' => 16.0,
383            'mm' => 3.7795275,
384            'q' => 0.944881,
385            'cm' => 37.795275,
386            'in' => 96.0,
387            'em' => 16.0, // Browser default font size if unspecified
388            'rem' => 16.0,
389            'ch' => 8.0, // Spec says 1em if impossible to determine
390            'ex' => 8.0, // Spec says 0.5em if impossible to determine
391            '' => 1.0, // "User units" pixels by default
392        ];
393        // TODO: Does not support vw, vh, vmin, vmax.
394        $matches = [];
395        if ( preg_match(
396            '/^\s*([-+]?\d*(?:\.\d+|\d+)(?:[Ee][-+]?\d+)?)\s*' .
397            '(rem|em|ex|px|pt|pc|cm|mm|in|ch|q|%)\s*$/i',
398            $length,
399            $matches
400        ) ) {
401            $length = (float)$matches[1];
402            $unit = strtolower( $matches[2] );
403            if ( $unit === '%' ) {
404                return $length * 0.01 * $viewportSize;
405            }
406
407            return $length * $unitLength[$unit];
408        }
409
410        // Assume pixels
411        return (float)$length;
412    }
413}