Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
89.83% covered (warning)
89.83%
159 / 177
22.22% covered (danger)
22.22%
2 / 9
CRAP
0.00% covered (danger)
0.00%
0 / 1
SVGReader
89.83% covered (warning)
89.83%
159 / 177
22.22% covered (danger)
22.22%
2 / 9
87.90
0.00% covered (danger)
0.00%
0 / 1
 __construct
72.00% covered (warning)
72.00%
18 / 25
0.00% covered (danger)
0.00%
0 / 1
5.55
 getMetadata
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 read
90.91% covered (success)
90.91%
30 / 33
0.00% covered (danger)
0.00%
0 / 1
20.30
 readField
91.67% covered (success)
91.67%
11 / 12
0.00% covered (danger)
0.00%
0 / 1
8.04
 readXml
80.00% covered (warning)
80.00%
4 / 5
0.00% covered (danger)
0.00%
0 / 1
3.07
 animateFilterAndLang
97.67% covered (success)
97.67%
42 / 43
0.00% covered (danger)
0.00%
0 / 1
24
 debug
50.00% covered (danger)
50.00%
1 / 2
0.00% covered (danger)
0.00%
0 / 1
2.50
 handleSVGAttribs
86.21% covered (warning)
86.21%
25 / 29
0.00% covered (danger)
0.00%
0 / 1
15.59
 scaleSVGUnit
100.00% covered (success)
100.00%
27 / 27
100.00% covered (success)
100.00%
1 / 1
3
1<?php
2
3/**
4 * Extraction of SVG image metadata.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 * http://www.gnu.org/copyleft/gpl.html
20 *
21 * @file
22 * @ingroup Media
23 * @author "Derk-Jan Hartman <hartman _at_ videolan d0t org>"
24 * @author Brooke Vibber
25 * @copyright Copyright © 2010-2010 Brooke Vibber, Derk-Jan Hartman
26 * @license GPL-2.0-or-later
27 */
28
29use MediaWiki\Language\LanguageCode;
30use MediaWiki\MainConfigNames;
31use MediaWiki\MediaWikiServices;
32use Wikimedia\AtEase\AtEase;
33
34/**
35 * @ingroup Media
36 */
37class SVGReader {
38    private const DEFAULT_WIDTH = 512;
39    private const DEFAULT_HEIGHT = 512;
40    private const NS_SVG = 'http://www.w3.org/2000/svg';
41    public const LANG_PREFIX_MATCH = 1;
42    public const LANG_FULL_MATCH = 2;
43
44    /** @var XMLReader */
45    private $reader;
46
47    /** @var bool */
48    private $mDebug = false;
49
50    /** @var array */
51    private $metadata = [];
52    /** @var int[] */
53    private $languages = [];
54    /** @var int[] */
55    private $languagePrefixes = [];
56
57    /**
58     * Creates an SVGReader drawing from the source provided
59     * @param string $source URI from which to read
60     * @throws InvalidSVGException
61     */
62    public function __construct( $source ) {
63        $svgMetadataCutoff = MediaWikiServices::getInstance()->getMainConfig()
64            ->get( MainConfigNames::SVGMetadataCutoff );
65        $this->reader = new XMLReader();
66
67        // Don't use $file->getSize() since file object passed to SVGHandler::getMetadata is bogus.
68        $size = filesize( $source );
69        if ( $size === false ) {
70            throw new InvalidSVGException( "Error getting filesize of SVG." );
71        }
72
73        if ( $size > $svgMetadataCutoff ) {
74            $this->debug( "SVG is $size bytes, which is bigger than {$svgMetadataCutoff}. Truncating." );
75            $contents = file_get_contents( $source, false, null, 0, $svgMetadataCutoff );
76            if ( $contents === false ) {
77                throw new InvalidSVGException( 'Error reading SVG file.' );
78            }
79            $status = $this->reader->XML( $contents, null, LIBXML_NOERROR | LIBXML_NOWARNING );
80        } else {
81            $status = $this->reader->open( $source, null, LIBXML_NOERROR | LIBXML_NOWARNING );
82        }
83        if ( !$status ) {
84            throw new InvalidSVGException( "Error getting xml of SVG." );
85        }
86
87        // Expand entities, since Adobe Illustrator uses them for xmlns
88        // attributes (T33719). Note that libxml2 has some protection
89        // against large recursive entity expansions so this is not as
90        // insecure as it might appear to be. However, it is still extremely
91        // insecure. It's necessary to wrap any read() calls with
92        // libxml_disable_entity_loader() to avoid arbitrary local file
93        // inclusion, or even arbitrary code execution if the expect
94        // extension is installed (T48859).
95        // phpcs:ignore Generic.PHP.NoSilencedErrors -- suppress deprecation per T268847
96        $oldDisable = @libxml_disable_entity_loader( true );
97        $this->reader->setParserProperty( XMLReader::SUBST_ENTITIES, true );
98
99        $this->metadata['width'] = self::DEFAULT_WIDTH;
100        $this->metadata['height'] = self::DEFAULT_HEIGHT;
101
102        // The size in the units specified by the SVG file
103        // (for the metadata box)
104        // Per the SVG spec, if unspecified, default to '100%'
105        $this->metadata['originalWidth'] = '100%';
106        $this->metadata['originalHeight'] = '100%';
107
108        // Because we cut off the end of the svg making an invalid one. Complicated
109        // try catch thing to make sure warnings get restored. Seems like there should
110        // be a better way.
111        AtEase::suppressWarnings();
112        try {
113            // Note: if this throws, the width/height will be taken to be 0x0.
114            // Should we consider it the default 512x512 instead?
115            $this->read();
116        } finally {
117            libxml_disable_entity_loader( $oldDisable );
118            AtEase::restoreWarnings();
119        }
120    }
121
122    /**
123     * @return array Array with the known metadata
124     */
125    public function getMetadata() {
126        return $this->metadata;
127    }
128
129    /**
130     * Read the SVG
131     * @throws InvalidSVGException
132     * @return bool
133     */
134    protected function read() {
135        $keepReading = $this->reader->read();
136
137        /* Skip until first element */
138        while ( $keepReading && $this->reader->nodeType !== XMLReader::ELEMENT ) {
139            $keepReading = $this->reader->read();
140        }
141
142        if ( $this->reader->localName !== 'svg' || $this->reader->namespaceURI !== self::NS_SVG ) {
143            throw new InvalidSVGException( "Expected <svg> tag, got " .
144                $this->reader->localName . " in NS " . $this->reader->namespaceURI );
145        }
146        $this->debug( '<svg> tag is correct.' );
147        $this->handleSVGAttribs();
148
149        $exitDepth = $this->reader->depth;
150        $keepReading = $this->reader->read();
151        while ( $keepReading ) {
152            $tag = $this->reader->localName;
153            $type = $this->reader->nodeType;
154            $isSVG = ( $this->reader->namespaceURI === self::NS_SVG );
155
156            $this->debug( "$tag" );
157
158            if ( $isSVG && $tag === 'svg' && $type === XMLReader::END_ELEMENT
159                && $this->reader->depth <= $exitDepth
160            ) {
161                    break;
162            }
163
164            if ( $isSVG && $tag === 'title' ) {
165                $this->readField( $tag, 'title' );
166            } elseif ( $isSVG && $tag === 'desc' ) {
167                $this->readField( $tag, 'description' );
168            } elseif ( $isSVG && $tag === 'metadata' && $type === XMLReader::ELEMENT ) {
169                $this->readXml( 'metadata' );
170            } elseif ( $isSVG && $tag === 'script' ) {
171                // We normally do not allow scripted svgs.
172                // However its possible to configure MW to let them
173                // in, and such files should be considered animated.
174                $this->metadata['animated'] = true;
175            } elseif ( $tag !== '#text' ) {
176                $this->debug( "Unhandled top-level XML tag $tag" );
177
178                // Recurse into children of current tag, looking for animation and languages.
179                $this->animateFilterAndLang( $tag );
180            }
181
182            // Goto next element, which is sibling of current (Skip children).
183            $keepReading = $this->reader->next();
184        }
185
186        $this->reader->close();
187
188        $this->metadata['translations'] = $this->languages + $this->languagePrefixes;
189
190        return true;
191    }
192
193    /**
194     * Read a textelement from an element
195     *
196     * @param string $name Name of the element that we are reading from
197     * @param string|null $metafield Field that we will fill with the result
198     */
199    private function readField( $name, $metafield = null ) {
200        $this->debug( "Read field $metafield" );
201        if ( !$metafield || $this->reader->nodeType !== XMLReader::ELEMENT ) {
202            return;
203        }
204        $keepReading = $this->reader->read();
205        while ( $keepReading ) {
206            if ( $this->reader->localName === $name
207                && $this->reader->namespaceURI === self::NS_SVG
208                && $this->reader->nodeType === XMLReader::END_ELEMENT
209            ) {
210                break;
211            }
212
213            if ( $this->reader->nodeType === XMLReader::TEXT ) {
214                $this->metadata[$metafield] = trim( $this->reader->value );
215            }
216            $keepReading = $this->reader->read();
217        }
218    }
219
220    /**
221     * Read an XML snippet from an element
222     *
223     * @param string|null $metafield Field that we will fill with the result
224     */
225    private function readXml( $metafield = null ) {
226        $this->debug( "Read top level metadata" );
227        if ( !$metafield || $this->reader->nodeType !== XMLReader::ELEMENT ) {
228            return;
229        }
230        // @todo Find and store type of xml snippet. metadata['metadataType'] = "rdf"
231        $this->metadata[$metafield] = trim( $this->reader->readInnerXml() );
232
233        $this->reader->next();
234    }
235
236    /**
237     * Filter all children, looking for animated elements.
238     * Also get a list of languages that can be targeted.
239     *
240     * @param string $name Name of the element that we are reading from
241     */
242    private function animateFilterAndLang( $name ) {
243        $this->debug( "animate filter for tag $name" );
244        if ( $this->reader->nodeType !== XMLReader::ELEMENT ) {
245            return;
246        }
247        if ( $this->reader->isEmptyElement ) {
248            return;
249        }
250        $exitDepth = $this->reader->depth;
251        $keepReading = $this->reader->read();
252        while ( $keepReading ) {
253            if ( $this->reader->localName === $name && $this->reader->depth <= $exitDepth
254                && $this->reader->nodeType === XMLReader::END_ELEMENT
255            ) {
256                break;
257            }
258
259            if ( $this->reader->namespaceURI === self::NS_SVG
260                && $this->reader->nodeType === XMLReader::ELEMENT
261            ) {
262                $sysLang = $this->reader->getAttribute( 'systemLanguage' );
263                if ( $sysLang !== null && $sysLang !== '' ) {
264                    // See https://www.w3.org/TR/SVG/struct.html#SystemLanguageAttribute
265                    $langList = explode( ',', $sysLang );
266                    foreach ( $langList as $langItem ) {
267                        $langItem = trim( $langItem );
268                        if ( LanguageCode::isWellFormedLanguageTag( $langItem ) ) {
269                            $this->languages[$langItem] = self::LANG_FULL_MATCH;
270                        }
271                        // Note, the standard says that any prefix should work,
272                        // here we do only the initial prefix, since that will catch
273                        // 99% of cases, and we are going to compare against fallbacks.
274                        // This differs mildly from how the spec says languages should be
275                        // handled, however it matches better how the MediaWiki language
276                        // preference is generally handled.
277                        $dash = strpos( $langItem, '-' );
278                        // Intentionally checking both !false and > 0 at the same time.
279                        if ( $dash ) {
280                            $itemPrefix = substr( $langItem, 0, $dash );
281                            if ( LanguageCode::isWellFormedLanguageTag( $itemPrefix ) ) {
282                                $this->languagePrefixes[$itemPrefix] = self::LANG_PREFIX_MATCH;
283                            }
284                        }
285                    }
286                }
287                switch ( $this->reader->localName ) {
288                    case 'style':
289                        $styleContents = $this->reader->readString();
290                        if (
291                            str_contains( $styleContents, 'animated' ) ||
292                            str_contains( $styleContents, '@keyframes' )
293                        ) {
294                            $this->debug( "HOUSTON WE HAVE ANIMATION" );
295                            $this->metadata['animated'] = true;
296                        }
297                        break;
298                    case 'script':
299                        // Normally we disallow files with
300                        // <script>, but its possible
301                        // to configure MW to disable
302                        // such checks.
303                    case 'animate':
304                    case 'set':
305                    case 'animateMotion':
306                    case 'animateColor':
307                    case 'animateTransform':
308                        $this->debug( "HOUSTON WE HAVE ANIMATION" );
309                        $this->metadata['animated'] = true;
310                        break;
311                }
312            }
313            $keepReading = $this->reader->read();
314        }
315    }
316
317    private function debug( $data ) {
318        if ( $this->mDebug ) {
319            wfDebug( "SVGReader: $data" );
320        }
321    }
322
323    /**
324     * Parse the attributes of an SVG element
325     *
326     * The parser has to be in the start element of "<svg>"
327     */
328    private function handleSVGAttribs() {
329        $defaultWidth = self::DEFAULT_WIDTH;
330        $defaultHeight = self::DEFAULT_HEIGHT;
331        $aspect = 1.0;
332        $width = null;
333        $height = null;
334
335        if ( $this->reader->getAttribute( 'viewBox' ) ) {
336            // min-x min-y width height
337            $viewBox = preg_split( '/\s*[\s,]\s*/', trim( $this->reader->getAttribute( 'viewBox' ) ?? '' ) );
338            if ( count( $viewBox ) === 4 ) {
339                $viewWidth = self::scaleSVGUnit( $viewBox[2] );
340                $viewHeight = self::scaleSVGUnit( $viewBox[3] );
341                if ( $viewWidth > 0 && $viewHeight > 0 ) {
342                    $aspect = $viewWidth / $viewHeight;
343                    $defaultHeight = $defaultWidth / $aspect;
344                }
345            }
346        }
347        if ( $this->reader->getAttribute( 'width' ) ) {
348            $width = self::scaleSVGUnit( $this->reader->getAttribute( 'width' ) ?? '', $defaultWidth );
349            $this->metadata['originalWidth'] = $this->reader->getAttribute( 'width' );
350        }
351        if ( $this->reader->getAttribute( 'height' ) ) {
352            $height = self::scaleSVGUnit( $this->reader->getAttribute( 'height' ) ?? '', $defaultHeight );
353            $this->metadata['originalHeight'] = $this->reader->getAttribute( 'height' );
354        }
355
356        if ( !isset( $width ) && !isset( $height ) ) {
357            $width = $defaultWidth;
358            $height = $width / $aspect;
359        } elseif ( isset( $width ) && !isset( $height ) ) {
360            $height = $width / $aspect;
361        } elseif ( isset( $height ) && !isset( $width ) ) {
362            $width = $height * $aspect;
363        }
364
365        if ( $width > 0 && $height > 0 ) {
366            $this->metadata['width'] = (int)round( $width );
367            $this->metadata['height'] = (int)round( $height );
368        }
369    }
370
371    /**
372     * Return a rounded pixel equivalent for a labeled CSS/SVG length.
373     * https://www.w3.org/TR/SVG11/coords.html#Units
374     * https://www.w3.org/TR/css-values-3/#lengths
375     *
376     * @param string $length CSS/SVG length.
377     * @param float|int $viewportSize Optional scale for percentage units...
378     * @return float Length in pixels
379     */
380    public static function scaleSVGUnit( $length, $viewportSize = 512 ) {
381        // Per CSS values spec, assume 96dpi.
382        static $unitLength = [
383            'px' => 1.0,
384            'pt' => 1.333333,
385            'pc' => 16.0,
386            'mm' => 3.7795275,
387            'q' => 0.944881,
388            'cm' => 37.795275,
389            'in' => 96.0,
390            'em' => 16.0, // Browser default font size if unspecified
391            'rem' => 16.0,
392            'ch' => 8.0, // Spec says 1em if impossible to determine
393            'ex' => 8.0, // Spec says 0.5em if impossible to determine
394            '' => 1.0, // "User units" pixels by default
395        ];
396        // TODO: Does not support vw, vh, vmin, vmax.
397        $matches = [];
398        if ( preg_match(
399            '/^\s*([-+]?\d*(?:\.\d+|\d+)(?:[Ee][-+]?\d+)?)\s*' .
400            '(rem|em|ex|px|pt|pc|cm|mm|in|ch|q|%)\s*$/i',
401            $length,
402            $matches
403        ) ) {
404            $length = (float)$matches[1];
405            $unit = strtolower( $matches[2] );
406            if ( $unit === '%' ) {
407                return $length * 0.01 * $viewportSize;
408            }
409
410            return $length * $unitLength[$unit];
411        }
412
413        // Assume pixels
414        return (float)$length;
415    }
416}