Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
89.83% |
159 / 177 |
|
22.22% |
2 / 9 |
CRAP | |
0.00% |
0 / 1 |
SVGReader | |
89.83% |
159 / 177 |
|
22.22% |
2 / 9 |
87.90 | |
0.00% |
0 / 1 |
__construct | |
72.00% |
18 / 25 |
|
0.00% |
0 / 1 |
5.55 | |||
getMetadata | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
read | |
90.91% |
30 / 33 |
|
0.00% |
0 / 1 |
20.30 | |||
readField | |
91.67% |
11 / 12 |
|
0.00% |
0 / 1 |
8.04 | |||
readXml | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
3.07 | |||
animateFilterAndLang | |
97.67% |
42 / 43 |
|
0.00% |
0 / 1 |
24 | |||
debug | |
50.00% |
1 / 2 |
|
0.00% |
0 / 1 |
2.50 | |||
handleSVGAttribs | |
86.21% |
25 / 29 |
|
0.00% |
0 / 1 |
15.59 | |||
scaleSVGUnit | |
100.00% |
27 / 27 |
|
100.00% |
1 / 1 |
3 |
1 | <?php |
2 | |
3 | /** |
4 | * Extraction of SVG image metadata. |
5 | * |
6 | * This program is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License as published by |
8 | * the Free Software Foundation; either version 2 of the License, or |
9 | * (at your option) any later version. |
10 | * |
11 | * This program is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | * GNU General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU General Public License along |
17 | * with this program; if not, write to the Free Software Foundation, Inc., |
18 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
19 | * http://www.gnu.org/copyleft/gpl.html |
20 | * |
21 | * @file |
22 | * @ingroup Media |
23 | * @author "Derk-Jan Hartman <hartman _at_ videolan d0t org>" |
24 | * @author Brooke Vibber |
25 | * @copyright Copyright © 2010-2010 Brooke Vibber, Derk-Jan Hartman |
26 | * @license GPL-2.0-or-later |
27 | */ |
28 | |
29 | use MediaWiki\MainConfigNames; |
30 | use MediaWiki\MediaWikiServices; |
31 | use Wikimedia\AtEase\AtEase; |
32 | |
33 | /** |
34 | * @ingroup Media |
35 | */ |
36 | class SVGReader { |
37 | private const DEFAULT_WIDTH = 512; |
38 | private const DEFAULT_HEIGHT = 512; |
39 | private const NS_SVG = 'http://www.w3.org/2000/svg'; |
40 | public const LANG_PREFIX_MATCH = 1; |
41 | public const LANG_FULL_MATCH = 2; |
42 | |
43 | /** @var XMLReader */ |
44 | private $reader; |
45 | |
46 | /** @var bool */ |
47 | private $mDebug = false; |
48 | |
49 | /** @var array */ |
50 | private $metadata = []; |
51 | private $languages = []; |
52 | private $languagePrefixes = []; |
53 | |
54 | /** |
55 | * Creates an SVGReader drawing from the source provided |
56 | * @param string $source URI from which to read |
57 | * @throws InvalidSVGException |
58 | */ |
59 | public function __construct( $source ) { |
60 | $svgMetadataCutoff = MediaWikiServices::getInstance()->getMainConfig() |
61 | ->get( MainConfigNames::SVGMetadataCutoff ); |
62 | $this->reader = new XMLReader(); |
63 | |
64 | // Don't use $file->getSize() since file object passed to SVGHandler::getMetadata is bogus. |
65 | $size = filesize( $source ); |
66 | if ( $size === false ) { |
67 | throw new InvalidSVGException( "Error getting filesize of SVG." ); |
68 | } |
69 | |
70 | if ( $size > $svgMetadataCutoff ) { |
71 | $this->debug( "SVG is $size bytes, which is bigger than {$svgMetadataCutoff}. Truncating." ); |
72 | $contents = file_get_contents( $source, false, null, 0, $svgMetadataCutoff ); |
73 | if ( $contents === false ) { |
74 | throw new InvalidSVGException( 'Error reading SVG file.' ); |
75 | } |
76 | $status = $this->reader->XML( $contents, null, LIBXML_NOERROR | LIBXML_NOWARNING ); |
77 | } else { |
78 | $status = $this->reader->open( $source, null, LIBXML_NOERROR | LIBXML_NOWARNING ); |
79 | } |
80 | if ( !$status ) { |
81 | throw new InvalidSVGException( "Error getting xml of SVG." ); |
82 | } |
83 | |
84 | // Expand entities, since Adobe Illustrator uses them for xmlns |
85 | // attributes (T33719). Note that libxml2 has some protection |
86 | // against large recursive entity expansions so this is not as |
87 | // insecure as it might appear to be. However, it is still extremely |
88 | // insecure. It's necessary to wrap any read() calls with |
89 | // libxml_disable_entity_loader() to avoid arbitrary local file |
90 | // inclusion, or even arbitrary code execution if the expect |
91 | // extension is installed (T48859). |
92 | // phpcs:ignore Generic.PHP.NoSilencedErrors -- suppress deprecation per T268847 |
93 | $oldDisable = @libxml_disable_entity_loader( true ); |
94 | $this->reader->setParserProperty( XMLReader::SUBST_ENTITIES, true ); |
95 | |
96 | $this->metadata['width'] = self::DEFAULT_WIDTH; |
97 | $this->metadata['height'] = self::DEFAULT_HEIGHT; |
98 | |
99 | // The size in the units specified by the SVG file |
100 | // (for the metadata box) |
101 | // Per the SVG spec, if unspecified, default to '100%' |
102 | $this->metadata['originalWidth'] = '100%'; |
103 | $this->metadata['originalHeight'] = '100%'; |
104 | |
105 | // Because we cut off the end of the svg making an invalid one. Complicated |
106 | // try catch thing to make sure warnings get restored. Seems like there should |
107 | // be a better way. |
108 | AtEase::suppressWarnings(); |
109 | try { |
110 | // Note: if this throws, the width/height will be taken to be 0x0. |
111 | // Should we consider it the default 512x512 instead? |
112 | $this->read(); |
113 | } finally { |
114 | libxml_disable_entity_loader( $oldDisable ); |
115 | AtEase::restoreWarnings(); |
116 | } |
117 | } |
118 | |
119 | /** |
120 | * @return array Array with the known metadata |
121 | */ |
122 | public function getMetadata() { |
123 | return $this->metadata; |
124 | } |
125 | |
126 | /** |
127 | * Read the SVG |
128 | * @throws InvalidSVGException |
129 | * @return bool |
130 | */ |
131 | protected function read() { |
132 | $keepReading = $this->reader->read(); |
133 | |
134 | /* Skip until first element */ |
135 | while ( $keepReading && $this->reader->nodeType !== XMLReader::ELEMENT ) { |
136 | $keepReading = $this->reader->read(); |
137 | } |
138 | |
139 | if ( $this->reader->localName !== 'svg' || $this->reader->namespaceURI !== self::NS_SVG ) { |
140 | throw new InvalidSVGException( "Expected <svg> tag, got " . |
141 | $this->reader->localName . " in NS " . $this->reader->namespaceURI ); |
142 | } |
143 | $this->debug( '<svg> tag is correct.' ); |
144 | $this->handleSVGAttribs(); |
145 | |
146 | $exitDepth = $this->reader->depth; |
147 | $keepReading = $this->reader->read(); |
148 | while ( $keepReading ) { |
149 | $tag = $this->reader->localName; |
150 | $type = $this->reader->nodeType; |
151 | $isSVG = ( $this->reader->namespaceURI === self::NS_SVG ); |
152 | |
153 | $this->debug( "$tag" ); |
154 | |
155 | if ( $isSVG && $tag === 'svg' && $type === XMLReader::END_ELEMENT |
156 | && $this->reader->depth <= $exitDepth |
157 | ) { |
158 | break; |
159 | } |
160 | |
161 | if ( $isSVG && $tag === 'title' ) { |
162 | $this->readField( $tag, 'title' ); |
163 | } elseif ( $isSVG && $tag === 'desc' ) { |
164 | $this->readField( $tag, 'description' ); |
165 | } elseif ( $isSVG && $tag === 'metadata' && $type === XMLReader::ELEMENT ) { |
166 | $this->readXml( 'metadata' ); |
167 | } elseif ( $isSVG && $tag === 'script' ) { |
168 | // We normally do not allow scripted svgs. |
169 | // However its possible to configure MW to let them |
170 | // in, and such files should be considered animated. |
171 | $this->metadata['animated'] = true; |
172 | } elseif ( $tag !== '#text' ) { |
173 | $this->debug( "Unhandled top-level XML tag $tag" ); |
174 | |
175 | // Recurse into children of current tag, looking for animation and languages. |
176 | $this->animateFilterAndLang( $tag ); |
177 | } |
178 | |
179 | // Goto next element, which is sibling of current (Skip children). |
180 | $keepReading = $this->reader->next(); |
181 | } |
182 | |
183 | $this->reader->close(); |
184 | |
185 | $this->metadata['translations'] = $this->languages + $this->languagePrefixes; |
186 | |
187 | return true; |
188 | } |
189 | |
190 | /** |
191 | * Read a textelement from an element |
192 | * |
193 | * @param string $name Name of the element that we are reading from |
194 | * @param string|null $metafield Field that we will fill with the result |
195 | */ |
196 | private function readField( $name, $metafield = null ) { |
197 | $this->debug( "Read field $metafield" ); |
198 | if ( !$metafield || $this->reader->nodeType !== XMLReader::ELEMENT ) { |
199 | return; |
200 | } |
201 | $keepReading = $this->reader->read(); |
202 | while ( $keepReading ) { |
203 | if ( $this->reader->localName === $name |
204 | && $this->reader->namespaceURI === self::NS_SVG |
205 | && $this->reader->nodeType === XMLReader::END_ELEMENT |
206 | ) { |
207 | break; |
208 | } |
209 | |
210 | if ( $this->reader->nodeType === XMLReader::TEXT ) { |
211 | $this->metadata[$metafield] = trim( $this->reader->value ); |
212 | } |
213 | $keepReading = $this->reader->read(); |
214 | } |
215 | } |
216 | |
217 | /** |
218 | * Read an XML snippet from an element |
219 | * |
220 | * @param string|null $metafield Field that we will fill with the result |
221 | */ |
222 | private function readXml( $metafield = null ) { |
223 | $this->debug( "Read top level metadata" ); |
224 | if ( !$metafield || $this->reader->nodeType !== XMLReader::ELEMENT ) { |
225 | return; |
226 | } |
227 | // @todo Find and store type of xml snippet. metadata['metadataType'] = "rdf" |
228 | $this->metadata[$metafield] = trim( $this->reader->readInnerXml() ); |
229 | |
230 | $this->reader->next(); |
231 | } |
232 | |
233 | /** |
234 | * Filter all children, looking for animated elements. |
235 | * Also get a list of languages that can be targeted. |
236 | * |
237 | * @param string $name Name of the element that we are reading from |
238 | */ |
239 | private function animateFilterAndLang( $name ) { |
240 | $this->debug( "animate filter for tag $name" ); |
241 | if ( $this->reader->nodeType !== XMLReader::ELEMENT ) { |
242 | return; |
243 | } |
244 | if ( $this->reader->isEmptyElement ) { |
245 | return; |
246 | } |
247 | $exitDepth = $this->reader->depth; |
248 | $keepReading = $this->reader->read(); |
249 | while ( $keepReading ) { |
250 | if ( $this->reader->localName === $name && $this->reader->depth <= $exitDepth |
251 | && $this->reader->nodeType === XMLReader::END_ELEMENT |
252 | ) { |
253 | break; |
254 | } |
255 | |
256 | if ( $this->reader->namespaceURI === self::NS_SVG |
257 | && $this->reader->nodeType === XMLReader::ELEMENT |
258 | ) { |
259 | $sysLang = $this->reader->getAttribute( 'systemLanguage' ); |
260 | if ( $sysLang !== null && $sysLang !== '' ) { |
261 | // See https://www.w3.org/TR/SVG/struct.html#SystemLanguageAttribute |
262 | $langList = explode( ',', $sysLang ); |
263 | foreach ( $langList as $langItem ) { |
264 | $langItem = trim( $langItem ); |
265 | if ( LanguageCode::isWellFormedLanguageTag( $langItem ) ) { |
266 | $this->languages[$langItem] = self::LANG_FULL_MATCH; |
267 | } |
268 | // Note, the standard says that any prefix should work, |
269 | // here we do only the initial prefix, since that will catch |
270 | // 99% of cases, and we are going to compare against fallbacks. |
271 | // This differs mildly from how the spec says languages should be |
272 | // handled, however it matches better how the MediaWiki language |
273 | // preference is generally handled. |
274 | $dash = strpos( $langItem, '-' ); |
275 | // Intentionally checking both !false and > 0 at the same time. |
276 | if ( $dash ) { |
277 | $itemPrefix = substr( $langItem, 0, $dash ); |
278 | if ( LanguageCode::isWellFormedLanguageTag( $itemPrefix ) ) { |
279 | $this->languagePrefixes[$itemPrefix] = self::LANG_PREFIX_MATCH; |
280 | } |
281 | } |
282 | } |
283 | } |
284 | switch ( $this->reader->localName ) { |
285 | case 'style': |
286 | $styleContents = $this->reader->readString(); |
287 | if ( |
288 | str_contains( $styleContents, 'animated' ) || |
289 | str_contains( $styleContents, '@keyframes' ) |
290 | ) { |
291 | $this->debug( "HOUSTON WE HAVE ANIMATION" ); |
292 | $this->metadata['animated'] = true; |
293 | } |
294 | break; |
295 | case 'script': |
296 | // Normally we disallow files with |
297 | // <script>, but its possible |
298 | // to configure MW to disable |
299 | // such checks. |
300 | case 'animate': |
301 | case 'set': |
302 | case 'animateMotion': |
303 | case 'animateColor': |
304 | case 'animateTransform': |
305 | $this->debug( "HOUSTON WE HAVE ANIMATION" ); |
306 | $this->metadata['animated'] = true; |
307 | break; |
308 | } |
309 | } |
310 | $keepReading = $this->reader->read(); |
311 | } |
312 | } |
313 | |
314 | private function debug( $data ) { |
315 | if ( $this->mDebug ) { |
316 | wfDebug( "SVGReader: $data" ); |
317 | } |
318 | } |
319 | |
320 | /** |
321 | * Parse the attributes of an SVG element |
322 | * |
323 | * The parser has to be in the start element of "<svg>" |
324 | */ |
325 | private function handleSVGAttribs() { |
326 | $defaultWidth = self::DEFAULT_WIDTH; |
327 | $defaultHeight = self::DEFAULT_HEIGHT; |
328 | $aspect = 1.0; |
329 | $width = null; |
330 | $height = null; |
331 | |
332 | if ( $this->reader->getAttribute( 'viewBox' ) ) { |
333 | // min-x min-y width height |
334 | $viewBox = preg_split( '/\s*[\s,]\s*/', trim( $this->reader->getAttribute( 'viewBox' ) ?? '' ) ); |
335 | if ( count( $viewBox ) === 4 ) { |
336 | $viewWidth = self::scaleSVGUnit( $viewBox[2] ); |
337 | $viewHeight = self::scaleSVGUnit( $viewBox[3] ); |
338 | if ( $viewWidth > 0 && $viewHeight > 0 ) { |
339 | $aspect = $viewWidth / $viewHeight; |
340 | $defaultHeight = $defaultWidth / $aspect; |
341 | } |
342 | } |
343 | } |
344 | if ( $this->reader->getAttribute( 'width' ) ) { |
345 | $width = self::scaleSVGUnit( $this->reader->getAttribute( 'width' ) ?? '', $defaultWidth ); |
346 | $this->metadata['originalWidth'] = $this->reader->getAttribute( 'width' ); |
347 | } |
348 | if ( $this->reader->getAttribute( 'height' ) ) { |
349 | $height = self::scaleSVGUnit( $this->reader->getAttribute( 'height' ) ?? '', $defaultHeight ); |
350 | $this->metadata['originalHeight'] = $this->reader->getAttribute( 'height' ); |
351 | } |
352 | |
353 | if ( !isset( $width ) && !isset( $height ) ) { |
354 | $width = $defaultWidth; |
355 | $height = $width / $aspect; |
356 | } elseif ( isset( $width ) && !isset( $height ) ) { |
357 | $height = $width / $aspect; |
358 | } elseif ( isset( $height ) && !isset( $width ) ) { |
359 | $width = $height * $aspect; |
360 | } |
361 | |
362 | if ( $width > 0 && $height > 0 ) { |
363 | $this->metadata['width'] = (int)round( $width ); |
364 | $this->metadata['height'] = (int)round( $height ); |
365 | } |
366 | } |
367 | |
368 | /** |
369 | * Return a rounded pixel equivalent for a labeled CSS/SVG length. |
370 | * https://www.w3.org/TR/SVG11/coords.html#Units |
371 | * https://www.w3.org/TR/css-values-3/#lengths |
372 | * |
373 | * @param string $length CSS/SVG length. |
374 | * @param float|int $viewportSize Optional scale for percentage units... |
375 | * @return float Length in pixels |
376 | */ |
377 | public static function scaleSVGUnit( $length, $viewportSize = 512 ) { |
378 | // Per CSS values spec, assume 96dpi. |
379 | static $unitLength = [ |
380 | 'px' => 1.0, |
381 | 'pt' => 1.333333, |
382 | 'pc' => 16.0, |
383 | 'mm' => 3.7795275, |
384 | 'q' => 0.944881, |
385 | 'cm' => 37.795275, |
386 | 'in' => 96.0, |
387 | 'em' => 16.0, // Browser default font size if unspecified |
388 | 'rem' => 16.0, |
389 | 'ch' => 8.0, // Spec says 1em if impossible to determine |
390 | 'ex' => 8.0, // Spec says 0.5em if impossible to determine |
391 | '' => 1.0, // "User units" pixels by default |
392 | ]; |
393 | // TODO: Does not support vw, vh, vmin, vmax. |
394 | $matches = []; |
395 | if ( preg_match( |
396 | '/^\s*([-+]?\d*(?:\.\d+|\d+)(?:[Ee][-+]?\d+)?)\s*' . |
397 | '(rem|em|ex|px|pt|pc|cm|mm|in|ch|q|%)\s*$/i', |
398 | $length, |
399 | $matches |
400 | ) ) { |
401 | $length = (float)$matches[1]; |
402 | $unit = strtolower( $matches[2] ); |
403 | if ( $unit === '%' ) { |
404 | return $length * 0.01 * $viewportSize; |
405 | } |
406 | |
407 | return $length * $unitLength[$unit]; |
408 | } |
409 | |
410 | // Assume pixels |
411 | return (float)$length; |
412 | } |
413 | } |