Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
89.83% |
159 / 177 |
|
22.22% |
2 / 9 |
CRAP | |
0.00% |
0 / 1 |
SVGReader | |
89.83% |
159 / 177 |
|
22.22% |
2 / 9 |
87.90 | |
0.00% |
0 / 1 |
__construct | |
72.00% |
18 / 25 |
|
0.00% |
0 / 1 |
5.55 | |||
getMetadata | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
read | |
90.91% |
30 / 33 |
|
0.00% |
0 / 1 |
20.30 | |||
readField | |
91.67% |
11 / 12 |
|
0.00% |
0 / 1 |
8.04 | |||
readXml | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
3.07 | |||
animateFilterAndLang | |
97.67% |
42 / 43 |
|
0.00% |
0 / 1 |
24 | |||
debug | |
50.00% |
1 / 2 |
|
0.00% |
0 / 1 |
2.50 | |||
handleSVGAttribs | |
86.21% |
25 / 29 |
|
0.00% |
0 / 1 |
15.59 | |||
scaleSVGUnit | |
100.00% |
27 / 27 |
|
100.00% |
1 / 1 |
3 |
1 | <?php |
2 | |
3 | /** |
4 | * Extraction of SVG image metadata. |
5 | * |
6 | * This program is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License as published by |
8 | * the Free Software Foundation; either version 2 of the License, or |
9 | * (at your option) any later version. |
10 | * |
11 | * This program is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | * GNU General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU General Public License along |
17 | * with this program; if not, write to the Free Software Foundation, Inc., |
18 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
19 | * http://www.gnu.org/copyleft/gpl.html |
20 | * |
21 | * @file |
22 | * @ingroup Media |
23 | * @author "Derk-Jan Hartman <hartman _at_ videolan d0t org>" |
24 | * @author Brooke Vibber |
25 | * @copyright Copyright © 2010-2010 Brooke Vibber, Derk-Jan Hartman |
26 | * @license GPL-2.0-or-later |
27 | */ |
28 | |
29 | use MediaWiki\Language\LanguageCode; |
30 | use MediaWiki\MainConfigNames; |
31 | use MediaWiki\MediaWikiServices; |
32 | use Wikimedia\AtEase\AtEase; |
33 | |
34 | /** |
35 | * @ingroup Media |
36 | */ |
37 | class SVGReader { |
38 | private const DEFAULT_WIDTH = 512; |
39 | private const DEFAULT_HEIGHT = 512; |
40 | private const NS_SVG = 'http://www.w3.org/2000/svg'; |
41 | public const LANG_PREFIX_MATCH = 1; |
42 | public const LANG_FULL_MATCH = 2; |
43 | |
44 | /** @var XMLReader */ |
45 | private $reader; |
46 | |
47 | /** @var bool */ |
48 | private $mDebug = false; |
49 | |
50 | /** @var array */ |
51 | private $metadata = []; |
52 | /** @var int[] */ |
53 | private $languages = []; |
54 | /** @var int[] */ |
55 | private $languagePrefixes = []; |
56 | |
57 | /** |
58 | * Creates an SVGReader drawing from the source provided |
59 | * @param string $source URI from which to read |
60 | * @throws InvalidSVGException |
61 | */ |
62 | public function __construct( $source ) { |
63 | $svgMetadataCutoff = MediaWikiServices::getInstance()->getMainConfig() |
64 | ->get( MainConfigNames::SVGMetadataCutoff ); |
65 | $this->reader = new XMLReader(); |
66 | |
67 | // Don't use $file->getSize() since file object passed to SVGHandler::getMetadata is bogus. |
68 | $size = filesize( $source ); |
69 | if ( $size === false ) { |
70 | throw new InvalidSVGException( "Error getting filesize of SVG." ); |
71 | } |
72 | |
73 | if ( $size > $svgMetadataCutoff ) { |
74 | $this->debug( "SVG is $size bytes, which is bigger than {$svgMetadataCutoff}. Truncating." ); |
75 | $contents = file_get_contents( $source, false, null, 0, $svgMetadataCutoff ); |
76 | if ( $contents === false ) { |
77 | throw new InvalidSVGException( 'Error reading SVG file.' ); |
78 | } |
79 | $status = $this->reader->XML( $contents, null, LIBXML_NOERROR | LIBXML_NOWARNING ); |
80 | } else { |
81 | $status = $this->reader->open( $source, null, LIBXML_NOERROR | LIBXML_NOWARNING ); |
82 | } |
83 | if ( !$status ) { |
84 | throw new InvalidSVGException( "Error getting xml of SVG." ); |
85 | } |
86 | |
87 | // Expand entities, since Adobe Illustrator uses them for xmlns |
88 | // attributes (T33719). Note that libxml2 has some protection |
89 | // against large recursive entity expansions so this is not as |
90 | // insecure as it might appear to be. However, it is still extremely |
91 | // insecure. It's necessary to wrap any read() calls with |
92 | // libxml_disable_entity_loader() to avoid arbitrary local file |
93 | // inclusion, or even arbitrary code execution if the expect |
94 | // extension is installed (T48859). |
95 | // phpcs:ignore Generic.PHP.NoSilencedErrors -- suppress deprecation per T268847 |
96 | $oldDisable = @libxml_disable_entity_loader( true ); |
97 | $this->reader->setParserProperty( XMLReader::SUBST_ENTITIES, true ); |
98 | |
99 | $this->metadata['width'] = self::DEFAULT_WIDTH; |
100 | $this->metadata['height'] = self::DEFAULT_HEIGHT; |
101 | |
102 | // The size in the units specified by the SVG file |
103 | // (for the metadata box) |
104 | // Per the SVG spec, if unspecified, default to '100%' |
105 | $this->metadata['originalWidth'] = '100%'; |
106 | $this->metadata['originalHeight'] = '100%'; |
107 | |
108 | // Because we cut off the end of the svg making an invalid one. Complicated |
109 | // try catch thing to make sure warnings get restored. Seems like there should |
110 | // be a better way. |
111 | AtEase::suppressWarnings(); |
112 | try { |
113 | // Note: if this throws, the width/height will be taken to be 0x0. |
114 | // Should we consider it the default 512x512 instead? |
115 | $this->read(); |
116 | } finally { |
117 | libxml_disable_entity_loader( $oldDisable ); |
118 | AtEase::restoreWarnings(); |
119 | } |
120 | } |
121 | |
122 | /** |
123 | * @return array Array with the known metadata |
124 | */ |
125 | public function getMetadata() { |
126 | return $this->metadata; |
127 | } |
128 | |
129 | /** |
130 | * Read the SVG |
131 | * @throws InvalidSVGException |
132 | * @return bool |
133 | */ |
134 | protected function read() { |
135 | $keepReading = $this->reader->read(); |
136 | |
137 | /* Skip until first element */ |
138 | while ( $keepReading && $this->reader->nodeType !== XMLReader::ELEMENT ) { |
139 | $keepReading = $this->reader->read(); |
140 | } |
141 | |
142 | if ( $this->reader->localName !== 'svg' || $this->reader->namespaceURI !== self::NS_SVG ) { |
143 | throw new InvalidSVGException( "Expected <svg> tag, got " . |
144 | $this->reader->localName . " in NS " . $this->reader->namespaceURI ); |
145 | } |
146 | $this->debug( '<svg> tag is correct.' ); |
147 | $this->handleSVGAttribs(); |
148 | |
149 | $exitDepth = $this->reader->depth; |
150 | $keepReading = $this->reader->read(); |
151 | while ( $keepReading ) { |
152 | $tag = $this->reader->localName; |
153 | $type = $this->reader->nodeType; |
154 | $isSVG = ( $this->reader->namespaceURI === self::NS_SVG ); |
155 | |
156 | $this->debug( "$tag" ); |
157 | |
158 | if ( $isSVG && $tag === 'svg' && $type === XMLReader::END_ELEMENT |
159 | && $this->reader->depth <= $exitDepth |
160 | ) { |
161 | break; |
162 | } |
163 | |
164 | if ( $isSVG && $tag === 'title' ) { |
165 | $this->readField( $tag, 'title' ); |
166 | } elseif ( $isSVG && $tag === 'desc' ) { |
167 | $this->readField( $tag, 'description' ); |
168 | } elseif ( $isSVG && $tag === 'metadata' && $type === XMLReader::ELEMENT ) { |
169 | $this->readXml( 'metadata' ); |
170 | } elseif ( $isSVG && $tag === 'script' ) { |
171 | // We normally do not allow scripted svgs. |
172 | // However its possible to configure MW to let them |
173 | // in, and such files should be considered animated. |
174 | $this->metadata['animated'] = true; |
175 | } elseif ( $tag !== '#text' ) { |
176 | $this->debug( "Unhandled top-level XML tag $tag" ); |
177 | |
178 | // Recurse into children of current tag, looking for animation and languages. |
179 | $this->animateFilterAndLang( $tag ); |
180 | } |
181 | |
182 | // Goto next element, which is sibling of current (Skip children). |
183 | $keepReading = $this->reader->next(); |
184 | } |
185 | |
186 | $this->reader->close(); |
187 | |
188 | $this->metadata['translations'] = $this->languages + $this->languagePrefixes; |
189 | |
190 | return true; |
191 | } |
192 | |
193 | /** |
194 | * Read a textelement from an element |
195 | * |
196 | * @param string $name Name of the element that we are reading from |
197 | * @param string|null $metafield Field that we will fill with the result |
198 | */ |
199 | private function readField( $name, $metafield = null ) { |
200 | $this->debug( "Read field $metafield" ); |
201 | if ( !$metafield || $this->reader->nodeType !== XMLReader::ELEMENT ) { |
202 | return; |
203 | } |
204 | $keepReading = $this->reader->read(); |
205 | while ( $keepReading ) { |
206 | if ( $this->reader->localName === $name |
207 | && $this->reader->namespaceURI === self::NS_SVG |
208 | && $this->reader->nodeType === XMLReader::END_ELEMENT |
209 | ) { |
210 | break; |
211 | } |
212 | |
213 | if ( $this->reader->nodeType === XMLReader::TEXT ) { |
214 | $this->metadata[$metafield] = trim( $this->reader->value ); |
215 | } |
216 | $keepReading = $this->reader->read(); |
217 | } |
218 | } |
219 | |
220 | /** |
221 | * Read an XML snippet from an element |
222 | * |
223 | * @param string|null $metafield Field that we will fill with the result |
224 | */ |
225 | private function readXml( $metafield = null ) { |
226 | $this->debug( "Read top level metadata" ); |
227 | if ( !$metafield || $this->reader->nodeType !== XMLReader::ELEMENT ) { |
228 | return; |
229 | } |
230 | // @todo Find and store type of xml snippet. metadata['metadataType'] = "rdf" |
231 | $this->metadata[$metafield] = trim( $this->reader->readInnerXml() ); |
232 | |
233 | $this->reader->next(); |
234 | } |
235 | |
236 | /** |
237 | * Filter all children, looking for animated elements. |
238 | * Also get a list of languages that can be targeted. |
239 | * |
240 | * @param string $name Name of the element that we are reading from |
241 | */ |
242 | private function animateFilterAndLang( $name ) { |
243 | $this->debug( "animate filter for tag $name" ); |
244 | if ( $this->reader->nodeType !== XMLReader::ELEMENT ) { |
245 | return; |
246 | } |
247 | if ( $this->reader->isEmptyElement ) { |
248 | return; |
249 | } |
250 | $exitDepth = $this->reader->depth; |
251 | $keepReading = $this->reader->read(); |
252 | while ( $keepReading ) { |
253 | if ( $this->reader->localName === $name && $this->reader->depth <= $exitDepth |
254 | && $this->reader->nodeType === XMLReader::END_ELEMENT |
255 | ) { |
256 | break; |
257 | } |
258 | |
259 | if ( $this->reader->namespaceURI === self::NS_SVG |
260 | && $this->reader->nodeType === XMLReader::ELEMENT |
261 | ) { |
262 | $sysLang = $this->reader->getAttribute( 'systemLanguage' ); |
263 | if ( $sysLang !== null && $sysLang !== '' ) { |
264 | // See https://www.w3.org/TR/SVG/struct.html#SystemLanguageAttribute |
265 | $langList = explode( ',', $sysLang ); |
266 | foreach ( $langList as $langItem ) { |
267 | $langItem = trim( $langItem ); |
268 | if ( LanguageCode::isWellFormedLanguageTag( $langItem ) ) { |
269 | $this->languages[$langItem] = self::LANG_FULL_MATCH; |
270 | } |
271 | // Note, the standard says that any prefix should work, |
272 | // here we do only the initial prefix, since that will catch |
273 | // 99% of cases, and we are going to compare against fallbacks. |
274 | // This differs mildly from how the spec says languages should be |
275 | // handled, however it matches better how the MediaWiki language |
276 | // preference is generally handled. |
277 | $dash = strpos( $langItem, '-' ); |
278 | // Intentionally checking both !false and > 0 at the same time. |
279 | if ( $dash ) { |
280 | $itemPrefix = substr( $langItem, 0, $dash ); |
281 | if ( LanguageCode::isWellFormedLanguageTag( $itemPrefix ) ) { |
282 | $this->languagePrefixes[$itemPrefix] = self::LANG_PREFIX_MATCH; |
283 | } |
284 | } |
285 | } |
286 | } |
287 | switch ( $this->reader->localName ) { |
288 | case 'style': |
289 | $styleContents = $this->reader->readString(); |
290 | if ( |
291 | str_contains( $styleContents, 'animated' ) || |
292 | str_contains( $styleContents, '@keyframes' ) |
293 | ) { |
294 | $this->debug( "HOUSTON WE HAVE ANIMATION" ); |
295 | $this->metadata['animated'] = true; |
296 | } |
297 | break; |
298 | case 'script': |
299 | // Normally we disallow files with |
300 | // <script>, but its possible |
301 | // to configure MW to disable |
302 | // such checks. |
303 | case 'animate': |
304 | case 'set': |
305 | case 'animateMotion': |
306 | case 'animateColor': |
307 | case 'animateTransform': |
308 | $this->debug( "HOUSTON WE HAVE ANIMATION" ); |
309 | $this->metadata['animated'] = true; |
310 | break; |
311 | } |
312 | } |
313 | $keepReading = $this->reader->read(); |
314 | } |
315 | } |
316 | |
317 | private function debug( $data ) { |
318 | if ( $this->mDebug ) { |
319 | wfDebug( "SVGReader: $data" ); |
320 | } |
321 | } |
322 | |
323 | /** |
324 | * Parse the attributes of an SVG element |
325 | * |
326 | * The parser has to be in the start element of "<svg>" |
327 | */ |
328 | private function handleSVGAttribs() { |
329 | $defaultWidth = self::DEFAULT_WIDTH; |
330 | $defaultHeight = self::DEFAULT_HEIGHT; |
331 | $aspect = 1.0; |
332 | $width = null; |
333 | $height = null; |
334 | |
335 | if ( $this->reader->getAttribute( 'viewBox' ) ) { |
336 | // min-x min-y width height |
337 | $viewBox = preg_split( '/\s*[\s,]\s*/', trim( $this->reader->getAttribute( 'viewBox' ) ?? '' ) ); |
338 | if ( count( $viewBox ) === 4 ) { |
339 | $viewWidth = self::scaleSVGUnit( $viewBox[2] ); |
340 | $viewHeight = self::scaleSVGUnit( $viewBox[3] ); |
341 | if ( $viewWidth > 0 && $viewHeight > 0 ) { |
342 | $aspect = $viewWidth / $viewHeight; |
343 | $defaultHeight = $defaultWidth / $aspect; |
344 | } |
345 | } |
346 | } |
347 | if ( $this->reader->getAttribute( 'width' ) ) { |
348 | $width = self::scaleSVGUnit( $this->reader->getAttribute( 'width' ) ?? '', $defaultWidth ); |
349 | $this->metadata['originalWidth'] = $this->reader->getAttribute( 'width' ); |
350 | } |
351 | if ( $this->reader->getAttribute( 'height' ) ) { |
352 | $height = self::scaleSVGUnit( $this->reader->getAttribute( 'height' ) ?? '', $defaultHeight ); |
353 | $this->metadata['originalHeight'] = $this->reader->getAttribute( 'height' ); |
354 | } |
355 | |
356 | if ( !isset( $width ) && !isset( $height ) ) { |
357 | $width = $defaultWidth; |
358 | $height = $width / $aspect; |
359 | } elseif ( isset( $width ) && !isset( $height ) ) { |
360 | $height = $width / $aspect; |
361 | } elseif ( isset( $height ) && !isset( $width ) ) { |
362 | $width = $height * $aspect; |
363 | } |
364 | |
365 | if ( $width > 0 && $height > 0 ) { |
366 | $this->metadata['width'] = (int)round( $width ); |
367 | $this->metadata['height'] = (int)round( $height ); |
368 | } |
369 | } |
370 | |
371 | /** |
372 | * Return a rounded pixel equivalent for a labeled CSS/SVG length. |
373 | * https://www.w3.org/TR/SVG11/coords.html#Units |
374 | * https://www.w3.org/TR/css-values-3/#lengths |
375 | * |
376 | * @param string $length CSS/SVG length. |
377 | * @param float|int $viewportSize Optional scale for percentage units... |
378 | * @return float Length in pixels |
379 | */ |
380 | public static function scaleSVGUnit( $length, $viewportSize = 512 ) { |
381 | // Per CSS values spec, assume 96dpi. |
382 | static $unitLength = [ |
383 | 'px' => 1.0, |
384 | 'pt' => 1.333333, |
385 | 'pc' => 16.0, |
386 | 'mm' => 3.7795275, |
387 | 'q' => 0.944881, |
388 | 'cm' => 37.795275, |
389 | 'in' => 96.0, |
390 | 'em' => 16.0, // Browser default font size if unspecified |
391 | 'rem' => 16.0, |
392 | 'ch' => 8.0, // Spec says 1em if impossible to determine |
393 | 'ex' => 8.0, // Spec says 0.5em if impossible to determine |
394 | '' => 1.0, // "User units" pixels by default |
395 | ]; |
396 | // TODO: Does not support vw, vh, vmin, vmax. |
397 | $matches = []; |
398 | if ( preg_match( |
399 | '/^\s*([-+]?\d*(?:\.\d+|\d+)(?:[Ee][-+]?\d+)?)\s*' . |
400 | '(rem|em|ex|px|pt|pc|cm|mm|in|ch|q|%)\s*$/i', |
401 | $length, |
402 | $matches |
403 | ) ) { |
404 | $length = (float)$matches[1]; |
405 | $unit = strtolower( $matches[2] ); |
406 | if ( $unit === '%' ) { |
407 | return $length * 0.01 * $viewportSize; |
408 | } |
409 | |
410 | return $length * $unitLength[$unit]; |
411 | } |
412 | |
413 | // Assume pixels |
414 | return (float)$length; |
415 | } |
416 | } |