Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
70.35% |
159 / 226 |
|
0.00% |
0 / 2 |
CRAP | |
0.00% |
0 / 1 |
PNGMetadataExtractor | |
70.35% |
159 / 226 |
|
0.00% |
0 / 2 |
170.72 | |
0.00% |
0 / 1 |
getMetadata | |
70.18% |
153 / 218 |
|
0.00% |
0 / 1 |
155.43 | |||
read | |
75.00% |
6 / 8 |
|
0.00% |
0 / 1 |
4.25 |
1 | <?php |
2 | /** |
3 | * PNG frame counter and metadata extractor. |
4 | * |
5 | * Slightly derived from GIFMetadataExtractor.php |
6 | * Deliberately not using MWExceptions to avoid external dependencies, encouraging |
7 | * redistribution. |
8 | * |
9 | * This program is free software; you can redistribute it and/or modify |
10 | * it under the terms of the GNU General Public License as published by |
11 | * the Free Software Foundation; either version 2 of the License, or |
12 | * (at your option) any later version. |
13 | * |
14 | * This program is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | * GNU General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU General Public License along |
20 | * with this program; if not, write to the Free Software Foundation, Inc., |
21 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
22 | * http://www.gnu.org/copyleft/gpl.html |
23 | * |
24 | * @file |
25 | * @ingroup Media |
26 | */ |
27 | |
28 | use Wikimedia\AtEase\AtEase; |
29 | |
30 | /** |
31 | * PNG frame counter. |
32 | * |
33 | * @ingroup Media |
34 | */ |
35 | class PNGMetadataExtractor { |
36 | /** @var string */ |
37 | private static $pngSig; |
38 | |
39 | /** @var int */ |
40 | private static $crcSize; |
41 | |
42 | /** @var array */ |
43 | private static $textChunks; |
44 | |
45 | public const VERSION = 1; |
46 | private const MAX_CHUNK_SIZE = 3_145_728; // 3 mebibytes |
47 | |
48 | public static function getMetadata( $filename ) { |
49 | self::$pngSig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 ); |
50 | self::$crcSize = 4; |
51 | /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData |
52 | * and https://www.w3.org/TR/PNG/#11keywords |
53 | */ |
54 | self::$textChunks = [ |
55 | 'xml:com.adobe.xmp' => 'xmp', |
56 | # Artist is unofficial. Author is the recommended |
57 | # keyword in the PNG spec. However some people output |
58 | # Artist so support both. |
59 | 'artist' => 'Artist', |
60 | 'model' => 'Model', |
61 | 'make' => 'Make', |
62 | 'author' => 'Artist', |
63 | 'comment' => 'PNGFileComment', |
64 | 'description' => 'ImageDescription', |
65 | 'title' => 'ObjectName', |
66 | 'copyright' => 'Copyright', |
67 | # Source as in original device used to make image |
68 | # not as in who gave you the image |
69 | 'source' => 'Model', |
70 | 'software' => 'Software', |
71 | 'disclaimer' => 'Disclaimer', |
72 | 'warning' => 'ContentWarning', |
73 | 'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement. |
74 | 'label' => 'Label', |
75 | 'creation time' => 'DateTimeDigitized', |
76 | /* Other potentially useful things - Document */ |
77 | ]; |
78 | |
79 | $frameCount = 0; |
80 | $loopCount = 1; |
81 | $text = []; |
82 | $duration = 0.0; |
83 | $width = 0; |
84 | $height = 0; |
85 | $bitDepth = 0; |
86 | $colorType = 'unknown'; |
87 | |
88 | if ( !$filename ) { |
89 | throw new InvalidArgumentException( __METHOD__ . ": No file name specified" ); |
90 | } |
91 | |
92 | if ( !file_exists( $filename ) || is_dir( $filename ) ) { |
93 | throw new InvalidArgumentException( __METHOD__ . ": File $filename does not exist" ); |
94 | } |
95 | |
96 | $fh = fopen( $filename, 'rb' ); |
97 | |
98 | if ( !$fh ) { |
99 | throw new InvalidArgumentException( __METHOD__ . ": Unable to open file $filename" ); |
100 | } |
101 | |
102 | // Check for the PNG header |
103 | $buf = self::read( $fh, 8 ); |
104 | if ( $buf !== self::$pngSig ) { |
105 | throw new InvalidArgumentException( __METHOD__ . ": Not a valid PNG file; header: $buf" ); |
106 | } |
107 | |
108 | // Read chunks |
109 | while ( !feof( $fh ) ) { |
110 | $buf = self::read( $fh, 4 ); |
111 | $chunk_size = unpack( "N", $buf )[1]; |
112 | |
113 | if ( $chunk_size < 0 || $chunk_size > self::MAX_CHUNK_SIZE ) { |
114 | wfDebug( __METHOD__ . ': Chunk size of ' . $chunk_size . |
115 | ' too big, skipping. Max size is: ' . self::MAX_CHUNK_SIZE ); |
116 | if ( fseek( $fh, 4 + $chunk_size + self::$crcSize, SEEK_CUR ) !== 0 ) { |
117 | throw new InvalidArgumentException( __METHOD__ . ': seek error' ); |
118 | } |
119 | continue; |
120 | } |
121 | |
122 | $chunk_type = self::read( $fh, 4 ); |
123 | $buf = self::read( $fh, $chunk_size ); |
124 | $crc = self::read( $fh, self::$crcSize ); |
125 | $computed = crc32( $chunk_type . $buf ); |
126 | if ( pack( 'N', $computed ) !== $crc ) { |
127 | wfDebug( __METHOD__ . ': chunk has invalid CRC, skipping' ); |
128 | continue; |
129 | } |
130 | |
131 | if ( $chunk_type === "IHDR" ) { |
132 | $width = unpack( 'N', substr( $buf, 0, 4 ) )[1]; |
133 | $height = unpack( 'N', substr( $buf, 4, 4 ) )[1]; |
134 | $bitDepth = ord( substr( $buf, 8, 1 ) ); |
135 | // Detect the color type in British English as per the spec |
136 | // https://www.w3.org/TR/PNG/#11IHDR |
137 | switch ( ord( substr( $buf, 9, 1 ) ) ) { |
138 | case 0: |
139 | $colorType = 'greyscale'; |
140 | break; |
141 | case 2: |
142 | $colorType = 'truecolour'; |
143 | break; |
144 | case 3: |
145 | $colorType = 'index-coloured'; |
146 | break; |
147 | case 4: |
148 | $colorType = 'greyscale-alpha'; |
149 | break; |
150 | case 6: |
151 | $colorType = 'truecolour-alpha'; |
152 | break; |
153 | default: |
154 | $colorType = 'unknown'; |
155 | break; |
156 | } |
157 | } elseif ( $chunk_type === "acTL" ) { |
158 | if ( $chunk_size < 4 ) { |
159 | wfDebug( __METHOD__ . ": acTL chunk too small" ); |
160 | continue; |
161 | } |
162 | |
163 | $actl = unpack( "Nframes/Nplays", $buf ); |
164 | $frameCount = $actl['frames']; |
165 | $loopCount = $actl['plays']; |
166 | } elseif ( $chunk_type === "fcTL" ) { |
167 | $buf = substr( $buf, 20 ); |
168 | if ( strlen( $buf ) < 4 ) { |
169 | wfDebug( __METHOD__ . ": fcTL chunk too small" ); |
170 | continue; |
171 | } |
172 | |
173 | $fctldur = unpack( "ndelay_num/ndelay_den", $buf ); |
174 | if ( $fctldur['delay_den'] == 0 ) { |
175 | $fctldur['delay_den'] = 100; |
176 | } |
177 | if ( $fctldur['delay_num'] ) { |
178 | $duration += $fctldur['delay_num'] / $fctldur['delay_den']; |
179 | } |
180 | } elseif ( $chunk_type === "iTXt" ) { |
181 | // Extracts iTXt chunks, uncompressing if necessary. |
182 | $items = []; |
183 | if ( preg_match( |
184 | '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds', |
185 | $buf, $items ) |
186 | ) { |
187 | /* $items[1] = text chunk name, $items[2] = compressed flag, |
188 | * $items[3] = lang code (or ""), $items[4]= compression type. |
189 | * $items[5] = content |
190 | */ |
191 | |
192 | // Theoretically should be case-sensitive, but in practise... |
193 | $items[1] = strtolower( $items[1] ); |
194 | if ( !isset( self::$textChunks[$items[1]] ) ) { |
195 | // Only extract textual chunks on our list. |
196 | continue; |
197 | } |
198 | |
199 | $items[3] = strtolower( $items[3] ); |
200 | if ( $items[3] == '' ) { |
201 | // if no lang specified use x-default like in xmp. |
202 | $items[3] = 'x-default'; |
203 | } |
204 | |
205 | // if compressed |
206 | if ( $items[2] === "\x01" ) { |
207 | if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) { |
208 | AtEase::suppressWarnings(); |
209 | $items[5] = gzuncompress( $items[5] ); |
210 | AtEase::restoreWarnings(); |
211 | |
212 | if ( $items[5] === false ) { |
213 | // decompression failed |
214 | wfDebug( __METHOD__ . ' Error decompressing iTxt chunk - ' . $items[1] ); |
215 | continue; |
216 | } |
217 | } else { |
218 | wfDebug( __METHOD__ . ' Skipping compressed png iTXt chunk due to lack of zlib,' |
219 | . " or potentially invalid compression method" ); |
220 | continue; |
221 | } |
222 | } |
223 | $finalKeyword = self::$textChunks[$items[1]]; |
224 | $text[$finalKeyword][$items[3]] = $items[5]; |
225 | $text[$finalKeyword]['_type'] = 'lang'; |
226 | } else { |
227 | // Error reading iTXt chunk |
228 | wfDebug( __METHOD__ . ": Invalid iTXt chunk" ); |
229 | } |
230 | } elseif ( $chunk_type === 'tEXt' ) { |
231 | // In case there is no \x00 which will make explode fail. |
232 | if ( strpos( $buf, "\x00" ) === false ) { |
233 | wfDebug( __METHOD__ . ": Invalid tEXt chunk: no null byte" ); |
234 | continue; |
235 | } |
236 | |
237 | [ $keyword, $content ] = explode( "\x00", $buf, 2 ); |
238 | if ( $keyword === '' ) { |
239 | wfDebug( __METHOD__ . ": Empty tEXt keyword" ); |
240 | continue; |
241 | } |
242 | |
243 | // Theoretically should be case-sensitive, but in practise... |
244 | $keyword = strtolower( $keyword ); |
245 | if ( !isset( self::$textChunks[$keyword] ) ) { |
246 | // Don't recognize chunk, so skip. |
247 | continue; |
248 | } |
249 | AtEase::suppressWarnings(); |
250 | $content = iconv( 'ISO-8859-1', 'UTF-8', $content ); |
251 | AtEase::restoreWarnings(); |
252 | |
253 | if ( $content === false ) { |
254 | wfDebug( __METHOD__ . ": Read error (error with iconv)" ); |
255 | continue; |
256 | } |
257 | |
258 | $finalKeyword = self::$textChunks[$keyword]; |
259 | $text[$finalKeyword]['x-default'] = $content; |
260 | $text[$finalKeyword]['_type'] = 'lang'; |
261 | } elseif ( $chunk_type === 'zTXt' ) { |
262 | if ( function_exists( 'gzuncompress' ) ) { |
263 | // In case there is no \x00 which will make explode fail. |
264 | if ( strpos( $buf, "\x00" ) === false ) { |
265 | wfDebug( __METHOD__ . ": No null byte in zTXt chunk" ); |
266 | continue; |
267 | } |
268 | |
269 | [ $keyword, $postKeyword ] = explode( "\x00", $buf, 2 ); |
270 | if ( $keyword === '' || $postKeyword === '' ) { |
271 | wfDebug( __METHOD__ . ": Empty zTXt chunk" ); |
272 | continue; |
273 | } |
274 | // Theoretically should be case-sensitive, but in practise... |
275 | $keyword = strtolower( $keyword ); |
276 | |
277 | if ( !isset( self::$textChunks[$keyword] ) ) { |
278 | // Don't recognize chunk, so skip. |
279 | continue; |
280 | } |
281 | $compression = substr( $postKeyword, 0, 1 ); |
282 | $content = substr( $postKeyword, 1 ); |
283 | if ( $compression !== "\x00" ) { |
284 | wfDebug( __METHOD__ . " Unrecognized compression method in zTXt ($keyword). Skipping." ); |
285 | continue; |
286 | } |
287 | |
288 | AtEase::suppressWarnings(); |
289 | $content = gzuncompress( $content ); |
290 | AtEase::restoreWarnings(); |
291 | |
292 | if ( $content === false ) { |
293 | // decompression failed |
294 | wfDebug( __METHOD__ . ' Error decompressing zTXt chunk - ' . $keyword ); |
295 | continue; |
296 | } |
297 | |
298 | AtEase::suppressWarnings(); |
299 | $content = iconv( 'ISO-8859-1', 'UTF-8', $content ); |
300 | AtEase::restoreWarnings(); |
301 | |
302 | if ( $content === false ) { |
303 | wfDebug( __METHOD__ . ": iconv error in zTXt chunk" ); |
304 | continue; |
305 | } |
306 | |
307 | $finalKeyword = self::$textChunks[$keyword]; |
308 | $text[$finalKeyword]['x-default'] = $content; |
309 | $text[$finalKeyword]['_type'] = 'lang'; |
310 | } else { |
311 | wfDebug( __METHOD__ . " Cannot decompress zTXt chunk due to lack of zlib. Skipping." ); |
312 | } |
313 | } elseif ( $chunk_type === 'tIME' ) { |
314 | // last mod timestamp. |
315 | if ( $chunk_size !== 7 ) { |
316 | wfDebug( __METHOD__ . ": tIME wrong size" ); |
317 | continue; |
318 | } |
319 | |
320 | // Note: spec says this should be UTC. |
321 | $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf ); |
322 | $strTime = sprintf( "%04d%02d%02d%02d%02d%02d", |
323 | $t['y'], $t['m'], $t['d'], $t['h'], |
324 | $t['min'], $t['s'] ); |
325 | |
326 | $exifTime = wfTimestamp( TS_EXIF, $strTime ); |
327 | |
328 | if ( $exifTime ) { |
329 | $text['DateTime'] = $exifTime; |
330 | } |
331 | } elseif ( $chunk_type === 'pHYs' ) { |
332 | // how big pixels are (dots per meter). |
333 | if ( $chunk_size !== 9 ) { |
334 | wfDebug( __METHOD__ . ": pHYs wrong size" ); |
335 | continue; |
336 | } |
337 | |
338 | $dim = unpack( "Nwidth/Nheight/Cunit", $buf ); |
339 | if ( $dim['unit'] === 1 ) { |
340 | // Need to check for negative because php |
341 | // doesn't deal with super-large unsigned 32-bit ints well |
342 | if ( $dim['width'] > 0 && $dim['height'] > 0 ) { |
343 | // unit is meters |
344 | // (as opposed to 0 = undefined ) |
345 | $text['XResolution'] = $dim['width'] |
346 | . '/100'; |
347 | $text['YResolution'] = $dim['height'] |
348 | . '/100'; |
349 | $text['ResolutionUnit'] = 3; |
350 | // 3 = dots per cm (from Exif). |
351 | } |
352 | } |
353 | } elseif ( $chunk_type === "IEND" ) { |
354 | break; |
355 | } |
356 | } |
357 | fclose( $fh ); |
358 | |
359 | if ( $loopCount > 1 ) { |
360 | $duration *= $loopCount; |
361 | } |
362 | |
363 | if ( isset( $text['DateTimeDigitized'] ) ) { |
364 | // Convert date format from rfc2822 to exif. |
365 | foreach ( $text['DateTimeDigitized'] as $name => &$value ) { |
366 | if ( $name === '_type' ) { |
367 | continue; |
368 | } |
369 | |
370 | // @todo FIXME: Currently timezones are ignored. |
371 | // possibly should be wfTimestamp's |
372 | // responsibility. (at least for numeric TZ) |
373 | $formatted = wfTimestamp( TS_EXIF, $value ); |
374 | if ( $formatted ) { |
375 | // Only change if we could convert the |
376 | // date. |
377 | // The png standard says it should be |
378 | // in rfc2822 format, but not required. |
379 | // In general for the exif stuff we |
380 | // prettify the date if we can, but we |
381 | // display as-is if we cannot or if |
382 | // it is invalid. |
383 | // So do the same here. |
384 | |
385 | $value = $formatted; |
386 | } |
387 | } |
388 | } |
389 | |
390 | return [ |
391 | 'width' => $width, |
392 | 'height' => $height, |
393 | 'frameCount' => $frameCount, |
394 | 'loopCount' => $loopCount, |
395 | 'duration' => $duration, |
396 | 'text' => $text, |
397 | 'bitDepth' => $bitDepth, |
398 | 'colorType' => $colorType, |
399 | ]; |
400 | } |
401 | |
402 | /** |
403 | * Read a chunk, checking to make sure its not too big. |
404 | * |
405 | * @param resource $fh The file handle |
406 | * @param int $size Size in bytes. |
407 | * @throws Exception If too big |
408 | * @return string The chunk. |
409 | */ |
410 | private static function read( $fh, $size ) { |
411 | if ( $size === 0 ) { |
412 | return ''; |
413 | } |
414 | |
415 | $result = fread( $fh, $size ); |
416 | if ( $result === false ) { |
417 | throw new InvalidArgumentException( __METHOD__ . ': read error' ); |
418 | } |
419 | if ( strlen( $result ) < $size ) { |
420 | throw new InvalidArgumentException( __METHOD__ . ': unexpected end of file' ); |
421 | } |
422 | return $result; |
423 | } |
424 | } |