MediaWiki REL1_37
PNGMetadataExtractor.php
Go to the documentation of this file.
1<?php
35 private static $pngSig;
36
38 private static $crcSize;
39
41 private static $textChunks;
42
43 public const VERSION = 1;
44 private const MAX_CHUNK_SIZE = 3145728; // 3 mebibytes
45
46 public static function getMetadata( $filename ) {
47 self::$pngSig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 );
48 self::$crcSize = 4;
49 /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData
50 * and https://www.w3.org/TR/PNG/#11keywords
51 */
52 self::$textChunks = [
53 'xml:com.adobe.xmp' => 'xmp',
54 # Artist is unofficial. Author is the recommended
55 # keyword in the PNG spec. However some people output
56 # Artist so support both.
57 'artist' => 'Artist',
58 'model' => 'Model',
59 'make' => 'Make',
60 'author' => 'Artist',
61 'comment' => 'PNGFileComment',
62 'description' => 'ImageDescription',
63 'title' => 'ObjectName',
64 'copyright' => 'Copyright',
65 # Source as in original device used to make image
66 # not as in who gave you the image
67 'source' => 'Model',
68 'software' => 'Software',
69 'disclaimer' => 'Disclaimer',
70 'warning' => 'ContentWarning',
71 'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement.
72 'label' => 'Label',
73 'creation time' => 'DateTimeDigitized',
74 /* Other potentially useful things - Document */
75 ];
76
77 $frameCount = 0;
78 $loopCount = 1;
79 $text = [];
80 $duration = 0.0;
81 $width = 0;
82 $height = 0;
83 $bitDepth = 0;
84 $colorType = 'unknown';
85
86 if ( !$filename ) {
87 throw new Exception( __METHOD__ . ": No file name specified" );
88 } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) {
89 throw new Exception( __METHOD__ . ": File $filename does not exist" );
90 }
91
92 $fh = fopen( $filename, 'rb' );
93
94 if ( !$fh ) {
95 throw new Exception( __METHOD__ . ": Unable to open file $filename" );
96 }
97
98 // Check for the PNG header
99 $buf = self::read( $fh, 8 );
100 if ( $buf != self::$pngSig ) {
101 throw new Exception( __METHOD__ . ": Not a valid PNG file; header: $buf" );
102 }
103
104 // Read chunks
105 while ( !feof( $fh ) ) {
106 $buf = self::read( $fh, 4 );
107 $chunk_size = unpack( "N", $buf )[1];
108
109 if ( $chunk_size < 0 || $chunk_size > self::MAX_CHUNK_SIZE ) {
110 wfDebug( __METHOD__ . ': Chunk size of ' . $chunk_size .
111 ' too big, skipping. Max size is: ' . self::MAX_CHUNK_SIZE );
112 if ( fseek( $fh, 4 + $chunk_size + self::$crcSize, SEEK_CUR ) !== 0 ) {
113 throw new Exception( __METHOD__ . ': seek error' );
114 }
115 continue;
116 }
117
118 $chunk_type = self::read( $fh, 4 );
119 $buf = self::read( $fh, $chunk_size );
120 $crc = self::read( $fh, self::$crcSize );
121 $computed = crc32( $chunk_type . $buf );
122 if ( pack( 'N', $computed ) !== $crc ) {
123 wfDebug( __METHOD__ . ': chunk has invalid CRC, skipping' );
124 continue;
125 }
126
127 if ( $chunk_type == "IHDR" ) {
128 $width = unpack( 'N', substr( $buf, 0, 4 ) )[1];
129 $height = unpack( 'N', substr( $buf, 4, 4 ) )[1];
130 $bitDepth = ord( substr( $buf, 8, 1 ) );
131 // Detect the color type in British English as per the spec
132 // https://www.w3.org/TR/PNG/#11IHDR
133 switch ( ord( substr( $buf, 9, 1 ) ) ) {
134 case 0:
135 $colorType = 'greyscale';
136 break;
137 case 2:
138 $colorType = 'truecolour';
139 break;
140 case 3:
141 $colorType = 'index-coloured';
142 break;
143 case 4:
144 $colorType = 'greyscale-alpha';
145 break;
146 case 6:
147 $colorType = 'truecolour-alpha';
148 break;
149 default:
150 $colorType = 'unknown';
151 break;
152 }
153 } elseif ( $chunk_type == "acTL" ) {
154 if ( $chunk_size < 4 ) {
155 wfDebug( __METHOD__ . ": acTL chunk too small" );
156 continue;
157 }
158
159 $actl = unpack( "Nframes/Nplays", $buf );
160 $frameCount = $actl['frames'];
161 $loopCount = $actl['plays'];
162 } elseif ( $chunk_type == "fcTL" ) {
163 $buf = substr( $buf, 20 );
164 if ( strlen( $buf ) < 4 ) {
165 wfDebug( __METHOD__ . ": fcTL chunk too small" );
166 continue;
167 }
168
169 $fctldur = unpack( "ndelay_num/ndelay_den", $buf );
170 if ( $fctldur['delay_den'] == 0 ) {
171 $fctldur['delay_den'] = 100;
172 }
173 if ( $fctldur['delay_num'] ) {
174 $duration += $fctldur['delay_num'] / $fctldur['delay_den'];
175 }
176 } elseif ( $chunk_type == "iTXt" ) {
177 // Extracts iTXt chunks, uncompressing if necessary.
178 $items = [];
179 if ( preg_match(
180 '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds',
181 $buf, $items )
182 ) {
183 /* $items[1] = text chunk name, $items[2] = compressed flag,
184 * $items[3] = lang code (or ""), $items[4]= compression type.
185 * $items[5] = content
186 */
187
188 // Theoretically should be case-sensitive, but in practise...
189 $items[1] = strtolower( $items[1] );
190 if ( !isset( self::$textChunks[$items[1]] ) ) {
191 // Only extract textual chunks on our list.
192 continue;
193 }
194
195 $items[3] = strtolower( $items[3] );
196 if ( $items[3] == '' ) {
197 // if no lang specified use x-default like in xmp.
198 $items[3] = 'x-default';
199 }
200
201 // if compressed
202 if ( $items[2] == "\x01" ) {
203 if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) {
204 Wikimedia\suppressWarnings();
205 $items[5] = gzuncompress( $items[5] );
206 Wikimedia\restoreWarnings();
207
208 if ( $items[5] === false ) {
209 // decompression failed
210 wfDebug( __METHOD__ . ' Error decompressing iTxt chunk - ' . $items[1] );
211 continue;
212 }
213 } else {
214 wfDebug( __METHOD__ . ' Skipping compressed png iTXt chunk due to lack of zlib,'
215 . " or potentially invalid compression method" );
216 continue;
217 }
218 }
219 $finalKeyword = self::$textChunks[$items[1]];
220 $text[$finalKeyword][$items[3]] = $items[5];
221 $text[$finalKeyword]['_type'] = 'lang';
222 } else {
223 // Error reading iTXt chunk
224 wfDebug( __METHOD__ . ": Invalid iTXt chunk" );
225 }
226 } elseif ( $chunk_type == 'tEXt' ) {
227 // In case there is no \x00 which will make explode fail.
228 if ( strpos( $buf, "\x00" ) === false ) {
229 wfDebug( __METHOD__ . ": Invalid tEXt chunk: no null byte" );
230 continue;
231 }
232
233 list( $keyword, $content ) = explode( "\x00", $buf, 2 );
234 if ( $keyword === '' ) {
235 wfDebug( __METHOD__ . ": Empty tEXt keyword" );
236 continue;
237 }
238
239 // Theoretically should be case-sensitive, but in practise...
240 $keyword = strtolower( $keyword );
241 if ( !isset( self::$textChunks[$keyword] ) ) {
242 // Don't recognize chunk, so skip.
243 continue;
244 }
245 Wikimedia\suppressWarnings();
246 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
247 Wikimedia\restoreWarnings();
248
249 if ( $content === false ) {
250 wfDebug( __METHOD__ . ": Read error (error with iconv)" );
251 continue;
252 }
253
254 $finalKeyword = self::$textChunks[$keyword];
255 $text[$finalKeyword]['x-default'] = $content;
256 $text[$finalKeyword]['_type'] = 'lang';
257 } elseif ( $chunk_type == 'zTXt' ) {
258 if ( function_exists( 'gzuncompress' ) ) {
259 // In case there is no \x00 which will make explode fail.
260 if ( strpos( $buf, "\x00" ) === false ) {
261 wfDebug( __METHOD__ . ": No null byte in zTXt chunk" );
262 continue;
263 }
264
265 list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 );
266 if ( $keyword === '' || $postKeyword === '' ) {
267 wfDebug( __METHOD__ . ": Empty zTXt chunk" );
268 continue;
269 }
270 // Theoretically should be case-sensitive, but in practise...
271 $keyword = strtolower( $keyword );
272
273 if ( !isset( self::$textChunks[$keyword] ) ) {
274 // Don't recognize chunk, so skip.
275 continue;
276 }
277 $compression = substr( $postKeyword, 0, 1 );
278 $content = substr( $postKeyword, 1 );
279 if ( $compression !== "\x00" ) {
280 wfDebug( __METHOD__ . " Unrecognized compression method in zTXt ($keyword). Skipping." );
281 continue;
282 }
283
284 Wikimedia\suppressWarnings();
285 $content = gzuncompress( $content );
286 Wikimedia\restoreWarnings();
287
288 if ( $content === false ) {
289 // decompression failed
290 wfDebug( __METHOD__ . ' Error decompressing zTXt chunk - ' . $keyword );
291 continue;
292 }
293
294 Wikimedia\suppressWarnings();
295 $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
296 Wikimedia\restoreWarnings();
297
298 if ( $content === false ) {
299 wfDebug( __METHOD__ . ": iconv error in zTXt chunk" );
300 continue;
301 }
302
303 $finalKeyword = self::$textChunks[$keyword];
304 $text[$finalKeyword]['x-default'] = $content;
305 $text[$finalKeyword]['_type'] = 'lang';
306 } else {
307 wfDebug( __METHOD__ . " Cannot decompress zTXt chunk due to lack of zlib. Skipping." );
308 }
309 } elseif ( $chunk_type == 'tIME' ) {
310 // last mod timestamp.
311 if ( $chunk_size !== 7 ) {
312 wfDebug( __METHOD__ . ": tIME wrong size" );
313 continue;
314 }
315
316 // Note: spec says this should be UTC.
317 $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf );
318 $strTime = sprintf( "%04d%02d%02d%02d%02d%02d",
319 $t['y'], $t['m'], $t['d'], $t['h'],
320 $t['min'], $t['s'] );
321
322 $exifTime = wfTimestamp( TS_EXIF, $strTime );
323
324 if ( $exifTime ) {
325 $text['DateTime'] = $exifTime;
326 }
327 } elseif ( $chunk_type == 'pHYs' ) {
328 // how big pixels are (dots per meter).
329 if ( $chunk_size !== 9 ) {
330 wfDebug( __METHOD__ . ": pHYs wrong size" );
331 continue;
332 }
333
334 $dim = unpack( "Nwidth/Nheight/Cunit", $buf );
335 if ( $dim['unit'] == 1 ) {
336 // Need to check for negative because php
337 // doesn't deal with super-large unsigned 32-bit ints well
338 if ( $dim['width'] > 0 && $dim['height'] > 0 ) {
339 // unit is meters
340 // (as opposed to 0 = undefined )
341 $text['XResolution'] = $dim['width']
342 . '/100';
343 $text['YResolution'] = $dim['height']
344 . '/100';
345 $text['ResolutionUnit'] = 3;
346 // 3 = dots per cm (from Exif).
347 }
348 }
349 } elseif ( $chunk_type == "IEND" ) {
350 break;
351 }
352 }
353 fclose( $fh );
354
355 if ( $loopCount > 1 ) {
356 $duration *= $loopCount;
357 }
358
359 if ( isset( $text['DateTimeDigitized'] ) ) {
360 // Convert date format from rfc2822 to exif.
361 foreach ( $text['DateTimeDigitized'] as $name => &$value ) {
362 if ( $name === '_type' ) {
363 continue;
364 }
365
366 // @todo FIXME: Currently timezones are ignored.
367 // possibly should be wfTimestamp's
368 // responsibility. (at least for numeric TZ)
369 $formatted = wfTimestamp( TS_EXIF, $value );
370 if ( $formatted ) {
371 // Only change if we could convert the
372 // date.
373 // The png standard says it should be
374 // in rfc2822 format, but not required.
375 // In general for the exif stuff we
376 // prettify the date if we can, but we
377 // display as-is if we cannot or if
378 // it is invalid.
379 // So do the same here.
380
381 $value = $formatted;
382 }
383 }
384 }
385
386 return [
387 'width' => $width,
388 'height' => $height,
389 'frameCount' => $frameCount,
390 'loopCount' => $loopCount,
391 'duration' => $duration,
392 'text' => $text,
393 'bitDepth' => $bitDepth,
394 'colorType' => $colorType,
395 ];
396 }
397
406 private static function read( $fh, $size ) {
407 if ( $size === 0 ) {
408 return '';
409 }
410
411 $result = fread( $fh, $size );
412 if ( $result === false ) {
413 throw new Exception( __METHOD__ . ': read error' );
414 }
415 if ( strlen( $result ) < $size ) {
416 throw new Exception( __METHOD__ . ': unexpected end of file' );
417 }
418 return $result;
419 }
420}
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
static read( $fh, $size)
Read a chunk, checking to make sure its not too big.
static getMetadata( $filename)
$content
Definition router.php:76