MediaWiki  master
PNGMetadataExtractor.php
Go to the documentation of this file.
1 <?php
35  private static $pngSig;
36 
38  private static $crcSize;
39 
41  private static $textChunks;
42 
43  const VERSION = 1;
44  const MAX_CHUNK_SIZE = 3145728; // 3 megabytes
45 
46  static function getMetadata( $filename ) {
47  self::$pngSig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 );
48  self::$crcSize = 4;
49  /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData
50  * and https://www.w3.org/TR/PNG/#11keywords
51  */
52  self::$textChunks = [
53  'xml:com.adobe.xmp' => 'xmp',
54  # Artist is unofficial. Author is the recommended
55  # keyword in the PNG spec. However some people output
56  # Artist so support both.
57  'artist' => 'Artist',
58  'model' => 'Model',
59  'make' => 'Make',
60  'author' => 'Artist',
61  'comment' => 'PNGFileComment',
62  'description' => 'ImageDescription',
63  'title' => 'ObjectName',
64  'copyright' => 'Copyright',
65  # Source as in original device used to make image
66  # not as in who gave you the image
67  'source' => 'Model',
68  'software' => 'Software',
69  'disclaimer' => 'Disclaimer',
70  'warning' => 'ContentWarning',
71  'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement.
72  'label' => 'Label',
73  'creation time' => 'DateTimeDigitized',
74  /* Other potentially useful things - Document */
75  ];
76 
77  $frameCount = 0;
78  $loopCount = 1;
79  $text = [];
80  $duration = 0.0;
81  $bitDepth = 0;
82  $colorType = 'unknown';
83 
84  if ( !$filename ) {
85  throw new Exception( __METHOD__ . ": No file name specified" );
86  } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) {
87  throw new Exception( __METHOD__ . ": File $filename does not exist" );
88  }
89 
90  $fh = fopen( $filename, 'rb' );
91 
92  if ( !$fh ) {
93  throw new Exception( __METHOD__ . ": Unable to open file $filename" );
94  }
95 
96  // Check for the PNG header
97  $buf = fread( $fh, 8 );
98  if ( $buf != self::$pngSig ) {
99  throw new Exception( __METHOD__ . ": Not a valid PNG file; header: $buf" );
100  }
101 
102  // Read chunks
103  while ( !feof( $fh ) ) {
104  $buf = fread( $fh, 4 );
105  if ( !$buf || strlen( $buf ) < 4 ) {
106  throw new Exception( __METHOD__ . ": Read error" );
107  }
108  $chunk_size = unpack( "N", $buf )[1];
109 
110  if ( $chunk_size < 0 ) {
111  throw new Exception( __METHOD__ . ": Chunk size too big for unpack" );
112  }
113 
114  $chunk_type = fread( $fh, 4 );
115  if ( !$chunk_type || strlen( $chunk_type ) < 4 ) {
116  throw new Exception( __METHOD__ . ": Read error" );
117  }
118 
119  if ( $chunk_type == "IHDR" ) {
120  $buf = self::read( $fh, $chunk_size );
121  if ( !$buf || strlen( $buf ) < $chunk_size ) {
122  throw new Exception( __METHOD__ . ": Read error" );
123  }
124  $width = unpack( 'N', substr( $buf, 0, 4 ) )[1];
125  $height = unpack( 'N', substr( $buf, 4, 4 ) )[1];
126  $bitDepth = ord( substr( $buf, 8, 1 ) );
127  // Detect the color type in British English as per the spec
128  // https://www.w3.org/TR/PNG/#11IHDR
129  switch ( ord( substr( $buf, 9, 1 ) ) ) {
130  case 0:
131  $colorType = 'greyscale';
132  break;
133  case 2:
134  $colorType = 'truecolour';
135  break;
136  case 3:
137  $colorType = 'index-coloured';
138  break;
139  case 4:
140  $colorType = 'greyscale-alpha';
141  break;
142  case 6:
143  $colorType = 'truecolour-alpha';
144  break;
145  default:
146  $colorType = 'unknown';
147  break;
148  }
149  } elseif ( $chunk_type == "acTL" ) {
150  $buf = fread( $fh, $chunk_size );
151  if ( !$buf || strlen( $buf ) < $chunk_size || $chunk_size < 4 ) {
152  throw new Exception( __METHOD__ . ": Read error" );
153  }
154 
155  $actl = unpack( "Nframes/Nplays", $buf );
156  $frameCount = $actl['frames'];
157  $loopCount = $actl['plays'];
158  } elseif ( $chunk_type == "fcTL" ) {
159  $buf = self::read( $fh, $chunk_size );
160  if ( !$buf || strlen( $buf ) < $chunk_size ) {
161  throw new Exception( __METHOD__ . ": Read error" );
162  }
163  $buf = substr( $buf, 20 );
164  if ( strlen( $buf ) < 4 ) {
165  throw new Exception( __METHOD__ . ": Read error" );
166  }
167 
168  $fctldur = unpack( "ndelay_num/ndelay_den", $buf );
169  if ( $fctldur['delay_den'] == 0 ) {
170  $fctldur['delay_den'] = 100;
171  }
172  if ( $fctldur['delay_num'] ) {
173  $duration += $fctldur['delay_num'] / $fctldur['delay_den'];
174  }
175  } elseif ( $chunk_type == "iTXt" ) {
176  // Extracts iTXt chunks, uncompressing if necessary.
177  $buf = self::read( $fh, $chunk_size );
178  $items = [];
179  if ( preg_match(
180  '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds',
181  $buf, $items )
182  ) {
183  /* $items[1] = text chunk name, $items[2] = compressed flag,
184  * $items[3] = lang code (or ""), $items[4]= compression type.
185  * $items[5] = content
186  */
187 
188  // Theoretically should be case-sensitive, but in practise...
189  $items[1] = strtolower( $items[1] );
190  if ( !isset( self::$textChunks[$items[1]] ) ) {
191  // Only extract textual chunks on our list.
192  fseek( $fh, self::$crcSize, SEEK_CUR );
193  continue;
194  }
195 
196  $items[3] = strtolower( $items[3] );
197  if ( $items[3] == '' ) {
198  // if no lang specified use x-default like in xmp.
199  $items[3] = 'x-default';
200  }
201 
202  // if compressed
203  if ( $items[2] == "\x01" ) {
204  if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) {
205  Wikimedia\suppressWarnings();
206  $items[5] = gzuncompress( $items[5] );
207  Wikimedia\restoreWarnings();
208 
209  if ( $items[5] === false ) {
210  // decompression failed
211  wfDebug( __METHOD__ . ' Error decompressing iTxt chunk - ' . $items[1] . "\n" );
212  fseek( $fh, self::$crcSize, SEEK_CUR );
213  continue;
214  }
215  } else {
216  wfDebug( __METHOD__ . ' Skipping compressed png iTXt chunk due to lack of zlib,'
217  . " or potentially invalid compression method\n" );
218  fseek( $fh, self::$crcSize, SEEK_CUR );
219  continue;
220  }
221  }
222  $finalKeyword = self::$textChunks[$items[1]];
223  $text[$finalKeyword][$items[3]] = $items[5];
224  $text[$finalKeyword]['_type'] = 'lang';
225  } else {
226  // Error reading iTXt chunk
227  throw new Exception( __METHOD__ . ": Read error on iTXt chunk" );
228  }
229  } elseif ( $chunk_type == 'tEXt' ) {
230  $buf = self::read( $fh, $chunk_size );
231 
232  // In case there is no \x00 which will make explode fail.
233  if ( strpos( $buf, "\x00" ) === false ) {
234  throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
235  }
236 
237  list( $keyword, $content ) = explode( "\x00", $buf, 2 );
238  if ( $keyword === '' || $content === '' ) {
239  throw new Exception( __METHOD__ . ": Read error on tEXt chunk" );
240  }
241 
242  // Theoretically should be case-sensitive, but in practise...
243  $keyword = strtolower( $keyword );
244  if ( !isset( self::$textChunks[$keyword] ) ) {
245  // Don't recognize chunk, so skip.
246  fseek( $fh, self::$crcSize, SEEK_CUR );
247  continue;
248  }
249  Wikimedia\suppressWarnings();
250  $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
251  Wikimedia\restoreWarnings();
252 
253  if ( $content === false ) {
254  throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
255  }
256 
257  $finalKeyword = self::$textChunks[$keyword];
258  $text[$finalKeyword]['x-default'] = $content;
259  $text[$finalKeyword]['_type'] = 'lang';
260  } elseif ( $chunk_type == 'zTXt' ) {
261  if ( function_exists( 'gzuncompress' ) ) {
262  $buf = self::read( $fh, $chunk_size );
263 
264  // In case there is no \x00 which will make explode fail.
265  if ( strpos( $buf, "\x00" ) === false ) {
266  throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
267  }
268 
269  list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 );
270  if ( $keyword === '' || $postKeyword === '' ) {
271  throw new Exception( __METHOD__ . ": Read error on zTXt chunk" );
272  }
273  // Theoretically should be case-sensitive, but in practise...
274  $keyword = strtolower( $keyword );
275 
276  if ( !isset( self::$textChunks[$keyword] ) ) {
277  // Don't recognize chunk, so skip.
278  fseek( $fh, self::$crcSize, SEEK_CUR );
279  continue;
280  }
281  $compression = substr( $postKeyword, 0, 1 );
282  $content = substr( $postKeyword, 1 );
283  if ( $compression !== "\x00" ) {
284  wfDebug( __METHOD__ . " Unrecognized compression method in zTXt ($keyword). Skipping.\n" );
285  fseek( $fh, self::$crcSize, SEEK_CUR );
286  continue;
287  }
288 
289  Wikimedia\suppressWarnings();
290  $content = gzuncompress( $content );
291  Wikimedia\restoreWarnings();
292 
293  if ( $content === false ) {
294  // decompression failed
295  wfDebug( __METHOD__ . ' Error decompressing zTXt chunk - ' . $keyword . "\n" );
296  fseek( $fh, self::$crcSize, SEEK_CUR );
297  continue;
298  }
299 
300  Wikimedia\suppressWarnings();
301  $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
302  Wikimedia\restoreWarnings();
303 
304  if ( $content === false ) {
305  throw new Exception( __METHOD__ . ": Read error (error with iconv)" );
306  }
307 
308  $finalKeyword = self::$textChunks[$keyword];
309  $text[$finalKeyword]['x-default'] = $content;
310  $text[$finalKeyword]['_type'] = 'lang';
311  } else {
312  wfDebug( __METHOD__ . " Cannot decompress zTXt chunk due to lack of zlib. Skipping.\n" );
313  fseek( $fh, $chunk_size, SEEK_CUR );
314  }
315  } elseif ( $chunk_type == 'tIME' ) {
316  // last mod timestamp.
317  if ( $chunk_size !== 7 ) {
318  throw new Exception( __METHOD__ . ": tIME wrong size" );
319  }
320  $buf = self::read( $fh, $chunk_size );
321  if ( !$buf || strlen( $buf ) < $chunk_size ) {
322  throw new Exception( __METHOD__ . ": Read error" );
323  }
324 
325  // Note: spec says this should be UTC.
326  $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf );
327  $strTime = sprintf( "%04d%02d%02d%02d%02d%02d",
328  $t['y'], $t['m'], $t['d'], $t['h'],
329  $t['min'], $t['s'] );
330 
331  $exifTime = wfTimestamp( TS_EXIF, $strTime );
332 
333  if ( $exifTime ) {
334  $text['DateTime'] = $exifTime;
335  }
336  } elseif ( $chunk_type == 'pHYs' ) {
337  // how big pixels are (dots per meter).
338  if ( $chunk_size !== 9 ) {
339  throw new Exception( __METHOD__ . ": pHYs wrong size" );
340  }
341 
342  $buf = self::read( $fh, $chunk_size );
343  if ( !$buf || strlen( $buf ) < $chunk_size ) {
344  throw new Exception( __METHOD__ . ": Read error" );
345  }
346 
347  $dim = unpack( "Nwidth/Nheight/Cunit", $buf );
348  if ( $dim['unit'] == 1 ) {
349  // Need to check for negative because php
350  // doesn't deal with super-large unsigned 32-bit ints well
351  if ( $dim['width'] > 0 && $dim['height'] > 0 ) {
352  // unit is meters
353  // (as opposed to 0 = undefined )
354  $text['XResolution'] = $dim['width']
355  . '/100';
356  $text['YResolution'] = $dim['height']
357  . '/100';
358  $text['ResolutionUnit'] = 3;
359  // 3 = dots per cm (from Exif).
360  }
361  }
362  } elseif ( $chunk_type == "IEND" ) {
363  break;
364  } else {
365  fseek( $fh, $chunk_size, SEEK_CUR );
366  }
367  fseek( $fh, self::$crcSize, SEEK_CUR );
368  }
369  fclose( $fh );
370 
371  if ( $loopCount > 1 ) {
372  $duration *= $loopCount;
373  }
374 
375  if ( isset( $text['DateTimeDigitized'] ) ) {
376  // Convert date format from rfc2822 to exif.
377  foreach ( $text['DateTimeDigitized'] as $name => &$value ) {
378  if ( $name === '_type' ) {
379  continue;
380  }
381 
382  // @todo FIXME: Currently timezones are ignored.
383  // possibly should be wfTimestamp's
384  // responsibility. (at least for numeric TZ)
385  $formatted = wfTimestamp( TS_EXIF, $value );
386  if ( $formatted ) {
387  // Only change if we could convert the
388  // date.
389  // The png standard says it should be
390  // in rfc2822 format, but not required.
391  // In general for the exif stuff we
392  // prettify the date if we can, but we
393  // display as-is if we cannot or if
394  // it is invalid.
395  // So do the same here.
396 
397  $value = $formatted;
398  }
399  }
400  }
401 
402  return [
403  'frameCount' => $frameCount,
404  'loopCount' => $loopCount,
405  'duration' => $duration,
406  'text' => $text,
407  'bitDepth' => $bitDepth,
408  'colorType' => $colorType,
409  ];
410  }
411 
420  private static function read( $fh, $size ) {
421  if ( $size > self::MAX_CHUNK_SIZE ) {
422  throw new Exception( __METHOD__ . ': Chunk size of ' . $size .
423  ' too big. Max size is: ' . self::MAX_CHUNK_SIZE );
424  }
425 
426  return fread( $fh, $size );
427  }
428 }
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
static getMetadata( $filename)
static read( $fh, $size)
Read a chunk, checking to make sure its not too big.
$content
Definition: router.php:78