MediaWiki  master
PNGMetadataExtractor.php
Go to the documentation of this file.
1 <?php
35  private static $pngSig;
36 
38  private static $crcSize;
39 
41  private static $textChunks;
42 
43  public const VERSION = 1;
44  private const MAX_CHUNK_SIZE = 3145728; // 3 mebibytes
45 
46  public static function getMetadata( $filename ) {
47  self::$pngSig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 );
48  self::$crcSize = 4;
49  /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData
50  * and https://www.w3.org/TR/PNG/#11keywords
51  */
52  self::$textChunks = [
53  'xml:com.adobe.xmp' => 'xmp',
54  # Artist is unofficial. Author is the recommended
55  # keyword in the PNG spec. However some people output
56  # Artist so support both.
57  'artist' => 'Artist',
58  'model' => 'Model',
59  'make' => 'Make',
60  'author' => 'Artist',
61  'comment' => 'PNGFileComment',
62  'description' => 'ImageDescription',
63  'title' => 'ObjectName',
64  'copyright' => 'Copyright',
65  # Source as in original device used to make image
66  # not as in who gave you the image
67  'source' => 'Model',
68  'software' => 'Software',
69  'disclaimer' => 'Disclaimer',
70  'warning' => 'ContentWarning',
71  'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement.
72  'label' => 'Label',
73  'creation time' => 'DateTimeDigitized',
74  /* Other potentially useful things - Document */
75  ];
76 
77  $frameCount = 0;
78  $loopCount = 1;
79  $text = [];
80  $duration = 0.0;
81  $width = 0;
82  $height = 0;
83  $bitDepth = 0;
84  $colorType = 'unknown';
85 
86  if ( !$filename ) {
87  throw new Exception( __METHOD__ . ": No file name specified" );
88  } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) {
89  throw new Exception( __METHOD__ . ": File $filename does not exist" );
90  }
91 
92  $fh = fopen( $filename, 'rb' );
93 
94  if ( !$fh ) {
95  throw new Exception( __METHOD__ . ": Unable to open file $filename" );
96  }
97 
98  // Check for the PNG header
99  $buf = self::read( $fh, 8 );
100  if ( $buf != self::$pngSig ) {
101  throw new Exception( __METHOD__ . ": Not a valid PNG file; header: $buf" );
102  }
103 
104  // Read chunks
105  while ( !feof( $fh ) ) {
106  $buf = self::read( $fh, 4 );
107  $chunk_size = unpack( "N", $buf )[1];
108 
109  if ( $chunk_size < 0 || $chunk_size > self::MAX_CHUNK_SIZE ) {
110  wfDebug( __METHOD__ . ': Chunk size of ' . $chunk_size .
111  ' too big, skipping. Max size is: ' . self::MAX_CHUNK_SIZE );
112  if ( fseek( $fh, 4 + $chunk_size + self::$crcSize, SEEK_CUR ) !== 0 ) {
113  throw new Exception( __METHOD__ . ': seek error' );
114  }
115  continue;
116  }
117 
118  $chunk_type = self::read( $fh, 4 );
119  $buf = self::read( $fh, $chunk_size );
120  $crc = self::read( $fh, self::$crcSize );
121  $computed = crc32( $chunk_type . $buf );
122  if ( pack( 'N', $computed ) !== $crc ) {
123  wfDebug( __METHOD__ . ': chunk has invalid CRC, skipping' );
124  continue;
125  }
126 
127  if ( $chunk_type == "IHDR" ) {
128  $width = unpack( 'N', substr( $buf, 0, 4 ) )[1];
129  $height = unpack( 'N', substr( $buf, 4, 4 ) )[1];
130  $bitDepth = ord( substr( $buf, 8, 1 ) );
131  // Detect the color type in British English as per the spec
132  // https://www.w3.org/TR/PNG/#11IHDR
133  switch ( ord( substr( $buf, 9, 1 ) ) ) {
134  case 0:
135  $colorType = 'greyscale';
136  break;
137  case 2:
138  $colorType = 'truecolour';
139  break;
140  case 3:
141  $colorType = 'index-coloured';
142  break;
143  case 4:
144  $colorType = 'greyscale-alpha';
145  break;
146  case 6:
147  $colorType = 'truecolour-alpha';
148  break;
149  default:
150  $colorType = 'unknown';
151  break;
152  }
153  } elseif ( $chunk_type == "acTL" ) {
154  if ( $chunk_size < 4 ) {
155  wfDebug( __METHOD__ . ": acTL chunk too small" );
156  continue;
157  }
158 
159  $actl = unpack( "Nframes/Nplays", $buf );
160  $frameCount = $actl['frames'];
161  $loopCount = $actl['plays'];
162  } elseif ( $chunk_type == "fcTL" ) {
163  $buf = substr( $buf, 20 );
164  if ( strlen( $buf ) < 4 ) {
165  wfDebug( __METHOD__ . ": fcTL chunk too small" );
166  continue;
167  }
168 
169  $fctldur = unpack( "ndelay_num/ndelay_den", $buf );
170  if ( $fctldur['delay_den'] == 0 ) {
171  $fctldur['delay_den'] = 100;
172  }
173  if ( $fctldur['delay_num'] ) {
174  $duration += $fctldur['delay_num'] / $fctldur['delay_den'];
175  }
176  } elseif ( $chunk_type == "iTXt" ) {
177  // Extracts iTXt chunks, uncompressing if necessary.
178  $items = [];
179  if ( preg_match(
180  '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds',
181  $buf, $items )
182  ) {
183  /* $items[1] = text chunk name, $items[2] = compressed flag,
184  * $items[3] = lang code (or ""), $items[4]= compression type.
185  * $items[5] = content
186  */
187 
188  // Theoretically should be case-sensitive, but in practise...
189  $items[1] = strtolower( $items[1] );
190  if ( !isset( self::$textChunks[$items[1]] ) ) {
191  // Only extract textual chunks on our list.
192  continue;
193  }
194 
195  $items[3] = strtolower( $items[3] );
196  if ( $items[3] == '' ) {
197  // if no lang specified use x-default like in xmp.
198  $items[3] = 'x-default';
199  }
200 
201  // if compressed
202  if ( $items[2] == "\x01" ) {
203  if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) {
204  Wikimedia\suppressWarnings();
205  $items[5] = gzuncompress( $items[5] );
206  Wikimedia\restoreWarnings();
207 
208  if ( $items[5] === false ) {
209  // decompression failed
210  wfDebug( __METHOD__ . ' Error decompressing iTxt chunk - ' . $items[1] );
211  continue;
212  }
213  } else {
214  wfDebug( __METHOD__ . ' Skipping compressed png iTXt chunk due to lack of zlib,'
215  . " or potentially invalid compression method" );
216  continue;
217  }
218  }
219  $finalKeyword = self::$textChunks[$items[1]];
220  $text[$finalKeyword][$items[3]] = $items[5];
221  $text[$finalKeyword]['_type'] = 'lang';
222  } else {
223  // Error reading iTXt chunk
224  wfDebug( __METHOD__ . ": Invalid iTXt chunk" );
225  }
226  } elseif ( $chunk_type == 'tEXt' ) {
227  // In case there is no \x00 which will make explode fail.
228  if ( strpos( $buf, "\x00" ) === false ) {
229  wfDebug( __METHOD__ . ": Invalid tEXt chunk: no null byte" );
230  continue;
231  }
232 
233  list( $keyword, $content ) = explode( "\x00", $buf, 2 );
234  if ( $keyword === '' ) {
235  wfDebug( __METHOD__ . ": Empty tEXt keyword" );
236  continue;
237  }
238 
239  // Theoretically should be case-sensitive, but in practise...
240  $keyword = strtolower( $keyword );
241  if ( !isset( self::$textChunks[$keyword] ) ) {
242  // Don't recognize chunk, so skip.
243  continue;
244  }
245  Wikimedia\suppressWarnings();
246  $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
247  Wikimedia\restoreWarnings();
248 
249  if ( $content === false ) {
250  wfDebug( __METHOD__ . ": Read error (error with iconv)" );
251  continue;
252  }
253 
254  $finalKeyword = self::$textChunks[$keyword];
255  $text[$finalKeyword]['x-default'] = $content;
256  $text[$finalKeyword]['_type'] = 'lang';
257  } elseif ( $chunk_type == 'zTXt' ) {
258  if ( function_exists( 'gzuncompress' ) ) {
259  // In case there is no \x00 which will make explode fail.
260  if ( strpos( $buf, "\x00" ) === false ) {
261  wfDebug( __METHOD__ . ": No null byte in zTXt chunk" );
262  continue;
263  }
264 
265  list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 );
266  if ( $keyword === '' || $postKeyword === '' ) {
267  wfDebug( __METHOD__ . ": Empty zTXt chunk" );
268  continue;
269  }
270  // Theoretically should be case-sensitive, but in practise...
271  $keyword = strtolower( $keyword );
272 
273  if ( !isset( self::$textChunks[$keyword] ) ) {
274  // Don't recognize chunk, so skip.
275  continue;
276  }
277  $compression = substr( $postKeyword, 0, 1 );
278  $content = substr( $postKeyword, 1 );
279  if ( $compression !== "\x00" ) {
280  wfDebug( __METHOD__ . " Unrecognized compression method in zTXt ($keyword). Skipping." );
281  continue;
282  }
283 
284  Wikimedia\suppressWarnings();
285  $content = gzuncompress( $content );
286  Wikimedia\restoreWarnings();
287 
288  if ( $content === false ) {
289  // decompression failed
290  wfDebug( __METHOD__ . ' Error decompressing zTXt chunk - ' . $keyword );
291  continue;
292  }
293 
294  Wikimedia\suppressWarnings();
295  $content = iconv( 'ISO-8859-1', 'UTF-8', $content );
296  Wikimedia\restoreWarnings();
297 
298  if ( $content === false ) {
299  wfDebug( __METHOD__ . ": iconv error in zTXt chunk" );
300  continue;
301  }
302 
303  $finalKeyword = self::$textChunks[$keyword];
304  $text[$finalKeyword]['x-default'] = $content;
305  $text[$finalKeyword]['_type'] = 'lang';
306  } else {
307  wfDebug( __METHOD__ . " Cannot decompress zTXt chunk due to lack of zlib. Skipping." );
308  }
309  } elseif ( $chunk_type == 'tIME' ) {
310  // last mod timestamp.
311  if ( $chunk_size !== 7 ) {
312  wfDebug( __METHOD__ . ": tIME wrong size" );
313  continue;
314  }
315 
316  // Note: spec says this should be UTC.
317  $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf );
318  $strTime = sprintf( "%04d%02d%02d%02d%02d%02d",
319  $t['y'], $t['m'], $t['d'], $t['h'],
320  $t['min'], $t['s'] );
321 
322  $exifTime = wfTimestamp( TS_EXIF, $strTime );
323 
324  if ( $exifTime ) {
325  $text['DateTime'] = $exifTime;
326  }
327  } elseif ( $chunk_type == 'pHYs' ) {
328  // how big pixels are (dots per meter).
329  if ( $chunk_size !== 9 ) {
330  wfDebug( __METHOD__ . ": pHYs wrong size" );
331  continue;
332  }
333 
334  $dim = unpack( "Nwidth/Nheight/Cunit", $buf );
335  if ( $dim['unit'] == 1 ) {
336  // Need to check for negative because php
337  // doesn't deal with super-large unsigned 32-bit ints well
338  if ( $dim['width'] > 0 && $dim['height'] > 0 ) {
339  // unit is meters
340  // (as opposed to 0 = undefined )
341  $text['XResolution'] = $dim['width']
342  . '/100';
343  $text['YResolution'] = $dim['height']
344  . '/100';
345  $text['ResolutionUnit'] = 3;
346  // 3 = dots per cm (from Exif).
347  }
348  }
349  } elseif ( $chunk_type == "IEND" ) {
350  break;
351  }
352  }
353  fclose( $fh );
354 
355  if ( $loopCount > 1 ) {
356  $duration *= $loopCount;
357  }
358 
359  if ( isset( $text['DateTimeDigitized'] ) ) {
360  // Convert date format from rfc2822 to exif.
361  foreach ( $text['DateTimeDigitized'] as $name => &$value ) {
362  if ( $name === '_type' ) {
363  continue;
364  }
365 
366  // @todo FIXME: Currently timezones are ignored.
367  // possibly should be wfTimestamp's
368  // responsibility. (at least for numeric TZ)
369  $formatted = wfTimestamp( TS_EXIF, $value );
370  if ( $formatted ) {
371  // Only change if we could convert the
372  // date.
373  // The png standard says it should be
374  // in rfc2822 format, but not required.
375  // In general for the exif stuff we
376  // prettify the date if we can, but we
377  // display as-is if we cannot or if
378  // it is invalid.
379  // So do the same here.
380 
381  $value = $formatted;
382  }
383  }
384  }
385 
386  return [
387  'width' => $width,
388  'height' => $height,
389  'frameCount' => $frameCount,
390  'loopCount' => $loopCount,
391  'duration' => $duration,
392  'text' => $text,
393  'bitDepth' => $bitDepth,
394  'colorType' => $colorType,
395  ];
396  }
397 
406  private static function read( $fh, $size ) {
407  if ( $size === 0 ) {
408  return '';
409  }
410 
411  $result = fread( $fh, $size );
412  if ( $result === false ) {
413  throw new Exception( __METHOD__ . ': read error' );
414  }
415  if ( strlen( $result ) < $size ) {
416  throw new Exception( __METHOD__ . ': unexpected end of file' );
417  }
418  return $result;
419  }
420 }
PNGMetadataExtractor\VERSION
const VERSION
Definition: PNGMetadataExtractor.php:43
wfTimestamp
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Definition: GlobalFunctions.php:1665
PNGMetadataExtractor
PNG frame counter.
Definition: PNGMetadataExtractor.php:33
PNGMetadataExtractor\$crcSize
static int $crcSize
Definition: PNGMetadataExtractor.php:38
PNGMetadataExtractor\$pngSig
static string $pngSig
Definition: PNGMetadataExtractor.php:35
PNGMetadataExtractor\$textChunks
static array $textChunks
Definition: PNGMetadataExtractor.php:41
PNGMetadataExtractor\getMetadata
static getMetadata( $filename)
Definition: PNGMetadataExtractor.php:46
wfDebug
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:894
PNGMetadataExtractor\read
static read( $fh, $size)
Read a chunk, checking to make sure its not too big.
Definition: PNGMetadataExtractor.php:406
$content
$content
Definition: router.php:76
PNGMetadataExtractor\MAX_CHUNK_SIZE
const MAX_CHUNK_SIZE
Definition: PNGMetadataExtractor.php:44
$t
$t
Definition: testCompression.php:74