MediaWiki  master
GIFMetadataExtractor.php
Go to the documentation of this file.
1 <?php
29 use Wikimedia\AtEase\AtEase;
30 
38  private static $gifFrameSep;
39 
41  private static $gifExtensionSep;
42 
44  private static $gifTerm;
45 
46  public const VERSION = 1;
47 
48  // Each sub-block is less than or equal to 255 bytes.
49  // Most of the time its 255 bytes, except for in XMP
50  // blocks, where it's usually between 32-127 bytes each.
51  private const MAX_SUBBLOCKS = 262144; // 5 MiB divided by 20.
52 
58  public static function getMetadata( $filename ) {
59  self::$gifFrameSep = pack( "C", ord( "," ) ); // 2C
60  self::$gifExtensionSep = pack( "C", ord( "!" ) ); // 21
61  self::$gifTerm = pack( "C", ord( ";" ) ); // 3B
62 
63  $frameCount = 0;
64  $duration = 0.0;
65  $isLooped = false;
66  $xmp = "";
67  $comment = [];
68 
69  if ( !$filename ) {
70  throw new Exception( 'No file name specified' );
71  }
72  if ( !file_exists( $filename ) || is_dir( $filename ) ) {
73  throw new Exception( "File $filename does not exist" );
74  }
75 
76  $fh = fopen( $filename, 'rb' );
77 
78  if ( !$fh ) {
79  throw new Exception( "Unable to open file $filename" );
80  }
81 
82  // Check for the GIF header
83  $buf = fread( $fh, 6 );
84  if ( !( $buf === 'GIF87a' || $buf === 'GIF89a' ) ) {
85  throw new Exception( "Not a valid GIF file; header: $buf" );
86  }
87 
88  // Read width and height.
89  $buf = fread( $fh, 2 );
90  if ( strlen( $buf ) < 2 ) {
91  throw new Exception( "Not a valid GIF file; Unable to read width." );
92  }
93  $width = unpack( 'v', $buf )[1];
94  $buf = fread( $fh, 2 );
95  if ( strlen( $buf ) < 2 ) {
96  throw new Exception( "Not a valid GIF file; Unable to read height." );
97  }
98  $height = unpack( 'v', $buf )[1];
99 
100  // Read BPP
101  $buf = fread( $fh, 1 );
102  [ $bpp, $have_map ] = self::decodeBPP( $buf );
103 
104  // Skip over background and aspect ratio
105  // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
106  fread( $fh, 2 );
107 
108  // Skip over the GCT
109  if ( $have_map ) {
110  self::readGCT( $fh, $bpp );
111  }
112 
113  while ( !feof( $fh ) ) {
114  $buf = fread( $fh, 1 );
115 
116  if ( $buf === self::$gifFrameSep ) {
117  // Found a frame
118  $frameCount++;
119 
120  # # Skip bounding box
121  // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
122  fread( $fh, 8 );
123 
124  # # Read BPP
125  $buf = fread( $fh, 1 );
126  [ $bpp, $have_map ] = self::decodeBPP( $buf );
127 
128  # # Read GCT
129  if ( $have_map ) {
130  self::readGCT( $fh, $bpp );
131  }
132  // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
133  fread( $fh, 1 );
134  self::skipBlock( $fh );
135  } elseif ( $buf === self::$gifExtensionSep ) {
136  $buf = fread( $fh, 1 );
137  if ( strlen( $buf ) < 1 ) {
138  throw new Exception( "Not a valid GIF file; Unable to read graphics control extension." );
139  }
140  $extension_code = unpack( 'C', $buf )[1];
141 
142  if ( $extension_code === 0xF9 ) {
143  // Graphics Control Extension.
144  // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
145  fread( $fh, 1 ); // Block size
146 
147  // @phan-suppress-next-next-line PhanPluginUseReturnValueInternalKnown
148  // @phan-suppress-next-line PhanPluginDuplicateAdjacentStatement
149  fread( $fh, 1 ); // Transparency, disposal method, user input
150 
151  $buf = fread( $fh, 2 ); // Delay, in hundredths of seconds.
152  if ( strlen( $buf ) < 2 ) {
153  throw new Exception( "Not a valid GIF file; Unable to read delay" );
154  }
155  $delay = unpack( 'v', $buf )[1];
156  $duration += $delay * 0.01;
157 
158  // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
159  fread( $fh, 1 ); // Transparent colour index
160 
161  $term = fread( $fh, 1 ); // Should be a terminator
162  if ( strlen( $term ) < 1 ) {
163  throw new Exception( "Not a valid GIF file; Unable to read terminator byte" );
164  }
165  $term = unpack( 'C', $term )[1];
166  if ( $term != 0 ) {
167  throw new Exception( "Malformed Graphics Control Extension block" );
168  }
169  } elseif ( $extension_code === 0xFE ) {
170  // Comment block(s).
171  $data = self::readBlock( $fh );
172  if ( $data === "" ) {
173  throw new Exception( 'Read error, zero-length comment block' );
174  }
175 
176  // The standard says this should be ASCII, however its unclear if
177  // thats true in practise. Check to see if its valid utf-8, if so
178  // assume its that, otherwise assume its windows-1252 (iso-8859-1)
179  $dataCopy = $data;
180  // quickIsNFCVerify has the side effect of replacing any invalid characters
181  UtfNormal\Validator::quickIsNFCVerify( $dataCopy );
182 
183  if ( $dataCopy !== $data ) {
184  AtEase::suppressWarnings();
185  $data = iconv( 'windows-1252', 'UTF-8', $data );
186  AtEase::restoreWarnings();
187  }
188 
189  $commentCount = count( $comment );
190  if ( $commentCount === 0
191  // @phan-suppress-next-line PhanTypeInvalidDimOffset
192  || $comment[$commentCount - 1] !== $data
193  ) {
194  // Some applications repeat the same comment on each
195  // frame of an animated GIF image, so if this comment
196  // is identical to the last, only extract once.
197  $comment[] = $data;
198  }
199  } elseif ( $extension_code === 0xFF ) {
200  // Application extension (Netscape info about the animated gif)
201  // or XMP (or theoretically any other type of extension block)
202  $blockLength = fread( $fh, 1 );
203  if ( strlen( $blockLength ) < 1 ) {
204  throw new Exception( "Not a valid GIF file; Unable to read block length" );
205  }
206  $blockLength = unpack( 'C', $blockLength )[1];
207  $data = fread( $fh, $blockLength );
208 
209  if ( $blockLength !== 11 ) {
210  wfDebug( __METHOD__ . " GIF application block with wrong length" );
211  fseek( $fh, -( $blockLength + 1 ), SEEK_CUR );
212  self::skipBlock( $fh );
213  continue;
214  }
215 
216  // NETSCAPE2.0 (application name for animated gif)
217  if ( $data === 'NETSCAPE2.0' ) {
218  $data = fread( $fh, 2 ); // Block length and introduction, should be 03 01
219 
220  if ( $data !== "\x03\x01" ) {
221  throw new Exception( "Expected \x03\x01, got $data" );
222  }
223 
224  // Unsigned little-endian integer, loop count or zero for "forever"
225  $loopData = fread( $fh, 2 );
226  if ( strlen( $loopData ) < 2 ) {
227  throw new Exception( "Not a valid GIF file; Unable to read loop count" );
228  }
229  $loopCount = unpack( 'v', $loopData )[1];
230 
231  if ( $loopCount !== 1 ) {
232  $isLooped = true;
233  }
234 
235  // Read out terminator byte
236  // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
237  fread( $fh, 1 );
238  } elseif ( $data === 'XMP DataXMP' ) {
239  // application name for XMP data.
240  // see pg 18 of XMP spec part 3.
241 
242  $xmp = self::readBlock( $fh, true );
243 
244  if ( substr( $xmp, -257, 3 ) !== "\x01\xFF\xFE"
245  || substr( $xmp, -4 ) !== "\x03\x02\x01\x00"
246  ) {
247  throw new Exception( "XMP does not have magic trailer!" );
248  }
249 
250  // strip out trailer.
251  $xmp = substr( $xmp, 0, -257 );
252  } else {
253  // unrecognized extension block
254  fseek( $fh, -( $blockLength + 1 ), SEEK_CUR );
255  self::skipBlock( $fh );
256  }
257  } else {
258  self::skipBlock( $fh );
259  }
260  } elseif ( $buf === self::$gifTerm ) {
261  break;
262  } else {
263  if ( strlen( $buf ) < 1 ) {
264  throw new Exception( "Not a valid GIF file; Unable to read unknown byte." );
265  }
266  $byte = unpack( 'C', $buf )[1];
267  throw new Exception( "At position: " . ftell( $fh ) . ", Unknown byte " . $byte );
268  }
269  }
270 
271  return [
272  'frameCount' => $frameCount,
273  'looped' => $isLooped,
274  'duration' => $duration,
275  'xmp' => $xmp,
276  'comment' => $comment,
277  'width' => $width,
278  'height' => $height,
279  'bits' => $bpp,
280  ];
281  }
282 
288  private static function readGCT( $fh, $bpp ) {
289  $max = 2 ** $bpp;
290  for ( $i = 1; $i <= $max; ++$i ) {
291  // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
292  fread( $fh, 3 );
293  }
294  }
295 
301  private static function decodeBPP( $data ) {
302  if ( strlen( $data ) < 1 ) {
303  throw new Exception( "Not a valid GIF file; Unable to read bits per channel." );
304  }
305  $buf = unpack( 'C', $data )[1];
306  $bpp = ( $buf & 7 ) + 1;
307  $buf >>= 7;
308 
309  $have_map = $buf & 1;
310 
311  return [ $bpp, $have_map ];
312  }
313 
318  private static function skipBlock( $fh ) {
319  while ( !feof( $fh ) ) {
320  $buf = fread( $fh, 1 );
321  if ( strlen( $buf ) < 1 ) {
322  throw new Exception( "Not a valid GIF file; Unable to read block length." );
323  }
324  $block_len = unpack( 'C', $buf )[1];
325  if ( $block_len == 0 ) {
326  return;
327  }
328  // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
329  fread( $fh, $block_len );
330  }
331  }
332 
347  private static function readBlock( $fh, $includeLengths = false ) {
348  $data = '';
349  $subLength = fread( $fh, 1 );
350  $blocks = 0;
351 
352  while ( $subLength !== "\0" ) {
353  $blocks++;
354  if ( $blocks > self::MAX_SUBBLOCKS ) {
355  throw new Exception( "MAX_SUBBLOCKS exceeded (over $blocks sub-blocks)" );
356  }
357  if ( feof( $fh ) ) {
358  throw new Exception( "Read error: Unexpected EOF." );
359  }
360  if ( $includeLengths ) {
361  $data .= $subLength;
362  }
363 
364  $data .= fread( $fh, ord( $subLength ) );
365  $subLength = fread( $fh, 1 );
366  }
367 
368  return $data;
369  }
370 }
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
static getMetadata( $filename)