MediaWiki  master
GIFMetadataExtractor.php
Go to the documentation of this file.
1 <?php
29 use Wikimedia\AtEase\AtEase;
30 
38  private static $gifFrameSep;
39 
41  private static $gifExtensionSep;
42 
44  private static $gifTerm;
45 
46  public const VERSION = 1;
47 
48  // Each sub-block is less than or equal to 255 bytes.
49  // Most of the time its 255 bytes, except for in XMP
50  // blocks, where it's usually between 32-127 bytes each.
51  private const MAX_SUBBLOCKS = 262144; // 5 MiB divided by 20.
52 
58  public static function getMetadata( $filename ) {
59  self::$gifFrameSep = pack( "C", ord( "," ) ); // 2C
60  self::$gifExtensionSep = pack( "C", ord( "!" ) ); // 21
61  self::$gifTerm = pack( "C", ord( ";" ) ); // 3B
62 
63  $frameCount = 0;
64  $duration = 0.0;
65  $isLooped = false;
66  $xmp = "";
67  $comment = [];
68 
69  if ( !$filename ) {
70  throw new Exception( "No file name specified" );
71  } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) {
72  throw new Exception( "File $filename does not exist" );
73  }
74 
75  $fh = fopen( $filename, 'rb' );
76 
77  if ( !$fh ) {
78  throw new Exception( "Unable to open file $filename" );
79  }
80 
81  // Check for the GIF header
82  $buf = fread( $fh, 6 );
83  if ( !( $buf == 'GIF87a' || $buf == 'GIF89a' ) ) {
84  throw new Exception( "Not a valid GIF file; header: $buf" );
85  }
86 
87  // Read width and height.
88  $buf = fread( $fh, 2 );
89  $width = unpack( 'v', $buf )[1];
90  $buf = fread( $fh, 2 );
91  $height = unpack( 'v', $buf )[1];
92 
93  // Read BPP
94  $buf = fread( $fh, 1 );
95  list( $bpp, $have_map ) = self::decodeBPP( $buf );
96 
97  // Skip over background and aspect ratio
98  // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
99  fread( $fh, 2 );
100 
101  // Skip over the GCT
102  if ( $have_map ) {
103  self::readGCT( $fh, $bpp );
104  }
105 
106  while ( !feof( $fh ) ) {
107  $buf = fread( $fh, 1 );
108 
109  if ( $buf == self::$gifFrameSep ) {
110  // Found a frame
111  $frameCount++;
112 
113  # # Skip bounding box
114  // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
115  fread( $fh, 8 );
116 
117  # # Read BPP
118  $buf = fread( $fh, 1 );
119  list( $bpp, $have_map ) = self::decodeBPP( $buf );
120 
121  # # Read GCT
122  if ( $have_map ) {
123  self::readGCT( $fh, $bpp );
124  }
125  // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
126  fread( $fh, 1 );
127  self::skipBlock( $fh );
128  } elseif ( $buf == self::$gifExtensionSep ) {
129  $buf = fread( $fh, 1 );
130  if ( strlen( $buf ) < 1 ) {
131  throw new Exception( "Ran out of input" );
132  }
133  $extension_code = unpack( 'C', $buf )[1];
134 
135  if ( $extension_code == 0xF9 ) {
136  // Graphics Control Extension.
137  // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
138  fread( $fh, 1 ); // Block size
139 
140  // @phan-suppress-next-next-line PhanPluginUseReturnValueInternalKnown
141  // @phan-suppress-next-line PhanPluginDuplicateAdjacentStatement
142  fread( $fh, 1 ); // Transparency, disposal method, user input
143 
144  $buf = fread( $fh, 2 ); // Delay, in hundredths of seconds.
145  if ( strlen( $buf ) < 2 ) {
146  throw new Exception( "Ran out of input" );
147  }
148  $delay = unpack( 'v', $buf )[1];
149  $duration += $delay * 0.01;
150 
151  // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
152  fread( $fh, 1 ); // Transparent colour index
153 
154  $term = fread( $fh, 1 ); // Should be a terminator
155  if ( strlen( $term ) < 1 ) {
156  throw new Exception( "Ran out of input" );
157  }
158  $term = unpack( 'C', $term )[1];
159  if ( $term != 0 ) {
160  throw new Exception( "Malformed Graphics Control Extension block" );
161  }
162  } elseif ( $extension_code == 0xFE ) {
163  // Comment block(s).
164  $data = self::readBlock( $fh );
165  if ( $data === "" ) {
166  throw new Exception( 'Read error, zero-length comment block' );
167  }
168 
169  // The standard says this should be ASCII, however its unclear if
170  // thats true in practise. Check to see if its valid utf-8, if so
171  // assume its that, otherwise assume its windows-1252 (iso-8859-1)
172  $dataCopy = $data;
173  // quickIsNFCVerify has the side effect of replacing any invalid characters
174  UtfNormal\Validator::quickIsNFCVerify( $dataCopy );
175 
176  if ( $dataCopy !== $data ) {
177  AtEase::suppressWarnings();
178  $data = iconv( 'windows-1252', 'UTF-8', $data );
179  AtEase::restoreWarnings();
180  }
181 
182  $commentCount = count( $comment );
183  if ( $commentCount === 0
184  // @phan-suppress-next-line PhanTypeInvalidDimOffset
185  || $comment[$commentCount - 1] !== $data
186  ) {
187  // Some applications repeat the same comment on each
188  // frame of an animated GIF image, so if this comment
189  // is identical to the last, only extract once.
190  $comment[] = $data;
191  }
192  } elseif ( $extension_code == 0xFF ) {
193  // Application extension (Netscape info about the animated gif)
194  // or XMP (or theoretically any other type of extension block)
195  $blockLength = fread( $fh, 1 );
196  if ( strlen( $blockLength ) < 1 ) {
197  throw new Exception( "Ran out of input" );
198  }
199  $blockLength = unpack( 'C', $blockLength )[1];
200  $data = fread( $fh, $blockLength );
201 
202  if ( $blockLength != 11 ) {
203  wfDebug( __METHOD__ . " GIF application block with wrong length" );
204  fseek( $fh, -( $blockLength + 1 ), SEEK_CUR );
205  self::skipBlock( $fh );
206  continue;
207  }
208 
209  // NETSCAPE2.0 (application name for animated gif)
210  if ( $data == 'NETSCAPE2.0' ) {
211  $data = fread( $fh, 2 ); // Block length and introduction, should be 03 01
212 
213  if ( $data != "\x03\x01" ) {
214  throw new Exception( "Expected \x03\x01, got $data" );
215  }
216 
217  // Unsigned little-endian integer, loop count or zero for "forever"
218  $loopData = fread( $fh, 2 );
219  if ( strlen( $loopData ) < 2 ) {
220  throw new Exception( "Ran out of input" );
221  }
222  $loopCount = unpack( 'v', $loopData )[1];
223 
224  if ( $loopCount != 1 ) {
225  $isLooped = true;
226  }
227 
228  // Read out terminator byte
229  // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
230  fread( $fh, 1 );
231  } elseif ( $data == 'XMP DataXMP' ) {
232  // application name for XMP data.
233  // see pg 18 of XMP spec part 3.
234 
235  $xmp = self::readBlock( $fh, true );
236 
237  if ( substr( $xmp, -257, 3 ) !== "\x01\xFF\xFE"
238  || substr( $xmp, -4 ) !== "\x03\x02\x01\x00"
239  ) {
240  throw new Exception( "XMP does not have magic trailer!" );
241  }
242 
243  // strip out trailer.
244  $xmp = substr( $xmp, 0, -257 );
245  } else {
246  // unrecognized extension block
247  fseek( $fh, -( $blockLength + 1 ), SEEK_CUR );
248  self::skipBlock( $fh );
249  }
250  } else {
251  self::skipBlock( $fh );
252  }
253  } elseif ( $buf == self::$gifTerm ) {
254  break;
255  } else {
256  if ( strlen( $buf ) < 1 ) {
257  throw new Exception( "Ran out of input" );
258  }
259  $byte = unpack( 'C', $buf )[1];
260  throw new Exception( "At position: " . ftell( $fh ) . ", Unknown byte " . $byte );
261  }
262  }
263 
264  return [
265  'frameCount' => $frameCount,
266  'looped' => $isLooped,
267  'duration' => $duration,
268  'xmp' => $xmp,
269  'comment' => $comment,
270  'width' => $width,
271  'height' => $height,
272  'bits' => $bpp,
273  ];
274  }
275 
281  private static function readGCT( $fh, $bpp ) {
282  $max = 2 ** $bpp;
283  for ( $i = 1; $i <= $max; ++$i ) {
284  // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
285  fread( $fh, 3 );
286  }
287  }
288 
294  private static function decodeBPP( $data ) {
295  if ( strlen( $data ) < 1 ) {
296  throw new Exception( "Ran out of input" );
297  }
298  $buf = unpack( 'C', $data )[1];
299  $bpp = ( $buf & 7 ) + 1;
300  $buf >>= 7;
301 
302  $have_map = $buf & 1;
303 
304  return [ $bpp, $have_map ];
305  }
306 
311  private static function skipBlock( $fh ) {
312  while ( !feof( $fh ) ) {
313  $buf = fread( $fh, 1 );
314  if ( strlen( $buf ) < 1 ) {
315  throw new Exception( "Ran out of input" );
316  }
317  $block_len = unpack( 'C', $buf )[1];
318  if ( $block_len == 0 ) {
319  return;
320  }
321  // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
322  fread( $fh, $block_len );
323  }
324  }
325 
340  private static function readBlock( $fh, $includeLengths = false ) {
341  $data = '';
342  $subLength = fread( $fh, 1 );
343  $blocks = 0;
344 
345  while ( $subLength !== "\0" ) {
346  $blocks++;
347  if ( $blocks > self::MAX_SUBBLOCKS ) {
348  throw new Exception( "MAX_SUBBLOCKS exceeded (over $blocks sub-blocks)" );
349  }
350  if ( feof( $fh ) ) {
351  throw new Exception( "Read error: Unexpected EOF." );
352  }
353  if ( $includeLengths ) {
354  $data .= $subLength;
355  }
356 
357  $data .= fread( $fh, ord( $subLength ) );
358  $subLength = fread( $fh, 1 );
359  }
360 
361  return $data;
362  }
363 }
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
static getMetadata( $filename)
static readBlock( $fh, $includeLengths=false)
Read a block.
static readGCT( $fh, $bpp)