MediaWiki master
GIFMetadataExtractor.php
Go to the documentation of this file.
1<?php
29use Wikimedia\AtEase\AtEase;
30
38 private static $gifFrameSep;
39
41 private static $gifExtensionSep;
42
44 private static $gifTerm;
45
46 public const VERSION = 1;
47
48 // Each sub-block is less than or equal to 255 bytes.
49 // Most of the time its 255 bytes, except for in XMP
50 // blocks, where it's usually between 32-127 bytes each.
51 private const MAX_SUBBLOCKS = 262144; // 5 MiB divided by 20.
52
58 public static function getMetadata( $filename ) {
59 self::$gifFrameSep = pack( "C", ord( "," ) ); // 2C
60 self::$gifExtensionSep = pack( "C", ord( "!" ) ); // 21
61 self::$gifTerm = pack( "C", ord( ";" ) ); // 3B
62
63 $frameCount = 0;
64 $duration = 0.0;
65 $isLooped = false;
66 $xmp = "";
67 $comment = [];
68
69 if ( !$filename ) {
70 throw new InvalidArgumentException( 'No file name specified' );
71 }
72 if ( !file_exists( $filename ) || is_dir( $filename ) ) {
73 throw new InvalidArgumentException( "File $filename does not exist" );
74 }
75
76 $fh = fopen( $filename, 'rb' );
77
78 if ( !$fh ) {
79 throw new InvalidArgumentException( "Unable to open file $filename" );
80 }
81
82 // Check for the GIF header
83 $buf = fread( $fh, 6 );
84 if ( !( $buf === 'GIF87a' || $buf === 'GIF89a' ) ) {
85 throw new InvalidArgumentException( "Not a valid GIF file; header: $buf" );
86 }
87
88 // Read width and height.
89 $buf = fread( $fh, 2 );
90 if ( strlen( $buf ) < 2 ) {
91 throw new InvalidArgumentException( "Not a valid GIF file; Unable to read width." );
92 }
93 $width = unpack( 'v', $buf )[1];
94 $buf = fread( $fh, 2 );
95 if ( strlen( $buf ) < 2 ) {
96 throw new InvalidArgumentException( "Not a valid GIF file; Unable to read height." );
97 }
98 $height = unpack( 'v', $buf )[1];
99
100 // Read BPP
101 $buf = fread( $fh, 1 );
102 [ $bpp, $have_map ] = self::decodeBPP( $buf );
103
104 // Skip over background and aspect ratio
105 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
106 fread( $fh, 2 );
107
108 // Skip over the GCT
109 if ( $have_map ) {
110 self::readGCT( $fh, $bpp );
111 }
112
113 while ( !feof( $fh ) ) {
114 $buf = fread( $fh, 1 );
115
116 if ( $buf === self::$gifFrameSep ) {
117 // Found a frame
118 $frameCount++;
119
120 # # Skip bounding box
121 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
122 fread( $fh, 8 );
123
124 # # Read BPP
125 $buf = fread( $fh, 1 );
126 [ $bpp, $have_map ] = self::decodeBPP( $buf );
127
128 # # Read GCT
129 if ( $have_map ) {
130 self::readGCT( $fh, $bpp );
131 }
132 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
133 fread( $fh, 1 );
134 self::skipBlock( $fh );
135 } elseif ( $buf === self::$gifExtensionSep ) {
136 $buf = fread( $fh, 1 );
137 if ( strlen( $buf ) < 1 ) {
138 throw new InvalidArgumentException(
139 "Not a valid GIF file; Unable to read graphics control extension."
140 );
141 }
142 $extension_code = unpack( 'C', $buf )[1];
143
144 if ( $extension_code === 0xF9 ) {
145 // Graphics Control Extension.
146 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
147 fread( $fh, 1 ); // Block size
148
149 // @phan-suppress-next-next-line PhanPluginUseReturnValueInternalKnown
150 // @phan-suppress-next-line PhanPluginDuplicateAdjacentStatement
151 fread( $fh, 1 ); // Transparency, disposal method, user input
152
153 $buf = fread( $fh, 2 ); // Delay, in hundredths of seconds.
154 if ( strlen( $buf ) < 2 ) {
155 throw new InvalidArgumentException( "Not a valid GIF file; Unable to read delay" );
156 }
157 $delay = unpack( 'v', $buf )[1];
158 $duration += $delay * 0.01;
159
160 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
161 fread( $fh, 1 ); // Transparent colour index
162
163 $term = fread( $fh, 1 ); // Should be a terminator
164 if ( strlen( $term ) < 1 ) {
165 throw new InvalidArgumentException( "Not a valid GIF file; Unable to read terminator byte" );
166 }
167 $term = unpack( 'C', $term )[1];
168 if ( $term != 0 ) {
169 throw new InvalidArgumentException( "Malformed Graphics Control Extension block" );
170 }
171 } elseif ( $extension_code === 0xFE ) {
172 // Comment block(s).
173 $data = self::readBlock( $fh );
174 if ( $data === "" ) {
175 throw new InvalidArgumentException( 'Read error, zero-length comment block' );
176 }
177
178 // The standard says this should be ASCII, however its unclear if
179 // thats true in practise. Check to see if its valid utf-8, if so
180 // assume its that, otherwise assume its windows-1252 (iso-8859-1)
181 $dataCopy = $data;
182 // quickIsNFCVerify has the side effect of replacing any invalid characters
183 UtfNormal\Validator::quickIsNFCVerify( $dataCopy );
184
185 if ( $dataCopy !== $data ) {
186 AtEase::suppressWarnings();
187 $data = iconv( 'windows-1252', 'UTF-8', $data );
188 AtEase::restoreWarnings();
189 }
190
191 $commentCount = count( $comment );
192 if ( $commentCount === 0
193 // @phan-suppress-next-line PhanTypeInvalidDimOffset
194 || $comment[$commentCount - 1] !== $data
195 ) {
196 // Some applications repeat the same comment on each
197 // frame of an animated GIF image, so if this comment
198 // is identical to the last, only extract once.
199 $comment[] = $data;
200 }
201 } elseif ( $extension_code === 0xFF ) {
202 // Application extension (Netscape info about the animated gif)
203 // or XMP (or theoretically any other type of extension block)
204 $blockLength = fread( $fh, 1 );
205 if ( strlen( $blockLength ) < 1 ) {
206 throw new InvalidArgumentException( "Not a valid GIF file; Unable to read block length" );
207 }
208 $blockLength = unpack( 'C', $blockLength )[1];
209 $data = fread( $fh, $blockLength );
210
211 if ( $blockLength !== 11 ) {
212 wfDebug( __METHOD__ . " GIF application block with wrong length" );
213 fseek( $fh, -( $blockLength + 1 ), SEEK_CUR );
214 self::skipBlock( $fh );
215 continue;
216 }
217
218 // NETSCAPE2.0 (application name for animated gif)
219 if ( $data === 'NETSCAPE2.0' ) {
220 $data = fread( $fh, 2 ); // Block length and introduction, should be 03 01
221
222 if ( $data !== "\x03\x01" ) {
223 throw new InvalidArgumentException( "Expected \x03\x01, got $data" );
224 }
225
226 // Unsigned little-endian integer, loop count or zero for "forever"
227 $loopData = fread( $fh, 2 );
228 if ( strlen( $loopData ) < 2 ) {
229 throw new InvalidArgumentException( "Not a valid GIF file; Unable to read loop count" );
230 }
231 $loopCount = unpack( 'v', $loopData )[1];
232
233 if ( $loopCount !== 1 ) {
234 $isLooped = true;
235 }
236
237 // Read out terminator byte
238 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
239 fread( $fh, 1 );
240 } elseif ( $data === 'XMP DataXMP' ) {
241 // application name for XMP data.
242 // see pg 18 of XMP spec part 3.
243
244 $xmp = self::readBlock( $fh, true );
245
246 if ( substr( $xmp, -257, 3 ) !== "\x01\xFF\xFE"
247 || substr( $xmp, -4 ) !== "\x03\x02\x01\x00"
248 ) {
249 throw new InvalidArgumentException( "XMP does not have magic trailer!" );
250 }
251
252 // strip out trailer.
253 $xmp = substr( $xmp, 0, -257 );
254 } else {
255 // unrecognized extension block
256 fseek( $fh, -( $blockLength + 1 ), SEEK_CUR );
257 self::skipBlock( $fh );
258 }
259 } else {
260 self::skipBlock( $fh );
261 }
262 } elseif ( $buf === self::$gifTerm ) {
263 break;
264 } else {
265 if ( strlen( $buf ) < 1 ) {
266 throw new InvalidArgumentException( "Not a valid GIF file; Unable to read unknown byte." );
267 }
268 $byte = unpack( 'C', $buf )[1];
269 throw new InvalidArgumentException( "At position: " . ftell( $fh ) . ", Unknown byte " . $byte );
270 }
271 }
272
273 return [
274 'frameCount' => $frameCount,
275 'looped' => $isLooped,
276 'duration' => $duration,
277 'xmp' => $xmp,
278 'comment' => $comment,
279 'width' => $width,
280 'height' => $height,
281 'bits' => $bpp,
282 ];
283 }
284
290 private static function readGCT( $fh, $bpp ) {
291 $max = 2 ** $bpp;
292 for ( $i = 1; $i <= $max; ++$i ) {
293 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
294 fread( $fh, 3 );
295 }
296 }
297
303 private static function decodeBPP( $data ) {
304 if ( strlen( $data ) < 1 ) {
305 throw new InvalidArgumentException( "Not a valid GIF file; Unable to read bits per channel." );
306 }
307 $buf = unpack( 'C', $data )[1];
308 $bpp = ( $buf & 7 ) + 1;
309 $buf >>= 7;
310
311 $have_map = $buf & 1;
312
313 return [ $bpp, $have_map ];
314 }
315
320 private static function skipBlock( $fh ) {
321 while ( !feof( $fh ) ) {
322 $buf = fread( $fh, 1 );
323 if ( strlen( $buf ) < 1 ) {
324 throw new InvalidArgumentException( "Not a valid GIF file; Unable to read block length." );
325 }
326 $block_len = unpack( 'C', $buf )[1];
327 if ( $block_len == 0 ) {
328 return;
329 }
330 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
331 fread( $fh, $block_len );
332 }
333 }
334
349 private static function readBlock( $fh, $includeLengths = false ) {
350 $data = '';
351 $subLength = fread( $fh, 1 );
352 $blocks = 0;
353
354 while ( $subLength !== "\0" ) {
355 $blocks++;
356 if ( $blocks > self::MAX_SUBBLOCKS ) {
357 throw new InvalidArgumentException( "MAX_SUBBLOCKS exceeded (over $blocks sub-blocks)" );
358 }
359 if ( feof( $fh ) ) {
360 throw new InvalidArgumentException( "Read error: Unexpected EOF." );
361 }
362 if ( $includeLengths ) {
363 $data .= $subLength;
364 }
365
366 $data .= fread( $fh, ord( $subLength ) );
367 $subLength = fread( $fh, 1 );
368 }
369
370 return $data;
371 }
372}
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
static getMetadata( $filename)