MediaWiki REL1_39
GIFMetadataExtractor.php
Go to the documentation of this file.
1<?php
29use Wikimedia\AtEase\AtEase;
30
38 private static $gifFrameSep;
39
41 private static $gifExtensionSep;
42
44 private static $gifTerm;
45
46 public const VERSION = 1;
47
48 // Each sub-block is less than or equal to 255 bytes.
49 // Most of the time its 255 bytes, except for in XMP
50 // blocks, where it's usually between 32-127 bytes each.
51 private const MAX_SUBBLOCKS = 262144; // 5 MiB divided by 20.
52
58 public static function getMetadata( $filename ) {
59 self::$gifFrameSep = pack( "C", ord( "," ) ); // 2C
60 self::$gifExtensionSep = pack( "C", ord( "!" ) ); // 21
61 self::$gifTerm = pack( "C", ord( ";" ) ); // 3B
62
63 $frameCount = 0;
64 $duration = 0.0;
65 $isLooped = false;
66 $xmp = "";
67 $comment = [];
68
69 if ( !$filename ) {
70 throw new Exception( "No file name specified" );
71 } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) {
72 throw new Exception( "File $filename does not exist" );
73 }
74
75 $fh = fopen( $filename, 'rb' );
76
77 if ( !$fh ) {
78 throw new Exception( "Unable to open file $filename" );
79 }
80
81 // Check for the GIF header
82 $buf = fread( $fh, 6 );
83 if ( !( $buf == 'GIF87a' || $buf == 'GIF89a' ) ) {
84 throw new Exception( "Not a valid GIF file; header: $buf" );
85 }
86
87 // Read width and height.
88 $buf = fread( $fh, 2 );
89 $width = unpack( 'v', $buf )[1];
90 $buf = fread( $fh, 2 );
91 $height = unpack( 'v', $buf )[1];
92
93 // Read BPP
94 $buf = fread( $fh, 1 );
95 list( $bpp, $have_map ) = self::decodeBPP( $buf );
96
97 // Skip over background and aspect ratio
98 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
99 fread( $fh, 2 );
100
101 // Skip over the GCT
102 if ( $have_map ) {
103 self::readGCT( $fh, $bpp );
104 }
105
106 while ( !feof( $fh ) ) {
107 $buf = fread( $fh, 1 );
108
109 if ( $buf == self::$gifFrameSep ) {
110 // Found a frame
111 $frameCount++;
112
113 # # Skip bounding box
114 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
115 fread( $fh, 8 );
116
117 # # Read BPP
118 $buf = fread( $fh, 1 );
119 list( $bpp, $have_map ) = self::decodeBPP( $buf );
120
121 # # Read GCT
122 if ( $have_map ) {
123 self::readGCT( $fh, $bpp );
124 }
125 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
126 fread( $fh, 1 );
127 self::skipBlock( $fh );
128 } elseif ( $buf == self::$gifExtensionSep ) {
129 $buf = fread( $fh, 1 );
130 if ( strlen( $buf ) < 1 ) {
131 throw new Exception( "Ran out of input" );
132 }
133 $extension_code = unpack( 'C', $buf )[1];
134
135 if ( $extension_code == 0xF9 ) {
136 // Graphics Control Extension.
137 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
138 fread( $fh, 1 ); // Block size
139
140 // @phan-suppress-next-next-line PhanPluginUseReturnValueInternalKnown
141 // @phan-suppress-next-line PhanPluginDuplicateAdjacentStatement
142 fread( $fh, 1 ); // Transparency, disposal method, user input
143
144 $buf = fread( $fh, 2 ); // Delay, in hundredths of seconds.
145 if ( strlen( $buf ) < 2 ) {
146 throw new Exception( "Ran out of input" );
147 }
148 $delay = unpack( 'v', $buf )[1];
149 $duration += $delay * 0.01;
150
151 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
152 fread( $fh, 1 ); // Transparent colour index
153
154 $term = fread( $fh, 1 ); // Should be a terminator
155 if ( strlen( $term ) < 1 ) {
156 throw new Exception( "Ran out of input" );
157 }
158 $term = unpack( 'C', $term )[1];
159 if ( $term != 0 ) {
160 throw new Exception( "Malformed Graphics Control Extension block" );
161 }
162 } elseif ( $extension_code == 0xFE ) {
163 // Comment block(s).
164 $data = self::readBlock( $fh );
165 if ( $data === "" ) {
166 throw new Exception( 'Read error, zero-length comment block' );
167 }
168
169 // The standard says this should be ASCII, however its unclear if
170 // thats true in practise. Check to see if its valid utf-8, if so
171 // assume its that, otherwise assume its windows-1252 (iso-8859-1)
172 $dataCopy = $data;
173 // quickIsNFCVerify has the side effect of replacing any invalid characters
174 UtfNormal\Validator::quickIsNFCVerify( $dataCopy );
175
176 if ( $dataCopy !== $data ) {
177 AtEase::suppressWarnings();
178 $data = iconv( 'windows-1252', 'UTF-8', $data );
179 AtEase::restoreWarnings();
180 }
181
182 $commentCount = count( $comment );
183 if ( $commentCount === 0
184 // @phan-suppress-next-line PhanTypeInvalidDimOffset
185 || $comment[$commentCount - 1] !== $data
186 ) {
187 // Some applications repeat the same comment on each
188 // frame of an animated GIF image, so if this comment
189 // is identical to the last, only extract once.
190 $comment[] = $data;
191 }
192 } elseif ( $extension_code == 0xFF ) {
193 // Application extension (Netscape info about the animated gif)
194 // or XMP (or theoretically any other type of extension block)
195 $blockLength = fread( $fh, 1 );
196 if ( strlen( $blockLength ) < 1 ) {
197 throw new Exception( "Ran out of input" );
198 }
199 $blockLength = unpack( 'C', $blockLength )[1];
200 $data = fread( $fh, $blockLength );
201
202 if ( $blockLength != 11 ) {
203 wfDebug( __METHOD__ . " GIF application block with wrong length" );
204 fseek( $fh, -( $blockLength + 1 ), SEEK_CUR );
205 self::skipBlock( $fh );
206 continue;
207 }
208
209 // NETSCAPE2.0 (application name for animated gif)
210 if ( $data == 'NETSCAPE2.0' ) {
211 $data = fread( $fh, 2 ); // Block length and introduction, should be 03 01
212
213 if ( $data != "\x03\x01" ) {
214 throw new Exception( "Expected \x03\x01, got $data" );
215 }
216
217 // Unsigned little-endian integer, loop count or zero for "forever"
218 $loopData = fread( $fh, 2 );
219 if ( strlen( $loopData ) < 2 ) {
220 throw new Exception( "Ran out of input" );
221 }
222 $loopCount = unpack( 'v', $loopData )[1];
223
224 if ( $loopCount != 1 ) {
225 $isLooped = true;
226 }
227
228 // Read out terminator byte
229 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
230 fread( $fh, 1 );
231 } elseif ( $data == 'XMP DataXMP' ) {
232 // application name for XMP data.
233 // see pg 18 of XMP spec part 3.
234
235 $xmp = self::readBlock( $fh, true );
236
237 if ( substr( $xmp, -257, 3 ) !== "\x01\xFF\xFE"
238 || substr( $xmp, -4 ) !== "\x03\x02\x01\x00"
239 ) {
240 throw new Exception( "XMP does not have magic trailer!" );
241 }
242
243 // strip out trailer.
244 $xmp = substr( $xmp, 0, -257 );
245 } else {
246 // unrecognized extension block
247 fseek( $fh, -( $blockLength + 1 ), SEEK_CUR );
248 self::skipBlock( $fh );
249 }
250 } else {
251 self::skipBlock( $fh );
252 }
253 } elseif ( $buf == self::$gifTerm ) {
254 break;
255 } else {
256 if ( strlen( $buf ) < 1 ) {
257 throw new Exception( "Ran out of input" );
258 }
259 $byte = unpack( 'C', $buf )[1];
260 throw new Exception( "At position: " . ftell( $fh ) . ", Unknown byte " . $byte );
261 }
262 }
263
264 return [
265 'frameCount' => $frameCount,
266 'looped' => $isLooped,
267 'duration' => $duration,
268 'xmp' => $xmp,
269 'comment' => $comment,
270 'width' => $width,
271 'height' => $height,
272 'bits' => $bpp,
273 ];
274 }
275
281 private static function readGCT( $fh, $bpp ) {
282 $max = 2 ** $bpp;
283 for ( $i = 1; $i <= $max; ++$i ) {
284 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
285 fread( $fh, 3 );
286 }
287 }
288
294 private static function decodeBPP( $data ) {
295 if ( strlen( $data ) < 1 ) {
296 throw new Exception( "Ran out of input" );
297 }
298 $buf = unpack( 'C', $data )[1];
299 $bpp = ( $buf & 7 ) + 1;
300 $buf >>= 7;
301
302 $have_map = $buf & 1;
303
304 return [ $bpp, $have_map ];
305 }
306
311 private static function skipBlock( $fh ) {
312 while ( !feof( $fh ) ) {
313 $buf = fread( $fh, 1 );
314 if ( strlen( $buf ) < 1 ) {
315 throw new Exception( "Ran out of input" );
316 }
317 $block_len = unpack( 'C', $buf )[1];
318 if ( $block_len == 0 ) {
319 return;
320 }
321 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
322 fread( $fh, $block_len );
323 }
324 }
325
340 private static function readBlock( $fh, $includeLengths = false ) {
341 $data = '';
342 $subLength = fread( $fh, 1 );
343 $blocks = 0;
344
345 while ( $subLength !== "\0" ) {
346 $blocks++;
347 if ( $blocks > self::MAX_SUBBLOCKS ) {
348 throw new Exception( "MAX_SUBBLOCKS exceeded (over $blocks sub-blocks)" );
349 }
350 if ( feof( $fh ) ) {
351 throw new Exception( "Read error: Unexpected EOF." );
352 }
353 if ( $includeLengths ) {
354 $data .= $subLength;
355 }
356
357 $data .= fread( $fh, ord( $subLength ) );
358 $subLength = fread( $fh, 1 );
359 }
360
361 return $data;
362 }
363}
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
static getMetadata( $filename)