MediaWiki REL1_40
GIFMetadataExtractor.php
Go to the documentation of this file.
1<?php
29use Wikimedia\AtEase\AtEase;
30
38 private static $gifFrameSep;
39
41 private static $gifExtensionSep;
42
44 private static $gifTerm;
45
46 public const VERSION = 1;
47
48 // Each sub-block is less than or equal to 255 bytes.
49 // Most of the time its 255 bytes, except for in XMP
50 // blocks, where it's usually between 32-127 bytes each.
51 private const MAX_SUBBLOCKS = 262144; // 5 MiB divided by 20.
52
58 public static function getMetadata( $filename ) {
59 self::$gifFrameSep = pack( "C", ord( "," ) ); // 2C
60 self::$gifExtensionSep = pack( "C", ord( "!" ) ); // 21
61 self::$gifTerm = pack( "C", ord( ";" ) ); // 3B
62
63 $frameCount = 0;
64 $duration = 0.0;
65 $isLooped = false;
66 $xmp = "";
67 $comment = [];
68
69 if ( !$filename ) {
70 throw new Exception( 'No file name specified' );
71 }
72 if ( !file_exists( $filename ) || is_dir( $filename ) ) {
73 throw new Exception( "File $filename does not exist" );
74 }
75
76 $fh = fopen( $filename, 'rb' );
77
78 if ( !$fh ) {
79 throw new Exception( "Unable to open file $filename" );
80 }
81
82 // Check for the GIF header
83 $buf = fread( $fh, 6 );
84 if ( !( $buf === 'GIF87a' || $buf === 'GIF89a' ) ) {
85 throw new Exception( "Not a valid GIF file; header: $buf" );
86 }
87
88 // Read width and height.
89 $buf = fread( $fh, 2 );
90 if ( strlen( $buf ) < 2 ) {
91 throw new Exception( "Not a valid GIF file; Unable to read width." );
92 }
93 $width = unpack( 'v', $buf )[1];
94 $buf = fread( $fh, 2 );
95 if ( strlen( $buf ) < 2 ) {
96 throw new Exception( "Not a valid GIF file; Unable to read height." );
97 }
98 $height = unpack( 'v', $buf )[1];
99
100 // Read BPP
101 $buf = fread( $fh, 1 );
102 [ $bpp, $have_map ] = self::decodeBPP( $buf );
103
104 // Skip over background and aspect ratio
105 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
106 fread( $fh, 2 );
107
108 // Skip over the GCT
109 if ( $have_map ) {
110 self::readGCT( $fh, $bpp );
111 }
112
113 while ( !feof( $fh ) ) {
114 $buf = fread( $fh, 1 );
115
116 if ( $buf === self::$gifFrameSep ) {
117 // Found a frame
118 $frameCount++;
119
120 # # Skip bounding box
121 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
122 fread( $fh, 8 );
123
124 # # Read BPP
125 $buf = fread( $fh, 1 );
126 [ $bpp, $have_map ] = self::decodeBPP( $buf );
127
128 # # Read GCT
129 if ( $have_map ) {
130 self::readGCT( $fh, $bpp );
131 }
132 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
133 fread( $fh, 1 );
134 self::skipBlock( $fh );
135 } elseif ( $buf === self::$gifExtensionSep ) {
136 $buf = fread( $fh, 1 );
137 if ( strlen( $buf ) < 1 ) {
138 throw new Exception( "Not a valid GIF file; Unable to read graphics control extension." );
139 }
140 $extension_code = unpack( 'C', $buf )[1];
141
142 if ( $extension_code === 0xF9 ) {
143 // Graphics Control Extension.
144 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
145 fread( $fh, 1 ); // Block size
146
147 // @phan-suppress-next-next-line PhanPluginUseReturnValueInternalKnown
148 // @phan-suppress-next-line PhanPluginDuplicateAdjacentStatement
149 fread( $fh, 1 ); // Transparency, disposal method, user input
150
151 $buf = fread( $fh, 2 ); // Delay, in hundredths of seconds.
152 if ( strlen( $buf ) < 2 ) {
153 throw new Exception( "Not a valid GIF file; Unable to read delay" );
154 }
155 $delay = unpack( 'v', $buf )[1];
156 $duration += $delay * 0.01;
157
158 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
159 fread( $fh, 1 ); // Transparent colour index
160
161 $term = fread( $fh, 1 ); // Should be a terminator
162 if ( strlen( $term ) < 1 ) {
163 throw new Exception( "Not a valid GIF file; Unable to read terminator byte" );
164 }
165 $term = unpack( 'C', $term )[1];
166 if ( $term != 0 ) {
167 throw new Exception( "Malformed Graphics Control Extension block" );
168 }
169 } elseif ( $extension_code === 0xFE ) {
170 // Comment block(s).
171 $data = self::readBlock( $fh );
172 if ( $data === "" ) {
173 throw new Exception( 'Read error, zero-length comment block' );
174 }
175
176 // The standard says this should be ASCII, however its unclear if
177 // thats true in practise. Check to see if its valid utf-8, if so
178 // assume its that, otherwise assume its windows-1252 (iso-8859-1)
179 $dataCopy = $data;
180 // quickIsNFCVerify has the side effect of replacing any invalid characters
181 UtfNormal\Validator::quickIsNFCVerify( $dataCopy );
182
183 if ( $dataCopy !== $data ) {
184 AtEase::suppressWarnings();
185 $data = iconv( 'windows-1252', 'UTF-8', $data );
186 AtEase::restoreWarnings();
187 }
188
189 $commentCount = count( $comment );
190 if ( $commentCount === 0
191 // @phan-suppress-next-line PhanTypeInvalidDimOffset
192 || $comment[$commentCount - 1] !== $data
193 ) {
194 // Some applications repeat the same comment on each
195 // frame of an animated GIF image, so if this comment
196 // is identical to the last, only extract once.
197 $comment[] = $data;
198 }
199 } elseif ( $extension_code === 0xFF ) {
200 // Application extension (Netscape info about the animated gif)
201 // or XMP (or theoretically any other type of extension block)
202 $blockLength = fread( $fh, 1 );
203 if ( strlen( $blockLength ) < 1 ) {
204 throw new Exception( "Not a valid GIF file; Unable to read block length" );
205 }
206 $blockLength = unpack( 'C', $blockLength )[1];
207 $data = fread( $fh, $blockLength );
208
209 if ( $blockLength !== 11 ) {
210 wfDebug( __METHOD__ . " GIF application block with wrong length" );
211 fseek( $fh, -( $blockLength + 1 ), SEEK_CUR );
212 self::skipBlock( $fh );
213 continue;
214 }
215
216 // NETSCAPE2.0 (application name for animated gif)
217 if ( $data === 'NETSCAPE2.0' ) {
218 $data = fread( $fh, 2 ); // Block length and introduction, should be 03 01
219
220 if ( $data !== "\x03\x01" ) {
221 throw new Exception( "Expected \x03\x01, got $data" );
222 }
223
224 // Unsigned little-endian integer, loop count or zero for "forever"
225 $loopData = fread( $fh, 2 );
226 if ( strlen( $loopData ) < 2 ) {
227 throw new Exception( "Not a valid GIF file; Unable to read loop count" );
228 }
229 $loopCount = unpack( 'v', $loopData )[1];
230
231 if ( $loopCount !== 1 ) {
232 $isLooped = true;
233 }
234
235 // Read out terminator byte
236 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
237 fread( $fh, 1 );
238 } elseif ( $data === 'XMP DataXMP' ) {
239 // application name for XMP data.
240 // see pg 18 of XMP spec part 3.
241
242 $xmp = self::readBlock( $fh, true );
243
244 if ( substr( $xmp, -257, 3 ) !== "\x01\xFF\xFE"
245 || substr( $xmp, -4 ) !== "\x03\x02\x01\x00"
246 ) {
247 throw new Exception( "XMP does not have magic trailer!" );
248 }
249
250 // strip out trailer.
251 $xmp = substr( $xmp, 0, -257 );
252 } else {
253 // unrecognized extension block
254 fseek( $fh, -( $blockLength + 1 ), SEEK_CUR );
255 self::skipBlock( $fh );
256 }
257 } else {
258 self::skipBlock( $fh );
259 }
260 } elseif ( $buf === self::$gifTerm ) {
261 break;
262 } else {
263 if ( strlen( $buf ) < 1 ) {
264 throw new Exception( "Not a valid GIF file; Unable to read unknown byte." );
265 }
266 $byte = unpack( 'C', $buf )[1];
267 throw new Exception( "At position: " . ftell( $fh ) . ", Unknown byte " . $byte );
268 }
269 }
270
271 return [
272 'frameCount' => $frameCount,
273 'looped' => $isLooped,
274 'duration' => $duration,
275 'xmp' => $xmp,
276 'comment' => $comment,
277 'width' => $width,
278 'height' => $height,
279 'bits' => $bpp,
280 ];
281 }
282
288 private static function readGCT( $fh, $bpp ) {
289 $max = 2 ** $bpp;
290 for ( $i = 1; $i <= $max; ++$i ) {
291 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
292 fread( $fh, 3 );
293 }
294 }
295
301 private static function decodeBPP( $data ) {
302 if ( strlen( $data ) < 1 ) {
303 throw new Exception( "Not a valid GIF file; Unable to read bits per channel." );
304 }
305 $buf = unpack( 'C', $data )[1];
306 $bpp = ( $buf & 7 ) + 1;
307 $buf >>= 7;
308
309 $have_map = $buf & 1;
310
311 return [ $bpp, $have_map ];
312 }
313
318 private static function skipBlock( $fh ) {
319 while ( !feof( $fh ) ) {
320 $buf = fread( $fh, 1 );
321 if ( strlen( $buf ) < 1 ) {
322 throw new Exception( "Not a valid GIF file; Unable to read block length." );
323 }
324 $block_len = unpack( 'C', $buf )[1];
325 if ( $block_len == 0 ) {
326 return;
327 }
328 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
329 fread( $fh, $block_len );
330 }
331 }
332
347 private static function readBlock( $fh, $includeLengths = false ) {
348 $data = '';
349 $subLength = fread( $fh, 1 );
350 $blocks = 0;
351
352 while ( $subLength !== "\0" ) {
353 $blocks++;
354 if ( $blocks > self::MAX_SUBBLOCKS ) {
355 throw new Exception( "MAX_SUBBLOCKS exceeded (over $blocks sub-blocks)" );
356 }
357 if ( feof( $fh ) ) {
358 throw new Exception( "Read error: Unexpected EOF." );
359 }
360 if ( $includeLengths ) {
361 $data .= $subLength;
362 }
363
364 $data .= fread( $fh, ord( $subLength ) );
365 $subLength = fread( $fh, 1 );
366 }
367
368 return $data;
369 }
370}
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
static getMetadata( $filename)