MediaWiki master
GIFMetadataExtractor.php
Go to the documentation of this file.
1<?php
15namespace MediaWiki\Media;
16
17use InvalidArgumentException;
18
25 public const VERSION = 1;
26
27 // Each sub-block is less than or equal to 255 bytes.
28 // Most of the time its 255 bytes, except for in XMP
29 // blocks, where it's usually between 32-127 bytes each.
30 private const MAX_SUBBLOCKS = 262144; // 5 MiB divided by 20.
31
37 public static function getMetadata( $filename ) {
38 $frameCount = 0;
39 $duration = 0.0;
40 $isLooped = false;
41 $xmp = "";
42 $comment = [];
43
44 if ( !$filename ) {
45 throw new InvalidArgumentException( 'No file name specified' );
46 }
47 if ( !file_exists( $filename ) || is_dir( $filename ) ) {
48 throw new InvalidArgumentException( "File $filename does not exist" );
49 }
50
51 $fh = fopen( $filename, 'rb' );
52
53 if ( !$fh ) {
54 throw new InvalidArgumentException( "Unable to open file $filename" );
55 }
56
57 // Check for the GIF header
58 $buf = fread( $fh, 6 );
59 if ( !( $buf === 'GIF87a' || $buf === 'GIF89a' ) ) {
60 throw new InvalidArgumentException( "Not a valid GIF file; header: $buf" );
61 }
62
63 // Read width and height.
64 $buf = fread( $fh, 2 );
65 if ( strlen( $buf ) < 2 ) {
66 throw new InvalidArgumentException( "Not a valid GIF file; Unable to read width." );
67 }
68 $width = unpack( 'v', $buf )[1];
69 $buf = fread( $fh, 2 );
70 if ( strlen( $buf ) < 2 ) {
71 throw new InvalidArgumentException( "Not a valid GIF file; Unable to read height." );
72 }
73 $height = unpack( 'v', $buf )[1];
74
75 // Read BPP
76 $buf = fread( $fh, 1 );
77 [ $bpp, $have_map ] = self::decodeBPP( $buf );
78
79 // Skip over background and aspect ratio
80 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
81 fread( $fh, 2 );
82
83 // Skip over the GCT
84 if ( $have_map ) {
85 self::readGCT( $fh, $bpp );
86 }
87
88 while ( !feof( $fh ) ) {
89 $buf = fread( $fh, 1 );
90
91 // 2C = Start of a image Descriptor (character , in ascii)
92 if ( $buf === "\x2C" ) {
93 // Found a frame
94 $frameCount++;
95
96 # # Skip bounding box
97 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
98 fread( $fh, 8 );
99
100 # # Read BPP
101 $buf = fread( $fh, 1 );
102 [ $bpp, $have_map ] = self::decodeBPP( $buf );
103
104 # # Read GCT
105 if ( $have_map ) {
106 self::readGCT( $fh, $bpp );
107 }
108 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
109 fread( $fh, 1 );
110 self::skipBlock( $fh );
111 } elseif ( $buf === "\x21" ) {
112 // 21 = Start of Extension (character ! in ascii)
113 $buf = fread( $fh, 1 );
114 if ( strlen( $buf ) < 1 ) {
115 throw new InvalidArgumentException(
116 "Not a valid GIF file; Unable to read graphics control extension."
117 );
118 }
119 $extension_code = unpack( 'C', $buf )[1];
120
121 if ( $extension_code === 0xF9 ) {
122 // Graphics Control Extension.
123 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
124 fread( $fh, 1 ); // Block size
125
126 // @phan-suppress-next-next-line PhanPluginUseReturnValueInternalKnown
127 // @phan-suppress-next-line PhanPluginDuplicateAdjacentStatement
128 fread( $fh, 1 ); // Transparency, disposal method, user input
129
130 $buf = fread( $fh, 2 ); // Delay, in hundredths of seconds.
131 if ( strlen( $buf ) < 2 ) {
132 throw new InvalidArgumentException( "Not a valid GIF file; Unable to read delay" );
133 }
134 $delay = unpack( 'v', $buf )[1];
135 $duration += $delay * 0.01;
136
137 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
138 fread( $fh, 1 ); // Transparent colour index
139
140 $term = fread( $fh, 1 ); // Should be a terminator
141 if ( strlen( $term ) < 1 ) {
142 throw new InvalidArgumentException( "Not a valid GIF file; Unable to read terminator byte" );
143 }
144 $term = unpack( 'C', $term )[1];
145 if ( $term != 0 ) {
146 throw new InvalidArgumentException( "Malformed Graphics Control Extension block" );
147 }
148 } elseif ( $extension_code === 0xFE ) {
149 // Comment block(s).
150 $data = self::readBlock( $fh );
151 if ( $data === "" ) {
152 throw new InvalidArgumentException( 'Read error, zero-length comment block' );
153 }
154
155 // The standard says this should be ASCII, however its unclear if
156 // thats true in practise. Check to see if its valid utf-8, if so
157 // assume its that, otherwise assume its windows-1252 (iso-8859-1)
158 $dataCopy = $data;
159 // quickIsNFCVerify has the side effect of replacing any invalid characters
160 \UtfNormal\Validator::quickIsNFCVerify( $dataCopy );
161
162 if ( $dataCopy !== $data ) {
163 // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged
164 $data = @iconv( 'windows-1252', 'UTF-8', $data );
165 }
166
167 $commentCount = count( $comment );
168 if ( $commentCount === 0
169 // @phan-suppress-next-line PhanTypeInvalidDimOffset
170 || $comment[$commentCount - 1] !== $data
171 ) {
172 // Some applications repeat the same comment on each
173 // frame of an animated GIF image, so if this comment
174 // is identical to the last, only extract once.
175 $comment[] = $data;
176 }
177 } elseif ( $extension_code === 0xFF ) {
178 // Application extension (Netscape info about the animated gif)
179 // or XMP (or theoretically any other type of extension block)
180 $blockLength = fread( $fh, 1 );
181 if ( strlen( $blockLength ) < 1 ) {
182 throw new InvalidArgumentException( "Not a valid GIF file; Unable to read block length" );
183 }
184 $blockLength = unpack( 'C', $blockLength )[1];
185 $data = fread( $fh, $blockLength );
186
187 if ( $blockLength !== 11 ) {
188 wfDebug( __METHOD__ . " GIF application block with wrong length" );
189 fseek( $fh, -( $blockLength + 1 ), SEEK_CUR );
190 self::skipBlock( $fh );
191 continue;
192 }
193
194 // NETSCAPE2.0 (application name for animated gif)
195 if ( $data === 'NETSCAPE2.0' ) {
196 $data = fread( $fh, 2 ); // Block length and introduction, should be 03 01
197
198 if ( $data !== "\x03\x01" ) {
199 throw new InvalidArgumentException( "Expected \x03\x01, got $data" );
200 }
201
202 // Unsigned little-endian integer, loop count or zero for "forever"
203 $loopData = fread( $fh, 2 );
204 if ( strlen( $loopData ) < 2 ) {
205 throw new InvalidArgumentException( "Not a valid GIF file; Unable to read loop count" );
206 }
207 $loopCount = unpack( 'v', $loopData )[1];
208
209 if ( $loopCount !== 1 ) {
210 $isLooped = true;
211 }
212
213 // Read out terminator byte
214 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
215 fread( $fh, 1 );
216 } elseif ( $data === 'XMP DataXMP' ) {
217 // application name for XMP data.
218 // see pg 18 of XMP spec part 3.
219
220 $xmp = self::readBlock( $fh, true );
221
222 if ( substr( $xmp, -257, 3 ) !== "\x01\xFF\xFE"
223 || substr( $xmp, -4 ) !== "\x03\x02\x01\x00"
224 ) {
225 throw new InvalidArgumentException( "XMP does not have magic trailer!" );
226 }
227
228 // strip out trailer.
229 $xmp = substr( $xmp, 0, -257 );
230 } else {
231 // unrecognized extension block
232 fseek( $fh, -( $blockLength + 1 ), SEEK_CUR );
233 self::skipBlock( $fh );
234 }
235 } else {
236 self::skipBlock( $fh );
237 }
238 } elseif ( $buf === "\x3B" ) {
239 // 3B = Trailer (character ; in ascii)
240 break;
241 } else {
242 if ( strlen( $buf ) < 1 ) {
243 throw new InvalidArgumentException( "Not a valid GIF file; Unable to read unknown byte." );
244 }
245 $byte = unpack( 'C', $buf )[1];
246 throw new InvalidArgumentException( "At position: " . ftell( $fh ) . ", Unknown byte " . $byte );
247 }
248 }
249
250 return [
251 'frameCount' => $frameCount,
252 'looped' => $isLooped,
253 'duration' => $duration,
254 'xmp' => $xmp,
255 'comment' => $comment,
256 'width' => $width,
257 'height' => $height,
258 'bits' => $bpp,
259 ];
260 }
261
267 private static function readGCT( $fh, $bpp ) {
268 $max = 2 ** $bpp;
269 for ( $i = 1; $i <= $max; ++$i ) {
270 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
271 fread( $fh, 3 );
272 }
273 }
274
280 private static function decodeBPP( $data ) {
281 if ( strlen( $data ) < 1 ) {
282 throw new InvalidArgumentException( "Not a valid GIF file; Unable to read bits per channel." );
283 }
284 $buf = unpack( 'C', $data )[1];
285 $bpp = ( $buf & 7 ) + 1;
286 $buf >>= 7;
287
288 $have_map = $buf & 1;
289
290 return [ $bpp, $have_map ];
291 }
292
297 private static function skipBlock( $fh ) {
298 while ( !feof( $fh ) ) {
299 $buf = fread( $fh, 1 );
300 if ( strlen( $buf ) < 1 ) {
301 throw new InvalidArgumentException( "Not a valid GIF file; Unable to read block length." );
302 }
303 $block_len = unpack( 'C', $buf )[1];
304 if ( $block_len == 0 ) {
305 return;
306 }
307 // @phan-suppress-next-line PhanPluginUseReturnValueInternalKnown
308 fread( $fh, $block_len );
309 }
310 }
311
326 private static function readBlock( $fh, $includeLengths = false ) {
327 $data = '';
328 $subLength = fread( $fh, 1 );
329 $blocks = 0;
330
331 while ( $subLength !== "\0" ) {
332 $blocks++;
333 if ( $blocks > self::MAX_SUBBLOCKS ) {
334 throw new InvalidArgumentException( "MAX_SUBBLOCKS exceeded (over $blocks sub-blocks)" );
335 }
336 if ( feof( $fh ) ) {
337 throw new InvalidArgumentException( "Read error: Unexpected EOF." );
338 }
339 if ( $includeLengths ) {
340 $data .= $subLength;
341 }
342
343 $data .= fread( $fh, ord( $subLength ) );
344 $subLength = fread( $fh, 1 );
345 }
346
347 return $data;
348 }
349}
350
352class_alias( GIFMetadataExtractor::class, 'GIFMetadataExtractor' );
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.