MediaWiki  master
GIFMetadataExtractor.php
Go to the documentation of this file.
1 <?php
36  private static $gifFrameSep;
37 
39  private static $gifExtensionSep;
40 
42  private static $gifTerm;
43 
44  const VERSION = 1;
45 
46  // Each sub-block is less than or equal to 255 bytes.
47  // Most of the time its 255 bytes, except for in XMP
48  // blocks, where it's usually between 32-127 bytes each.
49  const MAX_SUBBLOCKS = 262144; // 5mb divided by 20.
50 
56  static function getMetadata( $filename ) {
57  self::$gifFrameSep = pack( "C", ord( "," ) ); // 2C
58  self::$gifExtensionSep = pack( "C", ord( "!" ) ); // 21
59  self::$gifTerm = pack( "C", ord( ";" ) ); // 3B
60 
61  $frameCount = 0;
62  $duration = 0.0;
63  $isLooped = false;
64  $xmp = "";
65  $comment = [];
66 
67  if ( !$filename ) {
68  throw new Exception( "No file name specified" );
69  } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) {
70  throw new Exception( "File $filename does not exist" );
71  }
72 
73  $fh = fopen( $filename, 'rb' );
74 
75  if ( !$fh ) {
76  throw new Exception( "Unable to open file $filename" );
77  }
78 
79  // Check for the GIF header
80  $buf = fread( $fh, 6 );
81  if ( !( $buf == 'GIF87a' || $buf == 'GIF89a' ) ) {
82  throw new Exception( "Not a valid GIF file; header: $buf" );
83  }
84 
85  // Read width and height.
86  $buf = fread( $fh, 2 );
87  $width = unpack( 'v', $buf )[1];
88  $buf = fread( $fh, 2 );
89  $height = unpack( 'v', $buf )[1];
90 
91  // Read BPP
92  $buf = fread( $fh, 1 );
93  $bpp = self::decodeBPP( $buf );
94 
95  // Skip over background and aspect ratio
96  fread( $fh, 2 );
97 
98  // Skip over the GCT
99  self::readGCT( $fh, $bpp );
100 
101  while ( !feof( $fh ) ) {
102  $buf = fread( $fh, 1 );
103 
104  if ( $buf == self::$gifFrameSep ) {
105  // Found a frame
106  $frameCount++;
107 
108  # # Skip bounding box
109  fread( $fh, 8 );
110 
111  # # Read BPP
112  $buf = fread( $fh, 1 );
113  $bpp = self::decodeBPP( $buf );
114 
115  # # Read GCT
116  self::readGCT( $fh, $bpp );
117  fread( $fh, 1 );
118  self::skipBlock( $fh );
119  } elseif ( $buf == self::$gifExtensionSep ) {
120  $buf = fread( $fh, 1 );
121  if ( strlen( $buf ) < 1 ) {
122  throw new Exception( "Ran out of input" );
123  }
124  $extension_code = unpack( 'C', $buf )[1];
125 
126  if ( $extension_code == 0xF9 ) {
127  // Graphics Control Extension.
128  fread( $fh, 1 ); // Block size
129 
130  fread( $fh, 1 ); // Transparency, disposal method, user input
131 
132  $buf = fread( $fh, 2 ); // Delay, in hundredths of seconds.
133  if ( strlen( $buf ) < 2 ) {
134  throw new Exception( "Ran out of input" );
135  }
136  $delay = unpack( 'v', $buf )[1];
137  $duration += $delay * 0.01;
138 
139  fread( $fh, 1 ); // Transparent colour index
140 
141  $term = fread( $fh, 1 ); // Should be a terminator
142  if ( strlen( $term ) < 1 ) {
143  throw new Exception( "Ran out of input" );
144  }
145  $term = unpack( 'C', $term )[1];
146  if ( $term != 0 ) {
147  throw new Exception( "Malformed Graphics Control Extension block" );
148  }
149  } elseif ( $extension_code == 0xFE ) {
150  // Comment block(s).
151  $data = self::readBlock( $fh );
152  if ( $data === "" ) {
153  throw new Exception( 'Read error, zero-length comment block' );
154  }
155 
156  // The standard says this should be ASCII, however its unclear if
157  // thats true in practise. Check to see if its valid utf-8, if so
158  // assume its that, otherwise assume its windows-1252 (iso-8859-1)
159  $dataCopy = $data;
160  // quickIsNFCVerify has the side effect of replacing any invalid characters
161  UtfNormal\Validator::quickIsNFCVerify( $dataCopy );
162 
163  if ( $dataCopy !== $data ) {
164  Wikimedia\suppressWarnings();
165  $data = iconv( 'windows-1252', 'UTF-8', $data );
166  Wikimedia\restoreWarnings();
167  }
168 
169  $commentCount = count( $comment );
170  if ( $commentCount === 0
171  || $comment[$commentCount - 1] !== $data
172  ) {
173  // Some applications repeat the same comment on each
174  // frame of an animated GIF image, so if this comment
175  // is identical to the last, only extract once.
176  $comment[] = $data;
177  }
178  } elseif ( $extension_code == 0xFF ) {
179  // Application extension (Netscape info about the animated gif)
180  // or XMP (or theoretically any other type of extension block)
181  $blockLength = fread( $fh, 1 );
182  if ( strlen( $blockLength ) < 1 ) {
183  throw new Exception( "Ran out of input" );
184  }
185  $blockLength = unpack( 'C', $blockLength )[1];
186  $data = fread( $fh, $blockLength );
187 
188  if ( $blockLength != 11 ) {
189  wfDebug( __METHOD__ . " GIF application block with wrong length\n" );
190  fseek( $fh, -( $blockLength + 1 ), SEEK_CUR );
191  self::skipBlock( $fh );
192  continue;
193  }
194 
195  // NETSCAPE2.0 (application name for animated gif)
196  if ( $data == 'NETSCAPE2.0' ) {
197  $data = fread( $fh, 2 ); // Block length and introduction, should be 03 01
198 
199  if ( $data != "\x03\x01" ) {
200  throw new Exception( "Expected \x03\x01, got $data" );
201  }
202 
203  // Unsigned little-endian integer, loop count or zero for "forever"
204  $loopData = fread( $fh, 2 );
205  if ( strlen( $loopData ) < 2 ) {
206  throw new Exception( "Ran out of input" );
207  }
208  $loopCount = unpack( 'v', $loopData )[1];
209 
210  if ( $loopCount != 1 ) {
211  $isLooped = true;
212  }
213 
214  // Read out terminator byte
215  fread( $fh, 1 );
216  } elseif ( $data == 'XMP DataXMP' ) {
217  // application name for XMP data.
218  // see pg 18 of XMP spec part 3.
219 
220  $xmp = self::readBlock( $fh, true );
221 
222  if ( substr( $xmp, -257, 3 ) !== "\x01\xFF\xFE"
223  || substr( $xmp, -4 ) !== "\x03\x02\x01\x00"
224  ) {
225  // this is just a sanity check.
226  throw new Exception( "XMP does not have magic trailer!" );
227  }
228 
229  // strip out trailer.
230  $xmp = substr( $xmp, 0, -257 );
231  } else {
232  // unrecognized extension block
233  fseek( $fh, -( $blockLength + 1 ), SEEK_CUR );
234  self::skipBlock( $fh );
235  continue;
236  }
237  } else {
238  self::skipBlock( $fh );
239  }
240  } elseif ( $buf == self::$gifTerm ) {
241  break;
242  } else {
243  if ( strlen( $buf ) < 1 ) {
244  throw new Exception( "Ran out of input" );
245  }
246  $byte = unpack( 'C', $buf )[1];
247  throw new Exception( "At position: " . ftell( $fh ) . ", Unknown byte " . $byte );
248  }
249  }
250 
251  return [
252  'frameCount' => $frameCount,
253  'looped' => $isLooped,
254  'duration' => $duration,
255  'xmp' => $xmp,
256  'comment' => $comment,
257  ];
258  }
259 
265  static function readGCT( $fh, $bpp ) {
266  if ( $bpp > 0 ) {
267  $max = 2 ** $bpp;
268  for ( $i = 1; $i <= $max; ++$i ) {
269  fread( $fh, 3 );
270  }
271  }
272  }
273 
279  static function decodeBPP( $data ) {
280  if ( strlen( $data ) < 1 ) {
281  throw new Exception( "Ran out of input" );
282  }
283  $buf = unpack( 'C', $data )[1];
284  $bpp = ( $buf & 7 ) + 1;
285  $buf >>= 7;
286 
287  $have_map = $buf & 1;
288 
289  return $have_map ? $bpp : 0;
290  }
291 
296  static function skipBlock( $fh ) {
297  while ( !feof( $fh ) ) {
298  $buf = fread( $fh, 1 );
299  if ( strlen( $buf ) < 1 ) {
300  throw new Exception( "Ran out of input" );
301  }
302  $block_len = unpack( 'C', $buf )[1];
303  if ( $block_len == 0 ) {
304  return;
305  }
306  fread( $fh, $block_len );
307  }
308  }
309 
324  static function readBlock( $fh, $includeLengths = false ) {
325  $data = '';
326  $subLength = fread( $fh, 1 );
327  $blocks = 0;
328 
329  while ( $subLength !== "\0" ) {
330  $blocks++;
331  if ( $blocks > self::MAX_SUBBLOCKS ) {
332  throw new Exception( "MAX_SUBBLOCKS exceeded (over $blocks sub-blocks)" );
333  }
334  if ( feof( $fh ) ) {
335  throw new Exception( "Read error: Unexpected EOF." );
336  }
337  if ( $includeLengths ) {
338  $data .= $subLength;
339  }
340 
341  $data .= fread( $fh, ord( $subLength ) );
342  $subLength = fread( $fh, 1 );
343  }
344 
345  return $data;
346  }
347 }
whereas SearchGetNearMatch runs after $term
Definition: hooks.txt:2863
static readGCT( $fh, $bpp)
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
static getMetadata( $filename)
static readBlock( $fh, $includeLengths=false)
Read a block.