MediaWiki  master
ZipDirectoryReader.php
Go to the documentation of this file.
1 <?php
88  public static function read( $fileName, $callback, $options = [] ) {
89  $zdr = new self( $fileName, $callback, $options );
90 
91  return $zdr->execute();
92  }
93 
95  protected $fileName;
96 
98  protected $file;
99 
101  protected $fileLength;
102 
104  protected $buffer;
105 
107  protected $callback;
108 
110  protected $zip64 = false;
111 
114 
115  protected $data;
116 
118  const ZIP64_EXTRA_HEADER = 0x0001;
119 
121  const SEGSIZE = 16384;
122 
124  const GENERAL_UTF8 = 11;
125 
128 
134  protected function __construct( $fileName, $callback, $options ) {
135  $this->fileName = $fileName;
136  $this->callback = $callback;
137 
138  if ( isset( $options['zip64'] ) ) {
139  $this->zip64 = $options['zip64'];
140  }
141  }
142 
148  function execute() {
149  $this->file = fopen( $this->fileName, 'r' );
150  $this->data = [];
151  if ( !$this->file ) {
152  return Status::newFatal( 'zip-file-open-error' );
153  }
154 
155  $status = Status::newGood();
156  try {
158  if ( $this->zip64 ) {
159  list( $offset, $size ) = $this->findZip64CentralDirectory();
160  $this->readCentralDirectory( $offset, $size );
161  } else {
162  if ( $this->eocdr['CD size'] == 0xffffffff
163  || $this->eocdr['CD offset'] == 0xffffffff
164  || $this->eocdr['CD entries total'] == 0xffff
165  ) {
166  $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' .
167  'but we are in legacy mode. Rejecting this upload is necessary to avoid ' .
168  'opening vulnerabilities on clients using OpenJDK 7 or later.' );
169  }
170 
171  list( $offset, $size ) = $this->findOldCentralDirectory();
172  $this->readCentralDirectory( $offset, $size );
173  }
174  } catch ( ZipDirectoryReaderError $e ) {
175  $status->fatal( $e->getErrorCode() );
176  }
177 
178  fclose( $this->file );
179 
180  return $status;
181  }
182 
189  function error( $code, $debugMessage ) {
190  wfDebug( __CLASS__ . ": Fatal error: $debugMessage\n" );
191  throw new ZipDirectoryReaderError( $code );
192  }
193 
200  $info = [
201  'signature' => 4,
202  'disk' => 2,
203  'CD start disk' => 2,
204  'CD entries this disk' => 2,
205  'CD entries total' => 2,
206  'CD size' => 4,
207  'CD offset' => 4,
208  'file comment length' => 2,
209  ];
210  $structSize = $this->getStructSize( $info );
211  $startPos = $this->getFileLength() - 65536 - $structSize;
212  if ( $startPos < 0 ) {
213  $startPos = 0;
214  }
215 
216  if ( $this->getFileLength() === 0 ) {
217  $this->error( 'zip-wrong-format', "The file is empty." );
218  }
219 
220  $block = $this->getBlock( $startPos );
221  $sigPos = strrpos( $block, "PK\x05\x06" );
222  if ( $sigPos === false ) {
223  $this->error( 'zip-wrong-format',
224  "zip file lacks EOCDR signature. It probably isn't a zip file." );
225  }
226 
227  $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info );
228  $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length'];
229 
230  if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) {
231  // T40432: MS binary documents frequently embed ZIP files
232  $this->error( 'zip-wrong-format', 'there is a ZIP signature but it is not at ' .
233  'the end of the file. It could be an OLE file with a ZIP file embedded.' );
234  }
235  if ( $this->eocdr['disk'] !== 0
236  || $this->eocdr['CD start disk'] !== 0
237  ) {
238  $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' );
239  }
240  $this->eocdr += $this->unpack(
241  $block,
242  [ 'file comment' => [ 'string', $this->eocdr['file comment length'] ] ],
243  $sigPos + $structSize );
244  $this->eocdr['position'] = $startPos + $sigPos;
245  }
246 
252  $info = [
253  'signature' => [ 'string', 4 ],
254  'eocdr64 start disk' => 4,
255  'eocdr64 offset' => 8,
256  'number of disks' => 4,
257  ];
258  $structSize = $this->getStructSize( $info );
259 
260  $start = $this->getFileLength() - $this->eocdr['EOCDR size'] - $structSize;
261  $block = $this->getBlock( $start, $structSize );
262  $this->eocdr64Locator = $data = $this->unpack( $block, $info );
263 
264  if ( $data['signature'] !== "PK\x06\x07" ) {
265  // Note: Java will allow this and continue to read the
266  // EOCDR64, so we have to reject the upload, we can't
267  // just use the EOCDR header instead.
268  $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' );
269  }
270  }
271 
277  if ( $this->eocdr64Locator['eocdr64 start disk'] != 0
278  || $this->eocdr64Locator['number of disks'] != 0
279  ) {
280  $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' );
281  }
282 
283  $info = [
284  'signature' => [ 'string', 4 ],
285  'EOCDR64 size' => 8,
286  'version made by' => 2,
287  'version needed' => 2,
288  'disk' => 4,
289  'CD start disk' => 4,
290  'CD entries this disk' => 8,
291  'CD entries total' => 8,
292  'CD size' => 8,
293  'CD offset' => 8
294  ];
295  $structSize = $this->getStructSize( $info );
296  $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize );
297  $this->eocdr64 = $data = $this->unpack( $block, $info );
298  if ( $data['signature'] !== "PK\x06\x06" ) {
299  $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' );
300  }
301  if ( $data['disk'] !== 0
302  || $data['CD start disk'] !== 0
303  ) {
304  $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' );
305  }
306  }
307 
315  $size = $this->eocdr['CD size'];
316  $offset = $this->eocdr['CD offset'];
317  $endPos = $this->eocdr['position'];
318 
319  // Some readers use the EOCDR position instead of the offset field
320  // to find the directory, so to be safe, we check if they both agree.
321  if ( $offset + $size != $endPos ) {
322  $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
323  'of central directory record' );
324  }
325 
326  return [ $offset, $size ];
327  }
328 
336  // The spec is ambiguous about the exact rules of precedence between the
337  // ZIP64 headers and the original headers. Here we follow zip_util.c
338  // from OpenJDK 7.
339  $size = $this->eocdr['CD size'];
340  $offset = $this->eocdr['CD offset'];
341  $numEntries = $this->eocdr['CD entries total'];
342  $endPos = $this->eocdr['position'];
343  if ( $size == 0xffffffff
344  || $offset == 0xffffffff
345  || $numEntries == 0xffff
346  ) {
348 
349  if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) {
351  if ( isset( $this->eocdr64['CD offset'] ) ) {
352  $size = $this->eocdr64['CD size'];
353  $offset = $this->eocdr64['CD offset'];
354  $endPos = $this->eocdr64Locator['eocdr64 offset'];
355  }
356  }
357  }
358  // Some readers use the EOCDR position instead of the offset field
359  // to find the directory, so to be safe, we check if they both agree.
360  if ( $offset + $size != $endPos ) {
361  $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
362  'of central directory record' );
363  }
364 
365  return [ $offset, $size ];
366  }
367 
374  function readCentralDirectory( $offset, $size ) {
375  $block = $this->getBlock( $offset, $size );
376 
377  $fixedInfo = [
378  'signature' => [ 'string', 4 ],
379  'version made by' => 2,
380  'version needed' => 2,
381  'general bits' => 2,
382  'compression method' => 2,
383  'mod time' => 2,
384  'mod date' => 2,
385  'crc-32' => 4,
386  'compressed size' => 4,
387  'uncompressed size' => 4,
388  'name length' => 2,
389  'extra field length' => 2,
390  'comment length' => 2,
391  'disk number start' => 2,
392  'internal attrs' => 2,
393  'external attrs' => 4,
394  'local header offset' => 4,
395  ];
396  $fixedSize = $this->getStructSize( $fixedInfo );
397 
398  $pos = 0;
399  while ( $pos < $size ) {
400  $data = $this->unpack( $block, $fixedInfo, $pos );
401  $pos += $fixedSize;
402 
403  if ( $data['signature'] !== "PK\x01\x02" ) {
404  $this->error( 'zip-bad', 'Invalid signature found in directory entry' );
405  }
406 
407  $variableInfo = [
408  'name' => [ 'string', $data['name length'] ],
409  'extra field' => [ 'string', $data['extra field length'] ],
410  'comment' => [ 'string', $data['comment length'] ],
411  ];
412  $data += $this->unpack( $block, $variableInfo, $pos );
413  $pos += $this->getStructSize( $variableInfo );
414 
415  if ( $this->zip64 && (
416  $data['compressed size'] == 0xffffffff
417  || $data['uncompressed size'] == 0xffffffff
418  || $data['local header offset'] == 0xffffffff )
419  ) {
420  $zip64Data = $this->unpackZip64Extra( $data['extra field'] );
421  if ( $zip64Data ) {
422  $data = $zip64Data + $data;
423  }
424  }
425 
426  if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) {
427  $this->error( 'zip-unsupported', 'central directory encryption is not supported' );
428  }
429 
430  // Convert the timestamp into MediaWiki format
431  // For the format, please see the MS-DOS 2.0 Programmer's Reference,
432  // pages 3-5 and 3-6.
433  $time = $data['mod time'];
434  $date = $data['mod date'];
435 
436  $year = 1980 + ( $date >> 9 );
437  $month = ( $date >> 5 ) & 15;
438  $day = $date & 31;
439  $hour = ( $time >> 11 ) & 31;
440  $minute = ( $time >> 5 ) & 63;
441  $second = ( $time & 31 ) * 2;
442  $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d",
443  $year, $month, $day, $hour, $minute, $second );
444 
445  // Convert the character set in the file name
446  if ( $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) ) {
447  $name = $data['name'];
448  } else {
449  $name = iconv( 'CP437', 'UTF-8', $data['name'] );
450  }
451 
452  // Compile a data array for the user, with a sensible format
453  $userData = [
454  'name' => $name,
455  'mtime' => $timestamp,
456  'size' => $data['uncompressed size'],
457  ];
458  call_user_func( $this->callback, $userData );
459  }
460  }
461 
467  function unpackZip64Extra( $extraField ) {
468  $extraHeaderInfo = [
469  'id' => 2,
470  'size' => 2,
471  ];
472  $extraHeaderSize = $this->getStructSize( $extraHeaderInfo );
473 
474  $zip64ExtraInfo = [
475  'uncompressed size' => 8,
476  'compressed size' => 8,
477  'local header offset' => 8,
478  'disk number start' => 4,
479  ];
480 
481  $extraPos = 0;
482  while ( $extraPos < strlen( $extraField ) ) {
483  $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos );
484  $extraPos += $extraHeaderSize;
485  $extra += $this->unpack( $extraField,
486  [ 'data' => [ 'string', $extra['size'] ] ],
487  $extraPos );
488  $extraPos += $extra['size'];
489 
490  if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) {
491  return $this->unpack( $extra['data'], $zip64ExtraInfo );
492  }
493  }
494 
495  return false;
496  }
497 
502  function getFileLength() {
503  if ( $this->fileLength === null ) {
504  $stat = fstat( $this->file );
505  $this->fileLength = $stat['size'];
506  }
507 
508  return $this->fileLength;
509  }
510 
521  function getBlock( $start, $length = null ) {
522  $fileLength = $this->getFileLength();
523  if ( $start >= $fileLength ) {
524  $this->error( 'zip-bad', "getBlock() requested position $start, " .
525  "file length is $fileLength" );
526  }
527  if ( $length === null ) {
528  $length = $fileLength - $start;
529  }
530  $end = $start + $length;
531  if ( $end > $fileLength ) {
532  $this->error( 'zip-bad', "getBlock() requested end position $end, " .
533  "file length is $fileLength" );
534  }
535  $startSeg = floor( $start / self::SEGSIZE );
536  $endSeg = ceil( $end / self::SEGSIZE );
537 
538  $block = '';
539  for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) {
540  $block .= $this->getSegment( $segIndex );
541  }
542 
543  $block = substr( $block,
544  $start - $startSeg * self::SEGSIZE,
545  $length );
546 
547  if ( strlen( $block ) < $length ) {
548  $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' );
549  }
550 
551  return $block;
552  }
553 
567  function getSegment( $segIndex ) {
568  if ( !isset( $this->buffer[$segIndex] ) ) {
569  $bytePos = $segIndex * self::SEGSIZE;
570  if ( $bytePos >= $this->getFileLength() ) {
571  $this->buffer[$segIndex] = '';
572 
573  return '';
574  }
575  if ( fseek( $this->file, $bytePos ) ) {
576  $this->error( 'zip-bad', "seek to $bytePos failed" );
577  }
578  $seg = fread( $this->file, self::SEGSIZE );
579  if ( $seg === false ) {
580  $this->error( 'zip-bad', "read from $bytePos failed" );
581  }
582  $this->buffer[$segIndex] = $seg;
583  }
584 
585  return $this->buffer[$segIndex];
586  }
587 
593  function getStructSize( $struct ) {
594  $size = 0;
595  foreach ( $struct as $type ) {
596  if ( is_array( $type ) ) {
597  list( , $fieldSize ) = $type;
598  $size += $fieldSize;
599  } else {
600  $size += $type;
601  }
602  }
603 
604  return $size;
605  }
606 
629  function unpack( $string, $struct, $offset = 0 ) {
630  $size = $this->getStructSize( $struct );
631  if ( $offset + $size > strlen( $string ) ) {
632  $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' );
633  }
634 
635  $data = [];
636  $pos = $offset;
637  foreach ( $struct as $key => $type ) {
638  if ( is_array( $type ) ) {
639  list( $typeName, $fieldSize ) = $type;
640  switch ( $typeName ) {
641  case 'string':
642  $data[$key] = substr( $string, $pos, $fieldSize );
643  $pos += $fieldSize;
644  break;
645  default:
646  throw new MWException( __METHOD__ . ": invalid type \"$typeName\"" );
647  }
648  } else {
649  // Unsigned little-endian integer
650  $length = intval( $type );
651 
652  // Calculate the value. Use an algorithm which automatically
653  // upgrades the value to floating point if necessary.
654  $value = 0;
655  for ( $i = $length - 1; $i >= 0; $i-- ) {
656  $value *= 256;
657  $value += ord( $string[$pos + $i] );
658  }
659 
660  // Throw an exception if there was loss of precision
661  if ( $value > 2 ** 52 ) {
662  $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' .
663  'This could happen if we tried to unpack a 64-bit structure ' .
664  'at an invalid location.' );
665  }
666  $data[$key] = $value;
667  $pos += $length;
668  }
669  }
670 
671  return $data;
672  }
673 
682  function testBit( $value, $bitIndex ) {
683  return (bool)( ( $value >> $bitIndex ) & 1 );
684  }
685 
690  function hexDump( $s ) {
691  $n = strlen( $s );
692  for ( $i = 0; $i < $n; $i += 16 ) {
693  printf( "%08X ", $i );
694  for ( $j = 0; $j < 16; $j++ ) {
695  print " ";
696  if ( $j == 8 ) {
697  print " ";
698  }
699  if ( $i + $j >= $n ) {
700  print " ";
701  } else {
702  printf( "%02X", ord( $s[$i + $j] ) );
703  }
704  }
705 
706  print " |";
707  for ( $j = 0; $j < 16; $j++ ) {
708  if ( $i + $j >= $n ) {
709  print " ";
710  } elseif ( ctype_print( $s[$i + $j] ) ) {
711  print $s[$i + $j];
712  } else {
713  print '.';
714  }
715  }
716  print "|\n";
717  }
718  }
719 }
findZip64CentralDirectory()
Find the location of the central directory, as would be seen by a ZIP64-compliant reader...
static newFatal( $message,... $parameters)
Factory function for fatal errors.
Definition: StatusValue.php:69
unpack( $string, $struct, $offset=0)
Unpack a binary structure.
$callback
The file data callback.
const GENERAL_CD_ENCRYPTED
The index of the "general field" bit for central directory encryption.
getStructSize( $struct)
Get the size of a structure in bytes.
const GENERAL_UTF8
The index of the "general field" bit for UTF-8 file names.
getBlock( $start, $length=null)
Get the file contents from a given offset.
getFileLength()
Get the length of the file.
$zip64
The ZIP64 mode.
execute()
Read the directory according to settings in $this.
getSegment( $segIndex)
Get a section of the file starting at position $segIndex * self::SEGSIZE, of length self::SEGSIZE...
readZip64EndOfCentralDirectoryRecord()
Read the header called the "ZIP64 end of central directory record".
readZip64EndOfCentralDirectoryLocator()
Read the header called the "ZIP64 end of central directory locator".
const SEGSIZE
The segment size for the file contents cache.
$eocdr
Stored headers.
testBit( $value, $bitIndex)
Returns a bit from a given position in an integer value, converted to boolean.
static newGood( $value=null)
Factory function for good results.
Definition: StatusValue.php:81
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
readEndOfCentralDirectoryRecord()
Read the header which is at the end of the central directory, unimaginatively called the "end of cent...
A class for reading ZIP file directories, for the purposes of upload verification.
__construct( $fileName, $callback, $options)
findOldCentralDirectory()
Find the location of the central directory, as would be seen by a non-ZIP64 reader.
hexDump( $s)
Debugging helper function which dumps a string in hexdump -C format.
Internal exception class.
readCentralDirectory( $offset, $size)
Read the central directory at the given location.
unpackZip64Extra( $extraField)
Interpret ZIP64 "extra field" data and return an associative array.
$file
The opened file resource.
const ZIP64_EXTRA_HEADER
The "extra field" ID for ZIP64 central directory entries.
$fileName
The file name.
static read( $fileName, $callback, $options=[])
Read a ZIP file and call a function for each file discovered in it.
error( $code, $debugMessage)
Throw an error, and log a debug message.
$buffer
A segmented cache of the file contents.
$fileLength
The cached length of the file, or null if it has not been loaded yet.