MediaWiki  master
ZipDirectoryReader.php
Go to the documentation of this file.
1 <?php
88  public static function read( $fileName, $callback, $options = [] ) {
89  $zdr = new self( $fileName, $callback, $options );
90 
91  return $zdr->execute();
92  }
93 
95  protected $fileName;
96 
98  protected $file;
99 
101  protected $fileLength;
102 
104  protected $buffer;
105 
107  protected $callback;
108 
110  protected $zip64 = false;
111 
114 
115  protected $data;
116 
118  private const ZIP64_EXTRA_HEADER = 0x0001;
119 
121  private const SEGSIZE = 16384;
122 
124  private const GENERAL_UTF8 = 11;
125 
127  private const GENERAL_CD_ENCRYPTED = 13;
128 
134  protected function __construct( $fileName, $callback, $options ) {
135  $this->fileName = $fileName;
136  $this->callback = $callback;
137 
138  if ( isset( $options['zip64'] ) ) {
139  $this->zip64 = $options['zip64'];
140  }
141  }
142 
148  private function execute() {
149  $this->file = fopen( $this->fileName, 'r' );
150  $this->data = [];
151  if ( !$this->file ) {
152  return Status::newFatal( 'zip-file-open-error' );
153  }
154 
155  $status = Status::newGood();
156  try {
158  if ( $this->zip64 ) {
159  list( $offset, $size ) = $this->findZip64CentralDirectory();
160  $this->readCentralDirectory( $offset, $size );
161  } else {
162  if ( $this->eocdr['CD size'] == 0xffffffff
163  || $this->eocdr['CD offset'] == 0xffffffff
164  || $this->eocdr['CD entries total'] == 0xffff
165  ) {
166  $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' .
167  'but we are in legacy mode. Rejecting this upload is necessary to avoid ' .
168  'opening vulnerabilities on clients using OpenJDK 7 or later.' );
169  }
170 
171  list( $offset, $size ) = $this->findOldCentralDirectory();
172  $this->readCentralDirectory( $offset, $size );
173  }
174  } catch ( ZipDirectoryReaderError $e ) {
175  $status->fatal( $e->getErrorCode() );
176  }
177 
178  fclose( $this->file );
179 
180  return $status;
181  }
182 
189  private function error( $code, $debugMessage ) {
190  wfDebug( __CLASS__ . ": Fatal error: $debugMessage" );
191  throw new ZipDirectoryReaderError( $code );
192  }
193 
199  private function readEndOfCentralDirectoryRecord() {
200  $info = [
201  'signature' => 4,
202  'disk' => 2,
203  'CD start disk' => 2,
204  'CD entries this disk' => 2,
205  'CD entries total' => 2,
206  'CD size' => 4,
207  'CD offset' => 4,
208  'file comment length' => 2,
209  ];
210  $structSize = $this->getStructSize( $info );
211  $startPos = $this->getFileLength() - 65536 - $structSize;
212  if ( $startPos < 0 ) {
213  $startPos = 0;
214  }
215 
216  if ( $this->getFileLength() === 0 ) {
217  $this->error( 'zip-wrong-format', "The file is empty." );
218  }
219 
220  $block = $this->getBlock( $startPos );
221  $sigPos = strrpos( $block, "PK\x05\x06" );
222  if ( $sigPos === false ) {
223  $this->error( 'zip-wrong-format',
224  "zip file lacks EOCDR signature. It probably isn't a zip file." );
225  }
226 
227  $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info );
228  $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length'];
229 
230  if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) {
231  // T40432: MS binary documents frequently embed ZIP files
232  $this->error( 'zip-wrong-format', 'there is a ZIP signature but it is not at ' .
233  'the end of the file. It could be an OLE file with a ZIP file embedded.' );
234  }
235  if ( $this->eocdr['disk'] !== 0
236  || $this->eocdr['CD start disk'] !== 0
237  ) {
238  $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' );
239  }
240  $this->eocdr += $this->unpack(
241  $block,
242  [ 'file comment' => [ 'string', $this->eocdr['file comment length'] ] ],
243  $sigPos + $structSize );
244  $this->eocdr['position'] = $startPos + $sigPos;
245  }
246 
252  $info = [
253  'signature' => [ 'string', 4 ],
254  'eocdr64 start disk' => 4,
255  'eocdr64 offset' => 8,
256  'number of disks' => 4,
257  ];
258  $structSize = $this->getStructSize( $info );
259 
260  $start = $this->getFileLength() - $this->eocdr['EOCDR size'] - $structSize;
261  $block = $this->getBlock( $start, $structSize );
262  $this->eocdr64Locator = $data = $this->unpack( $block, $info );
263 
264  if ( $data['signature'] !== "PK\x06\x07" ) {
265  // Note: Java will allow this and continue to read the
266  // EOCDR64, so we have to reject the upload, we can't
267  // just use the EOCDR header instead.
268  $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' );
269  }
270  }
271 
277  if ( $this->eocdr64Locator['eocdr64 start disk'] != 0
278  || $this->eocdr64Locator['number of disks'] != 0
279  ) {
280  $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' );
281  }
282 
283  $info = [
284  'signature' => [ 'string', 4 ],
285  'EOCDR64 size' => 8,
286  'version made by' => 2,
287  'version needed' => 2,
288  'disk' => 4,
289  'CD start disk' => 4,
290  'CD entries this disk' => 8,
291  'CD entries total' => 8,
292  'CD size' => 8,
293  'CD offset' => 8
294  ];
295  $structSize = $this->getStructSize( $info );
296  $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize );
297  $this->eocdr64 = $data = $this->unpack( $block, $info );
298  if ( $data['signature'] !== "PK\x06\x06" ) {
299  $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' );
300  }
301  if ( $data['disk'] !== 0
302  || $data['CD start disk'] !== 0
303  ) {
304  $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' );
305  }
306  }
307 
314  private function findOldCentralDirectory() {
315  $size = $this->eocdr['CD size'];
316  $offset = $this->eocdr['CD offset'];
317  $endPos = $this->eocdr['position'];
318 
319  // Some readers use the EOCDR position instead of the offset field
320  // to find the directory, so to be safe, we check if they both agree.
321  if ( $offset + $size != $endPos ) {
322  $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
323  'of central directory record' );
324  }
325 
326  return [ $offset, $size ];
327  }
328 
335  private function findZip64CentralDirectory() {
336  // The spec is ambiguous about the exact rules of precedence between the
337  // ZIP64 headers and the original headers. Here we follow zip_util.c
338  // from OpenJDK 7.
339  $size = $this->eocdr['CD size'];
340  $offset = $this->eocdr['CD offset'];
341  $numEntries = $this->eocdr['CD entries total'];
342  $endPos = $this->eocdr['position'];
343  if ( $size == 0xffffffff
344  || $offset == 0xffffffff
345  || $numEntries == 0xffff
346  ) {
348 
349  if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) {
351  if ( isset( $this->eocdr64['CD offset'] ) ) {
352  $size = $this->eocdr64['CD size'];
353  $offset = $this->eocdr64['CD offset'];
354  $endPos = $this->eocdr64Locator['eocdr64 offset'];
355  }
356  }
357  }
358  // Some readers use the EOCDR position instead of the offset field
359  // to find the directory, so to be safe, we check if they both agree.
360  if ( $offset + $size != $endPos ) {
361  $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
362  'of central directory record' );
363  }
364 
365  return [ $offset, $size ];
366  }
367 
374  private function readCentralDirectory( $offset, $size ) {
375  $block = $this->getBlock( $offset, $size );
376 
377  $fixedInfo = [
378  'signature' => [ 'string', 4 ],
379  'version made by' => 2,
380  'version needed' => 2,
381  'general bits' => 2,
382  'compression method' => 2,
383  'mod time' => 2,
384  'mod date' => 2,
385  'crc-32' => 4,
386  'compressed size' => 4,
387  'uncompressed size' => 4,
388  'name length' => 2,
389  'extra field length' => 2,
390  'comment length' => 2,
391  'disk number start' => 2,
392  'internal attrs' => 2,
393  'external attrs' => 4,
394  'local header offset' => 4,
395  ];
396  $fixedSize = $this->getStructSize( $fixedInfo );
397 
398  $pos = 0;
399  while ( $pos < $size ) {
400  $data = $this->unpack( $block, $fixedInfo, $pos );
401  $pos += $fixedSize;
402 
403  if ( $data['signature'] !== "PK\x01\x02" ) {
404  $this->error( 'zip-bad', 'Invalid signature found in directory entry' );
405  }
406 
407  $variableInfo = [
408  'name' => [ 'string', $data['name length'] ],
409  'extra field' => [ 'string', $data['extra field length'] ],
410  'comment' => [ 'string', $data['comment length'] ],
411  ];
412  $data += $this->unpack( $block, $variableInfo, $pos );
413  $pos += $this->getStructSize( $variableInfo );
414 
415  if ( $this->zip64 && (
416  $data['compressed size'] == 0xffffffff
417  || $data['uncompressed size'] == 0xffffffff
418  || $data['local header offset'] == 0xffffffff )
419  ) {
420  $zip64Data = $this->unpackZip64Extra( $data['extra field'] );
421  if ( $zip64Data ) {
422  $data = $zip64Data + $data;
423  }
424  }
425 
426  if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) {
427  $this->error( 'zip-unsupported', 'central directory encryption is not supported' );
428  }
429 
430  // Convert the timestamp into MediaWiki format
431  // For the format, please see the MS-DOS 2.0 Programmer's Reference,
432  // pages 3-5 and 3-6.
433  $time = $data['mod time'];
434  $date = $data['mod date'];
435 
436  $year = 1980 + ( $date >> 9 );
437  $month = ( $date >> 5 ) & 15;
438  $day = $date & 31;
439  $hour = ( $time >> 11 ) & 31;
440  $minute = ( $time >> 5 ) & 63;
441  $second = ( $time & 31 ) * 2;
442  $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d",
443  $year, $month, $day, $hour, $minute, $second );
444 
445  // Convert the character set in the file name
446  if ( $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) ) {
447  $name = $data['name'];
448  } else {
449  $name = iconv( 'CP437', 'UTF-8', $data['name'] );
450  }
451 
452  // Compile a data array for the user, with a sensible format
453  $userData = [
454  'name' => $name,
455  'mtime' => $timestamp,
456  'size' => $data['uncompressed size'],
457  ];
458  call_user_func( $this->callback, $userData );
459  }
460  }
461 
467  private function unpackZip64Extra( $extraField ) {
468  $extraHeaderInfo = [
469  'id' => 2,
470  'size' => 2,
471  ];
472  $extraHeaderSize = $this->getStructSize( $extraHeaderInfo );
473 
474  $zip64ExtraInfo = [
475  'uncompressed size' => 8,
476  'compressed size' => 8,
477  'local header offset' => 8,
478  'disk number start' => 4,
479  ];
480 
481  $extraPos = 0;
482  while ( $extraPos < strlen( $extraField ) ) {
483  $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos );
484  $extraPos += $extraHeaderSize;
485  $extra += $this->unpack( $extraField,
486  [ 'data' => [ 'string', $extra['size'] ] ],
487  $extraPos );
488  $extraPos += $extra['size'];
489 
490  if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) {
491  return $this->unpack( $extra['data'], $zip64ExtraInfo );
492  }
493  }
494 
495  return false;
496  }
497 
502  private function getFileLength() {
503  if ( $this->fileLength === null ) {
504  $stat = fstat( $this->file );
505  $this->fileLength = $stat['size'];
506  }
507 
508  return $this->fileLength;
509  }
510 
521  private function getBlock( $start, $length = null ) {
522  $fileLength = $this->getFileLength();
523  if ( $start >= $fileLength ) {
524  $this->error( 'zip-bad', "getBlock() requested position $start, " .
525  "file length is $fileLength" );
526  }
527  if ( $length === null ) {
528  $length = $fileLength - $start;
529  }
530  $end = $start + $length;
531  if ( $end > $fileLength ) {
532  $this->error( 'zip-bad', "getBlock() requested end position $end, " .
533  "file length is $fileLength" );
534  }
535  $startSeg = floor( $start / self::SEGSIZE );
536  $endSeg = ceil( $end / self::SEGSIZE );
537 
538  $block = '';
539  for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) {
540  $block .= $this->getSegment( $segIndex );
541  }
542 
543  $block = substr( $block,
544  $start - $startSeg * self::SEGSIZE,
545  $length );
546 
547  if ( strlen( $block ) < $length ) {
548  $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' );
549  }
550 
551  return $block;
552  }
553 
567  private function getSegment( $segIndex ) {
568  if ( !isset( $this->buffer[$segIndex] ) ) {
569  $bytePos = $segIndex * self::SEGSIZE;
570  if ( $bytePos >= $this->getFileLength() ) {
571  $this->buffer[$segIndex] = '';
572 
573  return '';
574  }
575  if ( fseek( $this->file, $bytePos ) ) {
576  $this->error( 'zip-bad', "seek to $bytePos failed" );
577  }
578  $seg = fread( $this->file, self::SEGSIZE );
579  if ( $seg === false ) {
580  $this->error( 'zip-bad', "read from $bytePos failed" );
581  }
582  $this->buffer[$segIndex] = $seg;
583  }
584 
585  return $this->buffer[$segIndex];
586  }
587 
593  private function getStructSize( $struct ) {
594  $size = 0;
595  foreach ( $struct as $type ) {
596  if ( is_array( $type ) ) {
597  list( , $fieldSize ) = $type;
598  $size += $fieldSize;
599  } else {
600  $size += $type;
601  }
602  }
603 
604  return $size;
605  }
606 
629  private function unpack( $string, $struct, $offset = 0 ) {
630  $size = $this->getStructSize( $struct );
631  if ( $offset + $size > strlen( $string ) ) {
632  $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' );
633  }
634 
635  $data = [];
636  $pos = $offset;
637  foreach ( $struct as $key => $type ) {
638  if ( is_array( $type ) ) {
639  list( $typeName, $fieldSize ) = $type;
640  switch ( $typeName ) {
641  case 'string':
642  $data[$key] = substr( $string, $pos, $fieldSize );
643  $pos += $fieldSize;
644  break;
645  default:
646  throw new MWException( __METHOD__ . ": invalid type \"$typeName\"" );
647  }
648  } else {
649  // Unsigned little-endian integer
650  $length = intval( $type );
651 
652  // Calculate the value. Use an algorithm which automatically
653  // upgrades the value to floating point if necessary.
654  $value = 0;
655  for ( $i = $length - 1; $i >= 0; $i-- ) {
656  $value *= 256;
657  $value += ord( $string[$pos + $i] );
658  }
659 
660  // Throw an exception if there was loss of precision
661  if ( $value > 2 ** 52 ) {
662  $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' .
663  'This could happen if we tried to unpack a 64-bit structure ' .
664  'at an invalid location.' );
665  }
666  $data[$key] = $value;
667  $pos += $length;
668  }
669  }
670 
671  return $data;
672  }
673 
682  private function testBit( $value, $bitIndex ) {
683  return (bool)( ( $value >> $bitIndex ) & 1 );
684  }
685 }
ZipDirectoryReader\getBlock
getBlock( $start, $length=null)
Get the file contents from a given offset.
Definition: ZipDirectoryReader.php:521
ZipDirectoryReader\readZip64EndOfCentralDirectoryLocator
readZip64EndOfCentralDirectoryLocator()
Read the header called the "ZIP64 end of central directory locator".
Definition: ZipDirectoryReader.php:251
ZipDirectoryReader\getFileLength
getFileLength()
Get the length of the file.
Definition: ZipDirectoryReader.php:502
StatusValue\newFatal
static newFatal( $message,... $parameters)
Factory function for fatal errors.
Definition: StatusValue.php:69
ZipDirectoryReader\findZip64CentralDirectory
findZip64CentralDirectory()
Find the location of the central directory, as would be seen by a ZIP64-compliant reader.
Definition: ZipDirectoryReader.php:335
ZipDirectoryReader\unpack
unpack( $string, $struct, $offset=0)
Unpack a binary structure.
Definition: ZipDirectoryReader.php:629
ZipDirectoryReader\findOldCentralDirectory
findOldCentralDirectory()
Find the location of the central directory, as would be seen by a non-ZIP64 reader.
Definition: ZipDirectoryReader.php:314
ZipDirectoryReader\ZIP64_EXTRA_HEADER
const ZIP64_EXTRA_HEADER
The "extra field" ID for ZIP64 central directory entries.
Definition: ZipDirectoryReader.php:118
ZipDirectoryReader\unpackZip64Extra
unpackZip64Extra( $extraField)
Interpret ZIP64 "extra field" data and return an associative array.
Definition: ZipDirectoryReader.php:467
ZipDirectoryReader\$buffer
$buffer
A segmented cache of the file contents.
Definition: ZipDirectoryReader.php:104
ZipDirectoryReader\testBit
testBit( $value, $bitIndex)
Returns a bit from a given position in an integer value, converted to boolean.
Definition: ZipDirectoryReader.php:682
ZipDirectoryReader
A class for reading ZIP file directories, for the purposes of upload verification.
Definition: ZipDirectoryReader.php:30
ZipDirectoryReader\$file
$file
The opened file resource.
Definition: ZipDirectoryReader.php:98
MWException
MediaWiki exception.
Definition: MWException.php:26
ZipDirectoryReader\$data
$data
Definition: ZipDirectoryReader.php:115
ZipDirectoryReader\execute
execute()
Read the directory according to settings in $this.
Definition: ZipDirectoryReader.php:148
ZipDirectoryReader\__construct
__construct( $fileName, $callback, $options)
Definition: ZipDirectoryReader.php:134
ZipDirectoryReader\$fileName
$fileName
The file name.
Definition: ZipDirectoryReader.php:95
ZipDirectoryReader\getStructSize
getStructSize( $struct)
Get the size of a structure in bytes.
Definition: ZipDirectoryReader.php:593
ZipDirectoryReader\readZip64EndOfCentralDirectoryRecord
readZip64EndOfCentralDirectoryRecord()
Read the header called the "ZIP64 end of central directory record".
Definition: ZipDirectoryReader.php:276
ZipDirectoryReader\error
error( $code, $debugMessage)
Throw an error, and log a debug message.
Definition: ZipDirectoryReader.php:189
ZipDirectoryReader\$callback
$callback
The file data callback.
Definition: ZipDirectoryReader.php:107
wfDebug
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:913
ZipDirectoryReader\$eocdr64
$eocdr64
Definition: ZipDirectoryReader.php:113
ZipDirectoryReader\$eocdr64Locator
$eocdr64Locator
Definition: ZipDirectoryReader.php:113
ZipDirectoryReader\GENERAL_UTF8
const GENERAL_UTF8
The index of the "general field" bit for UTF-8 file names.
Definition: ZipDirectoryReader.php:124
StatusValue\newGood
static newGood( $value=null)
Factory function for good results.
Definition: StatusValue.php:81
ZipDirectoryReader\getSegment
getSegment( $segIndex)
Get a section of the file starting at position $segIndex * self::SEGSIZE, of length self::SEGSIZE.
Definition: ZipDirectoryReader.php:567
ZipDirectoryReader\readEndOfCentralDirectoryRecord
readEndOfCentralDirectoryRecord()
Read the header which is at the end of the central directory, unimaginatively called the "end of cent...
Definition: ZipDirectoryReader.php:199
ZipDirectoryReader\read
static read( $fileName, $callback, $options=[])
Read a ZIP file and call a function for each file discovered in it.
Definition: ZipDirectoryReader.php:88
ZipDirectoryReader\readCentralDirectory
readCentralDirectory( $offset, $size)
Read the central directory at the given location.
Definition: ZipDirectoryReader.php:374
ZipDirectoryReader\$eocdr
$eocdr
Stored headers.
Definition: ZipDirectoryReader.php:113
ZipDirectoryReaderError\getErrorCode
getErrorCode()
Definition: ZipDirectoryReaderError.php:35
ZipDirectoryReader\$fileLength
$fileLength
The cached length of the file, or null if it has not been loaded yet.
Definition: ZipDirectoryReader.php:101
ZipDirectoryReader\GENERAL_CD_ENCRYPTED
const GENERAL_CD_ENCRYPTED
The index of the "general field" bit for central directory encryption.
Definition: ZipDirectoryReader.php:127
ZipDirectoryReader\SEGSIZE
const SEGSIZE
The segment size for the file contents cache.
Definition: ZipDirectoryReader.php:121
ZipDirectoryReaderError
Internal exception class.
Definition: ZipDirectoryReaderError.php:24
ZipDirectoryReader\$zip64
$zip64
The ZIP64 mode.
Definition: ZipDirectoryReader.php:110
$type
$type
Definition: testCompression.php:52