MediaWiki  master
ZipDirectoryReader.php
Go to the documentation of this file.
1 <?php
88  public static function read( $fileName, $callback, $options = [] ) {
89  $file = fopen( $fileName, 'r' );
90  $zdr = new self( $file, $callback, $options );
91  return $zdr->execute();
92  }
93 
105  public static function readHandle( $file, $callback, $options = [] ) {
106  $zdr = new self( $file, $callback, $options );
107  return $zdr->execute();
108  }
109 
111  protected $file;
112 
114  protected $fileLength;
115 
117  protected $buffer;
118 
120  protected $callback;
121 
123  protected $zip64 = false;
124 
127 
128  protected $data;
129 
131  private const ZIP64_EXTRA_HEADER = 0x0001;
132 
134  private const SEGSIZE = 16384;
135 
137  private const GENERAL_UTF8 = 11;
138 
140  private const GENERAL_CD_ENCRYPTED = 13;
141 
147  protected function __construct( $file, $callback, $options ) {
148  $this->file = $file;
149  $this->callback = $callback;
150 
151  if ( isset( $options['zip64'] ) ) {
152  $this->zip64 = $options['zip64'];
153  }
154  }
155 
161  private function execute() {
162  $this->data = [];
163  if ( !$this->file ) {
164  return Status::newFatal( 'zip-file-open-error' );
165  }
166 
167  $status = Status::newGood();
168  try {
170  if ( $this->zip64 ) {
171  list( $offset, $size ) = $this->findZip64CentralDirectory();
172  $this->readCentralDirectory( $offset, $size );
173  } else {
174  if ( $this->eocdr['CD size'] == 0xffffffff
175  || $this->eocdr['CD offset'] == 0xffffffff
176  || $this->eocdr['CD entries total'] == 0xffff
177  ) {
178  $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' .
179  'but we are in legacy mode. Rejecting this upload is necessary to avoid ' .
180  'opening vulnerabilities on clients using OpenJDK 7 or later.' );
181  }
182 
183  list( $offset, $size ) = $this->findOldCentralDirectory();
184  $this->readCentralDirectory( $offset, $size );
185  }
186  } catch ( ZipDirectoryReaderError $e ) {
187  $status->fatal( $e->getErrorCode() );
188  }
189 
190  fclose( $this->file );
191 
192  return $status;
193  }
194 
202  private function error( $code, $debugMessage ) {
203  wfDebug( __CLASS__ . ": Fatal error: $debugMessage" );
204  throw new ZipDirectoryReaderError( $code );
205  }
206 
212  private function readEndOfCentralDirectoryRecord() {
213  $info = [
214  'signature' => 4,
215  'disk' => 2,
216  'CD start disk' => 2,
217  'CD entries this disk' => 2,
218  'CD entries total' => 2,
219  'CD size' => 4,
220  'CD offset' => 4,
221  'file comment length' => 2,
222  ];
223  $structSize = $this->getStructSize( $info );
224  $startPos = $this->getFileLength() - 65536 - $structSize;
225  if ( $startPos < 0 ) {
226  $startPos = 0;
227  }
228 
229  if ( $this->getFileLength() === 0 ) {
230  $this->error( 'zip-wrong-format', "The file is empty." );
231  }
232 
233  $block = $this->getBlock( $startPos );
234  $sigPos = strrpos( $block, "PK\x05\x06" );
235  if ( $sigPos === false ) {
236  $this->error( 'zip-wrong-format',
237  "zip file lacks EOCDR signature. It probably isn't a zip file." );
238  }
239 
240  $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info );
241  $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length'];
242 
243  if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) {
244  // T40432: MS binary documents frequently embed ZIP files
245  $this->error( 'zip-wrong-format', 'there is a ZIP signature but it is not at ' .
246  'the end of the file. It could be an OLE file with a ZIP file embedded.' );
247  }
248  if ( $this->eocdr['disk'] !== 0
249  || $this->eocdr['CD start disk'] !== 0
250  ) {
251  $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' );
252  }
253  $this->eocdr += $this->unpack(
254  $block,
255  [ 'file comment' => [ 'string', $this->eocdr['file comment length'] ] ],
256  $sigPos + $structSize );
257  $this->eocdr['position'] = $startPos + $sigPos;
258  }
259 
265  $info = [
266  'signature' => [ 'string', 4 ],
267  'eocdr64 start disk' => 4,
268  'eocdr64 offset' => 8,
269  'number of disks' => 4,
270  ];
271  $structSize = $this->getStructSize( $info );
272 
273  $start = $this->getFileLength() - $this->eocdr['EOCDR size'] - $structSize;
274  $block = $this->getBlock( $start, $structSize );
275  $this->eocdr64Locator = $data = $this->unpack( $block, $info );
276 
277  if ( $data['signature'] !== "PK\x06\x07" ) {
278  // Note: Java will allow this and continue to read the
279  // EOCDR64, so we have to reject the upload, we can't
280  // just use the EOCDR header instead.
281  $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' );
282  }
283  }
284 
290  if ( $this->eocdr64Locator['eocdr64 start disk'] != 0
291  || $this->eocdr64Locator['number of disks'] != 0
292  ) {
293  $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' );
294  }
295 
296  $info = [
297  'signature' => [ 'string', 4 ],
298  'EOCDR64 size' => 8,
299  'version made by' => 2,
300  'version needed' => 2,
301  'disk' => 4,
302  'CD start disk' => 4,
303  'CD entries this disk' => 8,
304  'CD entries total' => 8,
305  'CD size' => 8,
306  'CD offset' => 8
307  ];
308  $structSize = $this->getStructSize( $info );
309  $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize );
310  $this->eocdr64 = $data = $this->unpack( $block, $info );
311  if ( $data['signature'] !== "PK\x06\x06" ) {
312  $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' );
313  }
314  if ( $data['disk'] !== 0
315  || $data['CD start disk'] !== 0
316  ) {
317  $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' );
318  }
319  }
320 
327  private function findOldCentralDirectory() {
328  $size = $this->eocdr['CD size'];
329  $offset = $this->eocdr['CD offset'];
330  $endPos = $this->eocdr['position'];
331 
332  // Some readers use the EOCDR position instead of the offset field
333  // to find the directory, so to be safe, we check if they both agree.
334  if ( $offset + $size != $endPos ) {
335  $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
336  'of central directory record' );
337  }
338 
339  return [ $offset, $size ];
340  }
341 
348  private function findZip64CentralDirectory() {
349  // The spec is ambiguous about the exact rules of precedence between the
350  // ZIP64 headers and the original headers. Here we follow zip_util.c
351  // from OpenJDK 7.
352  $size = $this->eocdr['CD size'];
353  $offset = $this->eocdr['CD offset'];
354  $numEntries = $this->eocdr['CD entries total'];
355  $endPos = $this->eocdr['position'];
356  if ( $size == 0xffffffff
357  || $offset == 0xffffffff
358  || $numEntries == 0xffff
359  ) {
361 
362  if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) {
364  if ( isset( $this->eocdr64['CD offset'] ) ) {
365  $size = $this->eocdr64['CD size'];
366  $offset = $this->eocdr64['CD offset'];
367  $endPos = $this->eocdr64Locator['eocdr64 offset'];
368  }
369  }
370  }
371  // Some readers use the EOCDR position instead of the offset field
372  // to find the directory, so to be safe, we check if they both agree.
373  if ( $offset + $size != $endPos ) {
374  $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
375  'of central directory record' );
376  }
377 
378  return [ $offset, $size ];
379  }
380 
386  private function readCentralDirectory( $offset, $size ) {
387  $block = $this->getBlock( $offset, $size );
388 
389  $fixedInfo = [
390  'signature' => [ 'string', 4 ],
391  'version made by' => 2,
392  'version needed' => 2,
393  'general bits' => 2,
394  'compression method' => 2,
395  'mod time' => 2,
396  'mod date' => 2,
397  'crc-32' => 4,
398  'compressed size' => 4,
399  'uncompressed size' => 4,
400  'name length' => 2,
401  'extra field length' => 2,
402  'comment length' => 2,
403  'disk number start' => 2,
404  'internal attrs' => 2,
405  'external attrs' => 4,
406  'local header offset' => 4,
407  ];
408  $fixedSize = $this->getStructSize( $fixedInfo );
409 
410  $pos = 0;
411  while ( $pos < $size ) {
412  $data = $this->unpack( $block, $fixedInfo, $pos );
413  $pos += $fixedSize;
414 
415  if ( $data['signature'] !== "PK\x01\x02" ) {
416  $this->error( 'zip-bad', 'Invalid signature found in directory entry' );
417  }
418 
419  $variableInfo = [
420  'name' => [ 'string', $data['name length'] ],
421  'extra field' => [ 'string', $data['extra field length'] ],
422  'comment' => [ 'string', $data['comment length'] ],
423  ];
424  $data += $this->unpack( $block, $variableInfo, $pos );
425  $pos += $this->getStructSize( $variableInfo );
426 
427  if ( $this->zip64 && (
428  $data['compressed size'] == 0xffffffff
429  || $data['uncompressed size'] == 0xffffffff
430  || $data['local header offset'] == 0xffffffff )
431  ) {
432  $zip64Data = $this->unpackZip64Extra( $data['extra field'] );
433  if ( $zip64Data ) {
434  $data = $zip64Data + $data;
435  }
436  }
437 
438  if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) {
439  $this->error( 'zip-unsupported', 'central directory encryption is not supported' );
440  }
441 
442  // Convert the timestamp into MediaWiki format
443  // For the format, please see the MS-DOS 2.0 Programmer's Reference,
444  // pages 3-5 and 3-6.
445  $time = $data['mod time'];
446  $date = $data['mod date'];
447 
448  $year = 1980 + ( $date >> 9 );
449  $month = ( $date >> 5 ) & 15;
450  $day = $date & 31;
451  $hour = ( $time >> 11 ) & 31;
452  $minute = ( $time >> 5 ) & 63;
453  $second = ( $time & 31 ) * 2;
454  $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d",
455  $year, $month, $day, $hour, $minute, $second );
456 
457  // Convert the character set in the file name
458  if ( $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) ) {
459  $name = $data['name'];
460  } else {
461  $name = iconv( 'CP437', 'UTF-8', $data['name'] );
462  }
463 
464  // Compile a data array for the user, with a sensible format
465  $userData = [
466  'name' => $name,
467  'mtime' => $timestamp,
468  'size' => $data['uncompressed size'],
469  ];
470  call_user_func( $this->callback, $userData );
471  }
472  }
473 
479  private function unpackZip64Extra( $extraField ) {
480  $extraHeaderInfo = [
481  'id' => 2,
482  'size' => 2,
483  ];
484  $extraHeaderSize = $this->getStructSize( $extraHeaderInfo );
485 
486  $zip64ExtraInfo = [
487  'uncompressed size' => 8,
488  'compressed size' => 8,
489  'local header offset' => 8,
490  'disk number start' => 4,
491  ];
492 
493  $extraPos = 0;
494  while ( $extraPos < strlen( $extraField ) ) {
495  $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos );
496  $extraPos += $extraHeaderSize;
497  $extra += $this->unpack( $extraField,
498  [ 'data' => [ 'string', $extra['size'] ] ],
499  $extraPos );
500  $extraPos += $extra['size'];
501 
502  if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) {
503  return $this->unpack( $extra['data'], $zip64ExtraInfo );
504  }
505  }
506 
507  return false;
508  }
509 
514  private function getFileLength() {
515  if ( $this->fileLength === null ) {
516  $stat = fstat( $this->file );
517  $this->fileLength = $stat['size'];
518  }
519 
520  return $this->fileLength;
521  }
522 
533  private function getBlock( $start, $length = null ) {
534  $fileLength = $this->getFileLength();
535  if ( $start >= $fileLength ) {
536  $this->error( 'zip-bad', "getBlock() requested position $start, " .
537  "file length is $fileLength" );
538  }
539  if ( $length === null ) {
540  $length = $fileLength - $start;
541  }
542  $end = $start + $length;
543  if ( $end > $fileLength ) {
544  $this->error( 'zip-bad', "getBlock() requested end position $end, " .
545  "file length is $fileLength" );
546  }
547  $startSeg = (int)floor( $start / self::SEGSIZE );
548  $endSeg = (int)ceil( $end / self::SEGSIZE );
549 
550  $block = '';
551  for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) {
552  $block .= $this->getSegment( $segIndex );
553  }
554 
555  $block = substr( $block,
556  $start - $startSeg * self::SEGSIZE,
557  $length );
558 
559  if ( strlen( $block ) < $length ) {
560  $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' );
561  }
562 
563  return $block;
564  }
565 
579  private function getSegment( $segIndex ) {
580  if ( !isset( $this->buffer[$segIndex] ) ) {
581  $bytePos = $segIndex * self::SEGSIZE;
582  if ( $bytePos >= $this->getFileLength() ) {
583  $this->buffer[$segIndex] = '';
584 
585  return '';
586  }
587  if ( fseek( $this->file, $bytePos ) ) {
588  $this->error( 'zip-bad', "seek to $bytePos failed" );
589  }
590  $seg = fread( $this->file, self::SEGSIZE );
591  if ( $seg === false ) {
592  $this->error( 'zip-bad', "read from $bytePos failed" );
593  }
594  $this->buffer[$segIndex] = $seg;
595  }
596 
597  return $this->buffer[$segIndex];
598  }
599 
605  private function getStructSize( $struct ) {
606  $size = 0;
607  foreach ( $struct as $type ) {
608  if ( is_array( $type ) ) {
609  list( , $fieldSize ) = $type;
610  $size += $fieldSize;
611  } else {
612  $size += $type;
613  }
614  }
615 
616  return $size;
617  }
618 
641  private function unpack( $string, $struct, $offset = 0 ) {
642  $size = $this->getStructSize( $struct );
643  if ( $offset + $size > strlen( $string ) ) {
644  $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' );
645  }
646 
647  $data = [];
648  $pos = $offset;
649  foreach ( $struct as $key => $type ) {
650  if ( is_array( $type ) ) {
651  list( $typeName, $fieldSize ) = $type;
652  switch ( $typeName ) {
653  case 'string':
654  $data[$key] = substr( $string, $pos, $fieldSize );
655  $pos += $fieldSize;
656  break;
657  default:
658  throw new MWException( __METHOD__ . ": invalid type \"$typeName\"" );
659  }
660  } else {
661  // Unsigned little-endian integer
662  $length = intval( $type );
663 
664  // Calculate the value. Use an algorithm which automatically
665  // upgrades the value to floating point if necessary.
666  $value = 0;
667  for ( $i = $length - 1; $i >= 0; $i-- ) {
668  $value *= 256;
669  $value += ord( $string[$pos + $i] );
670  }
671 
672  // Throw an exception if there was loss of precision
673  if ( $value > 2 ** 52 ) {
674  $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' .
675  'This could happen if we tried to unpack a 64-bit structure ' .
676  'at an invalid location.' );
677  }
678  $data[$key] = $value;
679  $pos += $length;
680  }
681  }
682 
683  return $data;
684  }
685 
694  private function testBit( $value, $bitIndex ) {
695  return (bool)( ( $value >> $bitIndex ) & 1 );
696  }
697 }
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
MediaWiki exception.
Definition: MWException.php:29
static newFatal( $message,... $parameters)
Factory function for fatal errors.
Definition: StatusValue.php:70
static newGood( $value=null)
Factory function for good results.
Definition: StatusValue.php:82
Internal exception class.
A class for reading ZIP file directories, for the purposes of upload verification.
$callback
The file data callback.
const GENERAL_UTF8
The index of the "general field" bit for UTF-8 file names.
readEndOfCentralDirectoryRecord()
Read the header which is at the end of the central directory, unimaginatively called the "end of cent...
static readHandle( $file, $callback, $options=[])
Read an opened file handle presumed to be a ZIP and call a function for each file discovered in it.
error( $code, $debugMessage)
Throw an error, and log a debug message.
execute()
Read the directory according to settings in $this.
readZip64EndOfCentralDirectoryRecord()
Read the header called the "ZIP64 end of central directory record".
getSegment( $segIndex)
Get a section of the file starting at position $segIndex * self::SEGSIZE, of length self::SEGSIZE.
const ZIP64_EXTRA_HEADER
The "extra field" ID for ZIP64 central directory entries.
findZip64CentralDirectory()
Find the location of the central directory, as would be seen by a ZIP64-compliant reader.
$file
The opened file resource.
static read( $fileName, $callback, $options=[])
Read a ZIP file and call a function for each file discovered in it.
testBit( $value, $bitIndex)
Returns a bit from a given position in an integer value, converted to boolean.
getFileLength()
Get the length of the file.
const GENERAL_CD_ENCRYPTED
The index of the "general field" bit for central directory encryption.
getStructSize( $struct)
Get the size of a structure in bytes.
readZip64EndOfCentralDirectoryLocator()
Read the header called the "ZIP64 end of central directory locator".
findOldCentralDirectory()
Find the location of the central directory, as would be seen by a non-ZIP64 reader.
$buffer
A segmented cache of the file contents.
__construct( $file, $callback, $options)
getBlock( $start, $length=null)
Get the file contents from a given offset.
unpack( $string, $struct, $offset=0)
Unpack a binary structure.
const SEGSIZE
The segment size for the file contents cache.
$fileLength
The cached length of the file, or null if it has not been loaded yet.
readCentralDirectory( $offset, $size)
Read the central directory at the given location.
unpackZip64Extra( $extraField)
Interpret ZIP64 "extra field" data and return an associative array.