MediaWiki  master
ZipDirectoryReader.php
Go to the documentation of this file.
1 <?php
24 
90  public static function read( $fileName, $callback, $options = [] ) {
91  $file = fopen( $fileName, 'r' );
92  $zdr = new self( $file, $callback, $options );
93  return $zdr->execute();
94  }
95 
107  public static function readHandle( $file, $callback, $options = [] ) {
108  $zdr = new self( $file, $callback, $options );
109  return $zdr->execute();
110  }
111 
113  protected $file;
114 
116  protected $fileLength;
117 
119  protected $buffer;
120 
122  protected $callback;
123 
125  protected $zip64 = false;
126 
129 
130  protected $data;
131 
133  private const ZIP64_EXTRA_HEADER = 0x0001;
134 
136  private const SEGSIZE = 16384;
137 
139  private const GENERAL_UTF8 = 11;
140 
142  private const GENERAL_CD_ENCRYPTED = 13;
143 
149  protected function __construct( $file, $callback, $options ) {
150  $this->file = $file;
151  $this->callback = $callback;
152 
153  if ( isset( $options['zip64'] ) ) {
154  $this->zip64 = $options['zip64'];
155  }
156  }
157 
163  private function execute() {
164  $this->data = [];
165  if ( !$this->file ) {
166  return Status::newFatal( 'zip-file-open-error' );
167  }
168 
169  $status = Status::newGood();
170  try {
171  $this->readEndOfCentralDirectoryRecord();
172  if ( $this->zip64 ) {
173  [ $offset, $size ] = $this->findZip64CentralDirectory();
174  $this->readCentralDirectory( $offset, $size );
175  } else {
176  if ( $this->eocdr['CD size'] == 0xffffffff
177  || $this->eocdr['CD offset'] == 0xffffffff
178  || $this->eocdr['CD entries total'] == 0xffff
179  ) {
180  $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' .
181  'but we are in legacy mode. Rejecting this upload is necessary to avoid ' .
182  'opening vulnerabilities on clients using OpenJDK 7 or later.' );
183  }
184 
185  [ $offset, $size ] = $this->findOldCentralDirectory();
186  $this->readCentralDirectory( $offset, $size );
187  }
188  } catch ( ZipDirectoryReaderError $e ) {
189  $status->fatal( $e->getErrorCode() );
190  }
191 
192  fclose( $this->file );
193 
194  return $status;
195  }
196 
204  private function error( $code, $debugMessage ) {
205  wfDebug( __CLASS__ . ": Fatal error: $debugMessage" );
206  throw new ZipDirectoryReaderError( $code );
207  }
208 
214  private function readEndOfCentralDirectoryRecord() {
215  $info = [
216  'signature' => 4,
217  'disk' => 2,
218  'CD start disk' => 2,
219  'CD entries this disk' => 2,
220  'CD entries total' => 2,
221  'CD size' => 4,
222  'CD offset' => 4,
223  'file comment length' => 2,
224  ];
225  $structSize = $this->getStructSize( $info );
226  $startPos = $this->getFileLength() - 65536 - $structSize;
227  if ( $startPos < 0 ) {
228  $startPos = 0;
229  }
230 
231  if ( $this->getFileLength() === 0 ) {
232  $this->error( 'zip-wrong-format', "The file is empty." );
233  }
234 
235  $block = $this->getBlock( $startPos );
236  $sigPos = strrpos( $block, "PK\x05\x06" );
237  if ( $sigPos === false ) {
238  $this->error( 'zip-wrong-format',
239  "zip file lacks EOCDR signature. It probably isn't a zip file." );
240  }
241 
242  $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info );
243  $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length'];
244 
245  if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) {
246  // T40432: MS binary documents frequently embed ZIP files
247  $this->error( 'zip-wrong-format', 'there is a ZIP signature but it is not at ' .
248  'the end of the file. It could be an OLE file with a ZIP file embedded.' );
249  }
250  if ( $this->eocdr['disk'] !== 0
251  || $this->eocdr['CD start disk'] !== 0
252  ) {
253  $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' );
254  }
255  $this->eocdr += $this->unpack(
256  $block,
257  [ 'file comment' => [ 'string', $this->eocdr['file comment length'] ] ],
258  $sigPos + $structSize );
259  $this->eocdr['position'] = $startPos + $sigPos;
260  }
261 
266  private function readZip64EndOfCentralDirectoryLocator() {
267  $info = [
268  'signature' => [ 'string', 4 ],
269  'eocdr64 start disk' => 4,
270  'eocdr64 offset' => 8,
271  'number of disks' => 4,
272  ];
273  $structSize = $this->getStructSize( $info );
274 
275  $start = $this->getFileLength() - $this->eocdr['EOCDR size'] - $structSize;
276  $block = $this->getBlock( $start, $structSize );
277  $this->eocdr64Locator = $data = $this->unpack( $block, $info );
278 
279  if ( $data['signature'] !== "PK\x06\x07" ) {
280  // Note: Java will allow this and continue to read the
281  // EOCDR64, so we have to reject the upload, we can't
282  // just use the EOCDR header instead.
283  $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' );
284  }
285  }
286 
291  private function readZip64EndOfCentralDirectoryRecord() {
292  if ( $this->eocdr64Locator['eocdr64 start disk'] != 0
293  || $this->eocdr64Locator['number of disks'] != 0
294  ) {
295  $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' );
296  }
297 
298  $info = [
299  'signature' => [ 'string', 4 ],
300  'EOCDR64 size' => 8,
301  'version made by' => 2,
302  'version needed' => 2,
303  'disk' => 4,
304  'CD start disk' => 4,
305  'CD entries this disk' => 8,
306  'CD entries total' => 8,
307  'CD size' => 8,
308  'CD offset' => 8
309  ];
310  $structSize = $this->getStructSize( $info );
311  $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize );
312  $this->eocdr64 = $data = $this->unpack( $block, $info );
313  if ( $data['signature'] !== "PK\x06\x06" ) {
314  $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' );
315  }
316  if ( $data['disk'] !== 0
317  || $data['CD start disk'] !== 0
318  ) {
319  $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' );
320  }
321  }
322 
329  private function findOldCentralDirectory() {
330  $size = $this->eocdr['CD size'];
331  $offset = $this->eocdr['CD offset'];
332  $endPos = $this->eocdr['position'];
333 
334  // Some readers use the EOCDR position instead of the offset field
335  // to find the directory, so to be safe, we check if they both agree.
336  if ( $offset + $size != $endPos ) {
337  $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
338  'of central directory record' );
339  }
340 
341  return [ $offset, $size ];
342  }
343 
350  private function findZip64CentralDirectory() {
351  // The spec is ambiguous about the exact rules of precedence between the
352  // ZIP64 headers and the original headers. Here we follow zip_util.c
353  // from OpenJDK 7.
354  $size = $this->eocdr['CD size'];
355  $offset = $this->eocdr['CD offset'];
356  $numEntries = $this->eocdr['CD entries total'];
357  $endPos = $this->eocdr['position'];
358  if ( $size == 0xffffffff
359  || $offset == 0xffffffff
360  || $numEntries == 0xffff
361  ) {
362  $this->readZip64EndOfCentralDirectoryLocator();
363 
364  if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) {
365  $this->readZip64EndOfCentralDirectoryRecord();
366  if ( isset( $this->eocdr64['CD offset'] ) ) {
367  $size = $this->eocdr64['CD size'];
368  $offset = $this->eocdr64['CD offset'];
369  $endPos = $this->eocdr64Locator['eocdr64 offset'];
370  }
371  }
372  }
373  // Some readers use the EOCDR position instead of the offset field
374  // to find the directory, so to be safe, we check if they both agree.
375  if ( $offset + $size != $endPos ) {
376  $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
377  'of central directory record' );
378  }
379 
380  return [ $offset, $size ];
381  }
382 
388  private function readCentralDirectory( $offset, $size ) {
389  $block = $this->getBlock( $offset, $size );
390 
391  $fixedInfo = [
392  'signature' => [ 'string', 4 ],
393  'version made by' => 2,
394  'version needed' => 2,
395  'general bits' => 2,
396  'compression method' => 2,
397  'mod time' => 2,
398  'mod date' => 2,
399  'crc-32' => 4,
400  'compressed size' => 4,
401  'uncompressed size' => 4,
402  'name length' => 2,
403  'extra field length' => 2,
404  'comment length' => 2,
405  'disk number start' => 2,
406  'internal attrs' => 2,
407  'external attrs' => 4,
408  'local header offset' => 4,
409  ];
410  $fixedSize = $this->getStructSize( $fixedInfo );
411 
412  $pos = 0;
413  while ( $pos < $size ) {
414  $data = $this->unpack( $block, $fixedInfo, $pos );
415  $pos += $fixedSize;
416 
417  if ( $data['signature'] !== "PK\x01\x02" ) {
418  $this->error( 'zip-bad', 'Invalid signature found in directory entry' );
419  }
420 
421  $variableInfo = [
422  'name' => [ 'string', $data['name length'] ],
423  'extra field' => [ 'string', $data['extra field length'] ],
424  'comment' => [ 'string', $data['comment length'] ],
425  ];
426  $data += $this->unpack( $block, $variableInfo, $pos );
427  $pos += $this->getStructSize( $variableInfo );
428 
429  if ( $this->zip64 && (
430  $data['compressed size'] == 0xffffffff
431  || $data['uncompressed size'] == 0xffffffff
432  || $data['local header offset'] == 0xffffffff )
433  ) {
434  $zip64Data = $this->unpackZip64Extra( $data['extra field'] );
435  if ( $zip64Data ) {
436  $data = $zip64Data + $data;
437  }
438  }
439 
440  if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) {
441  $this->error( 'zip-unsupported', 'central directory encryption is not supported' );
442  }
443 
444  // Convert the timestamp into MediaWiki format
445  // For the format, please see the MS-DOS 2.0 Programmer's Reference,
446  // pages 3-5 and 3-6.
447  $time = $data['mod time'];
448  $date = $data['mod date'];
449 
450  $year = 1980 + ( $date >> 9 );
451  $month = ( $date >> 5 ) & 15;
452  $day = $date & 31;
453  $hour = ( $time >> 11 ) & 31;
454  $minute = ( $time >> 5 ) & 63;
455  $second = ( $time & 31 ) * 2;
456  $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d",
457  $year, $month, $day, $hour, $minute, $second );
458 
459  // Convert the character set in the file name
460  if ( $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) ) {
461  $name = $data['name'];
462  } else {
463  $name = iconv( 'CP437', 'UTF-8', $data['name'] );
464  }
465 
466  // Compile a data array for the user, with a sensible format
467  $userData = [
468  'name' => $name,
469  'mtime' => $timestamp,
470  'size' => $data['uncompressed size'],
471  ];
472  call_user_func( $this->callback, $userData );
473  }
474  }
475 
481  private function unpackZip64Extra( $extraField ) {
482  $extraHeaderInfo = [
483  'id' => 2,
484  'size' => 2,
485  ];
486  $extraHeaderSize = $this->getStructSize( $extraHeaderInfo );
487 
488  $zip64ExtraInfo = [
489  'uncompressed size' => 8,
490  'compressed size' => 8,
491  'local header offset' => 8,
492  'disk number start' => 4,
493  ];
494 
495  $extraPos = 0;
496  while ( $extraPos < strlen( $extraField ) ) {
497  $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos );
498  $extraPos += $extraHeaderSize;
499  $extra += $this->unpack( $extraField,
500  [ 'data' => [ 'string', $extra['size'] ] ],
501  $extraPos );
502  $extraPos += $extra['size'];
503 
504  if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) {
505  return $this->unpack( $extra['data'], $zip64ExtraInfo );
506  }
507  }
508 
509  return false;
510  }
511 
516  private function getFileLength() {
517  if ( $this->fileLength === null ) {
518  $stat = fstat( $this->file );
519  $this->fileLength = $stat['size'];
520  }
521 
522  return $this->fileLength;
523  }
524 
535  private function getBlock( $start, $length = null ) {
536  $fileLength = $this->getFileLength();
537  if ( $start >= $fileLength ) {
538  $this->error( 'zip-bad', "getBlock() requested position $start, " .
539  "file length is $fileLength" );
540  }
541  if ( $length === null ) {
542  $length = $fileLength - $start;
543  }
544  $end = $start + $length;
545  if ( $end > $fileLength ) {
546  $this->error( 'zip-bad', "getBlock() requested end position $end, " .
547  "file length is $fileLength" );
548  }
549  $startSeg = (int)floor( $start / self::SEGSIZE );
550  $endSeg = (int)ceil( $end / self::SEGSIZE );
551 
552  $block = '';
553  for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) {
554  $block .= $this->getSegment( $segIndex );
555  }
556 
557  $block = substr( $block,
558  $start - $startSeg * self::SEGSIZE,
559  $length );
560 
561  if ( strlen( $block ) < $length ) {
562  $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' );
563  }
564 
565  return $block;
566  }
567 
581  private function getSegment( $segIndex ) {
582  if ( !isset( $this->buffer[$segIndex] ) ) {
583  $bytePos = $segIndex * self::SEGSIZE;
584  if ( $bytePos >= $this->getFileLength() ) {
585  $this->buffer[$segIndex] = '';
586 
587  return '';
588  }
589  if ( fseek( $this->file, $bytePos ) ) {
590  $this->error( 'zip-bad', "seek to $bytePos failed" );
591  }
592  $seg = fread( $this->file, self::SEGSIZE );
593  if ( $seg === false ) {
594  $this->error( 'zip-bad', "read from $bytePos failed" );
595  }
596  $this->buffer[$segIndex] = $seg;
597  }
598 
599  return $this->buffer[$segIndex];
600  }
601 
607  private function getStructSize( $struct ) {
608  $size = 0;
609  foreach ( $struct as $type ) {
610  if ( is_array( $type ) ) {
611  [ , $fieldSize ] = $type;
612  $size += $fieldSize;
613  } else {
614  $size += $type;
615  }
616  }
617 
618  return $size;
619  }
620 
641  private function unpack( $string, $struct, $offset = 0 ) {
642  $size = $this->getStructSize( $struct );
643  if ( $offset + $size > strlen( $string ) ) {
644  $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' );
645  }
646 
647  $data = [];
648  $pos = $offset;
649  foreach ( $struct as $key => $type ) {
650  if ( is_array( $type ) ) {
651  [ $typeName, $fieldSize ] = $type;
652  switch ( $typeName ) {
653  case 'string':
654  $data[$key] = substr( $string, $pos, $fieldSize );
655  $pos += $fieldSize;
656  break;
657  default:
658  throw new UnexpectedValueException( __METHOD__ . ": invalid type \"$typeName\"" );
659  }
660  } else {
661  // Unsigned little-endian integer
662  $length = intval( $type );
663 
664  // Calculate the value. Use an algorithm which automatically
665  // upgrades the value to floating point if necessary.
666  $value = 0;
667  for ( $i = $length - 1; $i >= 0; $i-- ) {
668  $value *= 256;
669  $value += ord( $string[$pos + $i] );
670  }
671 
672  // Throw an exception if there was loss of precision
673  if ( $value > 2 ** 52 ) {
674  $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' .
675  'This could happen if we tried to unpack a 64-bit structure ' .
676  'at an invalid location.' );
677  }
678  $data[$key] = $value;
679  $pos += $length;
680  }
681  }
682 
683  return $data;
684  }
685 
694  private function testBit( $value, $bitIndex ) {
695  return (bool)( ( $value >> $bitIndex ) & 1 );
696  }
697 }
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition: Status.php:58
Internal exception class.
A class for reading ZIP file directories, for the purposes of upload verification.
$callback
The file data callback.
static readHandle( $file, $callback, $options=[])
Read an opened file handle presumed to be a ZIP and call a function for each file discovered in it.
$file
The opened file resource.
static read( $fileName, $callback, $options=[])
Read a ZIP file and call a function for each file discovered in it.
$buffer
A segmented cache of the file contents.
__construct( $file, $callback, $options)
$fileLength
The cached length of the file, or null if it has not been loaded yet.