MediaWiki master
ZipDirectoryReader.php
Go to the documentation of this file.
1<?php
9namespace Wikimedia\Mime;
10
11use StatusValue;
12use UnexpectedValueException;
13
80 public static function read( $fileName, $callback, $options = [] ) {
81 $file = fopen( $fileName, 'r' );
82 $zdr = new self( $file, $callback, $options );
83 return $zdr->execute();
84 }
85
97 public static function readHandle( $file, $callback, $options = [] ) {
98 $zdr = new self( $file, $callback, $options );
99 return $zdr->execute();
100 }
101
103 protected $file;
104
106 protected $fileLength;
107
109 protected $buffer;
110
112 protected $callback;
113
115 protected $zip64 = false;
116
118 protected $eocdr;
120 protected $eocdr64;
123
125 private const ZIP64_EXTRA_HEADER = 0x0001;
126
128 private const SEGSIZE = 16384;
129
131 private const GENERAL_UTF8 = 11;
132
134 private const GENERAL_CD_ENCRYPTED = 13;
135
141 protected function __construct( $file, $callback, $options ) {
142 $this->file = $file;
143 $this->callback = $callback;
144
145 if ( isset( $options['zip64'] ) ) {
146 $this->zip64 = $options['zip64'];
147 }
148 }
149
155 private function execute() {
156 if ( !$this->file ) {
157 return StatusValue::newFatal( 'zip-file-open-error' );
158 }
159
160 $status = StatusValue::newGood();
161 try {
162 $this->readEndOfCentralDirectoryRecord();
163 if ( $this->zip64 ) {
164 [ $offset, $size ] = $this->findZip64CentralDirectory();
165 $this->readCentralDirectory( $offset, $size );
166 } else {
167 if ( $this->eocdr['CD size'] == 0xffffffff
168 || $this->eocdr['CD offset'] == 0xffffffff
169 || $this->eocdr['CD entries total'] == 0xffff
170 ) {
171 $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' .
172 'but we are in legacy mode. Rejecting this upload is necessary to avoid ' .
173 'opening vulnerabilities on clients using OpenJDK 7 or later.' );
174 }
175
176 [ $offset, $size ] = $this->findOldCentralDirectory();
177 $this->readCentralDirectory( $offset, $size );
178 }
179 } catch ( ZipDirectoryReaderError $e ) {
180 $status->fatal( $e->getErrorCode() );
181 }
182
183 fclose( $this->file );
184
185 return $status;
186 }
187
195 private function error( $code, $debugMessage ): never {
196 wfDebug( __CLASS__ . ": Fatal error: $debugMessage" );
197 throw new ZipDirectoryReaderError( $code );
198 }
199
205 private function readEndOfCentralDirectoryRecord() {
206 $info = [
207 'signature' => 4,
208 'disk' => 2,
209 'CD start disk' => 2,
210 'CD entries this disk' => 2,
211 'CD entries total' => 2,
212 'CD size' => 4,
213 'CD offset' => 4,
214 'file comment length' => 2,
215 ];
216 $structSize = $this->getStructSize( $info );
217 $startPos = $this->getFileLength() - 65536 - $structSize;
218 if ( $startPos < 0 ) {
219 $startPos = 0;
220 }
221
222 if ( $this->getFileLength() === 0 ) {
223 $this->error( 'zip-wrong-format', "The file is empty." );
224 }
225
226 $block = $this->getBlock( $startPos );
227 $sigPos = strrpos( $block, "PK\x05\x06" );
228 if ( $sigPos === false ) {
229 $this->error( 'zip-wrong-format',
230 "zip file lacks EOCDR signature. It probably isn't a zip file." );
231 }
232
233 $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info );
234 $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length'];
235
236 if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) {
237 // T40432: MS binary documents frequently embed ZIP files
238 $this->error( 'zip-wrong-format', 'there is a ZIP signature but it is not at ' .
239 'the end of the file. It could be an OLE file with a ZIP file embedded.' );
240 }
241 if ( $this->eocdr['disk'] !== 0
242 || $this->eocdr['CD start disk'] !== 0
243 ) {
244 $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' );
245 }
246 $this->eocdr += $this->unpack(
247 $block,
248 [ 'file comment' => [ 'string', $this->eocdr['file comment length'] ] ],
249 $sigPos + $structSize );
250 $this->eocdr['position'] = $startPos + $sigPos;
251 }
252
257 private function readZip64EndOfCentralDirectoryLocator() {
258 $info = [
259 'signature' => [ 'string', 4 ],
260 'eocdr64 start disk' => 4,
261 'eocdr64 offset' => 8,
262 'number of disks' => 4,
263 ];
264 $structSize = $this->getStructSize( $info );
265
266 $start = $this->getFileLength() - $this->eocdr['EOCDR size'] - $structSize;
267 $block = $this->getBlock( $start, $structSize );
268 $this->eocdr64Locator = $data = $this->unpack( $block, $info );
269
270 if ( $data['signature'] !== "PK\x06\x07" ) {
271 // Note: Java will allow this and continue to read the
272 // EOCDR64, so we have to reject the upload, we can't
273 // just use the EOCDR header instead.
274 $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' );
275 }
276 }
277
282 private function readZip64EndOfCentralDirectoryRecord() {
283 if ( $this->eocdr64Locator['eocdr64 start disk'] != 0
284 || $this->eocdr64Locator['number of disks'] != 0
285 ) {
286 $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' );
287 }
288
289 $info = [
290 'signature' => [ 'string', 4 ],
291 'EOCDR64 size' => 8,
292 'version made by' => 2,
293 'version needed' => 2,
294 'disk' => 4,
295 'CD start disk' => 4,
296 'CD entries this disk' => 8,
297 'CD entries total' => 8,
298 'CD size' => 8,
299 'CD offset' => 8
300 ];
301 $structSize = $this->getStructSize( $info );
302 $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize );
303 $this->eocdr64 = $data = $this->unpack( $block, $info );
304 if ( $data['signature'] !== "PK\x06\x06" ) {
305 $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' );
306 }
307 if ( $data['disk'] !== 0
308 || $data['CD start disk'] !== 0
309 ) {
310 $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' );
311 }
312 }
313
320 private function findOldCentralDirectory() {
321 $size = $this->eocdr['CD size'];
322 $offset = $this->eocdr['CD offset'];
323 $endPos = $this->eocdr['position'];
324
325 // Some readers use the EOCDR position instead of the offset field
326 // to find the directory, so to be safe, we check if they both agree.
327 if ( $offset + $size != $endPos ) {
328 $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
329 'of central directory record' );
330 }
331
332 return [ $offset, $size ];
333 }
334
341 private function findZip64CentralDirectory() {
342 // The spec is ambiguous about the exact rules of precedence between the
343 // ZIP64 headers and the original headers. Here we follow zip_util.c
344 // from OpenJDK 7.
345 $size = $this->eocdr['CD size'];
346 $offset = $this->eocdr['CD offset'];
347 $numEntries = $this->eocdr['CD entries total'];
348 $endPos = $this->eocdr['position'];
349 if ( $size == 0xffffffff
350 || $offset == 0xffffffff
351 || $numEntries == 0xffff
352 ) {
353 $this->readZip64EndOfCentralDirectoryLocator();
354
355 if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) {
356 $this->readZip64EndOfCentralDirectoryRecord();
357 if ( isset( $this->eocdr64['CD offset'] ) ) {
358 $size = $this->eocdr64['CD size'];
359 $offset = $this->eocdr64['CD offset'];
360 $endPos = $this->eocdr64Locator['eocdr64 offset'];
361 }
362 }
363 }
364 // Some readers use the EOCDR position instead of the offset field
365 // to find the directory, so to be safe, we check if they both agree.
366 if ( $offset + $size != $endPos ) {
367 $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
368 'of central directory record' );
369 }
370
371 return [ $offset, $size ];
372 }
373
379 private function readCentralDirectory( $offset, $size ) {
380 $block = $this->getBlock( $offset, $size );
381
382 $fixedInfo = [
383 'signature' => [ 'string', 4 ],
384 'version made by' => 2,
385 'version needed' => 2,
386 'general bits' => 2,
387 'compression method' => 2,
388 'mod time' => 2,
389 'mod date' => 2,
390 'crc-32' => 4,
391 'compressed size' => 4,
392 'uncompressed size' => 4,
393 'name length' => 2,
394 'extra field length' => 2,
395 'comment length' => 2,
396 'disk number start' => 2,
397 'internal attrs' => 2,
398 'external attrs' => 4,
399 'local header offset' => 4,
400 ];
401 $fixedSize = $this->getStructSize( $fixedInfo );
402
403 $pos = 0;
404 while ( $pos < $size ) {
405 $data = $this->unpack( $block, $fixedInfo, $pos );
406 $pos += $fixedSize;
407
408 if ( $data['signature'] !== "PK\x01\x02" ) {
409 $this->error( 'zip-bad', 'Invalid signature found in directory entry' );
410 }
411
412 $variableInfo = [
413 'name' => [ 'string', $data['name length'] ],
414 'extra field' => [ 'string', $data['extra field length'] ],
415 'comment' => [ 'string', $data['comment length'] ],
416 ];
417 $data += $this->unpack( $block, $variableInfo, $pos );
418 $pos += $this->getStructSize( $variableInfo );
419
420 if ( $this->zip64 && (
421 $data['compressed size'] == 0xffffffff
422 || $data['uncompressed size'] == 0xffffffff
423 || $data['local header offset'] == 0xffffffff )
424 ) {
425 $zip64Data = $this->unpackZip64Extra( $data['extra field'] );
426 if ( $zip64Data ) {
427 $data = $zip64Data + $data;
428 }
429 }
430
431 if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) {
432 $this->error( 'zip-unsupported', 'central directory encryption is not supported' );
433 }
434
435 // Convert the timestamp into MediaWiki format
436 // For the format, please see the MS-DOS 2.0 Programmer's Reference,
437 // pages 3-5 and 3-6.
438 $time = $data['mod time'];
439 $date = $data['mod date'];
440
441 $year = 1980 + ( $date >> 9 );
442 $month = ( $date >> 5 ) & 15;
443 $day = $date & 31;
444 $hour = ( $time >> 11 ) & 31;
445 $minute = ( $time >> 5 ) & 63;
446 $second = ( $time & 31 ) * 2;
447 $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d",
448 $year, $month, $day, $hour, $minute, $second );
449
450 // Convert the character set in the file name
451 if ( $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) ) {
452 $name = $data['name'];
453 } else {
454 $name = iconv( 'CP437', 'UTF-8', $data['name'] );
455 }
456
457 // Compile a data array for the user, with a sensible format
458 $userData = [
459 'name' => $name,
460 'mtime' => $timestamp,
461 'size' => $data['uncompressed size'],
462 ];
463 ( $this->callback )( $userData );
464 }
465 }
466
472 private function unpackZip64Extra( $extraField ) {
473 $extraHeaderInfo = [
474 'id' => 2,
475 'size' => 2,
476 ];
477 $extraHeaderSize = $this->getStructSize( $extraHeaderInfo );
478
479 $zip64ExtraInfo = [
480 'uncompressed size' => 8,
481 'compressed size' => 8,
482 'local header offset' => 8,
483 'disk number start' => 4,
484 ];
485
486 $extraPos = 0;
487 while ( $extraPos < strlen( $extraField ) ) {
488 $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos );
489 $extraPos += $extraHeaderSize;
490 $extra += $this->unpack( $extraField,
491 [ 'data' => [ 'string', $extra['size'] ] ],
492 $extraPos );
493 $extraPos += $extra['size'];
494
495 if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) {
496 return $this->unpack( $extra['data'], $zip64ExtraInfo );
497 }
498 }
499
500 return false;
501 }
502
507 private function getFileLength() {
508 if ( $this->fileLength === null ) {
509 $stat = fstat( $this->file );
510 $this->fileLength = $stat['size'];
511 }
512
513 return $this->fileLength;
514 }
515
526 private function getBlock( $start, $length = null ) {
527 $fileLength = $this->getFileLength();
528 if ( $start >= $fileLength ) {
529 $this->error( 'zip-bad', "getBlock() requested position $start, " .
530 "file length is $fileLength" );
531 }
532 $length ??= $fileLength - $start;
533 $end = $start + $length;
534 if ( $end > $fileLength ) {
535 $this->error( 'zip-bad', "getBlock() requested end position $end, " .
536 "file length is $fileLength" );
537 }
538 $startSeg = (int)floor( $start / self::SEGSIZE );
539 $endSeg = (int)ceil( $end / self::SEGSIZE );
540
541 $block = '';
542 for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) {
543 $block .= $this->getSegment( $segIndex );
544 }
545
546 $block = substr( $block,
547 $start - $startSeg * self::SEGSIZE,
548 $length );
549
550 if ( strlen( $block ) < $length ) {
551 $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' );
552 }
553
554 return $block;
555 }
556
570 private function getSegment( $segIndex ) {
571 if ( !isset( $this->buffer[$segIndex] ) ) {
572 $bytePos = $segIndex * self::SEGSIZE;
573 if ( $bytePos >= $this->getFileLength() ) {
574 $this->buffer[$segIndex] = '';
575
576 return '';
577 }
578 if ( fseek( $this->file, $bytePos ) ) {
579 $this->error( 'zip-bad', "seek to $bytePos failed" );
580 }
581 $seg = fread( $this->file, self::SEGSIZE );
582 if ( $seg === false ) {
583 $this->error( 'zip-bad', "read from $bytePos failed" );
584 }
585 $this->buffer[$segIndex] = $seg;
586 }
587
588 return $this->buffer[$segIndex];
589 }
590
596 private function getStructSize( $struct ) {
597 $size = 0;
598 foreach ( $struct as $type ) {
599 if ( is_array( $type ) ) {
600 [ , $fieldSize ] = $type;
601 $size += $fieldSize;
602 } else {
603 $size += $type;
604 }
605 }
606
607 return $size;
608 }
609
630 private function unpack( $string, $struct, $offset = 0 ) {
631 $size = $this->getStructSize( $struct );
632 if ( $offset + $size > strlen( $string ) ) {
633 $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' );
634 }
635
636 $data = [];
637 $pos = $offset;
638 foreach ( $struct as $key => $type ) {
639 if ( is_array( $type ) ) {
640 [ $typeName, $fieldSize ] = $type;
641 switch ( $typeName ) {
642 case 'string':
643 $data[$key] = substr( $string, $pos, $fieldSize );
644 $pos += $fieldSize;
645 break;
646 default:
647 throw new UnexpectedValueException( __METHOD__ . ": invalid type \"$typeName\"" );
648 }
649 } else {
650 // Unsigned little-endian integer
651 $length = intval( $type );
652
653 // Calculate the value. Use an algorithm which automatically
654 // upgrades the value to floating point if necessary.
655 $value = 0;
656 for ( $i = $length - 1; $i >= 0; $i-- ) {
657 $value *= 256;
658 $value += ord( $string[$pos + $i] );
659 }
660
661 // Throw an exception if there was loss of precision
662 if ( $value > 2 ** 52 ) {
663 $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' .
664 'This could happen if we tried to unpack a 64-bit structure ' .
665 'at an invalid location.' );
666 }
667 $data[$key] = $value;
668 $pos += $length;
669 }
670 }
671
672 return $data;
673 }
674
683 private function testBit( $value, $bitIndex ) {
684 return (bool)( ( $value >> $bitIndex ) & 1 );
685 }
686}
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
A class for reading ZIP file directories, for the purposes of upload verification.
static readHandle( $file, $callback, $options=[])
Read an opened file handle presumed to be a ZIP and call a function for each file discovered in it.
static read( $fileName, $callback, $options=[])
Read a ZIP file and call a function for each file discovered in it.
int null $fileLength
The cached length of the file, or null if it has not been loaded yet.
string[] $buffer
A segmented cache of the file contents.
resource $file
The opened file resource.
__construct( $file, $callback, $options)
callable $callback
The file data callback.