MediaWiki master
ZipDirectoryReader.php
Go to the documentation of this file.
1<?php
23namespace Wikimedia\Mime;
24
25use StatusValue;
26use UnexpectedValueException;
27
94 public static function read( $fileName, $callback, $options = [] ) {
95 $file = fopen( $fileName, 'r' );
96 $zdr = new self( $file, $callback, $options );
97 return $zdr->execute();
98 }
99
111 public static function readHandle( $file, $callback, $options = [] ) {
112 $zdr = new self( $file, $callback, $options );
113 return $zdr->execute();
114 }
115
117 protected $file;
118
120 protected $fileLength;
121
123 protected $buffer;
124
126 protected $callback;
127
129 protected $zip64 = false;
130
132 protected $eocdr;
134 protected $eocdr64;
137
139 private const ZIP64_EXTRA_HEADER = 0x0001;
140
142 private const SEGSIZE = 16384;
143
145 private const GENERAL_UTF8 = 11;
146
148 private const GENERAL_CD_ENCRYPTED = 13;
149
155 protected function __construct( $file, $callback, $options ) {
156 $this->file = $file;
157 $this->callback = $callback;
158
159 if ( isset( $options['zip64'] ) ) {
160 $this->zip64 = $options['zip64'];
161 }
162 }
163
169 private function execute() {
170 if ( !$this->file ) {
171 return StatusValue::newFatal( 'zip-file-open-error' );
172 }
173
174 $status = StatusValue::newGood();
175 try {
176 $this->readEndOfCentralDirectoryRecord();
177 if ( $this->zip64 ) {
178 [ $offset, $size ] = $this->findZip64CentralDirectory();
179 $this->readCentralDirectory( $offset, $size );
180 } else {
181 if ( $this->eocdr['CD size'] == 0xffffffff
182 || $this->eocdr['CD offset'] == 0xffffffff
183 || $this->eocdr['CD entries total'] == 0xffff
184 ) {
185 $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' .
186 'but we are in legacy mode. Rejecting this upload is necessary to avoid ' .
187 'opening vulnerabilities on clients using OpenJDK 7 or later.' );
188 }
189
190 [ $offset, $size ] = $this->findOldCentralDirectory();
191 $this->readCentralDirectory( $offset, $size );
192 }
193 } catch ( ZipDirectoryReaderError $e ) {
194 $status->fatal( $e->getErrorCode() );
195 }
196
197 fclose( $this->file );
198
199 return $status;
200 }
201
209 private function error( $code, $debugMessage ) {
210 wfDebug( __CLASS__ . ": Fatal error: $debugMessage" );
211 throw new ZipDirectoryReaderError( $code );
212 }
213
219 private function readEndOfCentralDirectoryRecord() {
220 $info = [
221 'signature' => 4,
222 'disk' => 2,
223 'CD start disk' => 2,
224 'CD entries this disk' => 2,
225 'CD entries total' => 2,
226 'CD size' => 4,
227 'CD offset' => 4,
228 'file comment length' => 2,
229 ];
230 $structSize = $this->getStructSize( $info );
231 $startPos = $this->getFileLength() - 65536 - $structSize;
232 if ( $startPos < 0 ) {
233 $startPos = 0;
234 }
235
236 if ( $this->getFileLength() === 0 ) {
237 $this->error( 'zip-wrong-format', "The file is empty." );
238 }
239
240 $block = $this->getBlock( $startPos );
241 $sigPos = strrpos( $block, "PK\x05\x06" );
242 if ( $sigPos === false ) {
243 $this->error( 'zip-wrong-format',
244 "zip file lacks EOCDR signature. It probably isn't a zip file." );
245 }
246
247 $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info );
248 $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length'];
249
250 if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) {
251 // T40432: MS binary documents frequently embed ZIP files
252 $this->error( 'zip-wrong-format', 'there is a ZIP signature but it is not at ' .
253 'the end of the file. It could be an OLE file with a ZIP file embedded.' );
254 }
255 if ( $this->eocdr['disk'] !== 0
256 || $this->eocdr['CD start disk'] !== 0
257 ) {
258 $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' );
259 }
260 $this->eocdr += $this->unpack(
261 $block,
262 [ 'file comment' => [ 'string', $this->eocdr['file comment length'] ] ],
263 $sigPos + $structSize );
264 $this->eocdr['position'] = $startPos + $sigPos;
265 }
266
271 private function readZip64EndOfCentralDirectoryLocator() {
272 $info = [
273 'signature' => [ 'string', 4 ],
274 'eocdr64 start disk' => 4,
275 'eocdr64 offset' => 8,
276 'number of disks' => 4,
277 ];
278 $structSize = $this->getStructSize( $info );
279
280 $start = $this->getFileLength() - $this->eocdr['EOCDR size'] - $structSize;
281 $block = $this->getBlock( $start, $structSize );
282 $this->eocdr64Locator = $data = $this->unpack( $block, $info );
283
284 if ( $data['signature'] !== "PK\x06\x07" ) {
285 // Note: Java will allow this and continue to read the
286 // EOCDR64, so we have to reject the upload, we can't
287 // just use the EOCDR header instead.
288 $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' );
289 }
290 }
291
296 private function readZip64EndOfCentralDirectoryRecord() {
297 if ( $this->eocdr64Locator['eocdr64 start disk'] != 0
298 || $this->eocdr64Locator['number of disks'] != 0
299 ) {
300 $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' );
301 }
302
303 $info = [
304 'signature' => [ 'string', 4 ],
305 'EOCDR64 size' => 8,
306 'version made by' => 2,
307 'version needed' => 2,
308 'disk' => 4,
309 'CD start disk' => 4,
310 'CD entries this disk' => 8,
311 'CD entries total' => 8,
312 'CD size' => 8,
313 'CD offset' => 8
314 ];
315 $structSize = $this->getStructSize( $info );
316 $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize );
317 $this->eocdr64 = $data = $this->unpack( $block, $info );
318 if ( $data['signature'] !== "PK\x06\x06" ) {
319 $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' );
320 }
321 if ( $data['disk'] !== 0
322 || $data['CD start disk'] !== 0
323 ) {
324 $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' );
325 }
326 }
327
334 private function findOldCentralDirectory() {
335 $size = $this->eocdr['CD size'];
336 $offset = $this->eocdr['CD offset'];
337 $endPos = $this->eocdr['position'];
338
339 // Some readers use the EOCDR position instead of the offset field
340 // to find the directory, so to be safe, we check if they both agree.
341 if ( $offset + $size != $endPos ) {
342 $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
343 'of central directory record' );
344 }
345
346 return [ $offset, $size ];
347 }
348
355 private function findZip64CentralDirectory() {
356 // The spec is ambiguous about the exact rules of precedence between the
357 // ZIP64 headers and the original headers. Here we follow zip_util.c
358 // from OpenJDK 7.
359 $size = $this->eocdr['CD size'];
360 $offset = $this->eocdr['CD offset'];
361 $numEntries = $this->eocdr['CD entries total'];
362 $endPos = $this->eocdr['position'];
363 if ( $size == 0xffffffff
364 || $offset == 0xffffffff
365 || $numEntries == 0xffff
366 ) {
367 $this->readZip64EndOfCentralDirectoryLocator();
368
369 if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) {
370 $this->readZip64EndOfCentralDirectoryRecord();
371 if ( isset( $this->eocdr64['CD offset'] ) ) {
372 $size = $this->eocdr64['CD size'];
373 $offset = $this->eocdr64['CD offset'];
374 $endPos = $this->eocdr64Locator['eocdr64 offset'];
375 }
376 }
377 }
378 // Some readers use the EOCDR position instead of the offset field
379 // to find the directory, so to be safe, we check if they both agree.
380 if ( $offset + $size != $endPos ) {
381 $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
382 'of central directory record' );
383 }
384
385 return [ $offset, $size ];
386 }
387
393 private function readCentralDirectory( $offset, $size ) {
394 $block = $this->getBlock( $offset, $size );
395
396 $fixedInfo = [
397 'signature' => [ 'string', 4 ],
398 'version made by' => 2,
399 'version needed' => 2,
400 'general bits' => 2,
401 'compression method' => 2,
402 'mod time' => 2,
403 'mod date' => 2,
404 'crc-32' => 4,
405 'compressed size' => 4,
406 'uncompressed size' => 4,
407 'name length' => 2,
408 'extra field length' => 2,
409 'comment length' => 2,
410 'disk number start' => 2,
411 'internal attrs' => 2,
412 'external attrs' => 4,
413 'local header offset' => 4,
414 ];
415 $fixedSize = $this->getStructSize( $fixedInfo );
416
417 $pos = 0;
418 while ( $pos < $size ) {
419 $data = $this->unpack( $block, $fixedInfo, $pos );
420 $pos += $fixedSize;
421
422 if ( $data['signature'] !== "PK\x01\x02" ) {
423 $this->error( 'zip-bad', 'Invalid signature found in directory entry' );
424 }
425
426 $variableInfo = [
427 'name' => [ 'string', $data['name length'] ],
428 'extra field' => [ 'string', $data['extra field length'] ],
429 'comment' => [ 'string', $data['comment length'] ],
430 ];
431 $data += $this->unpack( $block, $variableInfo, $pos );
432 $pos += $this->getStructSize( $variableInfo );
433
434 if ( $this->zip64 && (
435 $data['compressed size'] == 0xffffffff
436 || $data['uncompressed size'] == 0xffffffff
437 || $data['local header offset'] == 0xffffffff )
438 ) {
439 $zip64Data = $this->unpackZip64Extra( $data['extra field'] );
440 if ( $zip64Data ) {
441 $data = $zip64Data + $data;
442 }
443 }
444
445 if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) {
446 $this->error( 'zip-unsupported', 'central directory encryption is not supported' );
447 }
448
449 // Convert the timestamp into MediaWiki format
450 // For the format, please see the MS-DOS 2.0 Programmer's Reference,
451 // pages 3-5 and 3-6.
452 $time = $data['mod time'];
453 $date = $data['mod date'];
454
455 $year = 1980 + ( $date >> 9 );
456 $month = ( $date >> 5 ) & 15;
457 $day = $date & 31;
458 $hour = ( $time >> 11 ) & 31;
459 $minute = ( $time >> 5 ) & 63;
460 $second = ( $time & 31 ) * 2;
461 $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d",
462 $year, $month, $day, $hour, $minute, $second );
463
464 // Convert the character set in the file name
465 if ( $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) ) {
466 $name = $data['name'];
467 } else {
468 $name = iconv( 'CP437', 'UTF-8', $data['name'] );
469 }
470
471 // Compile a data array for the user, with a sensible format
472 $userData = [
473 'name' => $name,
474 'mtime' => $timestamp,
475 'size' => $data['uncompressed size'],
476 ];
477 ( $this->callback )( $userData );
478 }
479 }
480
486 private function unpackZip64Extra( $extraField ) {
487 $extraHeaderInfo = [
488 'id' => 2,
489 'size' => 2,
490 ];
491 $extraHeaderSize = $this->getStructSize( $extraHeaderInfo );
492
493 $zip64ExtraInfo = [
494 'uncompressed size' => 8,
495 'compressed size' => 8,
496 'local header offset' => 8,
497 'disk number start' => 4,
498 ];
499
500 $extraPos = 0;
501 while ( $extraPos < strlen( $extraField ) ) {
502 $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos );
503 $extraPos += $extraHeaderSize;
504 $extra += $this->unpack( $extraField,
505 [ 'data' => [ 'string', $extra['size'] ] ],
506 $extraPos );
507 $extraPos += $extra['size'];
508
509 if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) {
510 return $this->unpack( $extra['data'], $zip64ExtraInfo );
511 }
512 }
513
514 return false;
515 }
516
521 private function getFileLength() {
522 if ( $this->fileLength === null ) {
523 $stat = fstat( $this->file );
524 $this->fileLength = $stat['size'];
525 }
526
527 return $this->fileLength;
528 }
529
540 private function getBlock( $start, $length = null ) {
541 $fileLength = $this->getFileLength();
542 if ( $start >= $fileLength ) {
543 $this->error( 'zip-bad', "getBlock() requested position $start, " .
544 "file length is $fileLength" );
545 }
546 $length ??= $fileLength - $start;
547 $end = $start + $length;
548 if ( $end > $fileLength ) {
549 $this->error( 'zip-bad', "getBlock() requested end position $end, " .
550 "file length is $fileLength" );
551 }
552 $startSeg = (int)floor( $start / self::SEGSIZE );
553 $endSeg = (int)ceil( $end / self::SEGSIZE );
554
555 $block = '';
556 for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) {
557 $block .= $this->getSegment( $segIndex );
558 }
559
560 $block = substr( $block,
561 $start - $startSeg * self::SEGSIZE,
562 $length );
563
564 if ( strlen( $block ) < $length ) {
565 $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' );
566 }
567
568 return $block;
569 }
570
584 private function getSegment( $segIndex ) {
585 if ( !isset( $this->buffer[$segIndex] ) ) {
586 $bytePos = $segIndex * self::SEGSIZE;
587 if ( $bytePos >= $this->getFileLength() ) {
588 $this->buffer[$segIndex] = '';
589
590 return '';
591 }
592 if ( fseek( $this->file, $bytePos ) ) {
593 $this->error( 'zip-bad', "seek to $bytePos failed" );
594 }
595 $seg = fread( $this->file, self::SEGSIZE );
596 if ( $seg === false ) {
597 $this->error( 'zip-bad', "read from $bytePos failed" );
598 }
599 $this->buffer[$segIndex] = $seg;
600 }
601
602 return $this->buffer[$segIndex];
603 }
604
610 private function getStructSize( $struct ) {
611 $size = 0;
612 foreach ( $struct as $type ) {
613 if ( is_array( $type ) ) {
614 [ , $fieldSize ] = $type;
615 $size += $fieldSize;
616 } else {
617 $size += $type;
618 }
619 }
620
621 return $size;
622 }
623
644 private function unpack( $string, $struct, $offset = 0 ) {
645 $size = $this->getStructSize( $struct );
646 if ( $offset + $size > strlen( $string ) ) {
647 $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' );
648 }
649
650 $data = [];
651 $pos = $offset;
652 foreach ( $struct as $key => $type ) {
653 if ( is_array( $type ) ) {
654 [ $typeName, $fieldSize ] = $type;
655 switch ( $typeName ) {
656 case 'string':
657 $data[$key] = substr( $string, $pos, $fieldSize );
658 $pos += $fieldSize;
659 break;
660 default:
661 throw new UnexpectedValueException( __METHOD__ . ": invalid type \"$typeName\"" );
662 }
663 } else {
664 // Unsigned little-endian integer
665 $length = intval( $type );
666
667 // Calculate the value. Use an algorithm which automatically
668 // upgrades the value to floating point if necessary.
669 $value = 0;
670 for ( $i = $length - 1; $i >= 0; $i-- ) {
671 $value *= 256;
672 $value += ord( $string[$pos + $i] );
673 }
674
675 // Throw an exception if there was loss of precision
676 if ( $value > 2 ** 52 ) {
677 $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' .
678 'This could happen if we tried to unpack a 64-bit structure ' .
679 'at an invalid location.' );
680 }
681 $data[$key] = $value;
682 $pos += $length;
683 }
684 }
685
686 return $data;
687 }
688
697 private function testBit( $value, $bitIndex ) {
698 return (bool)( ( $value >> $bitIndex ) & 1 );
699 }
700}
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
A class for reading ZIP file directories, for the purposes of upload verification.
static readHandle( $file, $callback, $options=[])
Read an opened file handle presumed to be a ZIP and call a function for each file discovered in it.
static read( $fileName, $callback, $options=[])
Read a ZIP file and call a function for each file discovered in it.
int null $fileLength
The cached length of the file, or null if it has not been loaded yet.
string[] $buffer
A segmented cache of the file contents.
resource $file
The opened file resource.
__construct( $file, $callback, $options)
callable $callback
The file data callback.