MediaWiki 1.42.1
ZipDirectoryReader.php
Go to the documentation of this file.
1<?php
24
90 public static function read( $fileName, $callback, $options = [] ) {
91 $file = fopen( $fileName, 'r' );
92 $zdr = new self( $file, $callback, $options );
93 return $zdr->execute();
94 }
95
107 public static function readHandle( $file, $callback, $options = [] ) {
108 $zdr = new self( $file, $callback, $options );
109 return $zdr->execute();
110 }
111
113 protected $file;
114
116 protected $fileLength;
117
119 protected $buffer;
120
122 protected $callback;
123
125 protected $zip64 = false;
126
129
130 protected $data;
131
133 private const ZIP64_EXTRA_HEADER = 0x0001;
134
136 private const SEGSIZE = 16384;
137
139 private const GENERAL_UTF8 = 11;
140
142 private const GENERAL_CD_ENCRYPTED = 13;
143
149 protected function __construct( $file, $callback, $options ) {
150 $this->file = $file;
151 $this->callback = $callback;
152
153 if ( isset( $options['zip64'] ) ) {
154 $this->zip64 = $options['zip64'];
155 }
156 }
157
163 private function execute() {
164 $this->data = [];
165 if ( !$this->file ) {
166 return Status::newFatal( 'zip-file-open-error' );
167 }
168
169 $status = Status::newGood();
170 try {
171 $this->readEndOfCentralDirectoryRecord();
172 if ( $this->zip64 ) {
173 [ $offset, $size ] = $this->findZip64CentralDirectory();
174 $this->readCentralDirectory( $offset, $size );
175 } else {
176 if ( $this->eocdr['CD size'] == 0xffffffff
177 || $this->eocdr['CD offset'] == 0xffffffff
178 || $this->eocdr['CD entries total'] == 0xffff
179 ) {
180 $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' .
181 'but we are in legacy mode. Rejecting this upload is necessary to avoid ' .
182 'opening vulnerabilities on clients using OpenJDK 7 or later.' );
183 }
184
185 [ $offset, $size ] = $this->findOldCentralDirectory();
186 $this->readCentralDirectory( $offset, $size );
187 }
188 } catch ( ZipDirectoryReaderError $e ) {
189 $status->fatal( $e->getErrorCode() );
190 }
191
192 fclose( $this->file );
193
194 return $status;
195 }
196
204 private function error( $code, $debugMessage ) {
205 wfDebug( __CLASS__ . ": Fatal error: $debugMessage" );
206 throw new ZipDirectoryReaderError( $code );
207 }
208
214 private function readEndOfCentralDirectoryRecord() {
215 $info = [
216 'signature' => 4,
217 'disk' => 2,
218 'CD start disk' => 2,
219 'CD entries this disk' => 2,
220 'CD entries total' => 2,
221 'CD size' => 4,
222 'CD offset' => 4,
223 'file comment length' => 2,
224 ];
225 $structSize = $this->getStructSize( $info );
226 $startPos = $this->getFileLength() - 65536 - $structSize;
227 if ( $startPos < 0 ) {
228 $startPos = 0;
229 }
230
231 if ( $this->getFileLength() === 0 ) {
232 $this->error( 'zip-wrong-format', "The file is empty." );
233 }
234
235 $block = $this->getBlock( $startPos );
236 $sigPos = strrpos( $block, "PK\x05\x06" );
237 if ( $sigPos === false ) {
238 $this->error( 'zip-wrong-format',
239 "zip file lacks EOCDR signature. It probably isn't a zip file." );
240 }
241
242 $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info );
243 $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length'];
244
245 if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) {
246 // T40432: MS binary documents frequently embed ZIP files
247 $this->error( 'zip-wrong-format', 'there is a ZIP signature but it is not at ' .
248 'the end of the file. It could be an OLE file with a ZIP file embedded.' );
249 }
250 if ( $this->eocdr['disk'] !== 0
251 || $this->eocdr['CD start disk'] !== 0
252 ) {
253 $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' );
254 }
255 $this->eocdr += $this->unpack(
256 $block,
257 [ 'file comment' => [ 'string', $this->eocdr['file comment length'] ] ],
258 $sigPos + $structSize );
259 $this->eocdr['position'] = $startPos + $sigPos;
260 }
261
266 private function readZip64EndOfCentralDirectoryLocator() {
267 $info = [
268 'signature' => [ 'string', 4 ],
269 'eocdr64 start disk' => 4,
270 'eocdr64 offset' => 8,
271 'number of disks' => 4,
272 ];
273 $structSize = $this->getStructSize( $info );
274
275 $start = $this->getFileLength() - $this->eocdr['EOCDR size'] - $structSize;
276 $block = $this->getBlock( $start, $structSize );
277 $this->eocdr64Locator = $data = $this->unpack( $block, $info );
278
279 if ( $data['signature'] !== "PK\x06\x07" ) {
280 // Note: Java will allow this and continue to read the
281 // EOCDR64, so we have to reject the upload, we can't
282 // just use the EOCDR header instead.
283 $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' );
284 }
285 }
286
291 private function readZip64EndOfCentralDirectoryRecord() {
292 if ( $this->eocdr64Locator['eocdr64 start disk'] != 0
293 || $this->eocdr64Locator['number of disks'] != 0
294 ) {
295 $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' );
296 }
297
298 $info = [
299 'signature' => [ 'string', 4 ],
300 'EOCDR64 size' => 8,
301 'version made by' => 2,
302 'version needed' => 2,
303 'disk' => 4,
304 'CD start disk' => 4,
305 'CD entries this disk' => 8,
306 'CD entries total' => 8,
307 'CD size' => 8,
308 'CD offset' => 8
309 ];
310 $structSize = $this->getStructSize( $info );
311 $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize );
312 $this->eocdr64 = $data = $this->unpack( $block, $info );
313 if ( $data['signature'] !== "PK\x06\x06" ) {
314 $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' );
315 }
316 if ( $data['disk'] !== 0
317 || $data['CD start disk'] !== 0
318 ) {
319 $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' );
320 }
321 }
322
329 private function findOldCentralDirectory() {
330 $size = $this->eocdr['CD size'];
331 $offset = $this->eocdr['CD offset'];
332 $endPos = $this->eocdr['position'];
333
334 // Some readers use the EOCDR position instead of the offset field
335 // to find the directory, so to be safe, we check if they both agree.
336 if ( $offset + $size != $endPos ) {
337 $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
338 'of central directory record' );
339 }
340
341 return [ $offset, $size ];
342 }
343
350 private function findZip64CentralDirectory() {
351 // The spec is ambiguous about the exact rules of precedence between the
352 // ZIP64 headers and the original headers. Here we follow zip_util.c
353 // from OpenJDK 7.
354 $size = $this->eocdr['CD size'];
355 $offset = $this->eocdr['CD offset'];
356 $numEntries = $this->eocdr['CD entries total'];
357 $endPos = $this->eocdr['position'];
358 if ( $size == 0xffffffff
359 || $offset == 0xffffffff
360 || $numEntries == 0xffff
361 ) {
362 $this->readZip64EndOfCentralDirectoryLocator();
363
364 if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) {
365 $this->readZip64EndOfCentralDirectoryRecord();
366 if ( isset( $this->eocdr64['CD offset'] ) ) {
367 $size = $this->eocdr64['CD size'];
368 $offset = $this->eocdr64['CD offset'];
369 $endPos = $this->eocdr64Locator['eocdr64 offset'];
370 }
371 }
372 }
373 // Some readers use the EOCDR position instead of the offset field
374 // to find the directory, so to be safe, we check if they both agree.
375 if ( $offset + $size != $endPos ) {
376 $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
377 'of central directory record' );
378 }
379
380 return [ $offset, $size ];
381 }
382
388 private function readCentralDirectory( $offset, $size ) {
389 $block = $this->getBlock( $offset, $size );
390
391 $fixedInfo = [
392 'signature' => [ 'string', 4 ],
393 'version made by' => 2,
394 'version needed' => 2,
395 'general bits' => 2,
396 'compression method' => 2,
397 'mod time' => 2,
398 'mod date' => 2,
399 'crc-32' => 4,
400 'compressed size' => 4,
401 'uncompressed size' => 4,
402 'name length' => 2,
403 'extra field length' => 2,
404 'comment length' => 2,
405 'disk number start' => 2,
406 'internal attrs' => 2,
407 'external attrs' => 4,
408 'local header offset' => 4,
409 ];
410 $fixedSize = $this->getStructSize( $fixedInfo );
411
412 $pos = 0;
413 while ( $pos < $size ) {
414 $data = $this->unpack( $block, $fixedInfo, $pos );
415 $pos += $fixedSize;
416
417 if ( $data['signature'] !== "PK\x01\x02" ) {
418 $this->error( 'zip-bad', 'Invalid signature found in directory entry' );
419 }
420
421 $variableInfo = [
422 'name' => [ 'string', $data['name length'] ],
423 'extra field' => [ 'string', $data['extra field length'] ],
424 'comment' => [ 'string', $data['comment length'] ],
425 ];
426 $data += $this->unpack( $block, $variableInfo, $pos );
427 $pos += $this->getStructSize( $variableInfo );
428
429 if ( $this->zip64 && (
430 $data['compressed size'] == 0xffffffff
431 || $data['uncompressed size'] == 0xffffffff
432 || $data['local header offset'] == 0xffffffff )
433 ) {
434 $zip64Data = $this->unpackZip64Extra( $data['extra field'] );
435 if ( $zip64Data ) {
436 $data = $zip64Data + $data;
437 }
438 }
439
440 if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) {
441 $this->error( 'zip-unsupported', 'central directory encryption is not supported' );
442 }
443
444 // Convert the timestamp into MediaWiki format
445 // For the format, please see the MS-DOS 2.0 Programmer's Reference,
446 // pages 3-5 and 3-6.
447 $time = $data['mod time'];
448 $date = $data['mod date'];
449
450 $year = 1980 + ( $date >> 9 );
451 $month = ( $date >> 5 ) & 15;
452 $day = $date & 31;
453 $hour = ( $time >> 11 ) & 31;
454 $minute = ( $time >> 5 ) & 63;
455 $second = ( $time & 31 ) * 2;
456 $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d",
457 $year, $month, $day, $hour, $minute, $second );
458
459 // Convert the character set in the file name
460 if ( $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) ) {
461 $name = $data['name'];
462 } else {
463 $name = iconv( 'CP437', 'UTF-8', $data['name'] );
464 }
465
466 // Compile a data array for the user, with a sensible format
467 $userData = [
468 'name' => $name,
469 'mtime' => $timestamp,
470 'size' => $data['uncompressed size'],
471 ];
472 call_user_func( $this->callback, $userData );
473 }
474 }
475
481 private function unpackZip64Extra( $extraField ) {
482 $extraHeaderInfo = [
483 'id' => 2,
484 'size' => 2,
485 ];
486 $extraHeaderSize = $this->getStructSize( $extraHeaderInfo );
487
488 $zip64ExtraInfo = [
489 'uncompressed size' => 8,
490 'compressed size' => 8,
491 'local header offset' => 8,
492 'disk number start' => 4,
493 ];
494
495 $extraPos = 0;
496 while ( $extraPos < strlen( $extraField ) ) {
497 $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos );
498 $extraPos += $extraHeaderSize;
499 $extra += $this->unpack( $extraField,
500 [ 'data' => [ 'string', $extra['size'] ] ],
501 $extraPos );
502 $extraPos += $extra['size'];
503
504 if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) {
505 return $this->unpack( $extra['data'], $zip64ExtraInfo );
506 }
507 }
508
509 return false;
510 }
511
516 private function getFileLength() {
517 if ( $this->fileLength === null ) {
518 $stat = fstat( $this->file );
519 $this->fileLength = $stat['size'];
520 }
521
522 return $this->fileLength;
523 }
524
535 private function getBlock( $start, $length = null ) {
536 $fileLength = $this->getFileLength();
537 if ( $start >= $fileLength ) {
538 $this->error( 'zip-bad', "getBlock() requested position $start, " .
539 "file length is $fileLength" );
540 }
541 if ( $length === null ) {
542 $length = $fileLength - $start;
543 }
544 $end = $start + $length;
545 if ( $end > $fileLength ) {
546 $this->error( 'zip-bad', "getBlock() requested end position $end, " .
547 "file length is $fileLength" );
548 }
549 $startSeg = (int)floor( $start / self::SEGSIZE );
550 $endSeg = (int)ceil( $end / self::SEGSIZE );
551
552 $block = '';
553 for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) {
554 $block .= $this->getSegment( $segIndex );
555 }
556
557 $block = substr( $block,
558 $start - $startSeg * self::SEGSIZE,
559 $length );
560
561 if ( strlen( $block ) < $length ) {
562 $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' );
563 }
564
565 return $block;
566 }
567
581 private function getSegment( $segIndex ) {
582 if ( !isset( $this->buffer[$segIndex] ) ) {
583 $bytePos = $segIndex * self::SEGSIZE;
584 if ( $bytePos >= $this->getFileLength() ) {
585 $this->buffer[$segIndex] = '';
586
587 return '';
588 }
589 if ( fseek( $this->file, $bytePos ) ) {
590 $this->error( 'zip-bad', "seek to $bytePos failed" );
591 }
592 $seg = fread( $this->file, self::SEGSIZE );
593 if ( $seg === false ) {
594 $this->error( 'zip-bad', "read from $bytePos failed" );
595 }
596 $this->buffer[$segIndex] = $seg;
597 }
598
599 return $this->buffer[$segIndex];
600 }
601
607 private function getStructSize( $struct ) {
608 $size = 0;
609 foreach ( $struct as $type ) {
610 if ( is_array( $type ) ) {
611 [ , $fieldSize ] = $type;
612 $size += $fieldSize;
613 } else {
614 $size += $type;
615 }
616 }
617
618 return $size;
619 }
620
641 private function unpack( $string, $struct, $offset = 0 ) {
642 $size = $this->getStructSize( $struct );
643 if ( $offset + $size > strlen( $string ) ) {
644 $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' );
645 }
646
647 $data = [];
648 $pos = $offset;
649 foreach ( $struct as $key => $type ) {
650 if ( is_array( $type ) ) {
651 [ $typeName, $fieldSize ] = $type;
652 switch ( $typeName ) {
653 case 'string':
654 $data[$key] = substr( $string, $pos, $fieldSize );
655 $pos += $fieldSize;
656 break;
657 default:
658 throw new UnexpectedValueException( __METHOD__ . ": invalid type \"$typeName\"" );
659 }
660 } else {
661 // Unsigned little-endian integer
662 $length = intval( $type );
663
664 // Calculate the value. Use an algorithm which automatically
665 // upgrades the value to floating point if necessary.
666 $value = 0;
667 for ( $i = $length - 1; $i >= 0; $i-- ) {
668 $value *= 256;
669 $value += ord( $string[$pos + $i] );
670 }
671
672 // Throw an exception if there was loss of precision
673 if ( $value > 2 ** 52 ) {
674 $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' .
675 'This could happen if we tried to unpack a 64-bit structure ' .
676 'at an invalid location.' );
677 }
678 $data[$key] = $value;
679 $pos += $length;
680 }
681 }
682
683 return $data;
684 }
685
694 private function testBit( $value, $bitIndex ) {
695 return (bool)( ( $value >> $bitIndex ) & 1 );
696 }
697}
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:54
Internal exception class.
A class for reading ZIP file directories, for the purposes of upload verification.
$callback
The file data callback.
static readHandle( $file, $callback, $options=[])
Read an opened file handle presumed to be a ZIP and call a function for each file discovered in it.
$file
The opened file resource.
static read( $fileName, $callback, $options=[])
Read a ZIP file and call a function for each file discovered in it.
$buffer
A segmented cache of the file contents.
__construct( $file, $callback, $options)
$fileLength
The cached length of the file, or null if it has not been loaded yet.