MediaWiki master
ZipDirectoryReader.php
Go to the documentation of this file.
1<?php
24
90 public static function read( $fileName, $callback, $options = [] ) {
91 $file = fopen( $fileName, 'r' );
92 $zdr = new self( $file, $callback, $options );
93 return $zdr->execute();
94 }
95
107 public static function readHandle( $file, $callback, $options = [] ) {
108 $zdr = new self( $file, $callback, $options );
109 return $zdr->execute();
110 }
111
113 protected $file;
114
116 protected $fileLength;
117
119 protected $buffer;
120
122 protected $callback;
123
125 protected $zip64 = false;
126
128 protected $eocdr;
129 protected $eocdr64;
131
132 protected $data;
133
135 private const ZIP64_EXTRA_HEADER = 0x0001;
136
138 private const SEGSIZE = 16384;
139
141 private const GENERAL_UTF8 = 11;
142
144 private const GENERAL_CD_ENCRYPTED = 13;
145
151 protected function __construct( $file, $callback, $options ) {
152 $this->file = $file;
153 $this->callback = $callback;
154
155 if ( isset( $options['zip64'] ) ) {
156 $this->zip64 = $options['zip64'];
157 }
158 }
159
165 private function execute() {
166 $this->data = [];
167 if ( !$this->file ) {
168 return Status::newFatal( 'zip-file-open-error' );
169 }
170
171 $status = Status::newGood();
172 try {
173 $this->readEndOfCentralDirectoryRecord();
174 if ( $this->zip64 ) {
175 [ $offset, $size ] = $this->findZip64CentralDirectory();
176 $this->readCentralDirectory( $offset, $size );
177 } else {
178 if ( $this->eocdr['CD size'] == 0xffffffff
179 || $this->eocdr['CD offset'] == 0xffffffff
180 || $this->eocdr['CD entries total'] == 0xffff
181 ) {
182 $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' .
183 'but we are in legacy mode. Rejecting this upload is necessary to avoid ' .
184 'opening vulnerabilities on clients using OpenJDK 7 or later.' );
185 }
186
187 [ $offset, $size ] = $this->findOldCentralDirectory();
188 $this->readCentralDirectory( $offset, $size );
189 }
190 } catch ( ZipDirectoryReaderError $e ) {
191 $status->fatal( $e->getErrorCode() );
192 }
193
194 fclose( $this->file );
195
196 return $status;
197 }
198
206 private function error( $code, $debugMessage ) {
207 wfDebug( __CLASS__ . ": Fatal error: $debugMessage" );
208 throw new ZipDirectoryReaderError( $code );
209 }
210
216 private function readEndOfCentralDirectoryRecord() {
217 $info = [
218 'signature' => 4,
219 'disk' => 2,
220 'CD start disk' => 2,
221 'CD entries this disk' => 2,
222 'CD entries total' => 2,
223 'CD size' => 4,
224 'CD offset' => 4,
225 'file comment length' => 2,
226 ];
227 $structSize = $this->getStructSize( $info );
228 $startPos = $this->getFileLength() - 65536 - $structSize;
229 if ( $startPos < 0 ) {
230 $startPos = 0;
231 }
232
233 if ( $this->getFileLength() === 0 ) {
234 $this->error( 'zip-wrong-format', "The file is empty." );
235 }
236
237 $block = $this->getBlock( $startPos );
238 $sigPos = strrpos( $block, "PK\x05\x06" );
239 if ( $sigPos === false ) {
240 $this->error( 'zip-wrong-format',
241 "zip file lacks EOCDR signature. It probably isn't a zip file." );
242 }
243
244 $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info );
245 $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length'];
246
247 if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) {
248 // T40432: MS binary documents frequently embed ZIP files
249 $this->error( 'zip-wrong-format', 'there is a ZIP signature but it is not at ' .
250 'the end of the file. It could be an OLE file with a ZIP file embedded.' );
251 }
252 if ( $this->eocdr['disk'] !== 0
253 || $this->eocdr['CD start disk'] !== 0
254 ) {
255 $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' );
256 }
257 $this->eocdr += $this->unpack(
258 $block,
259 [ 'file comment' => [ 'string', $this->eocdr['file comment length'] ] ],
260 $sigPos + $structSize );
261 $this->eocdr['position'] = $startPos + $sigPos;
262 }
263
268 private function readZip64EndOfCentralDirectoryLocator() {
269 $info = [
270 'signature' => [ 'string', 4 ],
271 'eocdr64 start disk' => 4,
272 'eocdr64 offset' => 8,
273 'number of disks' => 4,
274 ];
275 $structSize = $this->getStructSize( $info );
276
277 $start = $this->getFileLength() - $this->eocdr['EOCDR size'] - $structSize;
278 $block = $this->getBlock( $start, $structSize );
279 $this->eocdr64Locator = $data = $this->unpack( $block, $info );
280
281 if ( $data['signature'] !== "PK\x06\x07" ) {
282 // Note: Java will allow this and continue to read the
283 // EOCDR64, so we have to reject the upload, we can't
284 // just use the EOCDR header instead.
285 $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' );
286 }
287 }
288
293 private function readZip64EndOfCentralDirectoryRecord() {
294 if ( $this->eocdr64Locator['eocdr64 start disk'] != 0
295 || $this->eocdr64Locator['number of disks'] != 0
296 ) {
297 $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' );
298 }
299
300 $info = [
301 'signature' => [ 'string', 4 ],
302 'EOCDR64 size' => 8,
303 'version made by' => 2,
304 'version needed' => 2,
305 'disk' => 4,
306 'CD start disk' => 4,
307 'CD entries this disk' => 8,
308 'CD entries total' => 8,
309 'CD size' => 8,
310 'CD offset' => 8
311 ];
312 $structSize = $this->getStructSize( $info );
313 $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize );
314 $this->eocdr64 = $data = $this->unpack( $block, $info );
315 if ( $data['signature'] !== "PK\x06\x06" ) {
316 $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' );
317 }
318 if ( $data['disk'] !== 0
319 || $data['CD start disk'] !== 0
320 ) {
321 $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' );
322 }
323 }
324
331 private function findOldCentralDirectory() {
332 $size = $this->eocdr['CD size'];
333 $offset = $this->eocdr['CD offset'];
334 $endPos = $this->eocdr['position'];
335
336 // Some readers use the EOCDR position instead of the offset field
337 // to find the directory, so to be safe, we check if they both agree.
338 if ( $offset + $size != $endPos ) {
339 $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
340 'of central directory record' );
341 }
342
343 return [ $offset, $size ];
344 }
345
352 private function findZip64CentralDirectory() {
353 // The spec is ambiguous about the exact rules of precedence between the
354 // ZIP64 headers and the original headers. Here we follow zip_util.c
355 // from OpenJDK 7.
356 $size = $this->eocdr['CD size'];
357 $offset = $this->eocdr['CD offset'];
358 $numEntries = $this->eocdr['CD entries total'];
359 $endPos = $this->eocdr['position'];
360 if ( $size == 0xffffffff
361 || $offset == 0xffffffff
362 || $numEntries == 0xffff
363 ) {
364 $this->readZip64EndOfCentralDirectoryLocator();
365
366 if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) {
367 $this->readZip64EndOfCentralDirectoryRecord();
368 if ( isset( $this->eocdr64['CD offset'] ) ) {
369 $size = $this->eocdr64['CD size'];
370 $offset = $this->eocdr64['CD offset'];
371 $endPos = $this->eocdr64Locator['eocdr64 offset'];
372 }
373 }
374 }
375 // Some readers use the EOCDR position instead of the offset field
376 // to find the directory, so to be safe, we check if they both agree.
377 if ( $offset + $size != $endPos ) {
378 $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
379 'of central directory record' );
380 }
381
382 return [ $offset, $size ];
383 }
384
390 private function readCentralDirectory( $offset, $size ) {
391 $block = $this->getBlock( $offset, $size );
392
393 $fixedInfo = [
394 'signature' => [ 'string', 4 ],
395 'version made by' => 2,
396 'version needed' => 2,
397 'general bits' => 2,
398 'compression method' => 2,
399 'mod time' => 2,
400 'mod date' => 2,
401 'crc-32' => 4,
402 'compressed size' => 4,
403 'uncompressed size' => 4,
404 'name length' => 2,
405 'extra field length' => 2,
406 'comment length' => 2,
407 'disk number start' => 2,
408 'internal attrs' => 2,
409 'external attrs' => 4,
410 'local header offset' => 4,
411 ];
412 $fixedSize = $this->getStructSize( $fixedInfo );
413
414 $pos = 0;
415 while ( $pos < $size ) {
416 $data = $this->unpack( $block, $fixedInfo, $pos );
417 $pos += $fixedSize;
418
419 if ( $data['signature'] !== "PK\x01\x02" ) {
420 $this->error( 'zip-bad', 'Invalid signature found in directory entry' );
421 }
422
423 $variableInfo = [
424 'name' => [ 'string', $data['name length'] ],
425 'extra field' => [ 'string', $data['extra field length'] ],
426 'comment' => [ 'string', $data['comment length'] ],
427 ];
428 $data += $this->unpack( $block, $variableInfo, $pos );
429 $pos += $this->getStructSize( $variableInfo );
430
431 if ( $this->zip64 && (
432 $data['compressed size'] == 0xffffffff
433 || $data['uncompressed size'] == 0xffffffff
434 || $data['local header offset'] == 0xffffffff )
435 ) {
436 $zip64Data = $this->unpackZip64Extra( $data['extra field'] );
437 if ( $zip64Data ) {
438 $data = $zip64Data + $data;
439 }
440 }
441
442 if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) {
443 $this->error( 'zip-unsupported', 'central directory encryption is not supported' );
444 }
445
446 // Convert the timestamp into MediaWiki format
447 // For the format, please see the MS-DOS 2.0 Programmer's Reference,
448 // pages 3-5 and 3-6.
449 $time = $data['mod time'];
450 $date = $data['mod date'];
451
452 $year = 1980 + ( $date >> 9 );
453 $month = ( $date >> 5 ) & 15;
454 $day = $date & 31;
455 $hour = ( $time >> 11 ) & 31;
456 $minute = ( $time >> 5 ) & 63;
457 $second = ( $time & 31 ) * 2;
458 $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d",
459 $year, $month, $day, $hour, $minute, $second );
460
461 // Convert the character set in the file name
462 if ( $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) ) {
463 $name = $data['name'];
464 } else {
465 $name = iconv( 'CP437', 'UTF-8', $data['name'] );
466 }
467
468 // Compile a data array for the user, with a sensible format
469 $userData = [
470 'name' => $name,
471 'mtime' => $timestamp,
472 'size' => $data['uncompressed size'],
473 ];
474 call_user_func( $this->callback, $userData );
475 }
476 }
477
483 private function unpackZip64Extra( $extraField ) {
484 $extraHeaderInfo = [
485 'id' => 2,
486 'size' => 2,
487 ];
488 $extraHeaderSize = $this->getStructSize( $extraHeaderInfo );
489
490 $zip64ExtraInfo = [
491 'uncompressed size' => 8,
492 'compressed size' => 8,
493 'local header offset' => 8,
494 'disk number start' => 4,
495 ];
496
497 $extraPos = 0;
498 while ( $extraPos < strlen( $extraField ) ) {
499 $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos );
500 $extraPos += $extraHeaderSize;
501 $extra += $this->unpack( $extraField,
502 [ 'data' => [ 'string', $extra['size'] ] ],
503 $extraPos );
504 $extraPos += $extra['size'];
505
506 if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) {
507 return $this->unpack( $extra['data'], $zip64ExtraInfo );
508 }
509 }
510
511 return false;
512 }
513
518 private function getFileLength() {
519 if ( $this->fileLength === null ) {
520 $stat = fstat( $this->file );
521 $this->fileLength = $stat['size'];
522 }
523
524 return $this->fileLength;
525 }
526
537 private function getBlock( $start, $length = null ) {
538 $fileLength = $this->getFileLength();
539 if ( $start >= $fileLength ) {
540 $this->error( 'zip-bad', "getBlock() requested position $start, " .
541 "file length is $fileLength" );
542 }
543 if ( $length === null ) {
544 $length = $fileLength - $start;
545 }
546 $end = $start + $length;
547 if ( $end > $fileLength ) {
548 $this->error( 'zip-bad', "getBlock() requested end position $end, " .
549 "file length is $fileLength" );
550 }
551 $startSeg = (int)floor( $start / self::SEGSIZE );
552 $endSeg = (int)ceil( $end / self::SEGSIZE );
553
554 $block = '';
555 for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) {
556 $block .= $this->getSegment( $segIndex );
557 }
558
559 $block = substr( $block,
560 $start - $startSeg * self::SEGSIZE,
561 $length );
562
563 if ( strlen( $block ) < $length ) {
564 $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' );
565 }
566
567 return $block;
568 }
569
583 private function getSegment( $segIndex ) {
584 if ( !isset( $this->buffer[$segIndex] ) ) {
585 $bytePos = $segIndex * self::SEGSIZE;
586 if ( $bytePos >= $this->getFileLength() ) {
587 $this->buffer[$segIndex] = '';
588
589 return '';
590 }
591 if ( fseek( $this->file, $bytePos ) ) {
592 $this->error( 'zip-bad', "seek to $bytePos failed" );
593 }
594 $seg = fread( $this->file, self::SEGSIZE );
595 if ( $seg === false ) {
596 $this->error( 'zip-bad', "read from $bytePos failed" );
597 }
598 $this->buffer[$segIndex] = $seg;
599 }
600
601 return $this->buffer[$segIndex];
602 }
603
609 private function getStructSize( $struct ) {
610 $size = 0;
611 foreach ( $struct as $type ) {
612 if ( is_array( $type ) ) {
613 [ , $fieldSize ] = $type;
614 $size += $fieldSize;
615 } else {
616 $size += $type;
617 }
618 }
619
620 return $size;
621 }
622
643 private function unpack( $string, $struct, $offset = 0 ) {
644 $size = $this->getStructSize( $struct );
645 if ( $offset + $size > strlen( $string ) ) {
646 $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' );
647 }
648
649 $data = [];
650 $pos = $offset;
651 foreach ( $struct as $key => $type ) {
652 if ( is_array( $type ) ) {
653 [ $typeName, $fieldSize ] = $type;
654 switch ( $typeName ) {
655 case 'string':
656 $data[$key] = substr( $string, $pos, $fieldSize );
657 $pos += $fieldSize;
658 break;
659 default:
660 throw new UnexpectedValueException( __METHOD__ . ": invalid type \"$typeName\"" );
661 }
662 } else {
663 // Unsigned little-endian integer
664 $length = intval( $type );
665
666 // Calculate the value. Use an algorithm which automatically
667 // upgrades the value to floating point if necessary.
668 $value = 0;
669 for ( $i = $length - 1; $i >= 0; $i-- ) {
670 $value *= 256;
671 $value += ord( $string[$pos + $i] );
672 }
673
674 // Throw an exception if there was loss of precision
675 if ( $value > 2 ** 52 ) {
676 $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' .
677 'This could happen if we tried to unpack a 64-bit structure ' .
678 'at an invalid location.' );
679 }
680 $data[$key] = $value;
681 $pos += $length;
682 }
683 }
684
685 return $data;
686 }
687
696 private function testBit( $value, $bitIndex ) {
697 return (bool)( ( $value >> $bitIndex ) & 1 );
698 }
699}
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:54
Internal exception class.
A class for reading ZIP file directories, for the purposes of upload verification.
$callback
The file data callback.
static readHandle( $file, $callback, $options=[])
Read an opened file handle presumed to be a ZIP and call a function for each file discovered in it.
$file
The opened file resource.
static read( $fileName, $callback, $options=[])
Read a ZIP file and call a function for each file discovered in it.
$buffer
A segmented cache of the file contents.
__construct( $file, $callback, $options)
$fileLength
The cached length of the file, or null if it has not been loaded yet.