MediaWiki REL1_28
ZipDirectoryReader.php
Go to the documentation of this file.
1<?php
89 public static function read( $fileName, $callback, $options = [] ) {
90 $zdr = new self( $fileName, $callback, $options );
91
92 return $zdr->execute();
93 }
94
96 protected $fileName;
97
99 protected $file;
100
102 protected $fileLength;
103
105 protected $buffer;
106
108 protected $callback;
109
111 protected $zip64 = false;
112
115
116 protected $data;
117
119 const ZIP64_EXTRA_HEADER = 0x0001;
120
122 const SEGSIZE = 16384;
123
125 const GENERAL_UTF8 = 11;
126
129
136 protected function __construct( $fileName, $callback, $options ) {
137 $this->fileName = $fileName;
138 $this->callback = $callback;
139
140 if ( isset( $options['zip64'] ) ) {
141 $this->zip64 = $options['zip64'];
142 }
143 }
144
150 function execute() {
151 $this->file = fopen( $this->fileName, 'r' );
152 $this->data = [];
153 if ( !$this->file ) {
154 return Status::newFatal( 'zip-file-open-error' );
155 }
156
157 $status = Status::newGood();
158 try {
160 if ( $this->zip64 ) {
161 list( $offset, $size ) = $this->findZip64CentralDirectory();
162 $this->readCentralDirectory( $offset, $size );
163 } else {
164 if ( $this->eocdr['CD size'] == 0xffffffff
165 || $this->eocdr['CD offset'] == 0xffffffff
166 || $this->eocdr['CD entries total'] == 0xffff
167 ) {
168 $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' .
169 'but we are in legacy mode. Rejecting this upload is necessary to avoid ' .
170 'opening vulnerabilities on clients using OpenJDK 7 or later.' );
171 }
172
173 list( $offset, $size ) = $this->findOldCentralDirectory();
174 $this->readCentralDirectory( $offset, $size );
175 }
176 } catch ( ZipDirectoryReaderError $e ) {
177 $status->fatal( $e->getErrorCode() );
178 }
179
180 fclose( $this->file );
181
182 return $status;
183 }
184
191 function error( $code, $debugMessage ) {
192 wfDebug( __CLASS__ . ": Fatal error: $debugMessage\n" );
193 throw new ZipDirectoryReaderError( $code );
194 }
195
202 $info = [
203 'signature' => 4,
204 'disk' => 2,
205 'CD start disk' => 2,
206 'CD entries this disk' => 2,
207 'CD entries total' => 2,
208 'CD size' => 4,
209 'CD offset' => 4,
210 'file comment length' => 2,
211 ];
212 $structSize = $this->getStructSize( $info );
213 $startPos = $this->getFileLength() - 65536 - $structSize;
214 if ( $startPos < 0 ) {
215 $startPos = 0;
216 }
217
218 if ( $this->getFileLength() === 0 ) {
219 $this->error( 'zip-wrong-format', "The file is empty." );
220 }
221
222 $block = $this->getBlock( $startPos );
223 $sigPos = strrpos( $block, "PK\x05\x06" );
224 if ( $sigPos === false ) {
225 $this->error( 'zip-wrong-format',
226 "zip file lacks EOCDR signature. It probably isn't a zip file." );
227 }
228
229 $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info );
230 $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length'];
231
232 if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) {
233 $this->error( 'zip-bad', 'trailing bytes after the end of the file comment' );
234 }
235 if ( $this->eocdr['disk'] !== 0
236 || $this->eocdr['CD start disk'] !== 0
237 ) {
238 $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' );
239 }
240 $this->eocdr += $this->unpack(
241 $block,
242 [ 'file comment' => [ 'string', $this->eocdr['file comment length'] ] ],
243 $sigPos + $structSize );
244 $this->eocdr['position'] = $startPos + $sigPos;
245 }
246
252 $info = [
253 'signature' => [ 'string', 4 ],
254 'eocdr64 start disk' => 4,
255 'eocdr64 offset' => 8,
256 'number of disks' => 4,
257 ];
258 $structSize = $this->getStructSize( $info );
259
260 $start = $this->getFileLength() - $this->eocdr['EOCDR size'] - $structSize;
261 $block = $this->getBlock( $start, $structSize );
262 $this->eocdr64Locator = $data = $this->unpack( $block, $info );
263
264 if ( $data['signature'] !== "PK\x06\x07" ) {
265 // Note: Java will allow this and continue to read the
266 // EOCDR64, so we have to reject the upload, we can't
267 // just use the EOCDR header instead.
268 $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' );
269 }
270 }
271
277 if ( $this->eocdr64Locator['eocdr64 start disk'] != 0
278 || $this->eocdr64Locator['number of disks'] != 0
279 ) {
280 $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' );
281 }
282
283 $info = [
284 'signature' => [ 'string', 4 ],
285 'EOCDR64 size' => 8,
286 'version made by' => 2,
287 'version needed' => 2,
288 'disk' => 4,
289 'CD start disk' => 4,
290 'CD entries this disk' => 8,
291 'CD entries total' => 8,
292 'CD size' => 8,
293 'CD offset' => 8
294 ];
295 $structSize = $this->getStructSize( $info );
296 $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize );
297 $this->eocdr64 = $data = $this->unpack( $block, $info );
298 if ( $data['signature'] !== "PK\x06\x06" ) {
299 $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' );
300 }
301 if ( $data['disk'] !== 0
302 || $data['CD start disk'] !== 0
303 ) {
304 $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' );
305 }
306 }
307
315 $size = $this->eocdr['CD size'];
316 $offset = $this->eocdr['CD offset'];
317 $endPos = $this->eocdr['position'];
318
319 // Some readers use the EOCDR position instead of the offset field
320 // to find the directory, so to be safe, we check if they both agree.
321 if ( $offset + $size != $endPos ) {
322 $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
323 'of central directory record' );
324 }
325
326 return [ $offset, $size ];
327 }
328
336 // The spec is ambiguous about the exact rules of precedence between the
337 // ZIP64 headers and the original headers. Here we follow zip_util.c
338 // from OpenJDK 7.
339 $size = $this->eocdr['CD size'];
340 $offset = $this->eocdr['CD offset'];
341 $numEntries = $this->eocdr['CD entries total'];
342 $endPos = $this->eocdr['position'];
343 if ( $size == 0xffffffff
344 || $offset == 0xffffffff
345 || $numEntries == 0xffff
346 ) {
348
349 if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) {
351 if ( isset( $this->eocdr64['CD offset'] ) ) {
352 $size = $this->eocdr64['CD size'];
353 $offset = $this->eocdr64['CD offset'];
354 $endPos = $this->eocdr64Locator['eocdr64 offset'];
355 }
356 }
357 }
358 // Some readers use the EOCDR position instead of the offset field
359 // to find the directory, so to be safe, we check if they both agree.
360 if ( $offset + $size != $endPos ) {
361 $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
362 'of central directory record' );
363 }
364
365 return [ $offset, $size ];
366 }
367
373 function readCentralDirectory( $offset, $size ) {
374 $block = $this->getBlock( $offset, $size );
375
376 $fixedInfo = [
377 'signature' => [ 'string', 4 ],
378 'version made by' => 2,
379 'version needed' => 2,
380 'general bits' => 2,
381 'compression method' => 2,
382 'mod time' => 2,
383 'mod date' => 2,
384 'crc-32' => 4,
385 'compressed size' => 4,
386 'uncompressed size' => 4,
387 'name length' => 2,
388 'extra field length' => 2,
389 'comment length' => 2,
390 'disk number start' => 2,
391 'internal attrs' => 2,
392 'external attrs' => 4,
393 'local header offset' => 4,
394 ];
395 $fixedSize = $this->getStructSize( $fixedInfo );
396
397 $pos = 0;
398 while ( $pos < $size ) {
399 $data = $this->unpack( $block, $fixedInfo, $pos );
400 $pos += $fixedSize;
401
402 if ( $data['signature'] !== "PK\x01\x02" ) {
403 $this->error( 'zip-bad', 'Invalid signature found in directory entry' );
404 }
405
406 $variableInfo = [
407 'name' => [ 'string', $data['name length'] ],
408 'extra field' => [ 'string', $data['extra field length'] ],
409 'comment' => [ 'string', $data['comment length'] ],
410 ];
411 $data += $this->unpack( $block, $variableInfo, $pos );
412 $pos += $this->getStructSize( $variableInfo );
413
414 if ( $this->zip64 && (
415 $data['compressed size'] == 0xffffffff
416 || $data['uncompressed size'] == 0xffffffff
417 || $data['local header offset'] == 0xffffffff )
418 ) {
419 $zip64Data = $this->unpackZip64Extra( $data['extra field'] );
420 if ( $zip64Data ) {
421 $data = $zip64Data + $data;
422 }
423 }
424
425 if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) {
426 $this->error( 'zip-unsupported', 'central directory encryption is not supported' );
427 }
428
429 // Convert the timestamp into MediaWiki format
430 // For the format, please see the MS-DOS 2.0 Programmer's Reference,
431 // pages 3-5 and 3-6.
432 $time = $data['mod time'];
433 $date = $data['mod date'];
434
435 $year = 1980 + ( $date >> 9 );
436 $month = ( $date >> 5 ) & 15;
437 $day = $date & 31;
438 $hour = ( $time >> 11 ) & 31;
439 $minute = ( $time >> 5 ) & 63;
440 $second = ( $time & 31 ) * 2;
441 $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d",
442 $year, $month, $day, $hour, $minute, $second );
443
444 // Convert the character set in the file name
445 if ( $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) ) {
446 $name = $data['name'];
447 } else {
448 $name = iconv( 'CP437', 'UTF-8', $data['name'] );
449 }
450
451 // Compile a data array for the user, with a sensible format
452 $userData = [
453 'name' => $name,
454 'mtime' => $timestamp,
455 'size' => $data['uncompressed size'],
456 ];
457 call_user_func( $this->callback, $userData );
458 }
459 }
460
466 function unpackZip64Extra( $extraField ) {
467 $extraHeaderInfo = [
468 'id' => 2,
469 'size' => 2,
470 ];
471 $extraHeaderSize = $this->getStructSize( $extraHeaderInfo );
472
473 $zip64ExtraInfo = [
474 'uncompressed size' => 8,
475 'compressed size' => 8,
476 'local header offset' => 8,
477 'disk number start' => 4,
478 ];
479
480 $extraPos = 0;
481 while ( $extraPos < strlen( $extraField ) ) {
482 $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos );
483 $extraPos += $extraHeaderSize;
484 $extra += $this->unpack( $extraField,
485 [ 'data' => [ 'string', $extra['size'] ] ],
486 $extraPos );
487 $extraPos += $extra['size'];
488
489 if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) {
490 return $this->unpack( $extra['data'], $zip64ExtraInfo );
491 }
492 }
493
494 return false;
495 }
496
501 function getFileLength() {
502 if ( $this->fileLength === null ) {
503 $stat = fstat( $this->file );
504 $this->fileLength = $stat['size'];
505 }
506
507 return $this->fileLength;
508 }
509
520 function getBlock( $start, $length = null ) {
521 $fileLength = $this->getFileLength();
522 if ( $start >= $fileLength ) {
523 $this->error( 'zip-bad', "getBlock() requested position $start, " .
524 "file length is $fileLength" );
525 }
526 if ( $length === null ) {
527 $length = $fileLength - $start;
528 }
529 $end = $start + $length;
530 if ( $end > $fileLength ) {
531 $this->error( 'zip-bad', "getBlock() requested end position $end, " .
532 "file length is $fileLength" );
533 }
534 $startSeg = floor( $start / self::SEGSIZE );
535 $endSeg = ceil( $end / self::SEGSIZE );
536
537 $block = '';
538 for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) {
539 $block .= $this->getSegment( $segIndex );
540 }
541
542 $block = substr( $block,
543 $start - $startSeg * self::SEGSIZE,
544 $length );
545
546 if ( strlen( $block ) < $length ) {
547 $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' );
548 }
549
550 return $block;
551 }
552
566 function getSegment( $segIndex ) {
567 if ( !isset( $this->buffer[$segIndex] ) ) {
568 $bytePos = $segIndex * self::SEGSIZE;
569 if ( $bytePos >= $this->getFileLength() ) {
570 $this->buffer[$segIndex] = '';
571
572 return '';
573 }
574 if ( fseek( $this->file, $bytePos ) ) {
575 $this->error( 'zip-bad', "seek to $bytePos failed" );
576 }
577 $seg = fread( $this->file, self::SEGSIZE );
578 if ( $seg === false ) {
579 $this->error( 'zip-bad', "read from $bytePos failed" );
580 }
581 $this->buffer[$segIndex] = $seg;
582 }
583
584 return $this->buffer[$segIndex];
585 }
586
592 function getStructSize( $struct ) {
593 $size = 0;
594 foreach ( $struct as $type ) {
595 if ( is_array( $type ) ) {
596 list( , $fieldSize ) = $type;
597 $size += $fieldSize;
598 } else {
599 $size += $type;
600 }
601 }
602
603 return $size;
604 }
605
628 function unpack( $string, $struct, $offset = 0 ) {
629 $size = $this->getStructSize( $struct );
630 if ( $offset + $size > strlen( $string ) ) {
631 $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' );
632 }
633
634 $data = [];
635 $pos = $offset;
636 foreach ( $struct as $key => $type ) {
637 if ( is_array( $type ) ) {
638 list( $typeName, $fieldSize ) = $type;
639 switch ( $typeName ) {
640 case 'string':
641 $data[$key] = substr( $string, $pos, $fieldSize );
642 $pos += $fieldSize;
643 break;
644 default:
645 throw new MWException( __METHOD__ . ": invalid type \"$typeName\"" );
646 }
647 } else {
648 // Unsigned little-endian integer
649 $length = intval( $type );
650
651 // Calculate the value. Use an algorithm which automatically
652 // upgrades the value to floating point if necessary.
653 $value = 0;
654 for ( $i = $length - 1; $i >= 0; $i-- ) {
655 $value *= 256;
656 $value += ord( $string[$pos + $i] );
657 }
658
659 // Throw an exception if there was loss of precision
660 if ( $value > pow( 2, 52 ) ) {
661 $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' .
662 'This could happen if we tried to unpack a 64-bit structure ' .
663 'at an invalid location.' );
664 }
665 $data[$key] = $value;
666 $pos += $length;
667 }
668 }
669
670 return $data;
671 }
672
681 function testBit( $value, $bitIndex ) {
682 return (bool)( ( $value >> $bitIndex ) & 1 );
683 }
684
689 function hexDump( $s ) {
690 $n = strlen( $s );
691 for ( $i = 0; $i < $n; $i += 16 ) {
692 printf( "%08X ", $i );
693 for ( $j = 0; $j < 16; $j++ ) {
694 print " ";
695 if ( $j == 8 ) {
696 print " ";
697 }
698 if ( $i + $j >= $n ) {
699 print " ";
700 } else {
701 printf( "%02X", ord( $s[$i + $j] ) );
702 }
703 }
704
705 print " |";
706 for ( $j = 0; $j < 16; $j++ ) {
707 if ( $i + $j >= $n ) {
708 print " ";
709 } elseif ( ctype_print( $s[$i + $j] ) ) {
710 print $s[$i + $j];
711 } else {
712 print '.';
713 }
714 }
715 print "|\n";
716 }
717 }
718}
719
723class ZipDirectoryReaderError extends Exception {
724 protected $errorCode;
725
726 function __construct( $code ) {
727 $this->errorCode = $code;
728 parent::__construct( "ZipDirectoryReader error: $code" );
729 }
730
734 function getErrorCode() {
735 return $this->errorCode;
736 }
737}
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
MediaWiki exception.
Internal exception class.
A class for reading ZIP file directories, for the purposes of upload verification.
$callback
The file data callback.
const GENERAL_UTF8
The index of the "general field" bit for UTF-8 file names.
readEndOfCentralDirectoryRecord()
Read the header which is at the end of the central directory, unimaginatively called the "end of cent...
error( $code, $debugMessage)
Throw an error, and log a debug message.
execute()
Read the directory according to settings in $this.
__construct( $fileName, $callback, $options)
Private constructor.
readZip64EndOfCentralDirectoryRecord()
Read the header called the "ZIP64 end of central directory record".
getSegment( $segIndex)
Get a section of the file starting at position $segIndex * self::SEGSIZE, of length self::SEGSIZE.
const ZIP64_EXTRA_HEADER
The "extra field" ID for ZIP64 central directory entries.
findZip64CentralDirectory()
Find the location of the central directory, as would be seen by a ZIP64-compliant reader.
$file
The opened file resource.
static read( $fileName, $callback, $options=[])
Read a ZIP file and call a function for each file discovered in it.
testBit( $value, $bitIndex)
Returns a bit from a given position in an integer value, converted to boolean.
getFileLength()
Get the length of the file.
const GENERAL_CD_ENCRYPTED
The index of the "general field" bit for central directory encryption.
getStructSize( $struct)
Get the size of a structure in bytes.
readZip64EndOfCentralDirectoryLocator()
Read the header called the "ZIP64 end of central directory locator".
findOldCentralDirectory()
Find the location of the central directory, as would be seen by a non-ZIP64 reader.
$buffer
A segmented cache of the file contents.
hexDump( $s)
Debugging helper function which dumps a string in hexdump -C format.
getBlock( $start, $length=null)
Get the file contents from a given offset.
unpack( $string, $struct, $offset=0)
Unpack a binary structure.
const SEGSIZE
The segment size for the file contents cache.
$fileLength
The cached length of the file, or null if it has not been loaded yet.
readCentralDirectory( $offset, $size)
Read the central directory at the given location.
unpackZip64Extra( $extraField)
Interpret ZIP64 "extra field" data and return an associative array.
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
while(( $__line=Maintenance::readconsole()) !==false) print
Definition eval.php:64
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set $status
Definition hooks.txt:1049
namespace are movable Hooks may change this value to override the return value of MWNamespace::isMovable(). 'NewDifferenceEngine' do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached one of or reset my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition hooks.txt:2568
see documentation in includes Linker php for Linker::makeImageLink & $time
Definition hooks.txt:1752
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context $options
Definition hooks.txt:1096
We ve cleaned up the code here by removing clumps of infrequently used code and moving them off somewhere else It s much easier for someone working with this code to see what s _really_ going and make changes or fix bugs In we can take all the code that deals with the little used title reversing we can concentrate it all in an extension file
Definition hooks.txt:108
Allows to change the fields on the form that will be generated $name
Definition hooks.txt:304
returning false will NOT prevent logging $e
Definition hooks.txt:2110
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable & $code
Definition hooks.txt:887
if( $limit) $timestamp
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
This document describes the state of Postgres support in and is fairly well maintained The main code is very well while extensions are very hit and miss it is probably the most supported database after MySQL Much of the work in making MediaWiki database agnostic came about through the work of creating Postgres as and are nearing end of but without copying over all the usage comments General notes on the but these can almost always be programmed around *Although Postgres has a true BOOLEAN boolean columns are always mapped to as the code does not always treat the column as a and VARBINARY columns should simply be TEXT The only exception is when VARBINARY is used to store true binary data
Definition postgres.txt:43