Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
64.62% |
179 / 277 |
|
33.33% |
6 / 18 |
CRAP | |
0.00% |
0 / 1 |
| ZipDirectoryReader | |
64.62% |
179 / 277 |
|
33.33% |
6 / 18 |
308.99 | |
0.00% |
0 / 1 |
| read | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| readHandle | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
| __construct | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
2.06 | |||
| execute | |
84.21% |
16 / 19 |
|
0.00% |
0 / 1 |
7.19 | |||
| error | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| readEndOfCentralDirectoryRecord | |
97.06% |
33 / 34 |
|
0.00% |
0 / 1 |
7 | |||
| readZip64EndOfCentralDirectoryLocator | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
6 | |||
| readZip64EndOfCentralDirectoryRecord | |
0.00% |
0 / 23 |
|
0.00% |
0 / 1 |
42 | |||
| findOldCentralDirectory | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
2 | |||
| findZip64CentralDirectory | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
56 | |||
| readCentralDirectory | |
91.94% |
57 / 62 |
|
0.00% |
0 / 1 |
10.05 | |||
| unpackZip64Extra | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
12 | |||
| getFileLength | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
| getBlock | |
75.00% |
15 / 20 |
|
0.00% |
0 / 1 |
5.39 | |||
| getSegment | |
83.33% |
10 / 12 |
|
0.00% |
0 / 1 |
5.12 | |||
| getStructSize | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 | |||
| unpack | |
84.00% |
21 / 25 |
|
0.00% |
0 / 1 |
8.26 | |||
| testBit | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * ZIP file directories reader, for the purposes of upload verification. |
| 4 | * |
| 5 | * @license GPL-2.0-or-later |
| 6 | * @file |
| 7 | */ |
| 8 | |
| 9 | namespace Wikimedia\Mime; |
| 10 | |
| 11 | use StatusValue; |
| 12 | use UnexpectedValueException; |
| 13 | |
| 14 | /** |
| 15 | * A class for reading ZIP file directories, for the purposes of upload |
| 16 | * verification. |
| 17 | * |
| 18 | * Only a functional interface is provided: ZipFileReader::read(). No access is |
| 19 | * given to object instances. |
| 20 | */ |
| 21 | class ZipDirectoryReader { |
| 22 | /** |
| 23 | * Read a ZIP file and call a function for each file discovered in it. |
| 24 | * |
| 25 | * Because this class is aimed at verification, an error is raised on |
| 26 | * suspicious or ambiguous input, instead of emulating some standard |
| 27 | * behavior. |
| 28 | * |
| 29 | * @param string $fileName The archive file name |
| 30 | * @param callable $callback The callback function. It will be called for each file |
| 31 | * with a single associative array each time, with members: |
| 32 | * |
| 33 | * - name: The file name. Directories conventionally have a trailing |
| 34 | * slash. |
| 35 | * |
| 36 | * - mtime: The file modification time, in MediaWiki 14-char format |
| 37 | * |
| 38 | * - size: The uncompressed file size |
| 39 | * |
| 40 | * @param array $options An associative array of read options, with the option |
| 41 | * name in the key. This may currently contain: |
| 42 | * |
| 43 | * - zip64: If this is set to true, then we will emulate a |
| 44 | * library with ZIP64 support, like OpenJDK 7. If it is set to |
| 45 | * false, then we will emulate a library with no knowledge of |
| 46 | * ZIP64. |
| 47 | * |
| 48 | * NOTE: The ZIP64 code is untested and probably doesn't work. It |
| 49 | * turned out to be easier to just reject ZIP64 archive uploads, |
| 50 | * since they are likely to be very rare. Confirming safety of a |
| 51 | * ZIP64 file is fairly complex. What do you do with a file that is |
| 52 | * ambiguous and broken when read with a non-ZIP64 reader, but valid |
| 53 | * when read with a ZIP64 reader? This situation is normal for a |
| 54 | * valid ZIP64 file, and working out what non-ZIP64 readers will make |
| 55 | * of such a file is not trivial. |
| 56 | * |
| 57 | * @return StatusValue A StatusValue object. The following fatal errors are |
| 58 | * defined: |
| 59 | * |
| 60 | * - zip-file-open-error: The file could not be opened. |
| 61 | * |
| 62 | * - zip-wrong-format: The file does not appear to be a ZIP file. |
| 63 | * |
| 64 | * - zip-bad: There was something wrong or ambiguous about the file |
| 65 | * data. |
| 66 | * |
| 67 | * - zip-unsupported: The ZIP file uses features which |
| 68 | * ZipDirectoryReader does not support. |
| 69 | * |
| 70 | * The default messages for those fatal errors are written in a way that |
| 71 | * makes sense for upload verification. |
| 72 | * |
| 73 | * If a fatal error is returned, more information about the error will be |
| 74 | * available in the debug log. |
| 75 | * |
| 76 | * Note that the callback function may be called any number of times before |
| 77 | * a fatal error is returned. If this occurs, the data sent to the callback |
| 78 | * function should be discarded. |
| 79 | */ |
| 80 | public static function read( $fileName, $callback, $options = [] ) { |
| 81 | $file = fopen( $fileName, 'r' ); |
| 82 | $zdr = new self( $file, $callback, $options ); |
| 83 | return $zdr->execute(); |
| 84 | } |
| 85 | |
| 86 | /** |
| 87 | * Read an opened file handle presumed to be a ZIP and call a function for |
| 88 | * each file discovered in it. |
| 89 | * |
| 90 | * @see ZipDirectoryReader::read |
| 91 | * |
| 92 | * @param resource $file A seekable stream containing the archive |
| 93 | * @param callable $callback |
| 94 | * @param array $options |
| 95 | * @return StatusValue |
| 96 | */ |
| 97 | public static function readHandle( $file, $callback, $options = [] ) { |
| 98 | $zdr = new self( $file, $callback, $options ); |
| 99 | return $zdr->execute(); |
| 100 | } |
| 101 | |
| 102 | /** @var resource The opened file resource */ |
| 103 | protected $file; |
| 104 | |
| 105 | /** @var int|null The cached length of the file, or null if it has not been loaded yet. */ |
| 106 | protected $fileLength; |
| 107 | |
| 108 | /** @var string[] A segmented cache of the file contents */ |
| 109 | protected $buffer; |
| 110 | |
| 111 | /** @var callable The file data callback */ |
| 112 | protected $callback; |
| 113 | |
| 114 | /** @var bool The ZIP64 mode */ |
| 115 | protected $zip64 = false; |
| 116 | |
| 117 | /** @var array Stored headers */ |
| 118 | protected $eocdr; |
| 119 | /** @var array Stored headers */ |
| 120 | protected $eocdr64; |
| 121 | /** @var array Stored headers */ |
| 122 | protected $eocdr64Locator; |
| 123 | |
| 124 | /** The "extra field" ID for ZIP64 central directory entries */ |
| 125 | private const ZIP64_EXTRA_HEADER = 0x0001; |
| 126 | |
| 127 | /** The segment size for the file contents cache */ |
| 128 | private const SEGSIZE = 16384; |
| 129 | |
| 130 | /** The index of the "general field" bit for UTF-8 file names */ |
| 131 | private const GENERAL_UTF8 = 11; |
| 132 | |
| 133 | /** The index of the "general field" bit for central directory encryption */ |
| 134 | private const GENERAL_CD_ENCRYPTED = 13; |
| 135 | |
| 136 | /** |
| 137 | * @param resource $file |
| 138 | * @param callable $callback |
| 139 | * @param array $options |
| 140 | */ |
| 141 | protected function __construct( $file, $callback, $options ) { |
| 142 | $this->file = $file; |
| 143 | $this->callback = $callback; |
| 144 | |
| 145 | if ( isset( $options['zip64'] ) ) { |
| 146 | $this->zip64 = $options['zip64']; |
| 147 | } |
| 148 | } |
| 149 | |
| 150 | /** |
| 151 | * Read the directory according to settings in $this. |
| 152 | * |
| 153 | * @return StatusValue |
| 154 | */ |
| 155 | private function execute() { |
| 156 | if ( !$this->file ) { |
| 157 | return StatusValue::newFatal( 'zip-file-open-error' ); |
| 158 | } |
| 159 | |
| 160 | $status = StatusValue::newGood(); |
| 161 | try { |
| 162 | $this->readEndOfCentralDirectoryRecord(); |
| 163 | if ( $this->zip64 ) { |
| 164 | [ $offset, $size ] = $this->findZip64CentralDirectory(); |
| 165 | $this->readCentralDirectory( $offset, $size ); |
| 166 | } else { |
| 167 | if ( $this->eocdr['CD size'] == 0xffffffff |
| 168 | || $this->eocdr['CD offset'] == 0xffffffff |
| 169 | || $this->eocdr['CD entries total'] == 0xffff |
| 170 | ) { |
| 171 | $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' . |
| 172 | 'but we are in legacy mode. Rejecting this upload is necessary to avoid ' . |
| 173 | 'opening vulnerabilities on clients using OpenJDK 7 or later.' ); |
| 174 | } |
| 175 | |
| 176 | [ $offset, $size ] = $this->findOldCentralDirectory(); |
| 177 | $this->readCentralDirectory( $offset, $size ); |
| 178 | } |
| 179 | } catch ( ZipDirectoryReaderError $e ) { |
| 180 | $status->fatal( $e->getErrorCode() ); |
| 181 | } |
| 182 | |
| 183 | fclose( $this->file ); |
| 184 | |
| 185 | return $status; |
| 186 | } |
| 187 | |
| 188 | /** |
| 189 | * Throw an error, and log a debug message |
| 190 | * @param string $code |
| 191 | * @param string $debugMessage |
| 192 | * @throws ZipDirectoryReaderError |
| 193 | * @return never |
| 194 | */ |
| 195 | private function error( $code, $debugMessage ): never { |
| 196 | wfDebug( __CLASS__ . ": Fatal error: $debugMessage" ); |
| 197 | throw new ZipDirectoryReaderError( $code ); |
| 198 | } |
| 199 | |
| 200 | /** |
| 201 | * Read the header which is at the end of the central directory, |
| 202 | * unimaginatively called the "end of central directory record" by the ZIP |
| 203 | * spec. |
| 204 | */ |
| 205 | private function readEndOfCentralDirectoryRecord() { |
| 206 | $info = [ |
| 207 | 'signature' => 4, |
| 208 | 'disk' => 2, |
| 209 | 'CD start disk' => 2, |
| 210 | 'CD entries this disk' => 2, |
| 211 | 'CD entries total' => 2, |
| 212 | 'CD size' => 4, |
| 213 | 'CD offset' => 4, |
| 214 | 'file comment length' => 2, |
| 215 | ]; |
| 216 | $structSize = $this->getStructSize( $info ); |
| 217 | $startPos = $this->getFileLength() - 65536 - $structSize; |
| 218 | if ( $startPos < 0 ) { |
| 219 | $startPos = 0; |
| 220 | } |
| 221 | |
| 222 | if ( $this->getFileLength() === 0 ) { |
| 223 | $this->error( 'zip-wrong-format', "The file is empty." ); |
| 224 | } |
| 225 | |
| 226 | $block = $this->getBlock( $startPos ); |
| 227 | $sigPos = strrpos( $block, "PK\x05\x06" ); |
| 228 | if ( $sigPos === false ) { |
| 229 | $this->error( 'zip-wrong-format', |
| 230 | "zip file lacks EOCDR signature. It probably isn't a zip file." ); |
| 231 | } |
| 232 | |
| 233 | $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info ); |
| 234 | $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length']; |
| 235 | |
| 236 | if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) { |
| 237 | // T40432: MS binary documents frequently embed ZIP files |
| 238 | $this->error( 'zip-wrong-format', 'there is a ZIP signature but it is not at ' . |
| 239 | 'the end of the file. It could be an OLE file with a ZIP file embedded.' ); |
| 240 | } |
| 241 | if ( $this->eocdr['disk'] !== 0 |
| 242 | || $this->eocdr['CD start disk'] !== 0 |
| 243 | ) { |
| 244 | $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' ); |
| 245 | } |
| 246 | $this->eocdr += $this->unpack( |
| 247 | $block, |
| 248 | [ 'file comment' => [ 'string', $this->eocdr['file comment length'] ] ], |
| 249 | $sigPos + $structSize ); |
| 250 | $this->eocdr['position'] = $startPos + $sigPos; |
| 251 | } |
| 252 | |
| 253 | /** |
| 254 | * Read the header called the "ZIP64 end of central directory locator". An |
| 255 | * error will be raised if it does not exist. |
| 256 | */ |
| 257 | private function readZip64EndOfCentralDirectoryLocator() { |
| 258 | $info = [ |
| 259 | 'signature' => [ 'string', 4 ], |
| 260 | 'eocdr64 start disk' => 4, |
| 261 | 'eocdr64 offset' => 8, |
| 262 | 'number of disks' => 4, |
| 263 | ]; |
| 264 | $structSize = $this->getStructSize( $info ); |
| 265 | |
| 266 | $start = $this->getFileLength() - $this->eocdr['EOCDR size'] - $structSize; |
| 267 | $block = $this->getBlock( $start, $structSize ); |
| 268 | $this->eocdr64Locator = $data = $this->unpack( $block, $info ); |
| 269 | |
| 270 | if ( $data['signature'] !== "PK\x06\x07" ) { |
| 271 | // Note: Java will allow this and continue to read the |
| 272 | // EOCDR64, so we have to reject the upload, we can't |
| 273 | // just use the EOCDR header instead. |
| 274 | $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' ); |
| 275 | } |
| 276 | } |
| 277 | |
| 278 | /** |
| 279 | * Read the header called the "ZIP64 end of central directory record". It |
| 280 | * may replace the regular "end of central directory record" in ZIP64 files. |
| 281 | */ |
| 282 | private function readZip64EndOfCentralDirectoryRecord() { |
| 283 | if ( $this->eocdr64Locator['eocdr64 start disk'] != 0 |
| 284 | || $this->eocdr64Locator['number of disks'] != 0 |
| 285 | ) { |
| 286 | $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' ); |
| 287 | } |
| 288 | |
| 289 | $info = [ |
| 290 | 'signature' => [ 'string', 4 ], |
| 291 | 'EOCDR64 size' => 8, |
| 292 | 'version made by' => 2, |
| 293 | 'version needed' => 2, |
| 294 | 'disk' => 4, |
| 295 | 'CD start disk' => 4, |
| 296 | 'CD entries this disk' => 8, |
| 297 | 'CD entries total' => 8, |
| 298 | 'CD size' => 8, |
| 299 | 'CD offset' => 8 |
| 300 | ]; |
| 301 | $structSize = $this->getStructSize( $info ); |
| 302 | $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize ); |
| 303 | $this->eocdr64 = $data = $this->unpack( $block, $info ); |
| 304 | if ( $data['signature'] !== "PK\x06\x06" ) { |
| 305 | $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' ); |
| 306 | } |
| 307 | if ( $data['disk'] !== 0 |
| 308 | || $data['CD start disk'] !== 0 |
| 309 | ) { |
| 310 | $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' ); |
| 311 | } |
| 312 | } |
| 313 | |
| 314 | /** |
| 315 | * Find the location of the central directory, as would be seen by a |
| 316 | * non-ZIP64 reader. |
| 317 | * |
| 318 | * @return array List containing offset, size and end position. |
| 319 | */ |
| 320 | private function findOldCentralDirectory() { |
| 321 | $size = $this->eocdr['CD size']; |
| 322 | $offset = $this->eocdr['CD offset']; |
| 323 | $endPos = $this->eocdr['position']; |
| 324 | |
| 325 | // Some readers use the EOCDR position instead of the offset field |
| 326 | // to find the directory, so to be safe, we check if they both agree. |
| 327 | if ( $offset + $size != $endPos ) { |
| 328 | $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' . |
| 329 | 'of central directory record' ); |
| 330 | } |
| 331 | |
| 332 | return [ $offset, $size ]; |
| 333 | } |
| 334 | |
| 335 | /** |
| 336 | * Find the location of the central directory, as would be seen by a |
| 337 | * ZIP64-compliant reader. |
| 338 | * |
| 339 | * @return array List containing offset, size and end position. |
| 340 | */ |
| 341 | private function findZip64CentralDirectory() { |
| 342 | // The spec is ambiguous about the exact rules of precedence between the |
| 343 | // ZIP64 headers and the original headers. Here we follow zip_util.c |
| 344 | // from OpenJDK 7. |
| 345 | $size = $this->eocdr['CD size']; |
| 346 | $offset = $this->eocdr['CD offset']; |
| 347 | $numEntries = $this->eocdr['CD entries total']; |
| 348 | $endPos = $this->eocdr['position']; |
| 349 | if ( $size == 0xffffffff |
| 350 | || $offset == 0xffffffff |
| 351 | || $numEntries == 0xffff |
| 352 | ) { |
| 353 | $this->readZip64EndOfCentralDirectoryLocator(); |
| 354 | |
| 355 | if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) { |
| 356 | $this->readZip64EndOfCentralDirectoryRecord(); |
| 357 | if ( isset( $this->eocdr64['CD offset'] ) ) { |
| 358 | $size = $this->eocdr64['CD size']; |
| 359 | $offset = $this->eocdr64['CD offset']; |
| 360 | $endPos = $this->eocdr64Locator['eocdr64 offset']; |
| 361 | } |
| 362 | } |
| 363 | } |
| 364 | // Some readers use the EOCDR position instead of the offset field |
| 365 | // to find the directory, so to be safe, we check if they both agree. |
| 366 | if ( $offset + $size != $endPos ) { |
| 367 | $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' . |
| 368 | 'of central directory record' ); |
| 369 | } |
| 370 | |
| 371 | return [ $offset, $size ]; |
| 372 | } |
| 373 | |
| 374 | /** |
| 375 | * Read the central directory at the given location |
| 376 | * @param int $offset |
| 377 | * @param int $size |
| 378 | */ |
| 379 | private function readCentralDirectory( $offset, $size ) { |
| 380 | $block = $this->getBlock( $offset, $size ); |
| 381 | |
| 382 | $fixedInfo = [ |
| 383 | 'signature' => [ 'string', 4 ], |
| 384 | 'version made by' => 2, |
| 385 | 'version needed' => 2, |
| 386 | 'general bits' => 2, |
| 387 | 'compression method' => 2, |
| 388 | 'mod time' => 2, |
| 389 | 'mod date' => 2, |
| 390 | 'crc-32' => 4, |
| 391 | 'compressed size' => 4, |
| 392 | 'uncompressed size' => 4, |
| 393 | 'name length' => 2, |
| 394 | 'extra field length' => 2, |
| 395 | 'comment length' => 2, |
| 396 | 'disk number start' => 2, |
| 397 | 'internal attrs' => 2, |
| 398 | 'external attrs' => 4, |
| 399 | 'local header offset' => 4, |
| 400 | ]; |
| 401 | $fixedSize = $this->getStructSize( $fixedInfo ); |
| 402 | |
| 403 | $pos = 0; |
| 404 | while ( $pos < $size ) { |
| 405 | $data = $this->unpack( $block, $fixedInfo, $pos ); |
| 406 | $pos += $fixedSize; |
| 407 | |
| 408 | if ( $data['signature'] !== "PK\x01\x02" ) { |
| 409 | $this->error( 'zip-bad', 'Invalid signature found in directory entry' ); |
| 410 | } |
| 411 | |
| 412 | $variableInfo = [ |
| 413 | 'name' => [ 'string', $data['name length'] ], |
| 414 | 'extra field' => [ 'string', $data['extra field length'] ], |
| 415 | 'comment' => [ 'string', $data['comment length'] ], |
| 416 | ]; |
| 417 | $data += $this->unpack( $block, $variableInfo, $pos ); |
| 418 | $pos += $this->getStructSize( $variableInfo ); |
| 419 | |
| 420 | if ( $this->zip64 && ( |
| 421 | $data['compressed size'] == 0xffffffff |
| 422 | || $data['uncompressed size'] == 0xffffffff |
| 423 | || $data['local header offset'] == 0xffffffff ) |
| 424 | ) { |
| 425 | $zip64Data = $this->unpackZip64Extra( $data['extra field'] ); |
| 426 | if ( $zip64Data ) { |
| 427 | $data = $zip64Data + $data; |
| 428 | } |
| 429 | } |
| 430 | |
| 431 | if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) { |
| 432 | $this->error( 'zip-unsupported', 'central directory encryption is not supported' ); |
| 433 | } |
| 434 | |
| 435 | // Convert the timestamp into MediaWiki format |
| 436 | // For the format, please see the MS-DOS 2.0 Programmer's Reference, |
| 437 | // pages 3-5 and 3-6. |
| 438 | $time = $data['mod time']; |
| 439 | $date = $data['mod date']; |
| 440 | |
| 441 | $year = 1980 + ( $date >> 9 ); |
| 442 | $month = ( $date >> 5 ) & 15; |
| 443 | $day = $date & 31; |
| 444 | $hour = ( $time >> 11 ) & 31; |
| 445 | $minute = ( $time >> 5 ) & 63; |
| 446 | $second = ( $time & 31 ) * 2; |
| 447 | $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d", |
| 448 | $year, $month, $day, $hour, $minute, $second ); |
| 449 | |
| 450 | // Convert the character set in the file name |
| 451 | if ( $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) ) { |
| 452 | $name = $data['name']; |
| 453 | } else { |
| 454 | $name = iconv( 'CP437', 'UTF-8', $data['name'] ); |
| 455 | } |
| 456 | |
| 457 | // Compile a data array for the user, with a sensible format |
| 458 | $userData = [ |
| 459 | 'name' => $name, |
| 460 | 'mtime' => $timestamp, |
| 461 | 'size' => $data['uncompressed size'], |
| 462 | ]; |
| 463 | ( $this->callback )( $userData ); |
| 464 | } |
| 465 | } |
| 466 | |
| 467 | /** |
| 468 | * Interpret ZIP64 "extra field" data and return an associative array. |
| 469 | * @param string $extraField |
| 470 | * @return array|bool |
| 471 | */ |
| 472 | private function unpackZip64Extra( $extraField ) { |
| 473 | $extraHeaderInfo = [ |
| 474 | 'id' => 2, |
| 475 | 'size' => 2, |
| 476 | ]; |
| 477 | $extraHeaderSize = $this->getStructSize( $extraHeaderInfo ); |
| 478 | |
| 479 | $zip64ExtraInfo = [ |
| 480 | 'uncompressed size' => 8, |
| 481 | 'compressed size' => 8, |
| 482 | 'local header offset' => 8, |
| 483 | 'disk number start' => 4, |
| 484 | ]; |
| 485 | |
| 486 | $extraPos = 0; |
| 487 | while ( $extraPos < strlen( $extraField ) ) { |
| 488 | $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos ); |
| 489 | $extraPos += $extraHeaderSize; |
| 490 | $extra += $this->unpack( $extraField, |
| 491 | [ 'data' => [ 'string', $extra['size'] ] ], |
| 492 | $extraPos ); |
| 493 | $extraPos += $extra['size']; |
| 494 | |
| 495 | if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) { |
| 496 | return $this->unpack( $extra['data'], $zip64ExtraInfo ); |
| 497 | } |
| 498 | } |
| 499 | |
| 500 | return false; |
| 501 | } |
| 502 | |
| 503 | /** |
| 504 | * Get the length of the file. |
| 505 | * @return int |
| 506 | */ |
| 507 | private function getFileLength() { |
| 508 | if ( $this->fileLength === null ) { |
| 509 | $stat = fstat( $this->file ); |
| 510 | $this->fileLength = $stat['size']; |
| 511 | } |
| 512 | |
| 513 | return $this->fileLength; |
| 514 | } |
| 515 | |
| 516 | /** |
| 517 | * Get the file contents from a given offset. If there are not enough bytes |
| 518 | * in the file to satisfy the request, an exception will be thrown. |
| 519 | * |
| 520 | * @param int $start The byte offset of the start of the block. |
| 521 | * @param int|null $length The number of bytes to return. If omitted, the remainder |
| 522 | * of the file will be returned. |
| 523 | * |
| 524 | * @return string |
| 525 | */ |
| 526 | private function getBlock( $start, $length = null ) { |
| 527 | $fileLength = $this->getFileLength(); |
| 528 | if ( $start >= $fileLength ) { |
| 529 | $this->error( 'zip-bad', "getBlock() requested position $start, " . |
| 530 | "file length is $fileLength" ); |
| 531 | } |
| 532 | $length ??= $fileLength - $start; |
| 533 | $end = $start + $length; |
| 534 | if ( $end > $fileLength ) { |
| 535 | $this->error( 'zip-bad', "getBlock() requested end position $end, " . |
| 536 | "file length is $fileLength" ); |
| 537 | } |
| 538 | $startSeg = (int)floor( $start / self::SEGSIZE ); |
| 539 | $endSeg = (int)ceil( $end / self::SEGSIZE ); |
| 540 | |
| 541 | $block = ''; |
| 542 | for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) { |
| 543 | $block .= $this->getSegment( $segIndex ); |
| 544 | } |
| 545 | |
| 546 | $block = substr( $block, |
| 547 | $start - $startSeg * self::SEGSIZE, |
| 548 | $length ); |
| 549 | |
| 550 | if ( strlen( $block ) < $length ) { |
| 551 | $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' ); |
| 552 | } |
| 553 | |
| 554 | return $block; |
| 555 | } |
| 556 | |
| 557 | /** |
| 558 | * Get a section of the file starting at position $segIndex * self::SEGSIZE, |
| 559 | * of length self::SEGSIZE. The result is cached. This is a helper function |
| 560 | * for getBlock(). |
| 561 | * |
| 562 | * If there are not enough bytes in the file to satisfy the request, the |
| 563 | * return value will be truncated. If a request is made for a segment beyond |
| 564 | * the end of the file, an empty string will be returned. |
| 565 | * |
| 566 | * @param int $segIndex |
| 567 | * |
| 568 | * @return string |
| 569 | */ |
| 570 | private function getSegment( $segIndex ) { |
| 571 | if ( !isset( $this->buffer[$segIndex] ) ) { |
| 572 | $bytePos = $segIndex * self::SEGSIZE; |
| 573 | if ( $bytePos >= $this->getFileLength() ) { |
| 574 | $this->buffer[$segIndex] = ''; |
| 575 | |
| 576 | return ''; |
| 577 | } |
| 578 | if ( fseek( $this->file, $bytePos ) ) { |
| 579 | $this->error( 'zip-bad', "seek to $bytePos failed" ); |
| 580 | } |
| 581 | $seg = fread( $this->file, self::SEGSIZE ); |
| 582 | if ( $seg === false ) { |
| 583 | $this->error( 'zip-bad', "read from $bytePos failed" ); |
| 584 | } |
| 585 | $this->buffer[$segIndex] = $seg; |
| 586 | } |
| 587 | |
| 588 | return $this->buffer[$segIndex]; |
| 589 | } |
| 590 | |
| 591 | /** |
| 592 | * Get the size of a structure in bytes. See unpack() for the format of $struct. |
| 593 | * @param array $struct |
| 594 | * @return int |
| 595 | */ |
| 596 | private function getStructSize( $struct ) { |
| 597 | $size = 0; |
| 598 | foreach ( $struct as $type ) { |
| 599 | if ( is_array( $type ) ) { |
| 600 | [ , $fieldSize ] = $type; |
| 601 | $size += $fieldSize; |
| 602 | } else { |
| 603 | $size += $type; |
| 604 | } |
| 605 | } |
| 606 | |
| 607 | return $size; |
| 608 | } |
| 609 | |
| 610 | /** |
| 611 | * Unpack a binary structure. This is like the built-in unpack() function |
| 612 | * except nicer. |
| 613 | * |
| 614 | * @param string $string The binary data input |
| 615 | * |
| 616 | * @param array $struct An associative array giving structure members and their |
| 617 | * types. In the key is the field name. The value may be either an |
| 618 | * integer, in which case the field is a little-endian unsigned integer |
| 619 | * encoded in the given number of bytes, or an array, in which case the |
| 620 | * first element of the array is the type name, and the subsequent |
| 621 | * elements are type-dependent parameters. Only one such type is defined: |
| 622 | * - "string": The second array element gives the length of string. |
| 623 | * Not null terminated. |
| 624 | * |
| 625 | * @param int $offset The offset into the string at which to start unpacking. |
| 626 | * @return array Unpacked associative array. Note that large integers in the input |
| 627 | * may be represented as floating point numbers in the return value, so |
| 628 | * the use of weak comparison is advised. |
| 629 | */ |
| 630 | private function unpack( $string, $struct, $offset = 0 ) { |
| 631 | $size = $this->getStructSize( $struct ); |
| 632 | if ( $offset + $size > strlen( $string ) ) { |
| 633 | $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' ); |
| 634 | } |
| 635 | |
| 636 | $data = []; |
| 637 | $pos = $offset; |
| 638 | foreach ( $struct as $key => $type ) { |
| 639 | if ( is_array( $type ) ) { |
| 640 | [ $typeName, $fieldSize ] = $type; |
| 641 | switch ( $typeName ) { |
| 642 | case 'string': |
| 643 | $data[$key] = substr( $string, $pos, $fieldSize ); |
| 644 | $pos += $fieldSize; |
| 645 | break; |
| 646 | default: |
| 647 | throw new UnexpectedValueException( __METHOD__ . ": invalid type \"$typeName\"" ); |
| 648 | } |
| 649 | } else { |
| 650 | // Unsigned little-endian integer |
| 651 | $length = intval( $type ); |
| 652 | |
| 653 | // Calculate the value. Use an algorithm which automatically |
| 654 | // upgrades the value to floating point if necessary. |
| 655 | $value = 0; |
| 656 | for ( $i = $length - 1; $i >= 0; $i-- ) { |
| 657 | $value *= 256; |
| 658 | $value += ord( $string[$pos + $i] ); |
| 659 | } |
| 660 | |
| 661 | // Throw an exception if there was loss of precision |
| 662 | if ( $value > 2 ** 52 ) { |
| 663 | $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' . |
| 664 | 'This could happen if we tried to unpack a 64-bit structure ' . |
| 665 | 'at an invalid location.' ); |
| 666 | } |
| 667 | $data[$key] = $value; |
| 668 | $pos += $length; |
| 669 | } |
| 670 | } |
| 671 | |
| 672 | return $data; |
| 673 | } |
| 674 | |
| 675 | /** |
| 676 | * Returns a bit from a given position in an integer value, converted to |
| 677 | * boolean. |
| 678 | * |
| 679 | * @param int $value |
| 680 | * @param int $bitIndex The index of the bit, where 0 is the LSB. |
| 681 | * @return bool |
| 682 | */ |
| 683 | private function testBit( $value, $bitIndex ) { |
| 684 | return (bool)( ( $value >> $bitIndex ) & 1 ); |
| 685 | } |
| 686 | } |