Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
64.98% |
180 / 277 |
|
33.33% |
6 / 18 |
CRAP | |
0.00% |
0 / 1 |
ZipDirectoryReader | |
64.98% |
180 / 277 |
|
33.33% |
6 / 18 |
301.83 | |
0.00% |
0 / 1 |
read | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
readHandle | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
__construct | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
2.06 | |||
execute | |
84.21% |
16 / 19 |
|
0.00% |
0 / 1 |
7.19 | |||
error | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
readEndOfCentralDirectoryRecord | |
97.06% |
33 / 34 |
|
0.00% |
0 / 1 |
7 | |||
readZip64EndOfCentralDirectoryLocator | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
6 | |||
readZip64EndOfCentralDirectoryRecord | |
0.00% |
0 / 23 |
|
0.00% |
0 / 1 |
42 | |||
findOldCentralDirectory | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
2 | |||
findZip64CentralDirectory | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
56 | |||
readCentralDirectory | |
91.94% |
57 / 62 |
|
0.00% |
0 / 1 |
10.05 | |||
unpackZip64Extra | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
12 | |||
getFileLength | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
getBlock | |
75.00% |
15 / 20 |
|
0.00% |
0 / 1 |
5.39 | |||
getSegment | |
83.33% |
10 / 12 |
|
0.00% |
0 / 1 |
5.12 | |||
getStructSize | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 | |||
unpack | |
88.00% |
22 / 25 |
|
0.00% |
0 / 1 |
8.11 | |||
testBit | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | /** |
3 | * ZIP file directories reader, for the purposes of upload verification. |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | */ |
22 | |
23 | use MediaWiki\Status\Status; |
24 | |
25 | /** |
26 | * A class for reading ZIP file directories, for the purposes of upload |
27 | * verification. |
28 | * |
29 | * Only a functional interface is provided: ZipFileReader::read(). No access is |
30 | * given to object instances. |
31 | */ |
32 | class ZipDirectoryReader { |
33 | /** |
34 | * Read a ZIP file and call a function for each file discovered in it. |
35 | * |
36 | * Because this class is aimed at verification, an error is raised on |
37 | * suspicious or ambiguous input, instead of emulating some standard |
38 | * behavior. |
39 | * |
40 | * @param string $fileName The archive file name |
41 | * @param callable $callback The callback function. It will be called for each file |
42 | * with a single associative array each time, with members: |
43 | * |
44 | * - name: The file name. Directories conventionally have a trailing |
45 | * slash. |
46 | * |
47 | * - mtime: The file modification time, in MediaWiki 14-char format |
48 | * |
49 | * - size: The uncompressed file size |
50 | * |
51 | * @param array $options An associative array of read options, with the option |
52 | * name in the key. This may currently contain: |
53 | * |
54 | * - zip64: If this is set to true, then we will emulate a |
55 | * library with ZIP64 support, like OpenJDK 7. If it is set to |
56 | * false, then we will emulate a library with no knowledge of |
57 | * ZIP64. |
58 | * |
59 | * NOTE: The ZIP64 code is untested and probably doesn't work. It |
60 | * turned out to be easier to just reject ZIP64 archive uploads, |
61 | * since they are likely to be very rare. Confirming safety of a |
62 | * ZIP64 file is fairly complex. What do you do with a file that is |
63 | * ambiguous and broken when read with a non-ZIP64 reader, but valid |
64 | * when read with a ZIP64 reader? This situation is normal for a |
65 | * valid ZIP64 file, and working out what non-ZIP64 readers will make |
66 | * of such a file is not trivial. |
67 | * |
68 | * @return Status A Status object. The following fatal errors are defined: |
69 | * |
70 | * - zip-file-open-error: The file could not be opened. |
71 | * |
72 | * - zip-wrong-format: The file does not appear to be a ZIP file. |
73 | * |
74 | * - zip-bad: There was something wrong or ambiguous about the file |
75 | * data. |
76 | * |
77 | * - zip-unsupported: The ZIP file uses features which |
78 | * ZipDirectoryReader does not support. |
79 | * |
80 | * The default messages for those fatal errors are written in a way that |
81 | * makes sense for upload verification. |
82 | * |
83 | * If a fatal error is returned, more information about the error will be |
84 | * available in the debug log. |
85 | * |
86 | * Note that the callback function may be called any number of times before |
87 | * a fatal error is returned. If this occurs, the data sent to the callback |
88 | * function should be discarded. |
89 | */ |
90 | public static function read( $fileName, $callback, $options = [] ) { |
91 | $file = fopen( $fileName, 'r' ); |
92 | $zdr = new self( $file, $callback, $options ); |
93 | return $zdr->execute(); |
94 | } |
95 | |
96 | /** |
97 | * Read an opened file handle presumed to be a ZIP and call a function for |
98 | * each file discovered in it. |
99 | * |
100 | * @see ZipDirectoryReader::read |
101 | * |
102 | * @param resource $file A seekable stream containing the archive |
103 | * @param callable $callback |
104 | * @param array $options |
105 | * @return Status |
106 | */ |
107 | public static function readHandle( $file, $callback, $options = [] ) { |
108 | $zdr = new self( $file, $callback, $options ); |
109 | return $zdr->execute(); |
110 | } |
111 | |
112 | /** @var resource The opened file resource */ |
113 | protected $file; |
114 | |
115 | /** @var int|null The cached length of the file, or null if it has not been loaded yet. */ |
116 | protected $fileLength; |
117 | |
118 | /** @var string[] A segmented cache of the file contents */ |
119 | protected $buffer; |
120 | |
121 | /** @var callable The file data callback */ |
122 | protected $callback; |
123 | |
124 | /** @var bool The ZIP64 mode */ |
125 | protected $zip64 = false; |
126 | |
127 | /** @var array Stored headers */ |
128 | protected $eocdr; |
129 | /** @var array Stored headers */ |
130 | protected $eocdr64; |
131 | /** @var array Stored headers */ |
132 | protected $eocdr64Locator; |
133 | |
134 | /** The "extra field" ID for ZIP64 central directory entries */ |
135 | private const ZIP64_EXTRA_HEADER = 0x0001; |
136 | |
137 | /** The segment size for the file contents cache */ |
138 | private const SEGSIZE = 16384; |
139 | |
140 | /** The index of the "general field" bit for UTF-8 file names */ |
141 | private const GENERAL_UTF8 = 11; |
142 | |
143 | /** The index of the "general field" bit for central directory encryption */ |
144 | private const GENERAL_CD_ENCRYPTED = 13; |
145 | |
146 | /** |
147 | * @param resource $file |
148 | * @param callable $callback |
149 | * @param array $options |
150 | */ |
151 | protected function __construct( $file, $callback, $options ) { |
152 | $this->file = $file; |
153 | $this->callback = $callback; |
154 | |
155 | if ( isset( $options['zip64'] ) ) { |
156 | $this->zip64 = $options['zip64']; |
157 | } |
158 | } |
159 | |
160 | /** |
161 | * Read the directory according to settings in $this. |
162 | * |
163 | * @return Status |
164 | */ |
165 | private function execute() { |
166 | if ( !$this->file ) { |
167 | return Status::newFatal( 'zip-file-open-error' ); |
168 | } |
169 | |
170 | $status = Status::newGood(); |
171 | try { |
172 | $this->readEndOfCentralDirectoryRecord(); |
173 | if ( $this->zip64 ) { |
174 | [ $offset, $size ] = $this->findZip64CentralDirectory(); |
175 | $this->readCentralDirectory( $offset, $size ); |
176 | } else { |
177 | if ( $this->eocdr['CD size'] == 0xffffffff |
178 | || $this->eocdr['CD offset'] == 0xffffffff |
179 | || $this->eocdr['CD entries total'] == 0xffff |
180 | ) { |
181 | $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' . |
182 | 'but we are in legacy mode. Rejecting this upload is necessary to avoid ' . |
183 | 'opening vulnerabilities on clients using OpenJDK 7 or later.' ); |
184 | } |
185 | |
186 | [ $offset, $size ] = $this->findOldCentralDirectory(); |
187 | $this->readCentralDirectory( $offset, $size ); |
188 | } |
189 | } catch ( ZipDirectoryReaderError $e ) { |
190 | $status->fatal( $e->getErrorCode() ); |
191 | } |
192 | |
193 | fclose( $this->file ); |
194 | |
195 | return $status; |
196 | } |
197 | |
198 | /** |
199 | * Throw an error, and log a debug message |
200 | * @param string $code |
201 | * @param string $debugMessage |
202 | * @throws ZipDirectoryReaderError |
203 | * @return never |
204 | */ |
205 | private function error( $code, $debugMessage ) { |
206 | wfDebug( __CLASS__ . ": Fatal error: $debugMessage" ); |
207 | throw new ZipDirectoryReaderError( $code ); |
208 | } |
209 | |
210 | /** |
211 | * Read the header which is at the end of the central directory, |
212 | * unimaginatively called the "end of central directory record" by the ZIP |
213 | * spec. |
214 | */ |
215 | private function readEndOfCentralDirectoryRecord() { |
216 | $info = [ |
217 | 'signature' => 4, |
218 | 'disk' => 2, |
219 | 'CD start disk' => 2, |
220 | 'CD entries this disk' => 2, |
221 | 'CD entries total' => 2, |
222 | 'CD size' => 4, |
223 | 'CD offset' => 4, |
224 | 'file comment length' => 2, |
225 | ]; |
226 | $structSize = $this->getStructSize( $info ); |
227 | $startPos = $this->getFileLength() - 65536 - $structSize; |
228 | if ( $startPos < 0 ) { |
229 | $startPos = 0; |
230 | } |
231 | |
232 | if ( $this->getFileLength() === 0 ) { |
233 | $this->error( 'zip-wrong-format', "The file is empty." ); |
234 | } |
235 | |
236 | $block = $this->getBlock( $startPos ); |
237 | $sigPos = strrpos( $block, "PK\x05\x06" ); |
238 | if ( $sigPos === false ) { |
239 | $this->error( 'zip-wrong-format', |
240 | "zip file lacks EOCDR signature. It probably isn't a zip file." ); |
241 | } |
242 | |
243 | $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info ); |
244 | $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length']; |
245 | |
246 | if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) { |
247 | // T40432: MS binary documents frequently embed ZIP files |
248 | $this->error( 'zip-wrong-format', 'there is a ZIP signature but it is not at ' . |
249 | 'the end of the file. It could be an OLE file with a ZIP file embedded.' ); |
250 | } |
251 | if ( $this->eocdr['disk'] !== 0 |
252 | || $this->eocdr['CD start disk'] !== 0 |
253 | ) { |
254 | $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' ); |
255 | } |
256 | $this->eocdr += $this->unpack( |
257 | $block, |
258 | [ 'file comment' => [ 'string', $this->eocdr['file comment length'] ] ], |
259 | $sigPos + $structSize ); |
260 | $this->eocdr['position'] = $startPos + $sigPos; |
261 | } |
262 | |
263 | /** |
264 | * Read the header called the "ZIP64 end of central directory locator". An |
265 | * error will be raised if it does not exist. |
266 | */ |
267 | private function readZip64EndOfCentralDirectoryLocator() { |
268 | $info = [ |
269 | 'signature' => [ 'string', 4 ], |
270 | 'eocdr64 start disk' => 4, |
271 | 'eocdr64 offset' => 8, |
272 | 'number of disks' => 4, |
273 | ]; |
274 | $structSize = $this->getStructSize( $info ); |
275 | |
276 | $start = $this->getFileLength() - $this->eocdr['EOCDR size'] - $structSize; |
277 | $block = $this->getBlock( $start, $structSize ); |
278 | $this->eocdr64Locator = $data = $this->unpack( $block, $info ); |
279 | |
280 | if ( $data['signature'] !== "PK\x06\x07" ) { |
281 | // Note: Java will allow this and continue to read the |
282 | // EOCDR64, so we have to reject the upload, we can't |
283 | // just use the EOCDR header instead. |
284 | $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' ); |
285 | } |
286 | } |
287 | |
288 | /** |
289 | * Read the header called the "ZIP64 end of central directory record". It |
290 | * may replace the regular "end of central directory record" in ZIP64 files. |
291 | */ |
292 | private function readZip64EndOfCentralDirectoryRecord() { |
293 | if ( $this->eocdr64Locator['eocdr64 start disk'] != 0 |
294 | || $this->eocdr64Locator['number of disks'] != 0 |
295 | ) { |
296 | $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' ); |
297 | } |
298 | |
299 | $info = [ |
300 | 'signature' => [ 'string', 4 ], |
301 | 'EOCDR64 size' => 8, |
302 | 'version made by' => 2, |
303 | 'version needed' => 2, |
304 | 'disk' => 4, |
305 | 'CD start disk' => 4, |
306 | 'CD entries this disk' => 8, |
307 | 'CD entries total' => 8, |
308 | 'CD size' => 8, |
309 | 'CD offset' => 8 |
310 | ]; |
311 | $structSize = $this->getStructSize( $info ); |
312 | $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize ); |
313 | $this->eocdr64 = $data = $this->unpack( $block, $info ); |
314 | if ( $data['signature'] !== "PK\x06\x06" ) { |
315 | $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' ); |
316 | } |
317 | if ( $data['disk'] !== 0 |
318 | || $data['CD start disk'] !== 0 |
319 | ) { |
320 | $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' ); |
321 | } |
322 | } |
323 | |
324 | /** |
325 | * Find the location of the central directory, as would be seen by a |
326 | * non-ZIP64 reader. |
327 | * |
328 | * @return array List containing offset, size and end position. |
329 | */ |
330 | private function findOldCentralDirectory() { |
331 | $size = $this->eocdr['CD size']; |
332 | $offset = $this->eocdr['CD offset']; |
333 | $endPos = $this->eocdr['position']; |
334 | |
335 | // Some readers use the EOCDR position instead of the offset field |
336 | // to find the directory, so to be safe, we check if they both agree. |
337 | if ( $offset + $size != $endPos ) { |
338 | $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' . |
339 | 'of central directory record' ); |
340 | } |
341 | |
342 | return [ $offset, $size ]; |
343 | } |
344 | |
345 | /** |
346 | * Find the location of the central directory, as would be seen by a |
347 | * ZIP64-compliant reader. |
348 | * |
349 | * @return array List containing offset, size and end position. |
350 | */ |
351 | private function findZip64CentralDirectory() { |
352 | // The spec is ambiguous about the exact rules of precedence between the |
353 | // ZIP64 headers and the original headers. Here we follow zip_util.c |
354 | // from OpenJDK 7. |
355 | $size = $this->eocdr['CD size']; |
356 | $offset = $this->eocdr['CD offset']; |
357 | $numEntries = $this->eocdr['CD entries total']; |
358 | $endPos = $this->eocdr['position']; |
359 | if ( $size == 0xffffffff |
360 | || $offset == 0xffffffff |
361 | || $numEntries == 0xffff |
362 | ) { |
363 | $this->readZip64EndOfCentralDirectoryLocator(); |
364 | |
365 | if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) { |
366 | $this->readZip64EndOfCentralDirectoryRecord(); |
367 | if ( isset( $this->eocdr64['CD offset'] ) ) { |
368 | $size = $this->eocdr64['CD size']; |
369 | $offset = $this->eocdr64['CD offset']; |
370 | $endPos = $this->eocdr64Locator['eocdr64 offset']; |
371 | } |
372 | } |
373 | } |
374 | // Some readers use the EOCDR position instead of the offset field |
375 | // to find the directory, so to be safe, we check if they both agree. |
376 | if ( $offset + $size != $endPos ) { |
377 | $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' . |
378 | 'of central directory record' ); |
379 | } |
380 | |
381 | return [ $offset, $size ]; |
382 | } |
383 | |
384 | /** |
385 | * Read the central directory at the given location |
386 | * @param int $offset |
387 | * @param int $size |
388 | */ |
389 | private function readCentralDirectory( $offset, $size ) { |
390 | $block = $this->getBlock( $offset, $size ); |
391 | |
392 | $fixedInfo = [ |
393 | 'signature' => [ 'string', 4 ], |
394 | 'version made by' => 2, |
395 | 'version needed' => 2, |
396 | 'general bits' => 2, |
397 | 'compression method' => 2, |
398 | 'mod time' => 2, |
399 | 'mod date' => 2, |
400 | 'crc-32' => 4, |
401 | 'compressed size' => 4, |
402 | 'uncompressed size' => 4, |
403 | 'name length' => 2, |
404 | 'extra field length' => 2, |
405 | 'comment length' => 2, |
406 | 'disk number start' => 2, |
407 | 'internal attrs' => 2, |
408 | 'external attrs' => 4, |
409 | 'local header offset' => 4, |
410 | ]; |
411 | $fixedSize = $this->getStructSize( $fixedInfo ); |
412 | |
413 | $pos = 0; |
414 | while ( $pos < $size ) { |
415 | $data = $this->unpack( $block, $fixedInfo, $pos ); |
416 | $pos += $fixedSize; |
417 | |
418 | if ( $data['signature'] !== "PK\x01\x02" ) { |
419 | $this->error( 'zip-bad', 'Invalid signature found in directory entry' ); |
420 | } |
421 | |
422 | $variableInfo = [ |
423 | 'name' => [ 'string', $data['name length'] ], |
424 | 'extra field' => [ 'string', $data['extra field length'] ], |
425 | 'comment' => [ 'string', $data['comment length'] ], |
426 | ]; |
427 | $data += $this->unpack( $block, $variableInfo, $pos ); |
428 | $pos += $this->getStructSize( $variableInfo ); |
429 | |
430 | if ( $this->zip64 && ( |
431 | $data['compressed size'] == 0xffffffff |
432 | || $data['uncompressed size'] == 0xffffffff |
433 | || $data['local header offset'] == 0xffffffff ) |
434 | ) { |
435 | $zip64Data = $this->unpackZip64Extra( $data['extra field'] ); |
436 | if ( $zip64Data ) { |
437 | $data = $zip64Data + $data; |
438 | } |
439 | } |
440 | |
441 | if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) { |
442 | $this->error( 'zip-unsupported', 'central directory encryption is not supported' ); |
443 | } |
444 | |
445 | // Convert the timestamp into MediaWiki format |
446 | // For the format, please see the MS-DOS 2.0 Programmer's Reference, |
447 | // pages 3-5 and 3-6. |
448 | $time = $data['mod time']; |
449 | $date = $data['mod date']; |
450 | |
451 | $year = 1980 + ( $date >> 9 ); |
452 | $month = ( $date >> 5 ) & 15; |
453 | $day = $date & 31; |
454 | $hour = ( $time >> 11 ) & 31; |
455 | $minute = ( $time >> 5 ) & 63; |
456 | $second = ( $time & 31 ) * 2; |
457 | $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d", |
458 | $year, $month, $day, $hour, $minute, $second ); |
459 | |
460 | // Convert the character set in the file name |
461 | if ( $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) ) { |
462 | $name = $data['name']; |
463 | } else { |
464 | $name = iconv( 'CP437', 'UTF-8', $data['name'] ); |
465 | } |
466 | |
467 | // Compile a data array for the user, with a sensible format |
468 | $userData = [ |
469 | 'name' => $name, |
470 | 'mtime' => $timestamp, |
471 | 'size' => $data['uncompressed size'], |
472 | ]; |
473 | call_user_func( $this->callback, $userData ); |
474 | } |
475 | } |
476 | |
477 | /** |
478 | * Interpret ZIP64 "extra field" data and return an associative array. |
479 | * @param string $extraField |
480 | * @return array|bool |
481 | */ |
482 | private function unpackZip64Extra( $extraField ) { |
483 | $extraHeaderInfo = [ |
484 | 'id' => 2, |
485 | 'size' => 2, |
486 | ]; |
487 | $extraHeaderSize = $this->getStructSize( $extraHeaderInfo ); |
488 | |
489 | $zip64ExtraInfo = [ |
490 | 'uncompressed size' => 8, |
491 | 'compressed size' => 8, |
492 | 'local header offset' => 8, |
493 | 'disk number start' => 4, |
494 | ]; |
495 | |
496 | $extraPos = 0; |
497 | while ( $extraPos < strlen( $extraField ) ) { |
498 | $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos ); |
499 | $extraPos += $extraHeaderSize; |
500 | $extra += $this->unpack( $extraField, |
501 | [ 'data' => [ 'string', $extra['size'] ] ], |
502 | $extraPos ); |
503 | $extraPos += $extra['size']; |
504 | |
505 | if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) { |
506 | return $this->unpack( $extra['data'], $zip64ExtraInfo ); |
507 | } |
508 | } |
509 | |
510 | return false; |
511 | } |
512 | |
513 | /** |
514 | * Get the length of the file. |
515 | * @return int |
516 | */ |
517 | private function getFileLength() { |
518 | if ( $this->fileLength === null ) { |
519 | $stat = fstat( $this->file ); |
520 | $this->fileLength = $stat['size']; |
521 | } |
522 | |
523 | return $this->fileLength; |
524 | } |
525 | |
526 | /** |
527 | * Get the file contents from a given offset. If there are not enough bytes |
528 | * in the file to satisfy the request, an exception will be thrown. |
529 | * |
530 | * @param int $start The byte offset of the start of the block. |
531 | * @param int|null $length The number of bytes to return. If omitted, the remainder |
532 | * of the file will be returned. |
533 | * |
534 | * @return string |
535 | */ |
536 | private function getBlock( $start, $length = null ) { |
537 | $fileLength = $this->getFileLength(); |
538 | if ( $start >= $fileLength ) { |
539 | $this->error( 'zip-bad', "getBlock() requested position $start, " . |
540 | "file length is $fileLength" ); |
541 | } |
542 | $length ??= $fileLength - $start; |
543 | $end = $start + $length; |
544 | if ( $end > $fileLength ) { |
545 | $this->error( 'zip-bad', "getBlock() requested end position $end, " . |
546 | "file length is $fileLength" ); |
547 | } |
548 | $startSeg = (int)floor( $start / self::SEGSIZE ); |
549 | $endSeg = (int)ceil( $end / self::SEGSIZE ); |
550 | |
551 | $block = ''; |
552 | for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) { |
553 | $block .= $this->getSegment( $segIndex ); |
554 | } |
555 | |
556 | $block = substr( $block, |
557 | $start - $startSeg * self::SEGSIZE, |
558 | $length ); |
559 | |
560 | if ( strlen( $block ) < $length ) { |
561 | $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' ); |
562 | } |
563 | |
564 | return $block; |
565 | } |
566 | |
567 | /** |
568 | * Get a section of the file starting at position $segIndex * self::SEGSIZE, |
569 | * of length self::SEGSIZE. The result is cached. This is a helper function |
570 | * for getBlock(). |
571 | * |
572 | * If there are not enough bytes in the file to satisfy the request, the |
573 | * return value will be truncated. If a request is made for a segment beyond |
574 | * the end of the file, an empty string will be returned. |
575 | * |
576 | * @param int $segIndex |
577 | * |
578 | * @return string |
579 | */ |
580 | private function getSegment( $segIndex ) { |
581 | if ( !isset( $this->buffer[$segIndex] ) ) { |
582 | $bytePos = $segIndex * self::SEGSIZE; |
583 | if ( $bytePos >= $this->getFileLength() ) { |
584 | $this->buffer[$segIndex] = ''; |
585 | |
586 | return ''; |
587 | } |
588 | if ( fseek( $this->file, $bytePos ) ) { |
589 | $this->error( 'zip-bad', "seek to $bytePos failed" ); |
590 | } |
591 | $seg = fread( $this->file, self::SEGSIZE ); |
592 | if ( $seg === false ) { |
593 | $this->error( 'zip-bad', "read from $bytePos failed" ); |
594 | } |
595 | $this->buffer[$segIndex] = $seg; |
596 | } |
597 | |
598 | return $this->buffer[$segIndex]; |
599 | } |
600 | |
601 | /** |
602 | * Get the size of a structure in bytes. See unpack() for the format of $struct. |
603 | * @param array $struct |
604 | * @return int |
605 | */ |
606 | private function getStructSize( $struct ) { |
607 | $size = 0; |
608 | foreach ( $struct as $type ) { |
609 | if ( is_array( $type ) ) { |
610 | [ , $fieldSize ] = $type; |
611 | $size += $fieldSize; |
612 | } else { |
613 | $size += $type; |
614 | } |
615 | } |
616 | |
617 | return $size; |
618 | } |
619 | |
620 | /** |
621 | * Unpack a binary structure. This is like the built-in unpack() function |
622 | * except nicer. |
623 | * |
624 | * @param string $string The binary data input |
625 | * |
626 | * @param array $struct An associative array giving structure members and their |
627 | * types. In the key is the field name. The value may be either an |
628 | * integer, in which case the field is a little-endian unsigned integer |
629 | * encoded in the given number of bytes, or an array, in which case the |
630 | * first element of the array is the type name, and the subsequent |
631 | * elements are type-dependent parameters. Only one such type is defined: |
632 | * - "string": The second array element gives the length of string. |
633 | * Not null terminated. |
634 | * |
635 | * @param int $offset The offset into the string at which to start unpacking. |
636 | * @return array Unpacked associative array. Note that large integers in the input |
637 | * may be represented as floating point numbers in the return value, so |
638 | * the use of weak comparison is advised. |
639 | */ |
640 | private function unpack( $string, $struct, $offset = 0 ) { |
641 | $size = $this->getStructSize( $struct ); |
642 | if ( $offset + $size > strlen( $string ) ) { |
643 | $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' ); |
644 | } |
645 | |
646 | $data = []; |
647 | $pos = $offset; |
648 | foreach ( $struct as $key => $type ) { |
649 | if ( is_array( $type ) ) { |
650 | [ $typeName, $fieldSize ] = $type; |
651 | switch ( $typeName ) { |
652 | case 'string': |
653 | $data[$key] = substr( $string, $pos, $fieldSize ); |
654 | $pos += $fieldSize; |
655 | break; |
656 | default: |
657 | throw new UnexpectedValueException( __METHOD__ . ": invalid type \"$typeName\"" ); |
658 | } |
659 | } else { |
660 | // Unsigned little-endian integer |
661 | $length = intval( $type ); |
662 | |
663 | // Calculate the value. Use an algorithm which automatically |
664 | // upgrades the value to floating point if necessary. |
665 | $value = 0; |
666 | for ( $i = $length - 1; $i >= 0; $i-- ) { |
667 | $value *= 256; |
668 | $value += ord( $string[$pos + $i] ); |
669 | } |
670 | |
671 | // Throw an exception if there was loss of precision |
672 | if ( $value > 2 ** 52 ) { |
673 | $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' . |
674 | 'This could happen if we tried to unpack a 64-bit structure ' . |
675 | 'at an invalid location.' ); |
676 | } |
677 | $data[$key] = $value; |
678 | $pos += $length; |
679 | } |
680 | } |
681 | |
682 | return $data; |
683 | } |
684 | |
685 | /** |
686 | * Returns a bit from a given position in an integer value, converted to |
687 | * boolean. |
688 | * |
689 | * @param int $value |
690 | * @param int $bitIndex The index of the bit, where 0 is the LSB. |
691 | * @return bool |
692 | */ |
693 | private function testBit( $value, $bitIndex ) { |
694 | return (bool)( ( $value >> $bitIndex ) & 1 ); |
695 | } |
696 | } |