Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
65.23% |
182 / 279 |
|
33.33% |
6 / 18 |
CRAP | |
0.00% |
0 / 1 |
ZipDirectoryReader | |
65.23% |
182 / 279 |
|
33.33% |
6 / 18 |
304.13 | |
0.00% |
0 / 1 |
read | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
readHandle | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
__construct | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
2.06 | |||
execute | |
85.00% |
17 / 20 |
|
0.00% |
0 / 1 |
7.17 | |||
error | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
readEndOfCentralDirectoryRecord | |
97.06% |
33 / 34 |
|
0.00% |
0 / 1 |
7 | |||
readZip64EndOfCentralDirectoryLocator | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
6 | |||
readZip64EndOfCentralDirectoryRecord | |
0.00% |
0 / 23 |
|
0.00% |
0 / 1 |
42 | |||
findOldCentralDirectory | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
2 | |||
findZip64CentralDirectory | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
56 | |||
readCentralDirectory | |
91.94% |
57 / 62 |
|
0.00% |
0 / 1 |
10.05 | |||
unpackZip64Extra | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
12 | |||
getFileLength | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
getBlock | |
76.19% |
16 / 21 |
|
0.00% |
0 / 1 |
6.49 | |||
getSegment | |
83.33% |
10 / 12 |
|
0.00% |
0 / 1 |
5.12 | |||
getStructSize | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 | |||
unpack | |
88.00% |
22 / 25 |
|
0.00% |
0 / 1 |
8.11 | |||
testBit | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | /** |
3 | * ZIP file directories reader, for the purposes of upload verification. |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | */ |
22 | |
23 | use MediaWiki\Status\Status; |
24 | |
25 | /** |
26 | * A class for reading ZIP file directories, for the purposes of upload |
27 | * verification. |
28 | * |
29 | * Only a functional interface is provided: ZipFileReader::read(). No access is |
30 | * given to object instances. |
31 | */ |
32 | class ZipDirectoryReader { |
33 | /** |
34 | * Read a ZIP file and call a function for each file discovered in it. |
35 | * |
36 | * Because this class is aimed at verification, an error is raised on |
37 | * suspicious or ambiguous input, instead of emulating some standard |
38 | * behavior. |
39 | * |
40 | * @param string $fileName The archive file name |
41 | * @param callable $callback The callback function. It will be called for each file |
42 | * with a single associative array each time, with members: |
43 | * |
44 | * - name: The file name. Directories conventionally have a trailing |
45 | * slash. |
46 | * |
47 | * - mtime: The file modification time, in MediaWiki 14-char format |
48 | * |
49 | * - size: The uncompressed file size |
50 | * |
51 | * @param array $options An associative array of read options, with the option |
52 | * name in the key. This may currently contain: |
53 | * |
54 | * - zip64: If this is set to true, then we will emulate a |
55 | * library with ZIP64 support, like OpenJDK 7. If it is set to |
56 | * false, then we will emulate a library with no knowledge of |
57 | * ZIP64. |
58 | * |
59 | * NOTE: The ZIP64 code is untested and probably doesn't work. It |
60 | * turned out to be easier to just reject ZIP64 archive uploads, |
61 | * since they are likely to be very rare. Confirming safety of a |
62 | * ZIP64 file is fairly complex. What do you do with a file that is |
63 | * ambiguous and broken when read with a non-ZIP64 reader, but valid |
64 | * when read with a ZIP64 reader? This situation is normal for a |
65 | * valid ZIP64 file, and working out what non-ZIP64 readers will make |
66 | * of such a file is not trivial. |
67 | * |
68 | * @return Status A Status object. The following fatal errors are defined: |
69 | * |
70 | * - zip-file-open-error: The file could not be opened. |
71 | * |
72 | * - zip-wrong-format: The file does not appear to be a ZIP file. |
73 | * |
74 | * - zip-bad: There was something wrong or ambiguous about the file |
75 | * data. |
76 | * |
77 | * - zip-unsupported: The ZIP file uses features which |
78 | * ZipDirectoryReader does not support. |
79 | * |
80 | * The default messages for those fatal errors are written in a way that |
81 | * makes sense for upload verification. |
82 | * |
83 | * If a fatal error is returned, more information about the error will be |
84 | * available in the debug log. |
85 | * |
86 | * Note that the callback function may be called any number of times before |
87 | * a fatal error is returned. If this occurs, the data sent to the callback |
88 | * function should be discarded. |
89 | */ |
90 | public static function read( $fileName, $callback, $options = [] ) { |
91 | $file = fopen( $fileName, 'r' ); |
92 | $zdr = new self( $file, $callback, $options ); |
93 | return $zdr->execute(); |
94 | } |
95 | |
96 | /** |
97 | * Read an opened file handle presumed to be a ZIP and call a function for |
98 | * each file discovered in it. |
99 | * |
100 | * @see ZipDirectoryReader::read |
101 | * |
102 | * @param resource $file A seekable stream containing the archive |
103 | * @param callable $callback |
104 | * @param array $options |
105 | * @return Status |
106 | */ |
107 | public static function readHandle( $file, $callback, $options = [] ) { |
108 | $zdr = new self( $file, $callback, $options ); |
109 | return $zdr->execute(); |
110 | } |
111 | |
112 | /** The opened file resource */ |
113 | protected $file; |
114 | |
115 | /** The cached length of the file, or null if it has not been loaded yet. */ |
116 | protected $fileLength; |
117 | |
118 | /** A segmented cache of the file contents */ |
119 | protected $buffer; |
120 | |
121 | /** The file data callback */ |
122 | protected $callback; |
123 | |
124 | /** The ZIP64 mode */ |
125 | protected $zip64 = false; |
126 | |
127 | /** Stored headers */ |
128 | protected $eocdr; |
129 | protected $eocdr64; |
130 | protected $eocdr64Locator; |
131 | |
132 | protected $data; |
133 | |
134 | /** The "extra field" ID for ZIP64 central directory entries */ |
135 | private const ZIP64_EXTRA_HEADER = 0x0001; |
136 | |
137 | /** The segment size for the file contents cache */ |
138 | private const SEGSIZE = 16384; |
139 | |
140 | /** The index of the "general field" bit for UTF-8 file names */ |
141 | private const GENERAL_UTF8 = 11; |
142 | |
143 | /** The index of the "general field" bit for central directory encryption */ |
144 | private const GENERAL_CD_ENCRYPTED = 13; |
145 | |
146 | /** |
147 | * @param resource $file |
148 | * @param callable $callback |
149 | * @param array $options |
150 | */ |
151 | protected function __construct( $file, $callback, $options ) { |
152 | $this->file = $file; |
153 | $this->callback = $callback; |
154 | |
155 | if ( isset( $options['zip64'] ) ) { |
156 | $this->zip64 = $options['zip64']; |
157 | } |
158 | } |
159 | |
160 | /** |
161 | * Read the directory according to settings in $this. |
162 | * |
163 | * @return Status |
164 | */ |
165 | private function execute() { |
166 | $this->data = []; |
167 | if ( !$this->file ) { |
168 | return Status::newFatal( 'zip-file-open-error' ); |
169 | } |
170 | |
171 | $status = Status::newGood(); |
172 | try { |
173 | $this->readEndOfCentralDirectoryRecord(); |
174 | if ( $this->zip64 ) { |
175 | [ $offset, $size ] = $this->findZip64CentralDirectory(); |
176 | $this->readCentralDirectory( $offset, $size ); |
177 | } else { |
178 | if ( $this->eocdr['CD size'] == 0xffffffff |
179 | || $this->eocdr['CD offset'] == 0xffffffff |
180 | || $this->eocdr['CD entries total'] == 0xffff |
181 | ) { |
182 | $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' . |
183 | 'but we are in legacy mode. Rejecting this upload is necessary to avoid ' . |
184 | 'opening vulnerabilities on clients using OpenJDK 7 or later.' ); |
185 | } |
186 | |
187 | [ $offset, $size ] = $this->findOldCentralDirectory(); |
188 | $this->readCentralDirectory( $offset, $size ); |
189 | } |
190 | } catch ( ZipDirectoryReaderError $e ) { |
191 | $status->fatal( $e->getErrorCode() ); |
192 | } |
193 | |
194 | fclose( $this->file ); |
195 | |
196 | return $status; |
197 | } |
198 | |
199 | /** |
200 | * Throw an error, and log a debug message |
201 | * @param mixed $code |
202 | * @param string $debugMessage |
203 | * @throws ZipDirectoryReaderError |
204 | * @return never |
205 | */ |
206 | private function error( $code, $debugMessage ) { |
207 | wfDebug( __CLASS__ . ": Fatal error: $debugMessage" ); |
208 | throw new ZipDirectoryReaderError( $code ); |
209 | } |
210 | |
211 | /** |
212 | * Read the header which is at the end of the central directory, |
213 | * unimaginatively called the "end of central directory record" by the ZIP |
214 | * spec. |
215 | */ |
216 | private function readEndOfCentralDirectoryRecord() { |
217 | $info = [ |
218 | 'signature' => 4, |
219 | 'disk' => 2, |
220 | 'CD start disk' => 2, |
221 | 'CD entries this disk' => 2, |
222 | 'CD entries total' => 2, |
223 | 'CD size' => 4, |
224 | 'CD offset' => 4, |
225 | 'file comment length' => 2, |
226 | ]; |
227 | $structSize = $this->getStructSize( $info ); |
228 | $startPos = $this->getFileLength() - 65536 - $structSize; |
229 | if ( $startPos < 0 ) { |
230 | $startPos = 0; |
231 | } |
232 | |
233 | if ( $this->getFileLength() === 0 ) { |
234 | $this->error( 'zip-wrong-format', "The file is empty." ); |
235 | } |
236 | |
237 | $block = $this->getBlock( $startPos ); |
238 | $sigPos = strrpos( $block, "PK\x05\x06" ); |
239 | if ( $sigPos === false ) { |
240 | $this->error( 'zip-wrong-format', |
241 | "zip file lacks EOCDR signature. It probably isn't a zip file." ); |
242 | } |
243 | |
244 | $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info ); |
245 | $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length']; |
246 | |
247 | if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) { |
248 | // T40432: MS binary documents frequently embed ZIP files |
249 | $this->error( 'zip-wrong-format', 'there is a ZIP signature but it is not at ' . |
250 | 'the end of the file. It could be an OLE file with a ZIP file embedded.' ); |
251 | } |
252 | if ( $this->eocdr['disk'] !== 0 |
253 | || $this->eocdr['CD start disk'] !== 0 |
254 | ) { |
255 | $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' ); |
256 | } |
257 | $this->eocdr += $this->unpack( |
258 | $block, |
259 | [ 'file comment' => [ 'string', $this->eocdr['file comment length'] ] ], |
260 | $sigPos + $structSize ); |
261 | $this->eocdr['position'] = $startPos + $sigPos; |
262 | } |
263 | |
264 | /** |
265 | * Read the header called the "ZIP64 end of central directory locator". An |
266 | * error will be raised if it does not exist. |
267 | */ |
268 | private function readZip64EndOfCentralDirectoryLocator() { |
269 | $info = [ |
270 | 'signature' => [ 'string', 4 ], |
271 | 'eocdr64 start disk' => 4, |
272 | 'eocdr64 offset' => 8, |
273 | 'number of disks' => 4, |
274 | ]; |
275 | $structSize = $this->getStructSize( $info ); |
276 | |
277 | $start = $this->getFileLength() - $this->eocdr['EOCDR size'] - $structSize; |
278 | $block = $this->getBlock( $start, $structSize ); |
279 | $this->eocdr64Locator = $data = $this->unpack( $block, $info ); |
280 | |
281 | if ( $data['signature'] !== "PK\x06\x07" ) { |
282 | // Note: Java will allow this and continue to read the |
283 | // EOCDR64, so we have to reject the upload, we can't |
284 | // just use the EOCDR header instead. |
285 | $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' ); |
286 | } |
287 | } |
288 | |
289 | /** |
290 | * Read the header called the "ZIP64 end of central directory record". It |
291 | * may replace the regular "end of central directory record" in ZIP64 files. |
292 | */ |
293 | private function readZip64EndOfCentralDirectoryRecord() { |
294 | if ( $this->eocdr64Locator['eocdr64 start disk'] != 0 |
295 | || $this->eocdr64Locator['number of disks'] != 0 |
296 | ) { |
297 | $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' ); |
298 | } |
299 | |
300 | $info = [ |
301 | 'signature' => [ 'string', 4 ], |
302 | 'EOCDR64 size' => 8, |
303 | 'version made by' => 2, |
304 | 'version needed' => 2, |
305 | 'disk' => 4, |
306 | 'CD start disk' => 4, |
307 | 'CD entries this disk' => 8, |
308 | 'CD entries total' => 8, |
309 | 'CD size' => 8, |
310 | 'CD offset' => 8 |
311 | ]; |
312 | $structSize = $this->getStructSize( $info ); |
313 | $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize ); |
314 | $this->eocdr64 = $data = $this->unpack( $block, $info ); |
315 | if ( $data['signature'] !== "PK\x06\x06" ) { |
316 | $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' ); |
317 | } |
318 | if ( $data['disk'] !== 0 |
319 | || $data['CD start disk'] !== 0 |
320 | ) { |
321 | $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' ); |
322 | } |
323 | } |
324 | |
325 | /** |
326 | * Find the location of the central directory, as would be seen by a |
327 | * non-ZIP64 reader. |
328 | * |
329 | * @return array List containing offset, size and end position. |
330 | */ |
331 | private function findOldCentralDirectory() { |
332 | $size = $this->eocdr['CD size']; |
333 | $offset = $this->eocdr['CD offset']; |
334 | $endPos = $this->eocdr['position']; |
335 | |
336 | // Some readers use the EOCDR position instead of the offset field |
337 | // to find the directory, so to be safe, we check if they both agree. |
338 | if ( $offset + $size != $endPos ) { |
339 | $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' . |
340 | 'of central directory record' ); |
341 | } |
342 | |
343 | return [ $offset, $size ]; |
344 | } |
345 | |
346 | /** |
347 | * Find the location of the central directory, as would be seen by a |
348 | * ZIP64-compliant reader. |
349 | * |
350 | * @return array List containing offset, size and end position. |
351 | */ |
352 | private function findZip64CentralDirectory() { |
353 | // The spec is ambiguous about the exact rules of precedence between the |
354 | // ZIP64 headers and the original headers. Here we follow zip_util.c |
355 | // from OpenJDK 7. |
356 | $size = $this->eocdr['CD size']; |
357 | $offset = $this->eocdr['CD offset']; |
358 | $numEntries = $this->eocdr['CD entries total']; |
359 | $endPos = $this->eocdr['position']; |
360 | if ( $size == 0xffffffff |
361 | || $offset == 0xffffffff |
362 | || $numEntries == 0xffff |
363 | ) { |
364 | $this->readZip64EndOfCentralDirectoryLocator(); |
365 | |
366 | if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) { |
367 | $this->readZip64EndOfCentralDirectoryRecord(); |
368 | if ( isset( $this->eocdr64['CD offset'] ) ) { |
369 | $size = $this->eocdr64['CD size']; |
370 | $offset = $this->eocdr64['CD offset']; |
371 | $endPos = $this->eocdr64Locator['eocdr64 offset']; |
372 | } |
373 | } |
374 | } |
375 | // Some readers use the EOCDR position instead of the offset field |
376 | // to find the directory, so to be safe, we check if they both agree. |
377 | if ( $offset + $size != $endPos ) { |
378 | $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' . |
379 | 'of central directory record' ); |
380 | } |
381 | |
382 | return [ $offset, $size ]; |
383 | } |
384 | |
385 | /** |
386 | * Read the central directory at the given location |
387 | * @param int $offset |
388 | * @param int $size |
389 | */ |
390 | private function readCentralDirectory( $offset, $size ) { |
391 | $block = $this->getBlock( $offset, $size ); |
392 | |
393 | $fixedInfo = [ |
394 | 'signature' => [ 'string', 4 ], |
395 | 'version made by' => 2, |
396 | 'version needed' => 2, |
397 | 'general bits' => 2, |
398 | 'compression method' => 2, |
399 | 'mod time' => 2, |
400 | 'mod date' => 2, |
401 | 'crc-32' => 4, |
402 | 'compressed size' => 4, |
403 | 'uncompressed size' => 4, |
404 | 'name length' => 2, |
405 | 'extra field length' => 2, |
406 | 'comment length' => 2, |
407 | 'disk number start' => 2, |
408 | 'internal attrs' => 2, |
409 | 'external attrs' => 4, |
410 | 'local header offset' => 4, |
411 | ]; |
412 | $fixedSize = $this->getStructSize( $fixedInfo ); |
413 | |
414 | $pos = 0; |
415 | while ( $pos < $size ) { |
416 | $data = $this->unpack( $block, $fixedInfo, $pos ); |
417 | $pos += $fixedSize; |
418 | |
419 | if ( $data['signature'] !== "PK\x01\x02" ) { |
420 | $this->error( 'zip-bad', 'Invalid signature found in directory entry' ); |
421 | } |
422 | |
423 | $variableInfo = [ |
424 | 'name' => [ 'string', $data['name length'] ], |
425 | 'extra field' => [ 'string', $data['extra field length'] ], |
426 | 'comment' => [ 'string', $data['comment length'] ], |
427 | ]; |
428 | $data += $this->unpack( $block, $variableInfo, $pos ); |
429 | $pos += $this->getStructSize( $variableInfo ); |
430 | |
431 | if ( $this->zip64 && ( |
432 | $data['compressed size'] == 0xffffffff |
433 | || $data['uncompressed size'] == 0xffffffff |
434 | || $data['local header offset'] == 0xffffffff ) |
435 | ) { |
436 | $zip64Data = $this->unpackZip64Extra( $data['extra field'] ); |
437 | if ( $zip64Data ) { |
438 | $data = $zip64Data + $data; |
439 | } |
440 | } |
441 | |
442 | if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) { |
443 | $this->error( 'zip-unsupported', 'central directory encryption is not supported' ); |
444 | } |
445 | |
446 | // Convert the timestamp into MediaWiki format |
447 | // For the format, please see the MS-DOS 2.0 Programmer's Reference, |
448 | // pages 3-5 and 3-6. |
449 | $time = $data['mod time']; |
450 | $date = $data['mod date']; |
451 | |
452 | $year = 1980 + ( $date >> 9 ); |
453 | $month = ( $date >> 5 ) & 15; |
454 | $day = $date & 31; |
455 | $hour = ( $time >> 11 ) & 31; |
456 | $minute = ( $time >> 5 ) & 63; |
457 | $second = ( $time & 31 ) * 2; |
458 | $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d", |
459 | $year, $month, $day, $hour, $minute, $second ); |
460 | |
461 | // Convert the character set in the file name |
462 | if ( $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) ) { |
463 | $name = $data['name']; |
464 | } else { |
465 | $name = iconv( 'CP437', 'UTF-8', $data['name'] ); |
466 | } |
467 | |
468 | // Compile a data array for the user, with a sensible format |
469 | $userData = [ |
470 | 'name' => $name, |
471 | 'mtime' => $timestamp, |
472 | 'size' => $data['uncompressed size'], |
473 | ]; |
474 | call_user_func( $this->callback, $userData ); |
475 | } |
476 | } |
477 | |
478 | /** |
479 | * Interpret ZIP64 "extra field" data and return an associative array. |
480 | * @param string $extraField |
481 | * @return array|bool |
482 | */ |
483 | private function unpackZip64Extra( $extraField ) { |
484 | $extraHeaderInfo = [ |
485 | 'id' => 2, |
486 | 'size' => 2, |
487 | ]; |
488 | $extraHeaderSize = $this->getStructSize( $extraHeaderInfo ); |
489 | |
490 | $zip64ExtraInfo = [ |
491 | 'uncompressed size' => 8, |
492 | 'compressed size' => 8, |
493 | 'local header offset' => 8, |
494 | 'disk number start' => 4, |
495 | ]; |
496 | |
497 | $extraPos = 0; |
498 | while ( $extraPos < strlen( $extraField ) ) { |
499 | $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos ); |
500 | $extraPos += $extraHeaderSize; |
501 | $extra += $this->unpack( $extraField, |
502 | [ 'data' => [ 'string', $extra['size'] ] ], |
503 | $extraPos ); |
504 | $extraPos += $extra['size']; |
505 | |
506 | if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) { |
507 | return $this->unpack( $extra['data'], $zip64ExtraInfo ); |
508 | } |
509 | } |
510 | |
511 | return false; |
512 | } |
513 | |
514 | /** |
515 | * Get the length of the file. |
516 | * @return int |
517 | */ |
518 | private function getFileLength() { |
519 | if ( $this->fileLength === null ) { |
520 | $stat = fstat( $this->file ); |
521 | $this->fileLength = $stat['size']; |
522 | } |
523 | |
524 | return $this->fileLength; |
525 | } |
526 | |
527 | /** |
528 | * Get the file contents from a given offset. If there are not enough bytes |
529 | * in the file to satisfy the request, an exception will be thrown. |
530 | * |
531 | * @param int $start The byte offset of the start of the block. |
532 | * @param int|null $length The number of bytes to return. If omitted, the remainder |
533 | * of the file will be returned. |
534 | * |
535 | * @return string |
536 | */ |
537 | private function getBlock( $start, $length = null ) { |
538 | $fileLength = $this->getFileLength(); |
539 | if ( $start >= $fileLength ) { |
540 | $this->error( 'zip-bad', "getBlock() requested position $start, " . |
541 | "file length is $fileLength" ); |
542 | } |
543 | if ( $length === null ) { |
544 | $length = $fileLength - $start; |
545 | } |
546 | $end = $start + $length; |
547 | if ( $end > $fileLength ) { |
548 | $this->error( 'zip-bad', "getBlock() requested end position $end, " . |
549 | "file length is $fileLength" ); |
550 | } |
551 | $startSeg = (int)floor( $start / self::SEGSIZE ); |
552 | $endSeg = (int)ceil( $end / self::SEGSIZE ); |
553 | |
554 | $block = ''; |
555 | for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) { |
556 | $block .= $this->getSegment( $segIndex ); |
557 | } |
558 | |
559 | $block = substr( $block, |
560 | $start - $startSeg * self::SEGSIZE, |
561 | $length ); |
562 | |
563 | if ( strlen( $block ) < $length ) { |
564 | $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' ); |
565 | } |
566 | |
567 | return $block; |
568 | } |
569 | |
570 | /** |
571 | * Get a section of the file starting at position $segIndex * self::SEGSIZE, |
572 | * of length self::SEGSIZE. The result is cached. This is a helper function |
573 | * for getBlock(). |
574 | * |
575 | * If there are not enough bytes in the file to satisfy the request, the |
576 | * return value will be truncated. If a request is made for a segment beyond |
577 | * the end of the file, an empty string will be returned. |
578 | * |
579 | * @param int $segIndex |
580 | * |
581 | * @return string |
582 | */ |
583 | private function getSegment( $segIndex ) { |
584 | if ( !isset( $this->buffer[$segIndex] ) ) { |
585 | $bytePos = $segIndex * self::SEGSIZE; |
586 | if ( $bytePos >= $this->getFileLength() ) { |
587 | $this->buffer[$segIndex] = ''; |
588 | |
589 | return ''; |
590 | } |
591 | if ( fseek( $this->file, $bytePos ) ) { |
592 | $this->error( 'zip-bad', "seek to $bytePos failed" ); |
593 | } |
594 | $seg = fread( $this->file, self::SEGSIZE ); |
595 | if ( $seg === false ) { |
596 | $this->error( 'zip-bad', "read from $bytePos failed" ); |
597 | } |
598 | $this->buffer[$segIndex] = $seg; |
599 | } |
600 | |
601 | return $this->buffer[$segIndex]; |
602 | } |
603 | |
604 | /** |
605 | * Get the size of a structure in bytes. See unpack() for the format of $struct. |
606 | * @param array $struct |
607 | * @return int |
608 | */ |
609 | private function getStructSize( $struct ) { |
610 | $size = 0; |
611 | foreach ( $struct as $type ) { |
612 | if ( is_array( $type ) ) { |
613 | [ , $fieldSize ] = $type; |
614 | $size += $fieldSize; |
615 | } else { |
616 | $size += $type; |
617 | } |
618 | } |
619 | |
620 | return $size; |
621 | } |
622 | |
623 | /** |
624 | * Unpack a binary structure. This is like the built-in unpack() function |
625 | * except nicer. |
626 | * |
627 | * @param string $string The binary data input |
628 | * |
629 | * @param array $struct An associative array giving structure members and their |
630 | * types. In the key is the field name. The value may be either an |
631 | * integer, in which case the field is a little-endian unsigned integer |
632 | * encoded in the given number of bytes, or an array, in which case the |
633 | * first element of the array is the type name, and the subsequent |
634 | * elements are type-dependent parameters. Only one such type is defined: |
635 | * - "string": The second array element gives the length of string. |
636 | * Not null terminated. |
637 | * |
638 | * @param int $offset The offset into the string at which to start unpacking. |
639 | * @return array Unpacked associative array. Note that large integers in the input |
640 | * may be represented as floating point numbers in the return value, so |
641 | * the use of weak comparison is advised. |
642 | */ |
643 | private function unpack( $string, $struct, $offset = 0 ) { |
644 | $size = $this->getStructSize( $struct ); |
645 | if ( $offset + $size > strlen( $string ) ) { |
646 | $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' ); |
647 | } |
648 | |
649 | $data = []; |
650 | $pos = $offset; |
651 | foreach ( $struct as $key => $type ) { |
652 | if ( is_array( $type ) ) { |
653 | [ $typeName, $fieldSize ] = $type; |
654 | switch ( $typeName ) { |
655 | case 'string': |
656 | $data[$key] = substr( $string, $pos, $fieldSize ); |
657 | $pos += $fieldSize; |
658 | break; |
659 | default: |
660 | throw new UnexpectedValueException( __METHOD__ . ": invalid type \"$typeName\"" ); |
661 | } |
662 | } else { |
663 | // Unsigned little-endian integer |
664 | $length = intval( $type ); |
665 | |
666 | // Calculate the value. Use an algorithm which automatically |
667 | // upgrades the value to floating point if necessary. |
668 | $value = 0; |
669 | for ( $i = $length - 1; $i >= 0; $i-- ) { |
670 | $value *= 256; |
671 | $value += ord( $string[$pos + $i] ); |
672 | } |
673 | |
674 | // Throw an exception if there was loss of precision |
675 | if ( $value > 2 ** 52 ) { |
676 | $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' . |
677 | 'This could happen if we tried to unpack a 64-bit structure ' . |
678 | 'at an invalid location.' ); |
679 | } |
680 | $data[$key] = $value; |
681 | $pos += $length; |
682 | } |
683 | } |
684 | |
685 | return $data; |
686 | } |
687 | |
688 | /** |
689 | * Returns a bit from a given position in an integer value, converted to |
690 | * boolean. |
691 | * |
692 | * @param int $value |
693 | * @param int $bitIndex The index of the bit, where 0 is the LSB. |
694 | * @return bool |
695 | */ |
696 | private function testBit( $value, $bitIndex ) { |
697 | return (bool)( ( $value >> $bitIndex ) & 1 ); |
698 | } |
699 | } |