MediaWiki REL1_33
MSCompoundFileReader.php
Go to the documentation of this file.
1<?php
2/*
3 * Copyright 2019 Wikimedia Foundation
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License"); you may
6 * not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software distributed
12 * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
13 * OF ANY KIND, either express or implied. See the License for the
14 * specific language governing permissions and limitations under the License.
15 */
16
33 private $file;
34 private $header;
35 private $mime;
37 private $error;
38 private $errorCode;
39 private $valid = false;
40
42 private $difat;
43 private $fat = [];
44 private $fileLength;
45
47 const TYPE_STORAGE = 1;
48 const TYPE_STREAM = 2;
49 const TYPE_ROOT = 5;
50
51 const ERROR_FILE_OPEN = 1;
52 const ERROR_SEEK = 2;
53 const ERROR_READ = 3;
57
58 private static $mimesByClsid = [
59 // From http://justsolve.archiveteam.org/wiki/Microsoft_Compound_File
60 '00020810-0000-0000-C000-000000000046' => 'application/vnd.ms-excel',
61 '00020820-0000-0000-C000-000000000046' => 'application/vnd.ms-excel',
62 '00020906-0000-0000-C000-000000000046' => 'application/msword',
63 '64818D10-4F9B-11CF-86EA-00AA00B929E8' => 'application/vnd.ms-powerpoint',
64 ];
65
78 public static function readFile( $fileName ) {
79 $handle = fopen( $fileName, 'r' );
80 if ( $handle === false ) {
81 return [
82 'valid' => false,
83 'error' => 'file does not exist',
84 'errorCode' => self::ERROR_FILE_OPEN
85 ];
86 }
87 return self::readHandle( $handle );
88 }
89
102 public static function readHandle( $fileHandle ) {
103 $reader = new self( $fileHandle );
104 $info = [
105 'valid' => $reader->valid,
106 'mime' => $reader->mime,
107 'mimeFromClsid' => $reader->mimeFromClsid
108 ];
109 if ( $reader->error ) {
110 $info['error'] = $reader->error;
111 $info['errorCode'] = $reader->errorCode;
112 }
113 return $info;
114 }
115
116 private function __construct( $fileHandle ) {
117 $this->file = $fileHandle;
118 try {
119 $this->init();
120 } catch ( RuntimeException $e ) {
121 $this->valid = false;
122 $this->error = $e->getMessage();
123 $this->errorCode = $e->getCode();
124 }
125 }
126
127 private function init() {
128 $this->header = $this->unpackOffset( 0, [
129 'header_signature' => 8,
130 'header_clsid' => 16,
131 'minor_version' => 2,
132 'major_version' => 2,
133 'byte_order' => 2,
134 'sector_shift' => 2,
135 'mini_sector_shift' => 2,
136 'reserved' => 6,
137 'num_dir_sectors' => 4,
138 'num_fat_sectors' => 4,
139 'first_dir_sector' => 4,
140 'transaction_signature_number' => 4,
141 'mini_stream_cutoff_size' => 4,
142 'first_mini_fat_sector' => 4,
143 'num_mini_fat_sectors' => 4,
144 'first_difat_sector' => 4,
145 'num_difat_sectors' => 4,
146 'difat' => 436,
147 ] );
148 if ( $this->header['header_signature'] !== "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" ) {
149 $this->error( 'invalid signature: ' . bin2hex( $this->header['header_signature'] ),
150 self::ERROR_INVALID_SIGNATURE );
151 }
152 $this->sectorLength = 1 << $this->header['sector_shift'];
153 $this->readDifat();
154 $this->readDirectory();
155
156 $this->valid = true;
157 }
158
159 private function sectorOffset( $sectorId ) {
160 return $this->sectorLength * ( $sectorId + 1 );
161 }
162
163 private function decodeClsid( $binaryClsid ) {
164 $parts = unpack( 'Va/vb/vc/C8d', $binaryClsid );
165 return sprintf( "%08X-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X",
166 $parts['a'],
167 $parts['b'],
168 $parts['c'],
169 $parts['d1'],
170 $parts['d2'],
171 $parts['d3'],
172 $parts['d4'],
173 $parts['d5'],
174 $parts['d6'],
175 $parts['d7'],
176 $parts['d8']
177 );
178 }
179
180 private function unpackOffset( $offset, $struct ) {
181 $block = $this->readOffset( $offset, array_sum( $struct ) );
182 return $this->unpack( $block, 0, $struct );
183 }
184
185 private function unpackSector( $sectorNumber, $struct ) {
186 $offset = $this->sectorOffset( $sectorNumber );
187 return $this->unpackOffset( $offset, array_sum( $struct ) );
188 }
189
190 private function unpack( $block, $offset, $struct ) {
191 $data = [];
192 foreach ( $struct as $key => $length ) {
193 if ( $length > 4 ) {
194 $data[$key] = substr( $block, $offset, $length );
195 } else {
196 $data[$key] = $this->bin2dec( $block, $offset, $length );
197 }
198 $offset += $length;
199 }
200 return $data;
201 }
202
203 private function bin2dec( $str, $offset, $length ) {
204 $value = 0;
205 for ( $i = $length - 1; $i >= 0; $i-- ) {
206 $value *= 256;
207 $value += ord( $str[$offset + $i] );
208 }
209 return $value;
210 }
211
212 private function readOffset( $offset, $length ) {
213 $this->fseek( $offset );
215 $block = fread( $this->file, $length );
217 if ( $block === false ) {
218 $this->error( 'error reading from file', self::ERROR_READ );
219 }
220 if ( strlen( $block ) !== $length ) {
221 $this->error( 'unable to read the required number of bytes from the file',
222 self::ERROR_READ_PAST_END );
223 }
224 return $block;
225 }
226
227 private function readSector( $sectorId ) {
228 return $this->readOffset( $this->sectorOffset( $sectorId ), 1 << $this->header['sector_shift'] );
229 }
230
231 private function error( $message, $code ) {
232 throw new RuntimeException( $message, $code );
233 }
234
235 private function fseek( $offset ) {
237 $result = fseek( $this->file, $offset );
239 if ( $result !== 0 ) {
240 $this->error( "unable to seek to offset $offset", self::ERROR_SEEK );
241 }
242 }
243
244 private function readDifat() {
245 $binaryDifat = $this->header['difat'];
246 $nextDifatSector = $this->header['first_difat_sector'];
247 for ( $i = 0; $i < $this->header['num_difat_sectors']; $i++ ) {
248 $block = $this->readSector( $nextDifatSector );
249 $binaryDifat .= substr( $block, 0, $this->sectorLength - 4 );
250 $nextDifatSector = $this->bin2dec( $block, $this->sectorLength - 4, 4 );
251 if ( $nextDifatSector == 0xFFFFFFFE ) {
252 break;
253 }
254 }
255
256 $this->difat = [];
257 for ( $pos = 0; $pos < strlen( $binaryDifat ); $pos += 4 ) {
258 $fatSector = $this->bin2dec( $binaryDifat, $pos, 4 );
259 if ( $fatSector < 0xFFFFFFFC ) {
260 $this->difat[] = $fatSector;
261 } else {
262 break;
263 }
264 }
265 }
266
267 private function getNextSectorIdFromFat( $sectorId ) {
268 $entriesPerSector = intdiv( $this->sectorLength, 4 );
269 $fatSectorId = intdiv( $sectorId, $entriesPerSector );
270 $fatSectorArray = $this->getFatSector( $fatSectorId );
271 return $fatSectorArray[$sectorId % $entriesPerSector];
272 }
273
274 private function getFatSector( $fatSectorId ) {
275 if ( !isset( $this->fat[$fatSectorId] ) ) {
276 $fat = [];
277 if ( !isset( $this->difat[$fatSectorId] ) ) {
278 $this->error( 'FAT sector requested beyond the end of the DIFAT', self::ERROR_INVALID_FORMAT );
279 }
280 $absoluteSectorId = $this->difat[$fatSectorId];
281 $block = $this->readSector( $absoluteSectorId );
282 for ( $pos = 0; $pos < strlen( $block ); $pos += 4 ) {
283 $fat[] = $this->bin2dec( $block, $pos, 4 );
284 }
285 $this->fat[$fatSectorId] = $fat;
286 }
287 return $this->fat[$fatSectorId];
288 }
289
290 private function readDirectory() {
291 $dirSectorId = $this->header['first_dir_sector'];
292 $binaryDir = '';
293 $seenSectorIds = [];
294 while ( $dirSectorId !== 0xFFFFFFFE ) {
295 if ( isset( $seenSectorIds[$dirSectorId] ) ) {
296 $this->error( 'FAT loop detected', self::ERROR_INVALID_FORMAT );
297 }
298 $seenSectorIds[$dirSectorId] = true;
299
300 $binaryDir .= $this->readSector( $dirSectorId );
301 $dirSectorId = $this->getNextSectorIdFromFat( $dirSectorId );
302 }
303
304 $struct = [
305 'name_raw' => 64,
306 'name_length' => 2,
307 'object_type' => 1,
308 'color' => 1,
309 'sid_left' => 4,
310 'sid_right' => 4,
311 'sid_child' => 4,
312 'clsid' => 16,
313 'state_bits' => 4,
314 'create_time_low' => 4,
315 'create_time_high' => 4,
316 'modify_time_low' => 4,
317 'modify_time_high' => 4,
318 'first_sector' => 4,
319 'size_low' => 4,
320 'size_high' => 4,
321 ];
322 $entryLength = array_sum( $struct );
323
324 for ( $pos = 0; $pos < strlen( $binaryDir ); $pos += $entryLength ) {
325 $entry = $this->unpack( $binaryDir, $pos, $struct );
326
327 // According to [MS-CFB] size_high may contain garbage due to a
328 // bug in a writer, it's best to pretend it is zero
329 $entry['size_high'] = 0;
330
331 $type = $entry['object_type'];
332 if ( $type == self::TYPE_UNALLOCATED ) {
333 continue;
334 }
335
336 $name = iconv( 'UTF-16', 'UTF-8', substr( $entry['name_raw'], 0, $entry['name_length'] - 2 ) );
337
338 $clsid = $this->decodeClsid( $entry['clsid'] );
339 if ( $type == self::TYPE_ROOT && isset( self::$mimesByClsid[$clsid] ) ) {
340 $this->mimeFromClsid = self::$mimesByClsid[$clsid];
341 }
342
343 if ( $name === 'Workbook' ) {
344 $this->mime = 'application/vnd.ms-excel';
345 } elseif ( $name === 'WordDocument' ) {
346 $this->mime = 'application/msword';
347 } elseif ( $name === 'PowerPoint Document' ) {
348 $this->mime = 'application/vnd.ms-powerpoint';
349 }
350 }
351 }
352}
and that you know you can do these things To protect your we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights These restrictions translate to certain responsibilities for you if you distribute copies of the or if you modify it For if you distribute copies of such a whether gratis or for a you must give the recipients all the rights that you have You must make sure that receive or can get the source code And you must show them these terms so they know their rights We protect your rights with two and(2) offer you this license which gives you legal permission to copy
static $fileHandle
Definition cdb.php:57
Read the directory of a Microsoft Compound File Binary file, a.k.a.
unpackSector( $sectorNumber, $struct)
static readFile( $fileName)
Read a file by name.
unpack( $block, $offset, $struct)
readOffset( $offset, $length)
unpackOffset( $offset, $struct)
bin2dec( $str, $offset, $length)
static readHandle( $fileHandle)
Read from an open seekable handle.
do that in ParserLimitReportFormat instead use this to modify the parameters of the image all existing parser cache entries will be invalid To avoid you ll need to handle that somehow(e.g. with the RejectParserCacheValue hook) because MediaWiki won 't do it for you. & $defaults error
Definition hooks.txt:2644
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not it can be in the form of< username >< more info > e g for bot passwords intended to be added to log contexts Fields it might only if the login was with a bot password it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable & $code
Definition hooks.txt:856
returning false will NOT prevent logging $e
Definition hooks.txt:2175
$data
Utility to generate mapping file used in mw.Title (phpCharToUpper.json)