38 private $mimeFromClsid;
41 private $valid =
false;
43 private $sectorLength;
47 private const TYPE_UNALLOCATED = 0;
48 private const TYPE_STORAGE = 1;
49 private const TYPE_STREAM = 2;
50 private const TYPE_ROOT = 5;
59 private static $mimesByClsid = [
61 '00020810-0000-0000-C000-000000000046' =>
'application/vnd.ms-excel',
62 '00020820-0000-0000-C000-000000000046' =>
'application/vnd.ms-excel',
63 '00020906-0000-0000-C000-000000000046' =>
'application/msword',
64 '64818D10-4F9B-11CF-86EA-00AA00B929E8' =>
'application/vnd.ms-powerpoint',
80 $handle = fopen( $fileName,
'r' );
81 if ( $handle ===
false ) {
84 'error' =>
'file does not exist',
104 $reader =
new self( $fileHandle );
106 'valid' => $reader->valid,
107 'mime' => $reader->mime,
108 'mimeFromClsid' => $reader->mimeFromClsid
110 if ( $reader->error ) {
111 $info[
'error'] = $reader->error;
112 $info[
'errorCode'] = $reader->errorCode;
117 private function __construct( $fileHandle ) {
118 $this->file = $fileHandle;
121 }
catch ( RuntimeException $e ) {
122 $this->valid =
false;
123 $this->error = $e->getMessage();
124 $this->errorCode = $e->getCode();
128 private function init() {
129 $this->header = $this->unpackOffset( 0, [
130 'header_signature' => 8,
131 'header_clsid' => 16,
132 'minor_version' => 2,
133 'major_version' => 2,
136 'mini_sector_shift' => 2,
138 'num_dir_sectors' => 4,
139 'num_fat_sectors' => 4,
140 'first_dir_sector' => 4,
141 'transaction_signature_number' => 4,
142 'mini_stream_cutoff_size' => 4,
143 'first_mini_fat_sector' => 4,
144 'num_mini_fat_sectors' => 4,
145 'first_difat_sector' => 4,
146 'num_difat_sectors' => 4,
149 if ( $this->header[
'header_signature'] !==
"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" ) {
150 $this->error(
'invalid signature: ' . bin2hex( $this->header[
'header_signature'] ),
151 self::ERROR_INVALID_SIGNATURE );
153 $this->sectorLength = 1 << $this->header[
'sector_shift'];
155 $this->readDirectory();
160 private function sectorOffset( $sectorId ) {
161 return $this->sectorLength * ( $sectorId + 1 );
164 private function decodeClsid( $binaryClsid ) {
165 $parts = unpack(
'Va/vb/vc/C8d', $binaryClsid );
166 return sprintf(
"%08X-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X",
186 private function unpackOffset( $offset, $struct ) {
187 $block = $this->readOffset( $offset, array_sum( $struct ) );
188 return $this->unpack( $block, 0, $struct );
197 private function unpack( $block, $offset, $struct ) {
199 foreach ( $struct as $key => $length ) {
201 $data[$key] = substr( $block, $offset, $length );
203 $data[$key] = $this->bin2dec( $block, $offset, $length );
210 private function bin2dec( $str, $offset, $length ) {
212 for ( $i = $length - 1; $i >= 0; $i-- ) {
214 $value += ord( $str[$offset + $i] );
219 private function readOffset( $offset, $length ) {
220 $this->fseek( $offset );
221 AtEase::suppressWarnings();
222 $block = fread( $this->file, $length );
223 AtEase::restoreWarnings();
224 if ( $block ===
false ) {
225 $this->error(
'error reading from file', self::ERROR_READ );
227 if ( strlen( $block ) !== $length ) {
228 $this->error(
'unable to read the required number of bytes from the file',
229 self::ERROR_READ_PAST_END );
234 private function readSector( $sectorId ) {
235 return $this->readOffset( $this->sectorOffset( $sectorId ), 1 << $this->header[
'sector_shift'] );
243 private function error( $message, $code ) {
244 throw new RuntimeException( $message, $code );
247 private function fseek( $offset ) {
248 AtEase::suppressWarnings();
249 $result = fseek( $this->file, $offset );
250 AtEase::restoreWarnings();
251 if ( $result !== 0 ) {
252 $this->error(
"unable to seek to offset $offset", self::ERROR_SEEK );
256 private function readDifat() {
257 $binaryDifat = $this->header[
'difat'];
258 $nextDifatSector = $this->header[
'first_difat_sector'];
259 for ( $i = 0; $i < $this->header[
'num_difat_sectors']; $i++ ) {
260 $block = $this->readSector( $nextDifatSector );
261 $binaryDifat .= substr( $block, 0, $this->sectorLength - 4 );
262 $nextDifatSector = $this->bin2dec( $block, $this->sectorLength - 4, 4 );
263 if ( $nextDifatSector == 0xFFFFFFFE ) {
269 for ( $pos = 0; $pos < strlen( $binaryDifat ); $pos += 4 ) {
270 $fatSector = $this->bin2dec( $binaryDifat, $pos, 4 );
271 if ( $fatSector < 0xFFFFFFFC ) {
272 $this->difat[] = $fatSector;
279 private function getNextSectorIdFromFat( $sectorId ) {
280 $entriesPerSector = intdiv( $this->sectorLength, 4 );
281 $fatSectorId = intdiv( $sectorId, $entriesPerSector );
282 $fatSectorArray = $this->getFatSector( $fatSectorId );
283 return $fatSectorArray[$sectorId % $entriesPerSector];
286 private function getFatSector( $fatSectorId ) {
287 if ( !isset( $this->fat[$fatSectorId] ) ) {
289 if ( !isset( $this->difat[$fatSectorId] ) ) {
290 $this->error(
'FAT sector requested beyond the end of the DIFAT', self::ERROR_INVALID_FORMAT );
292 $absoluteSectorId = $this->difat[$fatSectorId];
293 $block = $this->readSector( $absoluteSectorId );
294 for ( $pos = 0; $pos < strlen( $block ); $pos += 4 ) {
295 $fat[] = $this->bin2dec( $block, $pos, 4 );
297 $this->fat[$fatSectorId] = $fat;
299 return $this->fat[$fatSectorId];
302 private function readDirectory() {
303 $dirSectorId = $this->header[
'first_dir_sector'];
306 while ( $dirSectorId !== 0xFFFFFFFE ) {
307 if ( isset( $seenSectorIds[$dirSectorId] ) ) {
308 $this->error(
'FAT loop detected', self::ERROR_INVALID_FORMAT );
310 $seenSectorIds[$dirSectorId] =
true;
312 $binaryDir .= $this->readSector( $dirSectorId );
313 $dirSectorId = $this->getNextSectorIdFromFat( $dirSectorId );
326 'create_time_low' => 4,
327 'create_time_high' => 4,
328 'modify_time_low' => 4,
329 'modify_time_high' => 4,
334 $entryLength = array_sum( $struct );
336 for ( $pos = 0; $pos < strlen( $binaryDir ); $pos += $entryLength ) {
337 $entry = $this->unpack( $binaryDir, $pos, $struct );
341 $entry[
'size_high'] = 0;
343 $type = $entry[
'object_type'];
344 if ( $type == self::TYPE_UNALLOCATED ) {
348 $name = iconv(
'UTF-16LE',
'UTF-8', substr( $entry[
'name_raw'], 0, $entry[
'name_length'] - 2 ) );
350 $clsid = $this->decodeClsid( $entry[
'clsid'] );
351 if ( $type == self::TYPE_ROOT && isset( self::$mimesByClsid[$clsid] ) ) {
352 $this->mimeFromClsid = self::$mimesByClsid[$clsid];
355 if ( $name ===
'Workbook' ) {
356 $this->mime =
'application/vnd.ms-excel';
357 } elseif ( $name ===
'WordDocument' ) {
358 $this->mime =
'application/msword';
359 } elseif ( $name ===
'PowerPoint Document' ) {
360 $this->mime =
'application/vnd.ms-powerpoint';