60 '00020810-0000-0000-C000-000000000046' =>
'application/vnd.ms-excel',
61 '00020820-0000-0000-C000-000000000046' =>
'application/vnd.ms-excel',
62 '00020906-0000-0000-C000-000000000046' =>
'application/msword',
63 '64818D10-4F9B-11CF-86EA-00AA00B929E8' =>
'application/vnd.ms-powerpoint',
79 $handle = fopen( $fileName,
'r' );
80 if ( $handle ===
false ) {
83 'error' =>
'file does not exist',
105 'valid' => $reader->valid,
106 'mime' => $reader->mime,
107 'mimeFromClsid' => $reader->mimeFromClsid
109 if ( $reader->error ) {
110 $info[
'error'] = $reader->error;
111 $info[
'errorCode'] = $reader->errorCode;
120 }
catch ( RuntimeException
$e ) {
121 $this->valid =
false;
122 $this->
error = $e->getMessage();
123 $this->errorCode =
$e->getCode();
129 'header_signature' => 8,
130 'header_clsid' => 16,
131 'minor_version' => 2,
132 'major_version' => 2,
135 'mini_sector_shift' => 2,
137 'num_dir_sectors' => 4,
138 'num_fat_sectors' => 4,
139 'first_dir_sector' => 4,
140 'transaction_signature_number' => 4,
141 'mini_stream_cutoff_size' => 4,
142 'first_mini_fat_sector' => 4,
143 'num_mini_fat_sectors' => 4,
144 'first_difat_sector' => 4,
145 'num_difat_sectors' => 4,
148 if ( $this->header[
'header_signature'] !==
"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" ) {
149 $this->
error(
'invalid signature: ' . bin2hex( $this->header[
'header_signature'] ),
150 self::ERROR_INVALID_SIGNATURE );
152 $this->sectorLength = 1 << $this->header[
'sector_shift'];
160 return $this->sectorLength * ( $sectorId + 1 );
164 $parts =
unpack(
'Va/vb/vc/C8d', $binaryClsid );
165 return sprintf(
"%08X-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X",
181 $block = $this->
readOffset( $offset, array_sum( $struct ) );
182 return $this->
unpack( $block, 0, $struct );
187 return $this->
unpackOffset( $offset, array_sum( $struct ) );
190 private function unpack( $block, $offset, $struct ) {
192 foreach ( $struct
as $key => $length ) {
194 $data[$key] = substr( $block, $offset, $length );
196 $data[$key] = $this->
bin2dec( $block, $offset, $length );
203 private function bin2dec( $str, $offset, $length ) {
205 for ( $i = $length - 1; $i >= 0; $i-- ) {
207 $value += ord( $str[$offset + $i] );
213 $this->
fseek( $offset );
214 Wikimedia\suppressWarnings();
215 $block = fread( $this->
file, $length );
216 Wikimedia\restoreWarnings();
217 if ( $block ===
false ) {
218 $this->
error(
'error reading from file', self::ERROR_READ );
220 if ( strlen( $block ) !== $length ) {
221 $this->
error(
'unable to read the required number of bytes from the file',
222 self::ERROR_READ_PAST_END );
232 throw new RuntimeException( $message,
$code );
236 Wikimedia\suppressWarnings();
238 Wikimedia\restoreWarnings();
240 $this->
error(
"unable to seek to offset $offset", self::ERROR_SEEK );
245 $binaryDifat = $this->header[
'difat'];
246 $nextDifatSector = $this->header[
'first_difat_sector'];
247 for ( $i = 0; $i < $this->header[
'num_difat_sectors']; $i++ ) {
248 $block = $this->
readSector( $nextDifatSector );
249 $binaryDifat .= substr( $block, 0, $this->sectorLength - 4 );
250 $nextDifatSector = $this->
bin2dec( $block, $this->sectorLength - 4, 4 );
251 if ( $nextDifatSector == 0xFFFFFFFE ) {
257 for ( $pos = 0; $pos < strlen( $binaryDifat ); $pos += 4 ) {
258 $fatSector = $this->
bin2dec( $binaryDifat, $pos, 4 );
259 if ( $fatSector < 0xFFFFFFFC ) {
260 $this->difat[] = $fatSector;
268 $entriesPerSector = intdiv( $this->sectorLength, 4 );
269 $fatSectorId = intdiv( $sectorId, $entriesPerSector );
271 return $fatSectorArray[$sectorId % $entriesPerSector];
275 if ( !isset( $this->fat[$fatSectorId] ) ) {
277 if ( !isset( $this->difat[$fatSectorId] ) ) {
278 $this->
error(
'FAT sector requested beyond the end of the DIFAT', self::ERROR_INVALID_FORMAT );
280 $absoluteSectorId = $this->difat[$fatSectorId];
281 $block = $this->
readSector( $absoluteSectorId );
282 for ( $pos = 0; $pos < strlen( $block ); $pos += 4 ) {
285 $this->fat[$fatSectorId] =
$fat;
287 return $this->fat[$fatSectorId];
291 $dirSectorId = $this->header[
'first_dir_sector'];
294 while ( $dirSectorId !== 0xFFFFFFFE ) {
295 if ( isset( $seenSectorIds[$dirSectorId] ) ) {
296 $this->
error(
'FAT loop detected', self::ERROR_INVALID_FORMAT );
298 $seenSectorIds[$dirSectorId] =
true;
300 $binaryDir .= $this->
readSector( $dirSectorId );
314 'create_time_low' => 4,
315 'create_time_high' => 4,
316 'modify_time_low' => 4,
317 'modify_time_high' => 4,
322 $entryLength = array_sum( $struct );
324 for ( $pos = 0; $pos < strlen( $binaryDir ); $pos += $entryLength ) {
325 $entry = $this->
unpack( $binaryDir, $pos, $struct );
329 $entry[
'size_high'] = 0;
331 $type = $entry[
'object_type'];
332 if (
$type == self::TYPE_UNALLOCATED ) {
336 $name = iconv(
'UTF-16',
'UTF-8', substr( $entry[
'name_raw'], 0, $entry[
'name_length'] - 2 ) );
339 if (
$type == self::TYPE_ROOT && isset( self::$mimesByClsid[$clsid] ) ) {
340 $this->mimeFromClsid = self::$mimesByClsid[$clsid];
343 if (
$name ===
'Workbook' ) {
344 $this->mime =
'application/vnd.ms-excel';
345 } elseif (
$name ===
'WordDocument' ) {
346 $this->mime =
'application/msword';
347 } elseif (
$name ===
'PowerPoint Document' ) {
348 $this->mime =
'application/vnd.ms-powerpoint';