MediaWiki REL1_30
MimeAnalyzer.php
Go to the documentation of this file.
1<?php
22use Psr\Log\LoggerAwareInterface;
23use Psr\Log\LoggerInterface;
24
30class MimeAnalyzer implements LoggerAwareInterface {
32 protected $typeFile;
34 protected $infoFile;
36 protected $xmlTypes;
38 protected $initCallback;
40 protected $detectCallback;
42 protected $guessCallback;
44 protected $extCallback;
46 protected $mediaTypes = null;
48 protected $mimeTypeAliases = null;
50 protected $mimetoExt = null;
51
53 public $mExtToMime = null; // legacy name; field accessed by hooks
54
56 protected $IEAnalyzer;
57
59 private $extraTypes = '';
61 private $extraInfo = '';
62
64 private $logger;
65
85 protected static $wellKnownTypes = <<<EOT
86application/ogg ogx ogg ogm ogv oga spx opus
87application/pdf pdf
88application/vnd.oasis.opendocument.chart odc
89application/vnd.oasis.opendocument.chart-template otc
90application/vnd.oasis.opendocument.database odb
91application/vnd.oasis.opendocument.formula odf
92application/vnd.oasis.opendocument.formula-template otf
93application/vnd.oasis.opendocument.graphics odg
94application/vnd.oasis.opendocument.graphics-template otg
95application/vnd.oasis.opendocument.image odi
96application/vnd.oasis.opendocument.image-template oti
97application/vnd.oasis.opendocument.presentation odp
98application/vnd.oasis.opendocument.presentation-template otp
99application/vnd.oasis.opendocument.spreadsheet ods
100application/vnd.oasis.opendocument.spreadsheet-template ots
101application/vnd.oasis.opendocument.text odt
102application/vnd.oasis.opendocument.text-master otm
103application/vnd.oasis.opendocument.text-template ott
104application/vnd.oasis.opendocument.text-web oth
105application/javascript js
106application/x-shockwave-flash swf
107audio/midi mid midi kar
108audio/mpeg mpga mpa mp2 mp3
109audio/x-aiff aif aiff aifc
110audio/x-wav wav
111audio/ogg oga spx ogg opus
112audio/opus opus ogg oga ogg spx
113image/x-bmp bmp
114image/gif gif
115image/jpeg jpeg jpg jpe
116image/png png
117image/svg+xml svg
118image/svg svg
119image/tiff tiff tif
120image/vnd.djvu djvu
121image/x.djvu djvu
122image/x-djvu djvu
123image/x-portable-pixmap ppm
124image/x-xcf xcf
125text/plain txt
126text/html html htm
127video/ogg ogv ogm ogg
128video/mpeg mpg mpeg
129EOT;
130
137 protected static $wellKnownInfo = <<<EOT
138application/pdf [OFFICE]
139application/vnd.oasis.opendocument.chart [OFFICE]
140application/vnd.oasis.opendocument.chart-template [OFFICE]
141application/vnd.oasis.opendocument.database [OFFICE]
142application/vnd.oasis.opendocument.formula [OFFICE]
143application/vnd.oasis.opendocument.formula-template [OFFICE]
144application/vnd.oasis.opendocument.graphics [OFFICE]
145application/vnd.oasis.opendocument.graphics-template [OFFICE]
146application/vnd.oasis.opendocument.image [OFFICE]
147application/vnd.oasis.opendocument.image-template [OFFICE]
148application/vnd.oasis.opendocument.presentation [OFFICE]
149application/vnd.oasis.opendocument.presentation-template [OFFICE]
150application/vnd.oasis.opendocument.spreadsheet [OFFICE]
151application/vnd.oasis.opendocument.spreadsheet-template [OFFICE]
152application/vnd.oasis.opendocument.text [OFFICE]
153application/vnd.oasis.opendocument.text-template [OFFICE]
154application/vnd.oasis.opendocument.text-master [OFFICE]
155application/vnd.oasis.opendocument.text-web [OFFICE]
156application/javascript text/javascript application/x-javascript [EXECUTABLE]
157application/x-shockwave-flash [MULTIMEDIA]
158audio/midi [AUDIO]
159audio/x-aiff [AUDIO]
160audio/x-wav [AUDIO]
161audio/mp3 audio/mpeg [AUDIO]
162application/ogg audio/ogg video/ogg [MULTIMEDIA]
163image/x-bmp image/x-ms-bmp image/bmp [BITMAP]
164image/gif [BITMAP]
165image/jpeg [BITMAP]
166image/png [BITMAP]
167image/svg+xml [DRAWING]
168image/tiff [BITMAP]
169image/vnd.djvu [BITMAP]
170image/x-xcf [BITMAP]
171image/x-portable-pixmap [BITMAP]
172text/plain [TEXT]
173text/html [TEXT]
174video/ogg [VIDEO]
175video/mpeg [VIDEO]
176unknown/unknown application/octet-stream application/x-empty [UNKNOWN]
177EOT;
178
194 public function __construct( array $params ) {
195 $this->typeFile = $params['typeFile'];
196 $this->infoFile = $params['infoFile'];
197 $this->xmlTypes = $params['xmlTypes'];
198 $this->initCallback = isset( $params['initCallback'] )
199 ? $params['initCallback']
200 : null;
201 $this->detectCallback = isset( $params['detectCallback'] )
202 ? $params['detectCallback']
203 : null;
204 $this->guessCallback = isset( $params['guessCallback'] )
205 ? $params['guessCallback']
206 : null;
207 $this->extCallback = isset( $params['extCallback'] )
208 ? $params['extCallback']
209 : null;
210 $this->logger = isset( $params['logger'] )
211 ? $params['logger']
212 : new \Psr\Log\NullLogger();
213
214 $this->loadFiles();
215 }
216
217 protected function loadFiles() {
222 # Allow media handling extensions adding MIME-types and MIME-info
223 if ( $this->initCallback ) {
224 call_user_func( $this->initCallback, $this );
225 }
226
227 $types = self::$wellKnownTypes;
228
229 $mimeTypeFile = $this->typeFile;
230 if ( $mimeTypeFile ) {
231 if ( is_file( $mimeTypeFile ) && is_readable( $mimeTypeFile ) ) {
232 $this->logger->info( __METHOD__ . ": loading mime types from $mimeTypeFile\n" );
233 $types .= "\n";
234 $types .= file_get_contents( $mimeTypeFile );
235 } else {
236 $this->logger->info( __METHOD__ . ": can't load mime types from $mimeTypeFile\n" );
237 }
238 } else {
239 $this->logger->info( __METHOD__ .
240 ": no mime types file defined, using built-ins only.\n" );
241 }
242
243 $types .= "\n" . $this->extraTypes;
244
245 $types = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $types );
246 $types = str_replace( "\t", " ", $types );
247
248 $this->mimetoExt = [];
249 $this->mExtToMime = [];
250
251 $lines = explode( "\n", $types );
252 foreach ( $lines as $s ) {
253 $s = trim( $s );
254 if ( empty( $s ) ) {
255 continue;
256 }
257 if ( strpos( $s, '#' ) === 0 ) {
258 continue;
259 }
260
261 $s = strtolower( $s );
262 $i = strpos( $s, ' ' );
263
264 if ( $i === false ) {
265 continue;
266 }
267
268 $mime = substr( $s, 0, $i );
269 $ext = trim( substr( $s, $i + 1 ) );
270
271 if ( empty( $ext ) ) {
272 continue;
273 }
274
275 if ( !empty( $this->mimetoExt[$mime] ) ) {
276 $this->mimetoExt[$mime] .= ' ' . $ext;
277 } else {
278 $this->mimetoExt[$mime] = $ext;
279 }
280
281 $extensions = explode( ' ', $ext );
282
283 foreach ( $extensions as $e ) {
284 $e = trim( $e );
285 if ( empty( $e ) ) {
286 continue;
287 }
288
289 if ( !empty( $this->mExtToMime[$e] ) ) {
290 $this->mExtToMime[$e] .= ' ' . $mime;
291 } else {
292 $this->mExtToMime[$e] = $mime;
293 }
294 }
295 }
296
301 $mimeInfoFile = $this->infoFile;
302
303 $info = self::$wellKnownInfo;
304
305 if ( $mimeInfoFile ) {
306 if ( is_file( $mimeInfoFile ) && is_readable( $mimeInfoFile ) ) {
307 $this->logger->info( __METHOD__ . ": loading mime info from $mimeInfoFile\n" );
308 $info .= "\n";
309 $info .= file_get_contents( $mimeInfoFile );
310 } else {
311 $this->logger->info( __METHOD__ . ": can't load mime info from $mimeInfoFile\n" );
312 }
313 } else {
314 $this->logger->info( __METHOD__ .
315 ": no mime info file defined, using built-ins only.\n" );
316 }
317
318 $info .= "\n" . $this->extraInfo;
319
320 $info = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $info );
321 $info = str_replace( "\t", " ", $info );
322
323 $this->mimeTypeAliases = [];
324 $this->mediaTypes = [];
325
326 $lines = explode( "\n", $info );
327 foreach ( $lines as $s ) {
328 $s = trim( $s );
329 if ( empty( $s ) ) {
330 continue;
331 }
332 if ( strpos( $s, '#' ) === 0 ) {
333 continue;
334 }
335
336 $s = strtolower( $s );
337 $i = strpos( $s, ' ' );
338
339 if ( $i === false ) {
340 continue;
341 }
342
343 # print "processing MIME INFO line $s<br>";
344
345 $match = [];
346 if ( preg_match( '!\[\s*(\w+)\s*\]!', $s, $match ) ) {
347 $s = preg_replace( '!\[\s*(\w+)\s*\]!', '', $s );
348 $mtype = trim( strtoupper( $match[1] ) );
349 } else {
350 $mtype = MEDIATYPE_UNKNOWN;
351 }
352
353 $m = explode( ' ', $s );
354
355 if ( !isset( $this->mediaTypes[$mtype] ) ) {
356 $this->mediaTypes[$mtype] = [];
357 }
358
359 foreach ( $m as $mime ) {
360 $mime = trim( $mime );
361 if ( empty( $mime ) ) {
362 continue;
363 }
364
365 $this->mediaTypes[$mtype][] = $mime;
366 }
367
368 if ( count( $m ) > 1 ) {
369 $main = $m[0];
370 $mCount = count( $m );
371 for ( $i = 1; $i < $mCount; $i += 1 ) {
372 $mime = $m[$i];
373 $this->mimeTypeAliases[$mime] = $main;
374 }
375 }
376 }
377 }
378
379 public function setLogger( LoggerInterface $logger ) {
380 $this->logger = $logger;
381 }
382
389 public function addExtraTypes( $types ) {
390 $this->extraTypes .= "\n" . $types;
391 }
392
399 public function addExtraInfo( $info ) {
400 $this->extraInfo .= "\n" . $info;
401 }
402
411 public function getExtensionsForType( $mime ) {
412 $mime = strtolower( $mime );
413
414 // Check the mime-to-ext map
415 if ( isset( $this->mimetoExt[$mime] ) ) {
416 return $this->mimetoExt[$mime];
417 }
418
419 // Resolve the MIME type to the canonical type
420 if ( isset( $this->mimeTypeAliases[$mime] ) ) {
421 $mime = $this->mimeTypeAliases[$mime];
422 if ( isset( $this->mimetoExt[$mime] ) ) {
423 return $this->mimetoExt[$mime];
424 }
425 }
426
427 return null;
428 }
429
437 public function getTypesForExtension( $ext ) {
438 $ext = strtolower( $ext );
439
440 $r = isset( $this->mExtToMime[$ext] ) ? $this->mExtToMime[$ext] : null;
441 return $r;
442 }
443
451 public function guessTypesForExtension( $ext ) {
452 $m = $this->getTypesForExtension( $ext );
453 if ( is_null( $m ) ) {
454 return null;
455 }
456
457 // TODO: Check if this is needed; strtok( $m, ' ' ) should be sufficient
458 $m = trim( $m );
459 $m = preg_replace( '/\s.*$/', '', $m );
460
461 return $m;
462 }
463
473 public function isMatchingExtension( $extension, $mime ) {
474 $ext = $this->getExtensionsForType( $mime );
475
476 if ( !$ext ) {
477 return null; // Unknown MIME type
478 }
479
480 $ext = explode( ' ', $ext );
481
482 $extension = strtolower( $extension );
483 return in_array( $extension, $ext );
484 }
485
494 public function isPHPImageType( $mime ) {
495 // As defined by imagegetsize and image_type_to_mime
496 static $types = [
497 'image/gif', 'image/jpeg', 'image/png',
498 'image/x-bmp', 'image/xbm', 'image/tiff',
499 'image/jp2', 'image/jpeg2000', 'image/iff',
500 'image/xbm', 'image/x-xbitmap',
501 'image/vnd.wap.wbmp', 'image/vnd.xiff',
502 'image/x-photoshop',
503 'application/x-shockwave-flash',
504 ];
505
506 return in_array( $mime, $types );
507 }
508
521 function isRecognizableExtension( $extension ) {
522 static $types = [
523 // Types recognized by getimagesize()
524 'gif', 'jpeg', 'jpg', 'png', 'swf', 'psd',
525 'bmp', 'tiff', 'tif', 'jpc', 'jp2',
526 'jpx', 'jb2', 'swc', 'iff', 'wbmp',
527 'xbm',
528
529 // Formats we recognize magic numbers for
530 'djvu', 'ogx', 'ogg', 'ogv', 'oga', 'spx', 'opus',
531 'mid', 'pdf', 'wmf', 'xcf', 'webm', 'mkv', 'mka',
532 'webp', 'mp3',
533
534 // XML formats we sure hope we recognize reliably
535 'svg',
536
537 // 3D formats
538 'stl',
539 ];
540 return in_array( strtolower( $extension ), $types );
541 }
542
554 public function improveTypeFromExtension( $mime, $ext ) {
555 if ( $mime === 'unknown/unknown' ) {
556 if ( $this->isRecognizableExtension( $ext ) ) {
557 $this->logger->info( __METHOD__ . ': refusing to guess mime type for .' .
558 "$ext file, we should have recognized it\n" );
559 } else {
560 // Not something we can detect, so simply
561 // trust the file extension
562 $mime = $this->guessTypesForExtension( $ext );
563 }
564 } elseif ( $mime === 'application/x-opc+zip' ) {
565 if ( $this->isMatchingExtension( $ext, $mime ) ) {
566 // A known file extension for an OPC file,
567 // find the proper MIME type for that file extension
568 $mime = $this->guessTypesForExtension( $ext );
569 } else {
570 $this->logger->info( __METHOD__ .
571 ": refusing to guess better type for $mime file, " .
572 ".$ext is not a known OPC extension.\n" );
573 $mime = 'application/zip';
574 }
575 } elseif ( $mime === 'text/plain' && $this->findMediaType( ".$ext" ) === MEDIATYPE_TEXT ) {
576 // Textual types are sometimes not recognized properly.
577 // If detected as text/plain, and has an extension which is textual
578 // improve to the extension's type. For example, csv and json are often
579 // misdetected as text/plain.
580 $mime = $this->guessTypesForExtension( $ext );
581 }
582
583 # Media handling extensions can improve the MIME detected
584 $callback = $this->extCallback;
585 if ( $callback ) {
586 $callback( $this, $ext, $mime /* by reference */ );
587 }
588
589 if ( isset( $this->mimeTypeAliases[$mime] ) ) {
590 $mime = $this->mimeTypeAliases[$mime];
591 }
592
593 $this->logger->info( __METHOD__ . ": improved mime type for .$ext: $mime\n" );
594 return $mime;
595 }
596
611 public function guessMimeType( $file, $ext = true ) {
612 if ( $ext ) { // TODO: make $ext default to false. Or better, remove it.
613 $this->logger->info( __METHOD__ .
614 ": WARNING: use of the \$ext parameter is deprecated. " .
615 "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
616 }
617
618 $mime = $this->doGuessMimeType( $file, $ext );
619
620 if ( !$mime ) {
621 $this->logger->info( __METHOD__ .
622 ": internal type detection failed for $file (.$ext)...\n" );
623 $mime = $this->detectMimeType( $file, $ext );
624 }
625
626 if ( isset( $this->mimeTypeAliases[$mime] ) ) {
627 $mime = $this->mimeTypeAliases[$mime];
628 }
629
630 $this->logger->info( __METHOD__ . ": guessed mime type of $file: $mime\n" );
631 return $mime;
632 }
633
644 private function doGuessMimeType( $file, $ext ) {
645 // Read a chunk of the file
646 MediaWiki\suppressWarnings();
647 $f = fopen( $file, 'rb' );
648 MediaWiki\restoreWarnings();
649
650 if ( !$f ) {
651 return 'unknown/unknown';
652 }
653
654 $fsize = filesize( $file );
655 if ( $fsize === false ) {
656 return 'unknown/unknown';
657 }
658
659 $head = fread( $f, 1024 );
660 $tailLength = min( 65558, $fsize ); // 65558 = maximum size of a zip EOCDR
661 if ( fseek( $f, -1 * $tailLength, SEEK_END ) === -1 ) {
662 throw new UnexpectedValueException(
663 "Seeking $tailLength bytes from EOF failed in " . __METHOD__ );
664 }
665 $tail = $tailLength ? fread( $f, $tailLength ) : '';
666 fclose( $f );
667
668 $this->logger->info( __METHOD__ .
669 ": analyzing head and tail of $file for magic numbers.\n" );
670
671 // Hardcode a few magic number checks...
672 $headers = [
673 // Multimedia...
674 'MThd' => 'audio/midi',
675 'OggS' => 'application/ogg',
676 'ID3' => 'audio/mpeg',
677 "\xff\xfb" => 'audio/mpeg', // MPEG-1 layer 3
678 "\xff\xf3" => 'audio/mpeg', // MPEG-2 layer 3 (lower sample rates)
679 "\xff\xe3" => 'audio/mpeg', // MPEG-2.5 layer 3 (very low sample rates)
680
681 // Image formats...
682 // Note that WMF may have a bare header, no magic number.
683 "\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives?
684 "\xd7\xcd\xc6\x9a" => 'application/x-msmetafile',
685 '%PDF' => 'application/pdf',
686 'gimp xcf' => 'image/x-xcf',
687
688 // Some forbidden fruit...
689 'MZ' => 'application/octet-stream', // DOS/Windows executable
690 "\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary
691 "\x7fELF" => 'application/octet-stream', // ELF binary
692 ];
693
694 foreach ( $headers as $magic => $candidate ) {
695 if ( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) {
696 $this->logger->info( __METHOD__ .
697 ": magic header in $file recognized as $candidate\n" );
698 return $candidate;
699 }
700 }
701
702 /* Look for WebM and Matroska files */
703 if ( strncmp( $head, pack( "C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) {
704 $doctype = strpos( $head, "\x42\x82" );
705 if ( $doctype ) {
706 // Next byte is datasize, then data (sizes larger than 1 byte are stupid muxers)
707 $data = substr( $head, $doctype + 3, 8 );
708 if ( strncmp( $data, "matroska", 8 ) == 0 ) {
709 $this->logger->info( __METHOD__ . ": recognized file as video/x-matroska\n" );
710 return "video/x-matroska";
711 } elseif ( strncmp( $data, "webm", 4 ) == 0 ) {
712 // XXX HACK look for a video track, if we don't find it, this is an audio file
713 $videotrack = strpos( $head, "\x86\x85V_VP" );
714
715 if ( $videotrack ) {
716 // There is a video track, so this is a video file.
717 $this->logger->info( __METHOD__ . ": recognized file as video/webm\n" );
718 return "video/webm";
719 }
720
721 $this->logger->info( __METHOD__ . ": recognized file as audio/webm\n" );
722 return "audio/webm";
723 }
724 }
725 $this->logger->info( __METHOD__ . ": unknown EBML file\n" );
726 return "unknown/unknown";
727 }
728
729 /* Look for WebP */
730 if ( strncmp( $head, "RIFF", 4 ) == 0 &&
731 strncmp( substr( $head, 8, 7 ), "WEBPVP8", 7 ) == 0
732 ) {
733 $this->logger->info( __METHOD__ . ": recognized file as image/webp\n" );
734 return "image/webp";
735 }
736
749 if ( ( strpos( $head, '<?php' ) !== false ) ||
750 ( strpos( $head, "<\x00?\x00p\x00h\x00p" ) !== false ) ||
751 ( strpos( $head, "<\x00?\x00 " ) !== false ) ||
752 ( strpos( $head, "<\x00?\x00\n" ) !== false ) ||
753 ( strpos( $head, "<\x00?\x00\t" ) !== false ) ||
754 ( strpos( $head, "<\x00?\x00=" ) !== false )
755 ) {
756 $this->logger->info( __METHOD__ . ": recognized $file as application/x-php\n" );
757 return 'application/x-php';
758 }
759
763 $xml = new XmlTypeCheck( $file );
764 if ( $xml->wellFormed ) {
765 $xmlTypes = $this->xmlTypes;
766 if ( isset( $xmlTypes[$xml->getRootElement()] ) ) {
767 return $xmlTypes[$xml->getRootElement()];
768 } else {
769 return 'application/xml';
770 }
771 }
772
776 $script_type = null;
777
778 # detect by shebang
779 if ( substr( $head, 0, 2 ) == "#!" ) {
780 $script_type = "ASCII";
781 } elseif ( substr( $head, 0, 5 ) == "\xef\xbb\xbf#!" ) {
782 $script_type = "UTF-8";
783 } elseif ( substr( $head, 0, 7 ) == "\xfe\xff\x00#\x00!" ) {
784 $script_type = "UTF-16BE";
785 } elseif ( substr( $head, 0, 7 ) == "\xff\xfe#\x00!" ) {
786 $script_type = "UTF-16LE";
787 }
788
789 if ( $script_type ) {
790 if ( $script_type !== "UTF-8" && $script_type !== "ASCII" ) {
791 // Quick and dirty fold down to ASCII!
792 $pack = [ 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' ];
793 $chars = unpack( $pack[$script_type], substr( $head, 2 ) );
794 $head = '';
795 foreach ( $chars as $codepoint ) {
796 if ( $codepoint < 128 ) {
797 $head .= chr( $codepoint );
798 } else {
799 $head .= '?';
800 }
801 }
802 }
803
804 $match = [];
805
806 if ( preg_match( '%/?([^\s]+/)(\w+)%', $head, $match ) ) {
807 $mime = "application/x-{$match[2]}";
808 $this->logger->info( __METHOD__ . ": shell script recognized as $mime\n" );
809 return $mime;
810 }
811 }
812
813 // Check for ZIP variants (before getimagesize)
814 if ( strpos( $tail, "PK\x05\x06" ) !== false ) {
815 $this->logger->info( __METHOD__ . ": ZIP header present in $file\n" );
816 return $this->detectZipType( $head, $tail, $ext );
817 }
818
819 // Check for STL (3D) files
820 // @see https://en.wikipedia.org/wiki/STL_(file_format)
821 if ( $fsize >= 15 &&
822 stripos( $head, 'SOLID ' ) === 0 &&
823 preg_match( '/\RENDSOLID .*$/i', $tail ) ) {
824 // ASCII STL file
825 return 'application/sla';
826 } elseif ( $fsize > 84 ) {
827 // binary STL file
828 $triangles = substr( $head, 80, 4 );
829 $triangles = unpack( 'V', $triangles );
830 $triangles = reset( $triangles );
831 if ( $triangles !== false && $fsize === 84 + ( $triangles * 50 ) ) {
832 return 'application/sla';
833 }
834 }
835
836 MediaWiki\suppressWarnings();
837 $gis = getimagesize( $file );
838 MediaWiki\restoreWarnings();
839
840 if ( $gis && isset( $gis['mime'] ) ) {
841 $mime = $gis['mime'];
842 $this->logger->info( __METHOD__ . ": getimagesize detected $file as $mime\n" );
843 return $mime;
844 }
845
846 # Media handling extensions can guess the MIME by content
847 # It's intentionally here so that if core is wrong about a type (false positive),
848 # people will hopefully nag and submit patches :)
849 $mime = false;
850 # Some strings by reference for performance - assuming well-behaved hooks
851 $callback = $this->guessCallback;
852 if ( $callback ) {
853 $callback( $this, $head, $tail, $file, $mime /* by reference */ );
854 };
855
856 return $mime;
857 }
858
872 function detectZipType( $header, $tail = null, $ext = false ) {
873 if ( $ext ) { # TODO: remove $ext param
874 $this->logger->info( __METHOD__ .
875 ": WARNING: use of the \$ext parameter is deprecated. " .
876 "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
877 }
878
879 $mime = 'application/zip';
880 $opendocTypes = [
881 'chart-template',
882 'chart',
883 'formula-template',
884 'formula',
885 'graphics-template',
886 'graphics',
887 'image-template',
888 'image',
889 'presentation-template',
890 'presentation',
891 'spreadsheet-template',
892 'spreadsheet',
893 'text-template',
894 'text-master',
895 'text-web',
896 'text' ];
897
898 // https://lists.oasis-open.org/archives/office/200505/msg00006.html
899 $types = '(?:' . implode( '|', $opendocTypes ) . ')';
900 $opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
901
902 $openxmlRegex = "/^\[Content_Types\].xml/";
903
904 if ( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) {
905 $mime = $matches[1];
906 $this->logger->info( __METHOD__ . ": detected $mime from ZIP archive\n" );
907 } elseif ( preg_match( $openxmlRegex, substr( $header, 30 ) ) ) {
908 $mime = "application/x-opc+zip";
909 # TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere
910 if ( $ext !== true && $ext !== false ) {
915 if ( $this->isMatchingExtension( $ext, $mime ) ) {
916 /* A known file extension for an OPC file,
917 * find the proper mime type for that file extension
918 */
919 $mime = $this->guessTypesForExtension( $ext );
920 } else {
921 $mime = "application/zip";
922 }
923 }
924 $this->logger->info( __METHOD__ .
925 ": detected an Open Packaging Conventions archive: $mime\n" );
926 } elseif ( substr( $header, 0, 8 ) == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" &&
927 ( $headerpos = strpos( $tail, "PK\x03\x04" ) ) !== false &&
928 preg_match( $openxmlRegex, substr( $tail, $headerpos + 30 ) ) ) {
929 if ( substr( $header, 512, 4 ) == "\xEC\xA5\xC1\x00" ) {
930 $mime = "application/msword";
931 }
932 switch ( substr( $header, 512, 6 ) ) {
933 case "\xEC\xA5\xC1\x00\x0E\x00":
934 case "\xEC\xA5\xC1\x00\x1C\x00":
935 case "\xEC\xA5\xC1\x00\x43\x00":
936 $mime = "application/vnd.ms-powerpoint";
937 break;
938 case "\xFD\xFF\xFF\xFF\x10\x00":
939 case "\xFD\xFF\xFF\xFF\x1F\x00":
940 case "\xFD\xFF\xFF\xFF\x22\x00":
941 case "\xFD\xFF\xFF\xFF\x23\x00":
942 case "\xFD\xFF\xFF\xFF\x28\x00":
943 case "\xFD\xFF\xFF\xFF\x29\x00":
944 case "\xFD\xFF\xFF\xFF\x10\x02":
945 case "\xFD\xFF\xFF\xFF\x1F\x02":
946 case "\xFD\xFF\xFF\xFF\x22\x02":
947 case "\xFD\xFF\xFF\xFF\x23\x02":
948 case "\xFD\xFF\xFF\xFF\x28\x02":
949 case "\xFD\xFF\xFF\xFF\x29\x02":
950 $mime = "application/vnd.msexcel";
951 break;
952 }
953
954 $this->logger->info( __METHOD__ .
955 ": detected a MS Office document with OPC trailer\n" );
956 } else {
957 $this->logger->info( __METHOD__ . ": unable to identify type of ZIP archive\n" );
958 }
959 return $mime;
960 }
961
979 private function detectMimeType( $file, $ext = true ) {
981 if ( $ext ) {
982 $this->logger->info( __METHOD__ .
983 ": WARNING: use of the \$ext parameter is deprecated. "
984 . "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
985 }
986
987 $callback = $this->detectCallback;
988 $m = null;
989 if ( $callback ) {
990 $m = $callback( $file );
991 } else {
992 $m = mime_content_type( $file );
993 }
994
995 if ( $m ) {
996 # normalize
997 $m = preg_replace( '![;, ].*$!', '', $m ); # strip charset, etc
998 $m = trim( $m );
999 $m = strtolower( $m );
1000
1001 if ( strpos( $m, 'unknown' ) !== false ) {
1002 $m = null;
1003 } else {
1004 $this->logger->info( __METHOD__ . ": magic mime type of $file: $m\n" );
1005 return $m;
1006 }
1007 }
1008
1009 // If desired, look at extension as a fallback.
1010 if ( $ext === true ) {
1011 $i = strrpos( $file, '.' );
1012 $ext = strtolower( $i ? substr( $file, $i + 1 ) : '' );
1013 }
1014 if ( $ext ) {
1015 if ( $this->isRecognizableExtension( $ext ) ) {
1016 $this->logger->info( __METHOD__ . ": refusing to guess mime type for .$ext file, "
1017 . "we should have recognized it\n" );
1018 } else {
1019 $m = $this->guessTypesForExtension( $ext );
1020 if ( $m ) {
1021 $this->logger->info( __METHOD__ . ": extension mime type of $file: $m\n" );
1022 return $m;
1023 }
1024 }
1025 }
1026
1027 // Unknown type
1028 $this->logger->info( __METHOD__ . ": failed to guess mime type for $file!\n" );
1029 return 'unknown/unknown';
1030 }
1031
1048 function getMediaType( $path = null, $mime = null ) {
1049 if ( !$mime && !$path ) {
1050 return MEDIATYPE_UNKNOWN;
1051 }
1052
1053 // If MIME type is unknown, guess it
1054 if ( !$mime ) {
1055 $mime = $this->guessMimeType( $path, false );
1056 }
1057
1058 // Special code for ogg - detect if it's video (theora),
1059 // else label it as sound.
1060 if ( $mime == 'application/ogg' && file_exists( $path ) ) {
1061 // Read a chunk of the file
1062 $f = fopen( $path, "rt" );
1063 if ( !$f ) {
1064 return MEDIATYPE_UNKNOWN;
1065 }
1066 $head = fread( $f, 256 );
1067 fclose( $f );
1068
1069 $head = str_replace( 'ffmpeg2theora', '', strtolower( $head ) );
1070
1071 // This is an UGLY HACK, file should be parsed correctly
1072 if ( strpos( $head, 'theora' ) !== false ) {
1073 return MEDIATYPE_VIDEO;
1074 } elseif ( strpos( $head, 'vorbis' ) !== false ) {
1075 return MEDIATYPE_AUDIO;
1076 } elseif ( strpos( $head, 'flac' ) !== false ) {
1077 return MEDIATYPE_AUDIO;
1078 } elseif ( strpos( $head, 'speex' ) !== false ) {
1079 return MEDIATYPE_AUDIO;
1080 } elseif ( strpos( $head, 'opus' ) !== false ) {
1081 return MEDIATYPE_AUDIO;
1082 } else {
1083 return MEDIATYPE_MULTIMEDIA;
1084 }
1085 }
1086
1087 $type = null;
1088 // Check for entry for full MIME type
1089 if ( $mime ) {
1090 $type = $this->findMediaType( $mime );
1091 if ( $type !== MEDIATYPE_UNKNOWN ) {
1092 return $type;
1093 }
1094 }
1095
1096 // Check for entry for file extension
1097 if ( $path ) {
1098 $i = strrpos( $path, '.' );
1099 $e = strtolower( $i ? substr( $path, $i + 1 ) : '' );
1100
1101 // TODO: look at multi-extension if this fails, parse from full path
1102 $type = $this->findMediaType( '.' . $e );
1103 if ( $type !== MEDIATYPE_UNKNOWN ) {
1104 return $type;
1105 }
1106 }
1107
1108 // Check major MIME type
1109 if ( $mime ) {
1110 $i = strpos( $mime, '/' );
1111 if ( $i !== false ) {
1112 $major = substr( $mime, 0, $i );
1113 $type = $this->findMediaType( $major );
1114 if ( $type !== MEDIATYPE_UNKNOWN ) {
1115 return $type;
1116 }
1117 }
1118 }
1119
1120 if ( !$type ) {
1122 }
1123
1124 return $type;
1125 }
1126
1137 function findMediaType( $extMime ) {
1138 if ( strpos( $extMime, '.' ) === 0 ) {
1139 // If it's an extension, look up the MIME types
1140 $m = $this->getTypesForExtension( substr( $extMime, 1 ) );
1141 if ( !$m ) {
1142 return MEDIATYPE_UNKNOWN;
1143 }
1144
1145 $m = explode( ' ', $m );
1146 } else {
1147 // Normalize MIME type
1148 if ( isset( $this->mimeTypeAliases[$extMime] ) ) {
1149 $extMime = $this->mimeTypeAliases[$extMime];
1150 }
1151
1152 $m = [ $extMime ];
1153 }
1154
1155 foreach ( $m as $mime ) {
1156 foreach ( $this->mediaTypes as $type => $codes ) {
1157 if ( in_array( $mime, $codes, true ) ) {
1158 return $type;
1159 }
1160 }
1161 }
1162
1163 return MEDIATYPE_UNKNOWN;
1164 }
1165
1171 public function getMediaTypes() {
1172 return array_keys( $this->mediaTypes );
1173 }
1174
1184 public function getIEMimeTypes( $fileName, $chunk, $proposed ) {
1185 $ca = $this->getIEContentAnalyzer();
1186 return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
1187 }
1188
1194 protected function getIEContentAnalyzer() {
1195 if ( is_null( $this->IEAnalyzer ) ) {
1196 $this->IEAnalyzer = new IEContentAnalyzer;
1197 }
1198 return $this->IEAnalyzer;
1199 }
1200}
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
This class simulates Microsoft Internet Explorer's terribly broken and insecure MIME type detection a...
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add etc
Definition design.txt:19
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add text
Definition design.txt:18
the array() calling protocol came about after MediaWiki 1.4rc1.
either a plain
Definition hooks.txt:2026
for adding new MIME info to the list Use $mimeMagic addExtraTypes( $stringOfTypes)
returning false will NOT prevent logging $e
Definition hooks.txt:2146
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
const MEDIATYPE_VIDEO
Definition defines.php:35
const MEDIATYPE_UNKNOWN
Definition defines.php:26
const MEDIATYPE_AUDIO
Definition defines.php:32
const MEDIATYPE_TEXT
Definition defines.php:41
const MEDIATYPE_MULTIMEDIA
Definition defines.php:37
if( $ext=='php'|| $ext=='php5') $mime
Definition router.php:59
$lines
Definition router.php:61
$params
$header