22 use Psr\Log\LoggerAwareInterface;
23 use Psr\Log\LoggerInterface;
86 application/ogg ogx ogg ogm ogv oga spx opus
88 application/vnd.oasis.opendocument.chart odc
89 application/vnd.oasis.opendocument.chart-
template otc
90 application/vnd.oasis.opendocument.database odb
91 application/vnd.oasis.opendocument.formula odf
92 application/vnd.oasis.opendocument.formula-
template otf
93 application/vnd.oasis.opendocument.graphics odg
94 application/vnd.oasis.opendocument.graphics-
template otg
95 application/vnd.oasis.opendocument.image odi
96 application/vnd.oasis.opendocument.image-
template oti
97 application/vnd.oasis.opendocument.presentation odp
98 application/vnd.oasis.opendocument.presentation-
template otp
99 application/vnd.oasis.opendocument.spreadsheet ods
100 application/vnd.oasis.opendocument.spreadsheet-
template ots
101 application/vnd.oasis.opendocument.text odt
102 application/vnd.oasis.opendocument.text-master otm
103 application/vnd.oasis.opendocument.text-
template ott
104 application/vnd.oasis.opendocument.text-web oth
105 application/javascript js
106 application/x-shockwave-flash swf
107 audio/midi mid midi kar
108 audio/mpeg mpga mpa mp2 mp3
109 audio/x-aiff aif aiff aifc
111 audio/ogg oga spx ogg opus
112 audio/opus opus ogg oga ogg spx
115 image/jpeg jpeg jpg jpe
123 image/x-portable-pixmap ppm
127 video/ogg ogv ogm ogg
138 application/pdf [OFFICE]
139 application/vnd.oasis.opendocument.chart [OFFICE]
140 application/vnd.oasis.opendocument.chart-
template [OFFICE]
141 application/vnd.oasis.opendocument.database [OFFICE]
142 application/vnd.oasis.opendocument.formula [OFFICE]
143 application/vnd.oasis.opendocument.formula-
template [OFFICE]
144 application/vnd.oasis.opendocument.graphics [OFFICE]
145 application/vnd.oasis.opendocument.graphics-
template [OFFICE]
146 application/vnd.oasis.opendocument.image [OFFICE]
147 application/vnd.oasis.opendocument.image-
template [OFFICE]
148 application/vnd.oasis.opendocument.presentation [OFFICE]
149 application/vnd.oasis.opendocument.presentation-
template [OFFICE]
150 application/vnd.oasis.opendocument.spreadsheet [OFFICE]
151 application/vnd.oasis.opendocument.spreadsheet-
template [OFFICE]
152 application/vnd.oasis.opendocument.text [OFFICE]
153 application/vnd.oasis.opendocument.text-
template [OFFICE]
154 application/vnd.oasis.opendocument.text-master [OFFICE]
155 application/vnd.oasis.opendocument.text-web [OFFICE]
156 application/javascript
text/javascript application/x-javascript [EXECUTABLE]
157 application/x-shockwave-flash [MULTIMEDIA]
161 audio/mp3 audio/mpeg [AUDIO]
162 application/ogg audio/ogg video/ogg [MULTIMEDIA]
163 image/x-bmp image/x-ms-bmp image/bmp [BITMAP]
167 image/svg+xml [DRAWING]
169 image/vnd.djvu [BITMAP]
171 image/x-portable-pixmap [BITMAP]
176 unknown/unknown application/octet-stream application/x-empty [UNKNOWN]
195 $this->typeFile =
$params[
'typeFile'];
196 $this->infoFile =
$params[
'infoFile'];
197 $this->xmlTypes =
$params[
'xmlTypes'];
198 $this->initCallback = isset(
$params[
'initCallback'] )
201 $this->detectCallback = isset(
$params[
'detectCallback'] )
204 $this->guessCallback = isset(
$params[
'guessCallback'] )
207 $this->extCallback = isset(
$params[
'extCallback'] )
210 $this->logger = isset(
$params[
'logger'] )
212 : new \Psr\Log\NullLogger();
222 # Allow media handling extensions adding MIME-types and MIME-info
223 if ( $this->initCallback ) {
224 call_user_func( $this->initCallback, $this );
230 if ( $mimeTypeFile ) {
231 if ( is_file( $mimeTypeFile ) && is_readable( $mimeTypeFile ) ) {
232 $this->logger->info( __METHOD__ .
": loading mime types from $mimeTypeFile\n" );
234 $types .= file_get_contents( $mimeTypeFile );
236 $this->logger->info( __METHOD__ .
": can't load mime types from $mimeTypeFile\n" );
239 $this->logger->info( __METHOD__ .
240 ": no mime types file defined, using built-ins only.\n" );
245 $types = str_replace( [
"\r\n",
"\n\r",
"\n\n",
"\r\r",
"\r" ],
"\n", $types );
246 $types = str_replace(
"\t",
" ", $types );
248 $this->mimetoExt = [];
249 $this->mExtToMime = [];
251 $lines = explode(
"\n", $types );
257 if ( strpos(
$s,
'#' ) === 0 ) {
261 $s = strtolower(
$s );
262 $i = strpos(
$s,
' ' );
264 if ( $i ===
false ) {
269 $ext = trim( substr(
$s, $i + 1 ) );
271 if ( empty(
$ext ) ) {
275 if ( !empty( $this->mimetoExt[
$mime] ) ) {
281 $extensions = explode(
' ',
$ext );
283 foreach ( $extensions
as $e ) {
289 if ( !empty( $this->mExtToMime[
$e] ) ) {
290 $this->mExtToMime[
$e] .=
' ' .
$mime;
305 if ( $mimeInfoFile ) {
306 if ( is_file( $mimeInfoFile ) && is_readable( $mimeInfoFile ) ) {
307 $this->logger->info( __METHOD__ .
": loading mime info from $mimeInfoFile\n" );
309 $info .= file_get_contents( $mimeInfoFile );
311 $this->logger->info( __METHOD__ .
": can't load mime info from $mimeInfoFile\n" );
314 $this->logger->info( __METHOD__ .
315 ": no mime info file defined, using built-ins only.\n" );
320 $info = str_replace( [
"\r\n",
"\n\r",
"\n\n",
"\r\r",
"\r" ],
"\n", $info );
321 $info = str_replace(
"\t",
" ", $info );
323 $this->mimeTypeAliases = [];
324 $this->mediaTypes = [];
326 $lines = explode(
"\n", $info );
332 if ( strpos(
$s,
'#' ) === 0 ) {
336 $s = strtolower(
$s );
337 $i = strpos(
$s,
' ' );
339 if ( $i ===
false ) {
343 # print "processing MIME INFO line $s<br>";
346 if ( preg_match(
'!\[\s*(\w+)\s*\]!',
$s, $match ) ) {
347 $s = preg_replace(
'!\[\s*(\w+)\s*\]!',
'',
$s );
348 $mtype = trim( strtoupper( $match[1] ) );
353 $m = explode(
' ',
$s );
355 if ( !isset( $this->mediaTypes[$mtype] ) ) {
356 $this->mediaTypes[$mtype] = [];
361 if ( empty(
$mime ) ) {
365 $this->mediaTypes[$mtype][] =
$mime;
368 if ( count( $m ) > 1 ) {
370 $mCount = count( $m );
371 for ( $i = 1; $i < $mCount; $i += 1 ) {
373 $this->mimeTypeAliases[
$mime] = $main;
390 $this->extraTypes .=
"\n" . $types;
400 $this->extraInfo .=
"\n" . $info;
415 if ( isset( $this->mimetoExt[
$mime] ) ) {
416 return $this->mimetoExt[
$mime];
420 if ( isset( $this->mimeTypeAliases[
$mime] ) ) {
422 if ( isset( $this->mimetoExt[
$mime] ) ) {
423 return $this->mimetoExt[
$mime];
440 $r = isset( $this->mExtToMime[
$ext] ) ? $this->mExtToMime[
$ext] :
null;
453 if ( is_null( $m ) ) {
459 $m = preg_replace(
'/\s.*$/',
'', $m );
482 $extension = strtolower( $extension );
483 return in_array( $extension,
$ext );
497 'image/gif',
'image/jpeg',
'image/png',
498 'image/x-bmp',
'image/xbm',
'image/tiff',
499 'image/jp2',
'image/jpeg2000',
'image/iff',
500 'image/xbm',
'image/x-xbitmap',
501 'image/vnd.wap.wbmp',
'image/vnd.xiff',
503 'application/x-shockwave-flash',
506 return in_array(
$mime, $types );
524 'gif',
'jpeg',
'jpg',
'png',
'swf',
'psd',
525 'bmp',
'tiff',
'tif',
'jpc',
'jp2',
526 'jpx',
'jb2',
'swc',
'iff',
'wbmp',
530 'djvu',
'ogx',
'ogg',
'ogv',
'oga',
'spx',
'opus',
531 'mid',
'pdf',
'wmf',
'xcf',
'webm',
'mkv',
'mka',
540 return in_array( strtolower( $extension ), $types );
555 if (
$mime ===
'unknown/unknown' ) {
557 $this->logger->info( __METHOD__ .
': refusing to guess mime type for .' .
558 "$ext file, we should have recognized it\n" );
564 } elseif (
$mime ===
'application/x-opc+zip' ) {
570 $this->logger->info( __METHOD__ .
571 ": refusing to guess better type for $mime file, " .
572 ".$ext is not a known OPC extension.\n" );
573 $mime =
'application/zip';
583 # Media handling extensions can improve the MIME detected
589 if ( isset( $this->mimeTypeAliases[
$mime] ) ) {
593 $this->logger->info( __METHOD__ .
": improved mime type for .$ext: $mime\n" );
613 $this->logger->info( __METHOD__ .
614 ": WARNING: use of the \$ext parameter is deprecated. " .
615 "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
621 $this->logger->info( __METHOD__ .
622 ": internal type detection failed for $file (.$ext)...\n" );
626 if ( isset( $this->mimeTypeAliases[
$mime] ) ) {
630 $this->logger->info( __METHOD__ .
": guessed mime type of $file: $mime\n" );
646 Wikimedia\suppressWarnings();
647 $f = fopen( $file,
'rb' );
648 Wikimedia\restoreWarnings();
651 return 'unknown/unknown';
654 $fsize = filesize( $file );
655 if ( $fsize ===
false ) {
656 return 'unknown/unknown';
659 $head = fread( $f, 1024 );
660 $tailLength = min( 65558, $fsize );
661 if ( fseek( $f, -1 * $tailLength, SEEK_END ) === -1 ) {
662 throw new UnexpectedValueException(
663 "Seeking $tailLength bytes from EOF failed in " . __METHOD__ );
665 $tail = $tailLength ? fread( $f, $tailLength ) :
'';
668 $this->logger->info( __METHOD__ .
669 ": analyzing head and tail of $file for magic numbers.\n" );
674 'MThd' =>
'audio/midi',
675 'OggS' =>
'application/ogg',
676 'ID3' =>
'audio/mpeg',
677 "\xff\xfb" =>
'audio/mpeg',
678 "\xff\xf3" =>
'audio/mpeg',
679 "\xff\xe3" =>
'audio/mpeg',
683 "\x01\x00\x09\x00" =>
'application/x-msmetafile',
684 "\xd7\xcd\xc6\x9a" =>
'application/x-msmetafile',
685 '%PDF' =>
'application/pdf',
686 'gimp xcf' =>
'image/x-xcf',
689 'MZ' =>
'application/octet-stream',
690 "\xca\xfe\xba\xbe" =>
'application/octet-stream',
691 "\x7fELF" =>
'application/octet-stream',
694 foreach ( $headers
as $magic => $candidate ) {
695 if ( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) {
696 $this->logger->info( __METHOD__ .
697 ": magic header in $file recognized as $candidate\n" );
703 if ( strncmp( $head, pack(
"C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) {
704 $doctype = strpos( $head,
"\x42\x82" );
707 $data = substr( $head, $doctype + 3, 8 );
708 if ( strncmp( $data,
"matroska", 8 ) == 0 ) {
709 $this->logger->info( __METHOD__ .
": recognized file as video/x-matroska\n" );
710 return "video/x-matroska";
711 } elseif ( strncmp( $data,
"webm", 4 ) == 0 ) {
713 $videotrack = strpos( $head,
"\x86\x85V_VP" );
717 $this->logger->info( __METHOD__ .
": recognized file as video/webm\n" );
721 $this->logger->info( __METHOD__ .
": recognized file as audio/webm\n" );
725 $this->logger->info( __METHOD__ .
": unknown EBML file\n" );
726 return "unknown/unknown";
730 if ( strncmp( $head,
"RIFF", 4 ) == 0 &&
731 strncmp( substr( $head, 8, 7 ),
"WEBPVP8", 7 ) == 0
733 $this->logger->info( __METHOD__ .
": recognized file as image/webp\n" );
749 if ( ( strpos( $head,
'<?php' ) !==
false ) ||
750 ( strpos( $head,
"<\x00?\x00p\x00h\x00p" ) !==
false ) ||
751 ( strpos( $head,
"<\x00?\x00 " ) !==
false ) ||
752 ( strpos( $head,
"<\x00?\x00\n" ) !==
false ) ||
753 ( strpos( $head,
"<\x00?\x00\t" ) !==
false ) ||
754 ( strpos( $head,
"<\x00?\x00=" ) !==
false )
756 $this->logger->info( __METHOD__ .
": recognized $file as application/x-php\n" );
757 return 'application/x-php';
763 Wikimedia\suppressWarnings();
765 Wikimedia\restoreWarnings();
766 if ( $xml->wellFormed ) {
768 if ( isset(
$xmlTypes[$xml->getRootElement()] ) ) {
769 return $xmlTypes[$xml->getRootElement()];
771 return 'application/xml';
781 if ( substr( $head, 0, 2 ) ==
"#!" ) {
782 $script_type =
"ASCII";
783 } elseif ( substr( $head, 0, 5 ) ==
"\xef\xbb\xbf#!" ) {
784 $script_type =
"UTF-8";
785 } elseif ( substr( $head, 0, 7 ) ==
"\xfe\xff\x00#\x00!" ) {
786 $script_type =
"UTF-16BE";
787 } elseif ( substr( $head, 0, 7 ) ==
"\xff\xfe#\x00!" ) {
788 $script_type =
"UTF-16LE";
791 if ( $script_type ) {
792 if ( $script_type !==
"UTF-8" && $script_type !==
"ASCII" ) {
794 $pack = [
'UTF-16BE' =>
'n*',
'UTF-16LE' =>
'v*' ];
795 $chars = unpack( $pack[$script_type], substr( $head, 2 ) );
797 foreach ( $chars
as $codepoint ) {
798 if ( $codepoint < 128 ) {
799 $head .= chr( $codepoint );
808 if ( preg_match(
'%/?([^\s]+/)(\w+)%', $head, $match ) ) {
809 $mime =
"application/x-{$match[2]}";
810 $this->logger->info( __METHOD__ .
": shell script recognized as $mime\n" );
816 if ( strpos( $tail,
"PK\x05\x06" ) !==
false ) {
817 $this->logger->info( __METHOD__ .
": ZIP header present in $file\n" );
824 stripos( $head,
'SOLID ' ) === 0 &&
825 preg_match(
'/\RENDSOLID .*$/i', $tail ) ) {
827 return 'application/sla';
828 } elseif ( $fsize > 84 ) {
830 $triangles = substr( $head, 80, 4 );
831 $triangles = unpack(
'V', $triangles );
832 $triangles = reset( $triangles );
833 if ( $triangles !==
false && $fsize === 84 + ( $triangles * 50 ) ) {
834 return 'application/sla';
838 Wikimedia\suppressWarnings();
839 $gis = getimagesize( $file );
840 Wikimedia\restoreWarnings();
842 if ( $gis && isset( $gis[
'mime'] ) ) {
843 $mime = $gis[
'mime'];
844 $this->logger->info( __METHOD__ .
": getimagesize detected $file as $mime\n" );
848 # Media handling extensions can guess the MIME by content
849 # It's intentionally here so that if core is wrong about a type (false positive),
850 # people will hopefully nag and submit patches :)
852 # Some strings by reference for performance - assuming well-behaved hooks
855 $callback( $this, $head, $tail, $file,
$mime );
875 if (
$ext ) { # TODO:
remove $ext param
876 $this->logger->info( __METHOD__ .
877 ": WARNING: use of the \$ext parameter is deprecated. " .
878 "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
881 $mime =
'application/zip';
891 'presentation-template',
893 'spreadsheet-template',
901 $types =
'(?:' . implode(
'|', $opendocTypes ) .
')';
902 $opendocRegex =
"/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
904 $openxmlRegex =
"/^\[Content_Types\].xml/";
908 $this->logger->info( __METHOD__ .
": detected $mime from ZIP archive\n" );
909 } elseif ( preg_match( $openxmlRegex, substr(
$header, 30 ) ) ) {
910 $mime =
"application/x-opc+zip";
911 # TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere
912 if (
$ext !==
true &&
$ext !==
false ) {
923 $mime =
"application/zip";
926 $this->logger->info( __METHOD__ .
927 ": detected an Open Packaging Conventions archive: $mime\n" );
928 } elseif ( substr(
$header, 0, 8 ) ==
"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" &&
929 ( $headerpos = strpos( $tail,
"PK\x03\x04" ) ) !==
false &&
930 preg_match( $openxmlRegex, substr( $tail, $headerpos + 30 ) ) ) {
931 if ( substr(
$header, 512, 4 ) ==
"\xEC\xA5\xC1\x00" ) {
932 $mime =
"application/msword";
934 switch ( substr(
$header, 512, 6 ) ) {
935 case "\xEC\xA5\xC1\x00\x0E\x00":
936 case "\xEC\xA5\xC1\x00\x1C\x00":
937 case "\xEC\xA5\xC1\x00\x43\x00":
938 $mime =
"application/vnd.ms-powerpoint";
940 case "\xFD\xFF\xFF\xFF\x10\x00":
941 case "\xFD\xFF\xFF\xFF\x1F\x00":
942 case "\xFD\xFF\xFF\xFF\x22\x00":
943 case "\xFD\xFF\xFF\xFF\x23\x00":
944 case "\xFD\xFF\xFF\xFF\x28\x00":
945 case "\xFD\xFF\xFF\xFF\x29\x00":
946 case "\xFD\xFF\xFF\xFF\x10\x02":
947 case "\xFD\xFF\xFF\xFF\x1F\x02":
948 case "\xFD\xFF\xFF\xFF\x22\x02":
949 case "\xFD\xFF\xFF\xFF\x23\x02":
950 case "\xFD\xFF\xFF\xFF\x28\x02":
951 case "\xFD\xFF\xFF\xFF\x29\x02":
952 $mime =
"application/vnd.msexcel";
956 $this->logger->info( __METHOD__ .
957 ": detected a MS Office document with OPC trailer\n" );
959 $this->logger->info( __METHOD__ .
": unable to identify type of ZIP archive\n" );
984 $this->logger->info( __METHOD__ .
985 ": WARNING: use of the \$ext parameter is deprecated. "
986 .
"Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
992 $m = $callback( $file );
994 $m = mime_content_type( $file );
999 $m = preg_replace(
'![;, ].*$!',
'', $m ); # strip charset,
etc
1001 $m = strtolower( $m );
1003 if ( strpos( $m,
'unknown' ) !==
false ) {
1006 $this->logger->info( __METHOD__ .
": magic mime type of $file: $m\n" );
1012 if (
$ext ===
true ) {
1013 $i = strrpos( $file,
'.' );
1014 $ext = strtolower( $i ? substr( $file, $i + 1 ) :
'' );
1018 $this->logger->info( __METHOD__ .
": refusing to guess mime type for .$ext file, "
1019 .
"we should have recognized it\n" );
1023 $this->logger->info( __METHOD__ .
": extension mime type of $file: $m\n" );
1030 $this->logger->info( __METHOD__ .
": failed to guess mime type for $file!\n" );
1031 return 'unknown/unknown';
1062 if (
$mime ==
'application/ogg' && file_exists(
$path ) ) {
1064 $f = fopen(
$path,
"rt" );
1068 $head = fread( $f, 256 );
1071 $head = str_replace(
'ffmpeg2theora',
'', strtolower( $head ) );
1074 if ( strpos( $head,
'theora' ) !==
false ) {
1076 } elseif ( strpos( $head,
'vorbis' ) !==
false ) {
1078 } elseif ( strpos( $head,
'flac' ) !==
false ) {
1080 } elseif ( strpos( $head,
'speex' ) !==
false ) {
1082 } elseif ( strpos( $head,
'opus' ) !==
false ) {
1100 $i = strrpos(
$path,
'.' );
1101 $e = strtolower( $i ? substr(
$path, $i + 1 ) :
'' );
1112 $i = strpos(
$mime,
'/' );
1113 if ( $i !==
false ) {
1114 $major = substr(
$mime, 0, $i );
1140 if ( strpos( $extMime,
'.' ) === 0 ) {
1147 $m = explode(
' ', $m );
1150 if ( isset( $this->mimeTypeAliases[$extMime] ) ) {
1151 $extMime = $this->mimeTypeAliases[$extMime];
1158 foreach ( $this->mediaTypes
as $type => $codes ) {
1159 if ( in_array(
$mime, $codes,
true ) ) {
1174 return array_keys( $this->mediaTypes );
1188 return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
1197 if ( is_null( $this->IEAnalyzer ) ) {