22 use Psr\Log\LoggerAwareInterface;
23 use Psr\Log\LoggerInterface;
86 application/ogg ogx ogg ogm ogv oga spx opus
88 application/vnd.oasis.opendocument.chart odc
89 application/vnd.oasis.opendocument.chart-
template otc
90 application/vnd.oasis.opendocument.database odb
91 application/vnd.oasis.opendocument.formula odf
92 application/vnd.oasis.opendocument.formula-
template otf
93 application/vnd.oasis.opendocument.graphics odg
94 application/vnd.oasis.opendocument.graphics-
template otg
95 application/vnd.oasis.opendocument.image odi
96 application/vnd.oasis.opendocument.image-
template oti
97 application/vnd.oasis.opendocument.presentation odp
98 application/vnd.oasis.opendocument.presentation-
template otp
99 application/vnd.oasis.opendocument.spreadsheet ods
100 application/vnd.oasis.opendocument.spreadsheet-
template ots
101 application/vnd.oasis.opendocument.text odt
102 application/vnd.oasis.opendocument.text-master otm
103 application/vnd.oasis.opendocument.text-
template ott
104 application/vnd.oasis.opendocument.text-web oth
105 application/javascript js
106 application/x-shockwave-flash swf
107 audio/midi mid midi kar
108 audio/mpeg mpga mpa mp2 mp3
109 audio/x-aiff aif aiff aifc
111 audio/ogg oga spx ogg opus
112 audio/opus opus ogg oga ogg spx
115 image/jpeg jpeg jpg jpe
123 image/x-portable-pixmap ppm
127 video/ogg ogv ogm ogg
138 application/pdf [OFFICE]
139 application/vnd.oasis.opendocument.chart [OFFICE]
140 application/vnd.oasis.opendocument.chart-
template [OFFICE]
141 application/vnd.oasis.opendocument.database [OFFICE]
142 application/vnd.oasis.opendocument.formula [OFFICE]
143 application/vnd.oasis.opendocument.formula-
template [OFFICE]
144 application/vnd.oasis.opendocument.graphics [OFFICE]
145 application/vnd.oasis.opendocument.graphics-
template [OFFICE]
146 application/vnd.oasis.opendocument.image [OFFICE]
147 application/vnd.oasis.opendocument.image-
template [OFFICE]
148 application/vnd.oasis.opendocument.presentation [OFFICE]
149 application/vnd.oasis.opendocument.presentation-
template [OFFICE]
150 application/vnd.oasis.opendocument.spreadsheet [OFFICE]
151 application/vnd.oasis.opendocument.spreadsheet-
template [OFFICE]
152 application/vnd.oasis.opendocument.text [OFFICE]
153 application/vnd.oasis.opendocument.text-
template [OFFICE]
154 application/vnd.oasis.opendocument.text-master [OFFICE]
155 application/vnd.oasis.opendocument.text-web [OFFICE]
156 application/javascript
text/javascript application/x-javascript [EXECUTABLE]
157 application/x-shockwave-flash [MULTIMEDIA]
161 audio/mp3 audio/mpeg [AUDIO]
162 application/ogg audio/ogg video/ogg [MULTIMEDIA]
163 image/x-bmp image/x-ms-bmp image/bmp [BITMAP]
167 image/svg+xml [DRAWING]
169 image/vnd.djvu [BITMAP]
171 image/x-portable-pixmap [BITMAP]
176 unknown/unknown application/octet-stream application/x-empty [UNKNOWN]
195 $this->typeFile =
$params[
'typeFile'];
196 $this->infoFile =
$params[
'infoFile'];
197 $this->xmlTypes =
$params[
'xmlTypes'];
198 $this->initCallback =
$params[
'initCallback'] ??
null;
199 $this->detectCallback =
$params[
'detectCallback'] ??
null;
200 $this->guessCallback =
$params[
'guessCallback'] ??
null;
201 $this->extCallback =
$params[
'extCallback'] ??
null;
202 $this->logger =
$params[
'logger'] ?? new \Psr\Log\NullLogger();
212 # Allow media handling extensions adding MIME-types and MIME-info
213 if ( $this->initCallback ) {
214 call_user_func( $this->initCallback, $this );
220 if ( $mimeTypeFile ) {
221 if ( is_file( $mimeTypeFile ) && is_readable( $mimeTypeFile ) ) {
222 $this->logger->info( __METHOD__ .
": loading mime types from $mimeTypeFile\n" );
224 $types .= file_get_contents( $mimeTypeFile );
226 $this->logger->info( __METHOD__ .
": can't load mime types from $mimeTypeFile\n" );
229 $this->logger->info( __METHOD__ .
230 ": no mime types file defined, using built-ins only.\n" );
235 $types = str_replace( [
"\r\n",
"\n\r",
"\n\n",
"\r\r",
"\r" ],
"\n", $types );
236 $types = str_replace(
"\t",
" ", $types );
238 $this->mimetoExt = [];
239 $this->mExtToMime = [];
241 $lines = explode(
"\n", $types );
247 if ( strpos(
$s,
'#' ) === 0 ) {
251 $s = strtolower(
$s );
252 $i = strpos(
$s,
' ' );
254 if ( $i ===
false ) {
259 $ext = trim( substr(
$s, $i + 1 ) );
261 if ( empty(
$ext ) ) {
265 if ( !empty( $this->mimetoExt[
$mime] ) ) {
271 $extensions = explode(
' ',
$ext );
273 foreach ( $extensions
as $e ) {
279 if ( !empty( $this->mExtToMime[
$e] ) ) {
280 $this->mExtToMime[
$e] .=
' ' .
$mime;
295 if ( $mimeInfoFile ) {
296 if ( is_file( $mimeInfoFile ) && is_readable( $mimeInfoFile ) ) {
297 $this->logger->info( __METHOD__ .
": loading mime info from $mimeInfoFile\n" );
299 $info .= file_get_contents( $mimeInfoFile );
301 $this->logger->info( __METHOD__ .
": can't load mime info from $mimeInfoFile\n" );
304 $this->logger->info( __METHOD__ .
305 ": no mime info file defined, using built-ins only.\n" );
310 $info = str_replace( [
"\r\n",
"\n\r",
"\n\n",
"\r\r",
"\r" ],
"\n", $info );
311 $info = str_replace(
"\t",
" ", $info );
313 $this->mimeTypeAliases = [];
314 $this->mediaTypes = [];
316 $lines = explode(
"\n", $info );
322 if ( strpos(
$s,
'#' ) === 0 ) {
326 $s = strtolower(
$s );
327 $i = strpos(
$s,
' ' );
329 if ( $i ===
false ) {
333 # print "processing MIME INFO line $s<br>";
336 if ( preg_match(
'!\[\s*(\w+)\s*\]!',
$s, $match ) ) {
337 $s = preg_replace(
'!\[\s*(\w+)\s*\]!',
'',
$s );
338 $mtype = trim( strtoupper( $match[1] ) );
343 $m = explode(
' ',
$s );
345 if ( !isset( $this->mediaTypes[$mtype] ) ) {
346 $this->mediaTypes[$mtype] = [];
351 if ( empty(
$mime ) ) {
355 $this->mediaTypes[$mtype][] =
$mime;
358 if (
count( $m ) > 1 ) {
360 $mCount =
count( $m );
361 for ( $i = 1; $i < $mCount; $i += 1 ) {
363 $this->mimeTypeAliases[
$mime] = $main;
380 $this->extraTypes .=
"\n" . $types;
390 $this->extraInfo .=
"\n" . $info;
405 if ( isset( $this->mimetoExt[
$mime] ) ) {
406 return $this->mimetoExt[
$mime];
410 if ( isset( $this->mimeTypeAliases[
$mime] ) ) {
412 if ( isset( $this->mimetoExt[
$mime] ) ) {
413 return $this->mimetoExt[
$mime];
430 $r = $this->mExtToMime[
$ext] ??
null;
443 if ( is_null( $m ) ) {
449 $m = preg_replace(
'/\s.*$/',
'', $m );
472 $extension = strtolower( $extension );
473 return in_array( $extension,
$ext );
487 'image/gif',
'image/jpeg',
'image/png',
488 'image/x-bmp',
'image/xbm',
'image/tiff',
489 'image/jp2',
'image/jpeg2000',
'image/iff',
490 'image/xbm',
'image/x-xbitmap',
491 'image/vnd.wap.wbmp',
'image/vnd.xiff',
493 'application/x-shockwave-flash',
496 return in_array(
$mime, $types );
514 'gif',
'jpeg',
'jpg',
'png',
'swf',
'psd',
515 'bmp',
'tiff',
'tif',
'jpc',
'jp2',
516 'jpx',
'jb2',
'swc',
'iff',
'wbmp',
520 'djvu',
'ogx',
'ogg',
'ogv',
'oga',
'spx',
'opus',
521 'mid',
'pdf',
'wmf',
'xcf',
'webm',
'mkv',
'mka',
530 return in_array( strtolower( $extension ), $types );
545 if (
$mime ===
'unknown/unknown' ) {
547 $this->logger->info( __METHOD__ .
': refusing to guess mime type for .' .
548 "$ext file, we should have recognized it\n" );
554 } elseif (
$mime ===
'application/x-opc+zip' ) {
560 $this->logger->info( __METHOD__ .
561 ": refusing to guess better type for $mime file, " .
562 ".$ext is not a known OPC extension.\n" );
563 $mime =
'application/zip';
573 # Media handling extensions can improve the MIME detected
579 if ( isset( $this->mimeTypeAliases[
$mime] ) ) {
583 $this->logger->info( __METHOD__ .
": improved mime type for .$ext: $mime\n" );
603 $this->logger->info( __METHOD__ .
604 ": WARNING: use of the \$ext parameter is deprecated. " .
605 "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
611 $this->logger->info( __METHOD__ .
612 ": internal type detection failed for $file (.$ext)...\n" );
616 if ( isset( $this->mimeTypeAliases[
$mime] ) ) {
620 $this->logger->info( __METHOD__ .
": guessed mime type of $file: $mime\n" );
636 Wikimedia\suppressWarnings();
637 $f = fopen( $file,
'rb' );
638 Wikimedia\restoreWarnings();
641 return 'unknown/unknown';
644 $fsize = filesize( $file );
645 if ( $fsize ===
false ) {
646 return 'unknown/unknown';
649 $head = fread( $f, 1024 );
650 $tailLength = min( 65558, $fsize );
651 if ( fseek( $f, -1 * $tailLength, SEEK_END ) === -1 ) {
652 throw new UnexpectedValueException(
653 "Seeking $tailLength bytes from EOF failed in " . __METHOD__ );
655 $tail = $tailLength ? fread( $f, $tailLength ) :
'';
658 $this->logger->info( __METHOD__ .
659 ": analyzing head and tail of $file for magic numbers.\n" );
664 'MThd' =>
'audio/midi',
665 'OggS' =>
'application/ogg',
666 'ID3' =>
'audio/mpeg',
667 "\xff\xfb" =>
'audio/mpeg',
668 "\xff\xf3" =>
'audio/mpeg',
669 "\xff\xe3" =>
'audio/mpeg',
673 "\x01\x00\x09\x00" =>
'application/x-msmetafile',
674 "\xd7\xcd\xc6\x9a" =>
'application/x-msmetafile',
675 '%PDF' =>
'application/pdf',
676 'gimp xcf' =>
'image/x-xcf',
679 'MZ' =>
'application/octet-stream',
680 "\xca\xfe\xba\xbe" =>
'application/octet-stream',
681 "\x7fELF" =>
'application/octet-stream',
684 foreach ( $headers
as $magic => $candidate ) {
685 if ( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) {
686 $this->logger->info( __METHOD__ .
687 ": magic header in $file recognized as $candidate\n" );
693 if ( strncmp( $head, pack(
"C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) {
694 $doctype = strpos( $head,
"\x42\x82" );
697 $data = substr( $head, $doctype + 3, 8 );
698 if ( strncmp( $data,
"matroska", 8 ) == 0 ) {
699 $this->logger->info( __METHOD__ .
": recognized file as video/x-matroska\n" );
700 return "video/x-matroska";
701 } elseif ( strncmp( $data,
"webm", 4 ) == 0 ) {
703 $videotrack = strpos( $head,
"\x86\x85V_VP" );
707 $this->logger->info( __METHOD__ .
": recognized file as video/webm\n" );
711 $this->logger->info( __METHOD__ .
": recognized file as audio/webm\n" );
715 $this->logger->info( __METHOD__ .
": unknown EBML file\n" );
716 return "unknown/unknown";
720 if ( strncmp( $head,
"RIFF", 4 ) == 0 &&
721 strncmp( substr( $head, 8, 7 ),
"WEBPVP8", 7 ) == 0
723 $this->logger->info( __METHOD__ .
": recognized file as image/webp\n" );
739 if ( ( strpos( $head,
'<?php' ) !==
false ) ||
740 ( strpos( $head,
"<\x00?\x00p\x00h\x00p" ) !==
false ) ||
741 ( strpos( $head,
"<\x00?\x00 " ) !==
false ) ||
742 ( strpos( $head,
"<\x00?\x00\n" ) !==
false ) ||
743 ( strpos( $head,
"<\x00?\x00\t" ) !==
false ) ||
744 ( strpos( $head,
"<\x00?\x00=" ) !==
false )
746 $this->logger->info( __METHOD__ .
": recognized $file as application/x-php\n" );
747 return 'application/x-php';
754 if ( $xml->wellFormed ) {
756 if ( isset(
$xmlTypes[$xml->getRootElement()] ) ) {
757 return $xmlTypes[$xml->getRootElement()];
759 return 'application/xml';
769 if ( substr( $head, 0, 2 ) ==
"#!" ) {
770 $script_type =
"ASCII";
771 } elseif ( substr( $head, 0, 5 ) ==
"\xef\xbb\xbf#!" ) {
772 $script_type =
"UTF-8";
773 } elseif ( substr( $head, 0, 7 ) ==
"\xfe\xff\x00#\x00!" ) {
774 $script_type =
"UTF-16BE";
775 } elseif ( substr( $head, 0, 7 ) ==
"\xff\xfe#\x00!" ) {
776 $script_type =
"UTF-16LE";
779 if ( $script_type ) {
780 if ( $script_type !==
"UTF-8" && $script_type !==
"ASCII" ) {
782 $pack = [
'UTF-16BE' =>
'n*',
'UTF-16LE' =>
'v*' ];
783 $chars = unpack( $pack[$script_type], substr( $head, 2 ) );
785 foreach ( $chars
as $codepoint ) {
786 if ( $codepoint < 128 ) {
787 $head .= chr( $codepoint );
796 if ( preg_match(
'%/?([^\s]+/)(\w+)%', $head, $match ) ) {
797 $mime =
"application/x-{$match[2]}";
798 $this->logger->info( __METHOD__ .
": shell script recognized as $mime\n" );
804 if ( strpos( $tail,
"PK\x05\x06" ) !==
false ) {
805 $this->logger->info( __METHOD__ .
": ZIP header present in $file\n" );
812 stripos( $head,
'SOLID ' ) === 0 &&
813 preg_match(
'/\RENDSOLID .*$/i', $tail ) ) {
815 return 'application/sla';
816 } elseif ( $fsize > 84 ) {
818 $triangles = substr( $head, 80, 4 );
819 $triangles = unpack(
'V', $triangles );
820 $triangles = reset( $triangles );
821 if ( $triangles !==
false && $fsize === 84 + ( $triangles * 50 ) ) {
822 return 'application/sla';
826 Wikimedia\suppressWarnings();
827 $gis = getimagesize( $file );
828 Wikimedia\restoreWarnings();
830 if ( $gis && isset( $gis[
'mime'] ) ) {
831 $mime = $gis[
'mime'];
832 $this->logger->info( __METHOD__ .
": getimagesize detected $file as $mime\n" );
836 # Media handling extensions can guess the MIME by content
837 # It's intentionally here so that if core is wrong about a type (false positive),
838 # people will hopefully nag and submit patches :)
840 # Some strings by reference for performance - assuming well-behaved hooks
843 $callback( $this, $head, $tail, $file,
$mime );
863 if (
$ext ) { # TODO:
remove $ext param
864 $this->logger->info( __METHOD__ .
865 ": WARNING: use of the \$ext parameter is deprecated. " .
866 "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
869 $mime =
'application/zip';
879 'presentation-template',
881 'spreadsheet-template',
889 $types =
'(?:' . implode(
'|', $opendocTypes ) .
')';
890 $opendocRegex =
"/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
892 $openxmlRegex =
"/^\[Content_Types\].xml/";
896 $this->logger->info( __METHOD__ .
": detected $mime from ZIP archive\n" );
897 } elseif ( preg_match( $openxmlRegex, substr(
$header, 30 ) ) ) {
898 $mime =
"application/x-opc+zip";
899 # TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere
900 if (
$ext !==
true &&
$ext !==
false ) {
911 $mime =
"application/zip";
914 $this->logger->info( __METHOD__ .
915 ": detected an Open Packaging Conventions archive: $mime\n" );
916 } elseif ( substr(
$header, 0, 8 ) ==
"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" &&
917 ( $headerpos = strpos( $tail,
"PK\x03\x04" ) ) !==
false &&
918 preg_match( $openxmlRegex, substr( $tail, $headerpos + 30 ) ) ) {
919 if ( substr(
$header, 512, 4 ) ==
"\xEC\xA5\xC1\x00" ) {
920 $mime =
"application/msword";
922 switch ( substr(
$header, 512, 6 ) ) {
923 case "\xEC\xA5\xC1\x00\x0E\x00":
924 case "\xEC\xA5\xC1\x00\x1C\x00":
925 case "\xEC\xA5\xC1\x00\x43\x00":
926 $mime =
"application/vnd.ms-powerpoint";
928 case "\xFD\xFF\xFF\xFF\x10\x00":
929 case "\xFD\xFF\xFF\xFF\x1F\x00":
930 case "\xFD\xFF\xFF\xFF\x22\x00":
931 case "\xFD\xFF\xFF\xFF\x23\x00":
932 case "\xFD\xFF\xFF\xFF\x28\x00":
933 case "\xFD\xFF\xFF\xFF\x29\x00":
934 case "\xFD\xFF\xFF\xFF\x10\x02":
935 case "\xFD\xFF\xFF\xFF\x1F\x02":
936 case "\xFD\xFF\xFF\xFF\x22\x02":
937 case "\xFD\xFF\xFF\xFF\x23\x02":
938 case "\xFD\xFF\xFF\xFF\x28\x02":
939 case "\xFD\xFF\xFF\xFF\x29\x02":
940 $mime =
"application/vnd.msexcel";
944 $this->logger->info( __METHOD__ .
945 ": detected a MS Office document with OPC trailer\n" );
947 $this->logger->info( __METHOD__ .
": unable to identify type of ZIP archive\n" );
972 $this->logger->info( __METHOD__ .
973 ": WARNING: use of the \$ext parameter is deprecated. "
974 .
"Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
980 $m = $callback( $file );
982 $m = mime_content_type( $file );
987 $m = preg_replace(
'![;, ].*$!',
'', $m ); # strip charset,
etc
989 $m = strtolower( $m );
991 if ( strpos( $m,
'unknown' ) !==
false ) {
994 $this->logger->info( __METHOD__ .
": magic mime type of $file: $m\n" );
1000 if (
$ext ===
true ) {
1001 $i = strrpos( $file,
'.' );
1002 $ext = strtolower( $i ? substr( $file, $i + 1 ) :
'' );
1006 $this->logger->info( __METHOD__ .
": refusing to guess mime type for .$ext file, "
1007 .
"we should have recognized it\n" );
1011 $this->logger->info( __METHOD__ .
": extension mime type of $file: $m\n" );
1018 $this->logger->info( __METHOD__ .
": failed to guess mime type for $file!\n" );
1019 return 'unknown/unknown';
1050 if (
$mime ==
'application/ogg' && file_exists(
$path ) ) {
1052 $f = fopen(
$path,
"rt" );
1056 $head = fread( $f, 256 );
1059 $head = str_replace(
'ffmpeg2theora',
'', strtolower( $head ) );
1062 if ( strpos( $head,
'theora' ) !==
false ) {
1064 } elseif ( strpos( $head,
'vorbis' ) !==
false ) {
1066 } elseif ( strpos( $head,
'flac' ) !==
false ) {
1068 } elseif ( strpos( $head,
'speex' ) !==
false ) {
1070 } elseif ( strpos( $head,
'opus' ) !==
false ) {
1088 $i = strrpos(
$path,
'.' );
1089 $e = strtolower( $i ? substr(
$path, $i + 1 ) :
'' );
1100 $i = strpos(
$mime,
'/' );
1101 if ( $i !==
false ) {
1102 $major = substr(
$mime, 0, $i );
1128 if ( strpos( $extMime,
'.' ) === 0 ) {
1135 $m = explode(
' ', $m );
1138 if ( isset( $this->mimeTypeAliases[$extMime] ) ) {
1139 $extMime = $this->mimeTypeAliases[$extMime];
1146 foreach ( $this->mediaTypes
as $type => $codes ) {
1147 if ( in_array(
$mime, $codes,
true ) ) {
1162 return array_keys( $this->mediaTypes );
1176 return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
1185 if ( is_null( $this->IEAnalyzer ) ) {