22 use Psr\Log\LoggerAwareInterface;
23 use Psr\Log\LoggerInterface;
24 use Psr\Log\NullLogger;
87 application/ogg ogx ogg ogm ogv oga spx opus
89 application/vnd.oasis.opendocument.chart odc
90 application/vnd.oasis.opendocument.chart-
template otc
91 application/vnd.oasis.opendocument.database odb
92 application/vnd.oasis.opendocument.formula odf
93 application/vnd.oasis.opendocument.formula-
template otf
94 application/vnd.oasis.opendocument.graphics odg
95 application/vnd.oasis.opendocument.graphics-
template otg
96 application/vnd.oasis.opendocument.image odi
97 application/vnd.oasis.opendocument.image-
template oti
98 application/vnd.oasis.opendocument.presentation odp
99 application/vnd.oasis.opendocument.presentation-
template otp
100 application/vnd.oasis.opendocument.spreadsheet ods
101 application/vnd.oasis.opendocument.spreadsheet-
template ots
102 application/vnd.oasis.opendocument.text odt
103 application/vnd.oasis.opendocument.text-master otm
104 application/vnd.oasis.opendocument.text-
template ott
105 application/vnd.oasis.opendocument.text-web oth
106 application/javascript js
107 application/x-shockwave-flash swf
108 audio/midi mid midi kar
109 audio/mpeg mpga mpa mp2 mp3
110 audio/x-aiff aif aiff aifc
112 audio/ogg oga spx ogg opus
113 audio/opus opus ogg oga ogg spx
116 image/jpeg jpeg jpg jpe
124 image/x-portable-pixmap ppm
128 video/ogg ogv ogm ogg
139 application/pdf [OFFICE]
140 application/vnd.oasis.opendocument.chart [OFFICE]
141 application/vnd.oasis.opendocument.chart-
template [OFFICE]
142 application/vnd.oasis.opendocument.database [OFFICE]
143 application/vnd.oasis.opendocument.formula [OFFICE]
144 application/vnd.oasis.opendocument.formula-
template [OFFICE]
145 application/vnd.oasis.opendocument.graphics [OFFICE]
146 application/vnd.oasis.opendocument.graphics-
template [OFFICE]
147 application/vnd.oasis.opendocument.image [OFFICE]
148 application/vnd.oasis.opendocument.image-
template [OFFICE]
149 application/vnd.oasis.opendocument.presentation [OFFICE]
150 application/vnd.oasis.opendocument.presentation-
template [OFFICE]
151 application/vnd.oasis.opendocument.spreadsheet [OFFICE]
152 application/vnd.oasis.opendocument.spreadsheet-
template [OFFICE]
153 application/vnd.oasis.opendocument.text [OFFICE]
154 application/vnd.oasis.opendocument.text-
template [OFFICE]
155 application/vnd.oasis.opendocument.text-master [OFFICE]
156 application/vnd.oasis.opendocument.text-web [OFFICE]
157 application/javascript text/javascript application/x-javascript [EXECUTABLE]
158 application/x-shockwave-flash [MULTIMEDIA]
162 audio/mp3 audio/mpeg [AUDIO]
163 application/ogg audio/ogg video/ogg [MULTIMEDIA]
164 image/x-bmp image/x-ms-bmp image/bmp [BITMAP]
168 image/svg+xml [DRAWING]
170 image/vnd.djvu [BITMAP]
172 image/x-portable-pixmap [BITMAP]
177 unknown/unknown application/octet-stream application/x-empty [UNKNOWN]
196 $this->typeFile = $params[
'typeFile'];
197 $this->infoFile = $params[
'infoFile'];
198 $this->xmlTypes = $params[
'xmlTypes'];
199 $this->initCallback = $params[
'initCallback'] ??
null;
200 $this->detectCallback = $params[
'detectCallback'] ??
null;
201 $this->guessCallback = $params[
'guessCallback'] ??
null;
202 $this->extCallback = $params[
'extCallback'] ??
null;
203 $this->logger = $params[
'logger'] ??
new NullLogger();
213 # Allow media handling extensions adding MIME-types and MIME-info
214 if ( $this->initCallback ) {
215 call_user_func( $this->initCallback, $this );
221 if ( $mimeTypeFile ) {
222 if ( is_file( $mimeTypeFile ) && is_readable( $mimeTypeFile ) ) {
223 $this->logger->info( __METHOD__ .
": loading mime types from $mimeTypeFile\n" );
225 $types .= file_get_contents( $mimeTypeFile );
227 $this->logger->info( __METHOD__ .
": can't load mime types from $mimeTypeFile\n" );
230 $this->logger->info( __METHOD__ .
231 ": no mime types file defined, using built-ins only.\n" );
236 $types = str_replace( [
"\r\n",
"\n\r",
"\n\n",
"\r\r",
"\r" ],
"\n", $types );
237 $types = str_replace(
"\t",
" ", $types );
239 $this->mimetoExt = [];
240 $this->mExtToMime = [];
242 $lines = explode(
"\n", $types );
248 if ( strpos(
$s,
'#' ) === 0 ) {
252 $s = strtolower(
$s );
253 $i = strpos(
$s,
' ' );
255 if ( $i ===
false ) {
259 $mime = substr(
$s, 0, $i );
260 $ext = trim( substr(
$s, $i + 1 ) );
262 if ( empty(
$ext ) ) {
266 if ( !empty( $this->mimetoExt[$mime] ) ) {
267 $this->mimetoExt[$mime] .=
' ' .
$ext;
269 $this->mimetoExt[$mime] =
$ext;
272 $extensions = explode(
' ',
$ext );
274 foreach ( $extensions as $e ) {
280 if ( !empty( $this->mExtToMime[$e] ) ) {
281 $this->mExtToMime[$e] .=
' ' . $mime;
283 $this->mExtToMime[$e] = $mime;
296 if ( $mimeInfoFile ) {
297 if ( is_file( $mimeInfoFile ) && is_readable( $mimeInfoFile ) ) {
298 $this->logger->info( __METHOD__ .
": loading mime info from $mimeInfoFile\n" );
300 $info .= file_get_contents( $mimeInfoFile );
302 $this->logger->info( __METHOD__ .
": can't load mime info from $mimeInfoFile\n" );
305 $this->logger->info( __METHOD__ .
306 ": no mime info file defined, using built-ins only.\n" );
311 $info = str_replace( [
"\r\n",
"\n\r",
"\n\n",
"\r\r",
"\r" ],
"\n", $info );
312 $info = str_replace(
"\t",
" ", $info );
314 $this->mimeTypeAliases = [];
315 $this->mediaTypes = [];
317 $lines = explode(
"\n", $info );
323 if ( strpos(
$s,
'#' ) === 0 ) {
327 $s = strtolower(
$s );
328 $i = strpos(
$s,
' ' );
330 if ( $i ===
false ) {
334 # print "processing MIME INFO line $s<br>";
337 if ( preg_match(
'!\[\s*(\w+)\s*\]!',
$s, $match ) ) {
338 $s = preg_replace(
'!\[\s*(\w+)\s*\]!',
'',
$s );
339 $mtype = trim( strtoupper( $match[1] ) );
344 $m = explode(
' ',
$s );
346 if ( !isset( $this->mediaTypes[$mtype] ) ) {
347 $this->mediaTypes[$mtype] = [];
350 foreach ( $m as $mime ) {
351 $mime = trim( $mime );
352 if ( empty( $mime ) ) {
356 $this->mediaTypes[$mtype][] = $mime;
359 if ( count( $m ) > 1 ) {
361 $mCount = count( $m );
362 for ( $i = 1; $i < $mCount; $i += 1 ) {
364 $this->mimeTypeAliases[$mime] = $main;
381 $this->extraTypes .=
"\n" . $types;
391 $this->extraInfo .=
"\n" . $info;
403 $mime = strtolower( $mime );
406 if ( isset( $this->mimetoExt[$mime] ) ) {
407 return $this->mimetoExt[$mime];
411 if ( isset( $this->mimeTypeAliases[$mime] ) ) {
412 $mime = $this->mimeTypeAliases[$mime];
413 if ( isset( $this->mimetoExt[$mime] ) ) {
414 return $this->mimetoExt[$mime];
431 $r = $this->mExtToMime[
$ext] ??
null;
444 if ( is_null( $m ) ) {
450 $m = preg_replace(
'/\s.*$/',
'', $m );
473 $extension = strtolower( $extension );
474 return in_array( $extension,
$ext );
488 'image/gif',
'image/jpeg',
'image/png',
489 'image/x-bmp',
'image/xbm',
'image/tiff',
490 'image/jp2',
'image/jpeg2000',
'image/iff',
491 'image/xbm',
'image/x-xbitmap',
492 'image/vnd.wap.wbmp',
'image/vnd.xiff',
494 'application/x-shockwave-flash',
497 return in_array( $mime, $types );
515 'gif',
'jpeg',
'jpg',
'png',
'swf',
'psd',
516 'bmp',
'tiff',
'tif',
'jpc',
'jp2',
517 'jpx',
'jb2',
'swc',
'iff',
'wbmp',
521 'djvu',
'ogx',
'ogg',
'ogv',
'oga',
'spx',
'opus',
522 'mid',
'pdf',
'wmf',
'xcf',
'webm',
'mkv',
'mka',
531 return in_array( strtolower( $extension ), $types );
546 if ( $mime ===
'unknown/unknown' ) {
548 $this->logger->info( __METHOD__ .
': refusing to guess mime type for .' .
549 "$ext file, we should have recognized it\n" );
555 } elseif ( $mime ===
'application/x-opc+zip' ) {
561 $this->logger->info( __METHOD__ .
562 ": refusing to guess better type for $mime file, " .
563 ".$ext is not a known OPC extension.\n" );
564 $mime =
'application/zip';
574 # Media handling extensions can improve the MIME detected
577 $callback( $this,
$ext, $mime );
580 if ( isset( $this->mimeTypeAliases[$mime] ) ) {
581 $mime = $this->mimeTypeAliases[$mime];
584 $this->logger->info( __METHOD__ .
": improved mime type for .$ext: $mime\n" );
604 $this->logger->info( __METHOD__ .
605 ": WARNING: use of the \$ext parameter is deprecated. " .
606 "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
612 $this->logger->info( __METHOD__ .
613 ": internal type detection failed for $file (.$ext)...\n" );
617 if ( isset( $this->mimeTypeAliases[$mime] ) ) {
618 $mime = $this->mimeTypeAliases[$mime];
621 $this->logger->info( __METHOD__ .
": guessed mime type of $file: $mime\n" );
637 Wikimedia\suppressWarnings();
638 $f = fopen(
$file,
'rb' );
639 Wikimedia\restoreWarnings();
642 return 'unknown/unknown';
645 $fsize = filesize(
$file );
646 if ( $fsize ===
false ) {
647 return 'unknown/unknown';
650 $head = fread( $f, 1024 );
651 $tailLength = min( 65558, $fsize );
652 if ( fseek( $f, -1 * $tailLength, SEEK_END ) === -1 ) {
653 throw new UnexpectedValueException(
654 "Seeking $tailLength bytes from EOF failed in " . __METHOD__ );
656 $tail = $tailLength ? fread( $f, $tailLength ) :
'';
658 $this->logger->info( __METHOD__ .
659 ": analyzing head and tail of $file for magic numbers.\n" );
664 'MThd' =>
'audio/midi',
665 'OggS' =>
'application/ogg',
666 'ID3' =>
'audio/mpeg',
667 "\xff\xfb" =>
'audio/mpeg',
668 "\xff\xf3" =>
'audio/mpeg',
669 "\xff\xe3" =>
'audio/mpeg',
673 "\x01\x00\x09\x00" =>
'application/x-msmetafile',
674 "\xd7\xcd\xc6\x9a" =>
'application/x-msmetafile',
675 '%PDF' =>
'application/pdf',
676 'gimp xcf' =>
'image/x-xcf',
679 'MZ' =>
'application/octet-stream',
680 "\xca\xfe\xba\xbe" =>
'application/octet-stream',
681 "\x7fELF" =>
'application/octet-stream',
684 foreach ( $headers as $magic => $candidate ) {
685 if ( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) {
686 $this->logger->info( __METHOD__ .
687 ": magic header in $file recognized as $candidate\n" );
693 if ( strncmp( $head, pack(
"C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) {
694 $doctype = strpos( $head,
"\x42\x82" );
697 $data = substr( $head, $doctype + 3, 8 );
698 if ( strncmp( $data,
"matroska", 8 ) == 0 ) {
699 $this->logger->info( __METHOD__ .
": recognized file as video/x-matroska\n" );
700 return "video/x-matroska";
701 } elseif ( strncmp( $data,
"webm", 4 ) == 0 ) {
703 $videotrack = strpos( $head,
"\x86\x85V_VP" );
707 $this->logger->info( __METHOD__ .
": recognized file as video/webm\n" );
711 $this->logger->info( __METHOD__ .
": recognized file as audio/webm\n" );
715 $this->logger->info( __METHOD__ .
": unknown EBML file\n" );
716 return "unknown/unknown";
720 if ( strncmp( $head,
"RIFF", 4 ) == 0 &&
721 strncmp( substr( $head, 8, 7 ),
"WEBPVP8", 7 ) == 0
723 $this->logger->info( __METHOD__ .
": recognized file as image/webp\n" );
728 if ( strncmp( $head,
"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1", 8 ) == 0 ) {
729 $this->logger->info( __METHOD__ .
': recognized MS CFB (OLE) file' );
745 if ( ( strpos( $head,
'<?php' ) !==
false ) ||
746 ( strpos( $head,
"<\x00?\x00p\x00h\x00p" ) !==
false ) ||
747 ( strpos( $head,
"<\x00?\x00 " ) !==
false ) ||
748 ( strpos( $head,
"<\x00?\x00\n" ) !==
false ) ||
749 ( strpos( $head,
"<\x00?\x00\t" ) !==
false ) ||
750 ( strpos( $head,
"<\x00?\x00=" ) !==
false )
752 $this->logger->info( __METHOD__ .
": recognized $file as application/x-php\n" );
753 return 'application/x-php';
759 Wikimedia\suppressWarnings();
761 Wikimedia\restoreWarnings();
762 if ( $xml->wellFormed ) {
764 return $xmlTypes[$xml->getRootElement()] ??
'application/xml';
773 if ( substr( $head, 0, 2 ) ==
"#!" ) {
774 $script_type =
"ASCII";
775 } elseif ( substr( $head, 0, 5 ) ==
"\xef\xbb\xbf#!" ) {
776 $script_type =
"UTF-8";
777 } elseif ( substr( $head, 0, 7 ) ==
"\xfe\xff\x00#\x00!" ) {
778 $script_type =
"UTF-16BE";
779 } elseif ( substr( $head, 0, 7 ) ==
"\xff\xfe#\x00!" ) {
780 $script_type =
"UTF-16LE";
783 if ( $script_type ) {
784 if ( $script_type !==
"UTF-8" && $script_type !==
"ASCII" ) {
786 $pack = [
'UTF-16BE' =>
'n*',
'UTF-16LE' =>
'v*' ];
787 $chars = unpack( $pack[$script_type], substr( $head, 2 ) );
789 foreach (
$chars as $codepoint ) {
790 if ( $codepoint < 128 ) {
791 $head .= chr( $codepoint );
800 if ( preg_match(
'%/?([^\s]+/)(\w+)%', $head, $match ) ) {
801 $mime =
"application/x-{$match[2]}";
802 $this->logger->info( __METHOD__ .
": shell script recognized as $mime\n" );
808 $eocdrPos = strpos( $tail,
"PK\x05\x06" );
809 if ( $eocdrPos !==
false && $eocdrPos <= strlen( $tail ) - 22 ) {
810 $this->logger->info( __METHOD__ .
": ZIP signature present in $file\n" );
812 $commentLength = unpack(
"n", substr( $tail, $eocdrPos + 20 ) )[1];
813 if ( $eocdrPos + 22 + $commentLength !== strlen( $tail ) ) {
814 $this->logger->info( __METHOD__ .
": ZIP EOCDR not at end. Not a ZIP file." );
823 stripos( $head,
'SOLID ' ) === 0 &&
824 preg_match(
'/\RENDSOLID .*$/i', $tail ) ) {
826 return 'application/sla';
827 } elseif ( $fsize > 84 ) {
829 $triangles = substr( $head, 80, 4 );
830 $triangles = unpack(
'V', $triangles );
831 $triangles = reset( $triangles );
832 if ( $triangles !==
false && $fsize === 84 + ( $triangles * 50 ) ) {
833 return 'application/sla';
837 Wikimedia\suppressWarnings();
838 $gis = getimagesize(
$file );
839 Wikimedia\restoreWarnings();
841 if ( $gis && isset( $gis[
'mime'] ) ) {
842 $mime = $gis[
'mime'];
843 $this->logger->info( __METHOD__ .
": getimagesize detected $file as $mime\n" );
847 # Media handling extensions can guess the MIME by content
848 # It's intentionally here so that if core is wrong about a type (false positive),
849 # people will hopefully nag and submit patches :)
851 # Some strings by reference for performance - assuming well-behaved hooks
854 $callback( $this, $head, $tail,
$file, $mime );
874 if (
$ext ) { # TODO:
remove $ext param
875 $this->logger->info( __METHOD__ .
876 ": WARNING: use of the \$ext parameter is deprecated. " .
877 "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
880 $mime =
'application/zip';
882 # In OASIS Open Document Format v1.2, Database front end document
883 # has a recommended MIME type of:
884 # application/vnd.oasis.opendocument.base
885 # Despite the type registered at the IANA being 'database' which is
886 # supposed to be normative.
898 'presentation-template',
900 'spreadsheet-template',
911 $types =
'(?:' . implode(
'|', $opendocTypes ) .
')';
912 $opendocRegex =
"/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
914 $openxmlRegex =
"/^\[Content_Types\].xml/";
918 $this->logger->info( __METHOD__ .
": detected $mime from ZIP archive\n" );
919 } elseif ( preg_match( $openxmlRegex, substr(
$header, 30 ) ) ) {
920 $mime =
"application/x-opc+zip";
921 # TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere
922 if (
$ext !==
true &&
$ext !==
false ) {
933 $mime =
"application/zip";
936 $this->logger->info( __METHOD__ .
937 ": detected an Open Packaging Conventions archive: $mime\n" );
938 } elseif ( substr(
$header, 0, 8 ) ==
"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" &&
939 ( $headerpos = strpos( $tail,
"PK\x03\x04" ) ) !==
false &&
940 preg_match( $openxmlRegex, substr( $tail, $headerpos + 30 ) ) ) {
941 if ( substr(
$header, 512, 4 ) ==
"\xEC\xA5\xC1\x00" ) {
942 $mime =
"application/msword";
944 switch ( substr(
$header, 512, 6 ) ) {
945 case "\xEC\xA5\xC1\x00\x0E\x00":
946 case "\xEC\xA5\xC1\x00\x1C\x00":
947 case "\xEC\xA5\xC1\x00\x43\x00":
948 $mime =
"application/vnd.ms-powerpoint";
950 case "\xFD\xFF\xFF\xFF\x10\x00":
951 case "\xFD\xFF\xFF\xFF\x1F\x00":
952 case "\xFD\xFF\xFF\xFF\x22\x00":
953 case "\xFD\xFF\xFF\xFF\x23\x00":
954 case "\xFD\xFF\xFF\xFF\x28\x00":
955 case "\xFD\xFF\xFF\xFF\x29\x00":
956 case "\xFD\xFF\xFF\xFF\x10\x02":
957 case "\xFD\xFF\xFF\xFF\x1F\x02":
958 case "\xFD\xFF\xFF\xFF\x22\x02":
959 case "\xFD\xFF\xFF\xFF\x23\x02":
960 case "\xFD\xFF\xFF\xFF\x28\x02":
961 case "\xFD\xFF\xFF\xFF\x29\x02":
962 $mime =
"application/vnd.msexcel";
966 $this->logger->info( __METHOD__ .
967 ": detected a MS Office document with OPC trailer\n" );
969 $this->logger->info( __METHOD__ .
": unable to identify type of ZIP archive\n" );
983 if ( !$info[
'valid'] ) {
984 $this->logger->info( __METHOD__ .
': invalid file format' );
985 return 'unknown/unknown';
987 if ( !$info[
'mime'] ) {
988 $this->logger->info( __METHOD__ .
": unrecognised document subtype" );
989 return 'unknown/unknown';
991 return $info[
'mime'];
1014 $this->logger->info( __METHOD__ .
1015 ": WARNING: use of the \$ext parameter is deprecated. "
1016 .
"Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
1022 $m = $callback(
$file );
1024 $m = mime_content_type(
$file );
1029 $m = preg_replace(
'![;, ].*$!',
'', $m ); # strip charset, etc
1031 $m = strtolower( $m );
1033 if ( strpos( $m,
'unknown' ) !==
false ) {
1036 $this->logger->info( __METHOD__ .
": magic mime type of $file: $m\n" );
1042 if (
$ext ===
true ) {
1043 $i = strrpos(
$file,
'.' );
1044 $ext = strtolower( $i ? substr(
$file, $i + 1 ) :
'' );
1048 $this->logger->info( __METHOD__ .
": refusing to guess mime type for .$ext file, "
1049 .
"we should have recognized it\n" );
1053 $this->logger->info( __METHOD__ .
": extension mime type of $file: $m\n" );
1060 $this->logger->info( __METHOD__ .
": failed to guess mime type for $file!\n" );
1061 return 'unknown/unknown';
1081 if ( !$mime && !
$path ) {
1092 if ( $mime ==
'application/ogg' && is_string(
$path ) && file_exists(
$path ) ) {
1094 $f = fopen(
$path,
"rt" );
1098 $head = fread( $f, 256 );
1101 $head = str_replace(
'ffmpeg2theora',
'', strtolower( $head ) );
1104 if ( strpos( $head,
'theora' ) !==
false ) {
1106 } elseif ( strpos( $head,
'vorbis' ) !==
false ) {
1108 } elseif ( strpos( $head,
'flac' ) !==
false ) {
1110 } elseif ( strpos( $head,
'speex' ) !==
false ) {
1112 } elseif ( strpos( $head,
'opus' ) !==
false ) {
1130 $i = strrpos(
$path,
'.' );
1131 $e = strtolower( $i ? substr(
$path, $i + 1 ) :
'' );
1142 $i = strpos( $mime,
'/' );
1143 if ( $i !==
false ) {
1144 $major = substr( $mime, 0, $i );
1170 if ( strpos( $extMime,
'.' ) === 0 ) {
1177 $m = explode(
' ', $m );
1180 if ( isset( $this->mimeTypeAliases[$extMime] ) ) {
1181 $extMime = $this->mimeTypeAliases[$extMime];
1187 foreach ( $m as $mime ) {
1188 foreach ( $this->mediaTypes as
$type => $codes ) {
1189 if ( in_array( $mime, $codes,
true ) ) {
1204 return array_keys( $this->mediaTypes );
1218 return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
1227 if ( is_null( $this->IEAnalyzer ) ) {