22 use Psr\Log\LoggerAwareInterface;
23 use Psr\Log\LoggerInterface;
86 application/ogg ogx ogg ogm ogv oga spx opus
88 application/vnd.oasis.opendocument.chart odc
89 application/vnd.oasis.opendocument.chart-
template otc
90 application/vnd.oasis.opendocument.database odb
91 application/vnd.oasis.opendocument.formula odf
92 application/vnd.oasis.opendocument.formula-
template otf
93 application/vnd.oasis.opendocument.graphics odg
94 application/vnd.oasis.opendocument.graphics-
template otg
95 application/vnd.oasis.opendocument.image odi
96 application/vnd.oasis.opendocument.image-
template oti
97 application/vnd.oasis.opendocument.presentation odp
98 application/vnd.oasis.opendocument.presentation-
template otp
99 application/vnd.oasis.opendocument.spreadsheet ods
100 application/vnd.oasis.opendocument.spreadsheet-
template ots
101 application/vnd.oasis.opendocument.text odt
102 application/vnd.oasis.opendocument.text-master otm
103 application/vnd.oasis.opendocument.text-
template ott
104 application/vnd.oasis.opendocument.text-web oth
105 application/javascript js
106 application/x-shockwave-flash swf
107 audio/midi mid midi kar
108 audio/mpeg mpga mpa mp2 mp3
109 audio/x-aiff aif aiff aifc
111 audio/ogg oga spx ogg opus
112 audio/opus opus ogg oga ogg spx
115 image/jpeg jpeg jpg jpe
123 image/x-portable-pixmap ppm
127 video/ogg ogv ogm ogg
138 application/pdf [OFFICE]
139 application/vnd.oasis.opendocument.chart [OFFICE]
140 application/vnd.oasis.opendocument.chart-
template [OFFICE]
141 application/vnd.oasis.opendocument.database [OFFICE]
142 application/vnd.oasis.opendocument.formula [OFFICE]
143 application/vnd.oasis.opendocument.formula-
template [OFFICE]
144 application/vnd.oasis.opendocument.graphics [OFFICE]
145 application/vnd.oasis.opendocument.graphics-
template [OFFICE]
146 application/vnd.oasis.opendocument.image [OFFICE]
147 application/vnd.oasis.opendocument.image-
template [OFFICE]
148 application/vnd.oasis.opendocument.presentation [OFFICE]
149 application/vnd.oasis.opendocument.presentation-
template [OFFICE]
150 application/vnd.oasis.opendocument.spreadsheet [OFFICE]
151 application/vnd.oasis.opendocument.spreadsheet-
template [OFFICE]
152 application/vnd.oasis.opendocument.text [OFFICE]
153 application/vnd.oasis.opendocument.text-
template [OFFICE]
154 application/vnd.oasis.opendocument.text-master [OFFICE]
155 application/vnd.oasis.opendocument.text-web [OFFICE]
156 application/javascript
text/javascript application/x-javascript [EXECUTABLE]
157 application/x-shockwave-flash [MULTIMEDIA]
161 audio/mp3 audio/mpeg [AUDIO]
162 application/ogg audio/ogg video/ogg [MULTIMEDIA]
163 image/x-bmp image/x-ms-bmp image/bmp [BITMAP]
167 image/svg+xml [DRAWING]
169 image/vnd.djvu [BITMAP]
171 image/x-portable-pixmap [BITMAP]
176 unknown/unknown application/octet-stream application/x-empty [UNKNOWN]
195 $this->typeFile =
$params[
'typeFile'];
196 $this->infoFile =
$params[
'infoFile'];
197 $this->xmlTypes =
$params[
'xmlTypes'];
198 $this->initCallback =
$params[
'initCallback'] ??
null;
199 $this->detectCallback =
$params[
'detectCallback'] ??
null;
200 $this->guessCallback =
$params[
'guessCallback'] ??
null;
201 $this->extCallback =
$params[
'extCallback'] ??
null;
202 $this->logger =
$params[
'logger'] ?? new \Psr\Log\NullLogger();
212 # Allow media handling extensions adding MIME-types and MIME-info
213 if ( $this->initCallback ) {
214 call_user_func( $this->initCallback, $this );
220 if ( $mimeTypeFile ) {
221 if ( is_file( $mimeTypeFile ) && is_readable( $mimeTypeFile ) ) {
222 $this->logger->info( __METHOD__ .
": loading mime types from $mimeTypeFile\n" );
224 $types .= file_get_contents( $mimeTypeFile );
226 $this->logger->info( __METHOD__ .
": can't load mime types from $mimeTypeFile\n" );
229 $this->logger->info( __METHOD__ .
230 ": no mime types file defined, using built-ins only.\n" );
235 $types = str_replace( [
"\r\n",
"\n\r",
"\n\n",
"\r\r",
"\r" ],
"\n", $types );
236 $types = str_replace(
"\t",
" ", $types );
238 $this->mimetoExt = [];
239 $this->mExtToMime = [];
241 $lines = explode(
"\n", $types );
247 if ( strpos(
$s,
'#' ) === 0 ) {
251 $s = strtolower(
$s );
252 $i = strpos(
$s,
' ' );
254 if ( $i ===
false ) {
258 $mime = substr(
$s, 0, $i );
259 $ext = trim( substr(
$s, $i + 1 ) );
261 if ( empty(
$ext ) ) {
265 if ( !empty( $this->mimetoExt[$mime] ) ) {
266 $this->mimetoExt[$mime] .=
' ' .
$ext;
268 $this->mimetoExt[$mime] =
$ext;
271 $extensions = explode(
' ',
$ext );
273 foreach ( $extensions
as $e ) {
279 if ( !empty( $this->mExtToMime[
$e] ) ) {
280 $this->mExtToMime[
$e] .=
' ' . $mime;
282 $this->mExtToMime[
$e] = $mime;
295 if ( $mimeInfoFile ) {
296 if ( is_file( $mimeInfoFile ) && is_readable( $mimeInfoFile ) ) {
297 $this->logger->info( __METHOD__ .
": loading mime info from $mimeInfoFile\n" );
299 $info .= file_get_contents( $mimeInfoFile );
301 $this->logger->info( __METHOD__ .
": can't load mime info from $mimeInfoFile\n" );
304 $this->logger->info( __METHOD__ .
305 ": no mime info file defined, using built-ins only.\n" );
310 $info = str_replace( [
"\r\n",
"\n\r",
"\n\n",
"\r\r",
"\r" ],
"\n", $info );
311 $info = str_replace(
"\t",
" ", $info );
313 $this->mimeTypeAliases = [];
314 $this->mediaTypes = [];
316 $lines = explode(
"\n", $info );
322 if ( strpos(
$s,
'#' ) === 0 ) {
326 $s = strtolower(
$s );
327 $i = strpos(
$s,
' ' );
329 if ( $i ===
false ) {
333 # print "processing MIME INFO line $s<br>";
336 if ( preg_match(
'!\[\s*(\w+)\s*\]!',
$s, $match ) ) {
337 $s = preg_replace(
'!\[\s*(\w+)\s*\]!',
'',
$s );
338 $mtype = trim( strtoupper( $match[1] ) );
343 $m = explode(
' ',
$s );
345 if ( !isset( $this->mediaTypes[$mtype] ) ) {
346 $this->mediaTypes[$mtype] = [];
349 foreach ( $m
as $mime ) {
350 $mime = trim( $mime );
351 if ( empty( $mime ) ) {
355 $this->mediaTypes[$mtype][] = $mime;
358 if (
count( $m ) > 1 ) {
360 $mCount =
count( $m );
361 for ( $i = 1; $i < $mCount; $i += 1 ) {
363 $this->mimeTypeAliases[$mime] = $main;
380 $this->extraTypes .=
"\n" . $types;
390 $this->extraInfo .=
"\n" . $info;
402 $mime = strtolower( $mime );
405 if ( isset( $this->mimetoExt[$mime] ) ) {
406 return $this->mimetoExt[$mime];
410 if ( isset( $this->mimeTypeAliases[$mime] ) ) {
411 $mime = $this->mimeTypeAliases[$mime];
412 if ( isset( $this->mimetoExt[$mime] ) ) {
413 return $this->mimetoExt[$mime];
430 $r = $this->mExtToMime[
$ext] ??
null;
443 if ( is_null( $m ) ) {
449 $m = preg_replace(
'/\s.*$/',
'', $m );
472 $extension = strtolower( $extension );
473 return in_array( $extension,
$ext );
487 'image/gif',
'image/jpeg',
'image/png',
488 'image/x-bmp',
'image/xbm',
'image/tiff',
489 'image/jp2',
'image/jpeg2000',
'image/iff',
490 'image/xbm',
'image/x-xbitmap',
491 'image/vnd.wap.wbmp',
'image/vnd.xiff',
493 'application/x-shockwave-flash',
496 return in_array( $mime, $types );
514 'gif',
'jpeg',
'jpg',
'png',
'swf',
'psd',
515 'bmp',
'tiff',
'tif',
'jpc',
'jp2',
516 'jpx',
'jb2',
'swc',
'iff',
'wbmp',
520 'djvu',
'ogx',
'ogg',
'ogv',
'oga',
'spx',
'opus',
521 'mid',
'pdf',
'wmf',
'xcf',
'webm',
'mkv',
'mka',
530 return in_array( strtolower( $extension ), $types );
545 if ( $mime ===
'unknown/unknown' ) {
547 $this->logger->info( __METHOD__ .
': refusing to guess mime type for .' .
548 "$ext file, we should have recognized it\n" );
554 } elseif ( $mime ===
'application/x-opc+zip' ) {
560 $this->logger->info( __METHOD__ .
561 ": refusing to guess better type for $mime file, " .
562 ".$ext is not a known OPC extension.\n" );
563 $mime =
'application/zip';
573 # Media handling extensions can improve the MIME detected
576 $callback( $this,
$ext, $mime );
579 if ( isset( $this->mimeTypeAliases[$mime] ) ) {
580 $mime = $this->mimeTypeAliases[$mime];
583 $this->logger->info( __METHOD__ .
": improved mime type for .$ext: $mime\n" );
603 $this->logger->info( __METHOD__ .
604 ": WARNING: use of the \$ext parameter is deprecated. " .
605 "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
611 $this->logger->info( __METHOD__ .
612 ": internal type detection failed for $file (.$ext)...\n" );
616 if ( isset( $this->mimeTypeAliases[$mime] ) ) {
617 $mime = $this->mimeTypeAliases[$mime];
620 $this->logger->info( __METHOD__ .
": guessed mime type of $file: $mime\n" );
636 Wikimedia\suppressWarnings();
638 Wikimedia\restoreWarnings();
641 return 'unknown/unknown';
644 $fsize = filesize(
$file );
645 if ( $fsize ===
false ) {
646 return 'unknown/unknown';
649 $head = fread(
$f, 1024 );
650 $tailLength = min( 65558, $fsize );
651 if ( fseek(
$f, -1 * $tailLength, SEEK_END ) === -1 ) {
652 throw new UnexpectedValueException(
653 "Seeking $tailLength bytes from EOF failed in " . __METHOD__ );
655 $tail = $tailLength ? fread(
$f, $tailLength ) :
'';
657 $this->logger->info( __METHOD__ .
658 ": analyzing head and tail of $file for magic numbers.\n" );
663 'MThd' =>
'audio/midi',
664 'OggS' =>
'application/ogg',
665 'ID3' =>
'audio/mpeg',
666 "\xff\xfb" =>
'audio/mpeg',
667 "\xff\xf3" =>
'audio/mpeg',
668 "\xff\xe3" =>
'audio/mpeg',
672 "\x01\x00\x09\x00" =>
'application/x-msmetafile',
673 "\xd7\xcd\xc6\x9a" =>
'application/x-msmetafile',
674 '%PDF' =>
'application/pdf',
675 'gimp xcf' =>
'image/x-xcf',
678 'MZ' =>
'application/octet-stream',
679 "\xca\xfe\xba\xbe" =>
'application/octet-stream',
680 "\x7fELF" =>
'application/octet-stream',
683 foreach ( $headers
as $magic => $candidate ) {
684 if ( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) {
685 $this->logger->info( __METHOD__ .
686 ": magic header in $file recognized as $candidate\n" );
692 if ( strncmp( $head, pack(
"C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) {
693 $doctype = strpos( $head,
"\x42\x82" );
696 $data = substr( $head, $doctype + 3, 8 );
697 if ( strncmp(
$data,
"matroska", 8 ) == 0 ) {
698 $this->logger->info( __METHOD__ .
": recognized file as video/x-matroska\n" );
699 return "video/x-matroska";
700 } elseif ( strncmp(
$data,
"webm", 4 ) == 0 ) {
702 $videotrack = strpos( $head,
"\x86\x85V_VP" );
706 $this->logger->info( __METHOD__ .
": recognized file as video/webm\n" );
710 $this->logger->info( __METHOD__ .
": recognized file as audio/webm\n" );
714 $this->logger->info( __METHOD__ .
": unknown EBML file\n" );
715 return "unknown/unknown";
719 if ( strncmp( $head,
"RIFF", 4 ) == 0 &&
720 strncmp( substr( $head, 8, 7 ),
"WEBPVP8", 7 ) == 0
722 $this->logger->info( __METHOD__ .
": recognized file as image/webp\n" );
727 if ( strncmp( $head,
"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1", 8 ) == 0 ) {
728 $this->logger->info( __METHOD__ .
': recognized MS CFB (OLE) file' );
744 if ( ( strpos( $head,
'<?php' ) !==
false ) ||
745 ( strpos( $head,
"<\x00?\x00p\x00h\x00p" ) !==
false ) ||
746 ( strpos( $head,
"<\x00?\x00 " ) !==
false ) ||
747 ( strpos( $head,
"<\x00?\x00\n" ) !==
false ) ||
748 ( strpos( $head,
"<\x00?\x00\t" ) !==
false ) ||
749 ( strpos( $head,
"<\x00?\x00=" ) !==
false )
751 $this->logger->info( __METHOD__ .
": recognized $file as application/x-php\n" );
752 return 'application/x-php';
759 if ( $xml->wellFormed ) {
761 return $xmlTypes[$xml->getRootElement()] ??
'application/xml';
770 if ( substr( $head, 0, 2 ) ==
"#!" ) {
771 $script_type =
"ASCII";
772 } elseif ( substr( $head, 0, 5 ) ==
"\xef\xbb\xbf#!" ) {
773 $script_type =
"UTF-8";
774 } elseif ( substr( $head, 0, 7 ) ==
"\xfe\xff\x00#\x00!" ) {
775 $script_type =
"UTF-16BE";
776 } elseif ( substr( $head, 0, 7 ) ==
"\xff\xfe#\x00!" ) {
777 $script_type =
"UTF-16LE";
780 if ( $script_type ) {
781 if ( $script_type !==
"UTF-8" && $script_type !==
"ASCII" ) {
783 $pack = [
'UTF-16BE' =>
'n*',
'UTF-16LE' =>
'v*' ];
784 $chars = unpack( $pack[$script_type], substr( $head, 2 ) );
786 foreach ( $chars
as $codepoint ) {
787 if ( $codepoint < 128 ) {
788 $head .= chr( $codepoint );
797 if ( preg_match(
'%/?([^\s]+/)(\w+)%', $head, $match ) ) {
798 $mime =
"application/x-{$match[2]}";
799 $this->logger->info( __METHOD__ .
": shell script recognized as $mime\n" );
805 $eocdrPos = strpos( $tail,
"PK\x05\x06" );
806 if ( $eocdrPos !==
false ) {
807 $this->logger->info( __METHOD__ .
": ZIP signature present in $file\n" );
809 $commentLength = unpack(
"n", substr( $tail, $eocdrPos + 20 ) )[0];
810 if ( $eocdrPos + 22 + $commentLength !== strlen( $tail ) ) {
811 $this->logger->info( __METHOD__ .
": ZIP EOCDR not at end. Not a ZIP file." );
820 stripos( $head,
'SOLID ' ) === 0 &&
821 preg_match(
'/\RENDSOLID .*$/i', $tail ) ) {
823 return 'application/sla';
824 } elseif ( $fsize > 84 ) {
826 $triangles = substr( $head, 80, 4 );
827 $triangles = unpack(
'V', $triangles );
828 $triangles = reset( $triangles );
829 if ( $triangles !==
false && $fsize === 84 + ( $triangles * 50 ) ) {
830 return 'application/sla';
834 Wikimedia\suppressWarnings();
835 $gis = getimagesize(
$file );
836 Wikimedia\restoreWarnings();
838 if ( $gis && isset( $gis[
'mime'] ) ) {
839 $mime = $gis[
'mime'];
840 $this->logger->info( __METHOD__ .
": getimagesize detected $file as $mime\n" );
844 # Media handling extensions can guess the MIME by content
845 # It's intentionally here so that if core is wrong about a type (false positive),
846 # people will hopefully nag and submit patches :)
848 # Some strings by reference for performance - assuming well-behaved hooks
851 $callback( $this, $head, $tail,
$file, $mime );
871 if (
$ext ) { # TODO:
remove $ext param
872 $this->logger->info( __METHOD__ .
873 ": WARNING: use of the \$ext parameter is deprecated. " .
874 "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
877 $mime =
'application/zip';
887 'presentation-template',
889 'spreadsheet-template',
897 $types =
'(?:' . implode(
'|', $opendocTypes ) .
')';
898 $opendocRegex =
"/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
900 $openxmlRegex =
"/^\[Content_Types\].xml/";
904 $this->logger->info( __METHOD__ .
": detected $mime from ZIP archive\n" );
905 } elseif ( preg_match( $openxmlRegex, substr(
$header, 30 ) ) ) {
906 $mime =
"application/x-opc+zip";
907 # TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere
908 if (
$ext !==
true &&
$ext !==
false ) {
919 $mime =
"application/zip";
922 $this->logger->info( __METHOD__ .
923 ": detected an Open Packaging Conventions archive: $mime\n" );
924 } elseif ( substr(
$header, 0, 8 ) ==
"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" &&
925 ( $headerpos = strpos( $tail,
"PK\x03\x04" ) ) !==
false &&
926 preg_match( $openxmlRegex, substr( $tail, $headerpos + 30 ) ) ) {
927 if ( substr(
$header, 512, 4 ) ==
"\xEC\xA5\xC1\x00" ) {
928 $mime =
"application/msword";
930 switch ( substr(
$header, 512, 6 ) ) {
931 case "\xEC\xA5\xC1\x00\x0E\x00":
932 case "\xEC\xA5\xC1\x00\x1C\x00":
933 case "\xEC\xA5\xC1\x00\x43\x00":
934 $mime =
"application/vnd.ms-powerpoint";
936 case "\xFD\xFF\xFF\xFF\x10\x00":
937 case "\xFD\xFF\xFF\xFF\x1F\x00":
938 case "\xFD\xFF\xFF\xFF\x22\x00":
939 case "\xFD\xFF\xFF\xFF\x23\x00":
940 case "\xFD\xFF\xFF\xFF\x28\x00":
941 case "\xFD\xFF\xFF\xFF\x29\x00":
942 case "\xFD\xFF\xFF\xFF\x10\x02":
943 case "\xFD\xFF\xFF\xFF\x1F\x02":
944 case "\xFD\xFF\xFF\xFF\x22\x02":
945 case "\xFD\xFF\xFF\xFF\x23\x02":
946 case "\xFD\xFF\xFF\xFF\x28\x02":
947 case "\xFD\xFF\xFF\xFF\x29\x02":
948 $mime =
"application/vnd.msexcel";
952 $this->logger->info( __METHOD__ .
953 ": detected a MS Office document with OPC trailer\n" );
955 $this->logger->info( __METHOD__ .
": unable to identify type of ZIP archive\n" );
969 if ( !$info[
'valid'] ) {
970 $this->logger->info( __METHOD__ .
': invalid file format' );
971 return 'unknown/unknown';
973 if ( !$info[
'mime'] ) {
974 $this->logger->info( __METHOD__ .
": unrecognised document subtype" );
975 return 'unknown/unknown';
977 return $info[
'mime'];
1000 $this->logger->info( __METHOD__ .
1001 ": WARNING: use of the \$ext parameter is deprecated. "
1002 .
"Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
1008 $m = $callback(
$file );
1010 $m = mime_content_type(
$file );
1015 $m = preg_replace(
'![;, ].*$!',
'', $m ); # strip charset,
etc
1017 $m = strtolower( $m );
1019 if ( strpos( $m,
'unknown' ) !==
false ) {
1022 $this->logger->info( __METHOD__ .
": magic mime type of $file: $m\n" );
1028 if (
$ext ===
true ) {
1029 $i = strrpos(
$file,
'.' );
1030 $ext = strtolower( $i ? substr(
$file, $i + 1 ) :
'' );
1034 $this->logger->info( __METHOD__ .
": refusing to guess mime type for .$ext file, "
1035 .
"we should have recognized it\n" );
1039 $this->logger->info( __METHOD__ .
": extension mime type of $file: $m\n" );
1046 $this->logger->info( __METHOD__ .
": failed to guess mime type for $file!\n" );
1047 return 'unknown/unknown';
1067 if ( !$mime && !
$path ) {
1078 if ( $mime ==
'application/ogg' && file_exists(
$path ) ) {
1084 $head = fread(
$f, 256 );
1087 $head = str_replace(
'ffmpeg2theora',
'', strtolower( $head ) );
1090 if ( strpos( $head,
'theora' ) !==
false ) {
1092 } elseif ( strpos( $head,
'vorbis' ) !==
false ) {
1094 } elseif ( strpos( $head,
'flac' ) !==
false ) {
1096 } elseif ( strpos( $head,
'speex' ) !==
false ) {
1098 } elseif ( strpos( $head,
'opus' ) !==
false ) {
1116 $i = strrpos(
$path,
'.' );
1117 $e = strtolower( $i ? substr(
$path, $i + 1 ) :
'' );
1128 $i = strpos( $mime,
'/' );
1129 if ( $i !==
false ) {
1130 $major = substr( $mime, 0, $i );
1156 if ( strpos( $extMime,
'.' ) === 0 ) {
1163 $m = explode(
' ', $m );
1166 if ( isset( $this->mimeTypeAliases[$extMime] ) ) {
1167 $extMime = $this->mimeTypeAliases[$extMime];
1173 foreach ( $m
as $mime ) {
1174 foreach ( $this->mediaTypes
as $type => $codes ) {
1175 if ( in_array( $mime, $codes,
true ) ) {
1190 return array_keys( $this->mediaTypes );
1204 return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
1213 if ( is_null( $this->IEAnalyzer ) ) {