30class MimeAnalyzer
implements LoggerAwareInterface {
46 protected $mediaTypes =
null;
48 protected $mimeTypeAliases =
null;
50 protected $mimetoExt =
null;
53 public $mExtToMime =
null;
59 private $extraTypes =
'';
61 private $extraInfo =
'';
85 protected static $wellKnownTypes = <<<EOT
86application/ogg ogx ogg ogm ogv oga spx opus
88application/vnd.oasis.opendocument.chart odc
89application/vnd.oasis.opendocument.chart-
template otc
90application/vnd.oasis.opendocument.database odb
91application/vnd.oasis.opendocument.formula odf
92application/vnd.oasis.opendocument.formula-
template otf
93application/vnd.oasis.opendocument.graphics odg
94application/vnd.oasis.opendocument.graphics-
template otg
95application/vnd.oasis.opendocument.image odi
96application/vnd.oasis.opendocument.image-
template oti
97application/vnd.oasis.opendocument.presentation odp
98application/vnd.oasis.opendocument.presentation-
template otp
99application/vnd.oasis.opendocument.spreadsheet ods
100application/vnd.oasis.opendocument.spreadsheet-
template ots
101application/vnd.oasis.opendocument.text odt
102application/vnd.oasis.opendocument.text-master otm
103application/vnd.oasis.opendocument.text-
template ott
104application/vnd.oasis.opendocument.text-web oth
105application/javascript js
106application/x-shockwave-flash swf
107audio/midi mid midi kar
108audio/mpeg mpga mpa mp2 mp3
109audio/x-aiff aif aiff aifc
111audio/ogg oga spx ogg opus
112audio/opus opus ogg oga ogg spx
115image/jpeg jpeg jpg jpe
123image/x-portable-pixmap ppm
137 protected static $wellKnownInfo = <<<EOT
139application/vnd.oasis.opendocument.chart [
OFFICE]
140application/vnd.oasis.opendocument.chart-
template [
OFFICE]
141application/vnd.oasis.opendocument.database [
OFFICE]
142application/vnd.oasis.opendocument.formula [
OFFICE]
143application/vnd.oasis.opendocument.formula-
template [
OFFICE]
144application/vnd.oasis.opendocument.graphics [
OFFICE]
145application/vnd.oasis.opendocument.graphics-
template [
OFFICE]
146application/vnd.oasis.opendocument.image [
OFFICE]
147application/vnd.oasis.opendocument.image-
template [
OFFICE]
148application/vnd.oasis.opendocument.presentation [
OFFICE]
149application/vnd.oasis.opendocument.presentation-
template [
OFFICE]
150application/vnd.oasis.opendocument.spreadsheet [
OFFICE]
151application/vnd.oasis.opendocument.spreadsheet-
template [
OFFICE]
152application/vnd.oasis.opendocument.text [
OFFICE]
153application/vnd.oasis.opendocument.text-
template [
OFFICE]
154application/vnd.oasis.opendocument.text-master [
OFFICE]
155application/vnd.oasis.opendocument.text-web [
OFFICE]
156application/javascript text/javascript application/x-javascript [
EXECUTABLE]
161audio/mp3 audio/mpeg [
AUDIO]
162application/ogg audio/ogg video/ogg [
MULTIMEDIA]
163image/x-bmp image/x-ms-bmp image/bmp [
BITMAP]
171image/x-portable-pixmap [
BITMAP]
176unknown/unknown application/octet-stream application/x-empty [
UNKNOWN]
195 $this->typeFile =
$params[
'typeFile'];
196 $this->infoFile =
$params[
'infoFile'];
197 $this->xmlTypes =
$params[
'xmlTypes'];
198 $this->initCallback =
$params[
'initCallback'] ??
null;
199 $this->detectCallback =
$params[
'detectCallback'] ??
null;
200 $this->guessCallback =
$params[
'guessCallback'] ??
null;
201 $this->extCallback =
$params[
'extCallback'] ??
null;
212 # Allow media handling extensions adding MIME-types and MIME-info
213 if ( $this->initCallback ) {
220 if ( $mimeTypeFile ) {
222 $this->logger->info( __METHOD__ .
": loading mime types from $mimeTypeFile\n" );
226 $this->logger->info( __METHOD__ .
": can't load mime types from $mimeTypeFile\n" );
229 $this->logger->info( __METHOD__ .
230 ": no mime types file defined, using built-ins only.\n" );
235 $types =
str_replace( [
"\r\n",
"\n\r",
"\n\n",
"\r\r",
"\r" ],
"\n", $types );
238 $this->mimetoExt = [];
239 $this->mExtToMime = [];
241 $lines = explode(
"\n", $types );
254 if ( $i ===
false ) {
261 if ( empty(
$ext ) ) {
265 if ( !empty( $this->mimetoExt[$mime] ) ) {
266 $this->mimetoExt[$mime] .=
' ' .
$ext;
268 $this->mimetoExt[$mime] =
$ext;
271 $extensions = explode(
' ',
$ext );
273 foreach ( $extensions as
$e ) {
279 if ( !empty( $this->mExtToMime[
$e] ) ) {
280 $this->mExtToMime[
$e] .=
' ' . $mime;
282 $this->mExtToMime[
$e] = $mime;
295 if ( $mimeInfoFile ) {
297 $this->logger->info( __METHOD__ .
": loading mime info from $mimeInfoFile\n" );
301 $this->logger->info( __METHOD__ .
": can't load mime info from $mimeInfoFile\n" );
304 $this->logger->info( __METHOD__ .
305 ": no mime info file defined, using built-ins only.\n" );
310 $info =
str_replace( [
"\r\n",
"\n\r",
"\n\n",
"\r\r",
"\r" ],
"\n", $info );
313 $this->mimeTypeAliases = [];
314 $this->mediaTypes = [];
316 $lines = explode(
"\n", $info );
329 if ( $i ===
false ) {
333 # print "processing MIME INFO line $s<br>";
343 $m = explode(
' ',
$s );
345 if ( !
isset( $this->mediaTypes[$mtype] ) ) {
346 $this->mediaTypes[
$mtype] = [];
349 foreach ( $m as $mime ) {
350 $mime =
trim( $mime );
351 if ( empty( $mime ) ) {
355 $this->mediaTypes[
$mtype][] = $mime;
358 if ( count( $m ) > 1 ) {
360 $mCount = count( $m );
361 for ( $i = 1; $i <
$mCount; $i += 1 ) {
363 $this->mimeTypeAliases[$mime] = $main;
369 public function setLogger( LoggerInterface $logger ) {
370 $this->logger = $logger;
380 $this->extraTypes .=
"\n" . $types;
390 $this->extraInfo .=
"\n" . $info;
405 if (
isset( $this->mimetoExt[$mime] ) ) {
406 return $this->mimetoExt[$mime];
410 if (
isset( $this->mimeTypeAliases[$mime] ) ) {
411 $mime = $this->mimeTypeAliases[$mime];
412 if (
isset( $this->mimetoExt[$mime] ) ) {
413 return $this->mimetoExt[$mime];
430 $r = $this->mExtToMime[
$ext] ??
null;
487 'image/gif',
'image/jpeg',
'image/png',
488 'image/x-bmp',
'image/xbm',
'image/tiff',
489 'image/jp2',
'image/jpeg2000',
'image/iff',
490 'image/xbm',
'image/x-xbitmap',
491 'image/vnd.wap.wbmp',
'image/vnd.xiff',
493 'application/x-shockwave-flash',
514 'gif',
'jpeg',
'jpg',
'png',
'swf',
'psd',
515 'bmp',
'tiff',
'tif',
'jpc',
'jp2',
516 'jpx',
'jb2',
'swc',
'iff',
'wbmp',
520 'djvu',
'ogx',
'ogg',
'ogv',
'oga',
'spx',
'opus',
521 'mid',
'pdf',
'wmf',
'xcf',
'webm',
'mkv',
'mka',
545 if ( $mime ===
'unknown/unknown' ) {
547 $this->logger->info( __METHOD__ .
': refusing to guess mime type for .' .
548 "$ext file, we should have recognized it\n" );
554 }
elseif ( $mime ===
'application/x-opc+zip' ) {
560 $this->logger->info( __METHOD__ .
561 ": refusing to guess better type for $mime file, " .
562 ".$ext is not a known OPC extension.\n" );
563 $mime =
'application/zip';
573 # Media handling extensions can improve the MIME detected
576 $callback( $this,
$ext, $mime );
579 if (
isset( $this->mimeTypeAliases[$mime] ) ) {
580 $mime = $this->mimeTypeAliases[$mime];
583 $this->logger->info( __METHOD__ .
": improved mime type for .$ext: $mime\n" );
603 $this->logger->info( __METHOD__ .
604 ": WARNING: use of the \$ext parameter is deprecated. " .
605 "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
608 $mime = $this->doGuessMimeType( $file,
$ext );
611 $this->logger->info( __METHOD__ .
612 ": internal type detection failed for $file (.$ext)...\n" );
616 if (
isset( $this->mimeTypeAliases[$mime] ) ) {
617 $mime = $this->mimeTypeAliases[$mime];
620 $this->logger->info( __METHOD__ .
": guessed mime type of $file: $mime\n" );
634 private function doGuessMimeType( $file,
$ext ) {
641 return 'unknown/unknown';
645 if ( $fsize ===
false ) {
646 return 'unknown/unknown';
650 $tailLength = min( 65558, $fsize );
651 if ( fseek(
$f, -1 * $tailLength, SEEK_END ) === -1 ) {
652 throw new UnexpectedValueException(
653 "Seeking $tailLength bytes from EOF failed in " . __METHOD__ );
655 $tail = $tailLength ?
fread(
$f, $tailLength ) :
'';
657 $this->logger->info( __METHOD__ .
658 ": analyzing head and tail of $file for magic numbers.\n" );
663 'MThd' =>
'audio/midi',
664 'OggS' =>
'application/ogg',
665 'ID3' =>
'audio/mpeg',
666 "\xff\xfb" =>
'audio/mpeg',
667 "\xff\xf3" =>
'audio/mpeg',
668 "\xff\xe3" =>
'audio/mpeg',
672 "\x01\x00\x09\x00" =>
'application/x-msmetafile',
673 "\xd7\xcd\xc6\x9a" =>
'application/x-msmetafile',
674 '%PDF' =>
'application/pdf',
675 'gimp xcf' =>
'image/x-xcf',
678 'MZ' =>
'application/octet-stream',
679 "\xca\xfe\xba\xbe" =>
'application/octet-stream',
680 "\x7fELF" =>
'application/octet-stream',
683 foreach ( $headers as $magic => $candidate ) {
685 $this->logger->info( __METHOD__ .
686 ": magic header in $file recognized as $candidate\n" );
692 if (
strncmp( $head, pack(
"C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) {
693 $doctype =
strpos( $head,
"\x42\x82" );
698 $this->logger->info( __METHOD__ .
": recognized file as video/x-matroska\n" );
699 return "video/x-matroska";
702 $videotrack =
strpos( $head,
"\x86\x85V_VP" );
706 $this->logger->info( __METHOD__ .
": recognized file as video/webm\n" );
710 $this->logger->info( __METHOD__ .
": recognized file as audio/webm\n" );
714 $this->logger->info( __METHOD__ .
": unknown EBML file\n" );
715 return "unknown/unknown";
719 if (
strncmp( $head,
"RIFF", 4 ) == 0 &&
722 $this->logger->info( __METHOD__ .
": recognized file as image/webp\n" );
727 if (
strncmp( $head,
"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1", 8 ) == 0 ) {
728 $this->logger->info( __METHOD__ .
': recognized MS CFB (OLE) file' );
744 if ( (
strpos( $head,
'<?php' ) !==
false ) ||
745 (
strpos( $head,
"<\x00?\x00p\x00h\x00p" ) !==
false ) ||
746 (
strpos( $head,
"<\x00?\x00 " ) !==
false ) ||
747 (
strpos( $head,
"<\x00?\x00\n" ) !==
false ) ||
748 (
strpos( $head,
"<\x00?\x00\t" ) !==
false ) ||
749 (
strpos( $head,
"<\x00?\x00=" ) !==
false )
751 $this->logger->info( __METHOD__ .
": recognized $file as application/x-php\n" );
752 return 'application/x-php';
761 if ( $xml->wellFormed ) {
763 return $xmlTypes[$xml->getRootElement()] ??
'application/xml';
772 if (
substr( $head, 0, 2 ) ==
"#!" ) {
773 $script_type =
"ASCII";
774 }
elseif (
substr( $head, 0, 5 ) ==
"\xef\xbb\xbf#!" ) {
775 $script_type =
"UTF-8";
776 }
elseif (
substr( $head, 0, 7 ) ==
"\xfe\xff\x00#\x00!" ) {
777 $script_type =
"UTF-16BE";
778 }
elseif (
substr( $head, 0, 7 ) ==
"\xff\xfe#\x00!" ) {
779 $script_type =
"UTF-16LE";
782 if ( $script_type ) {
783 if ( $script_type !==
"UTF-8" && $script_type !==
"ASCII" ) {
785 $pack = [
'UTF-16BE' =>
'n*',
'UTF-16LE' =>
'v*' ];
786 $chars = unpack( $pack[$script_type],
substr( $head, 2 ) );
788 foreach ( $chars as $codepoint ) {
789 if ( $codepoint < 128 ) {
790 $head .=
chr( $codepoint );
799 if (
preg_match(
'%/?([^\s]+/)(\w+)%', $head, $match ) ) {
800 $mime =
"application/x-{$match[2]}";
801 $this->logger->info( __METHOD__ .
": shell script recognized as $mime\n" );
807 $eocdrPos =
strpos( $tail,
"PK\x05\x06" );
808 if ( $eocdrPos !==
false ) {
809 $this->logger->info( __METHOD__ .
": ZIP signature present in $file\n" );
811 $commentLength = unpack(
"n",
substr( $tail, $eocdrPos + 20 ) )[0];
812 if ( $eocdrPos + 22 + $commentLength !==
strlen( $tail ) ) {
813 $this->logger->info( __METHOD__ .
": ZIP EOCDR not at end. Not a ZIP file." );
822 stripos( $head,
'SOLID ' ) === 0 &&
825 return 'application/sla';
826 }
elseif ( $fsize > 84 ) {
828 $triangles =
substr( $head, 80, 4 );
829 $triangles = unpack(
'V', $triangles );
830 $triangles = reset( $triangles );
831 if ( $triangles !==
false && $fsize === 84 + ( $triangles * 50 ) ) {
832 return 'application/sla';
840 if ( $gis &&
isset( $gis[
'mime'] ) ) {
841 $mime = $gis[
'mime'];
842 $this->logger->info( __METHOD__ .
": getimagesize detected $file as $mime\n" );
846 # Media handling extensions can guess the MIME by content
847 # It's intentionally here so that if core is wrong about a type (false positive),
848 # people will hopefully nag and submit patches :)
850 # Some strings by reference for performance - assuming well-behaved hooks
853 $callback( $this, $head, $tail, $file, $mime );
874 $this->logger->info( __METHOD__ .
875 ": WARNING: use of the \$ext parameter is deprecated. " .
876 "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
879 $mime =
'application/zip';
889 'presentation-template',
891 'spreadsheet-template',
899 $types =
'(?:' . implode(
'|', $opendocTypes ) .
')';
900 $opendocRegex =
"/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
902 $openxmlRegex =
"/^\[Content_Types\].xml/";
906 $this->logger->info( __METHOD__ .
": detected $mime from ZIP archive\n" );
908 $mime =
"application/x-opc+zip";
909 # TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere
910 if (
$ext !==
true &&
$ext !==
false ) {
921 $mime =
"application/zip";
924 $this->logger->info( __METHOD__ .
925 ": detected an Open Packaging Conventions archive: $mime\n" );
927 ( $headerpos =
strpos( $tail,
"PK\x03\x04" ) ) !==
false &&
930 $mime =
"application/msword";
933 case "\xEC\xA5\xC1\x00\x0E\x00":
934 case "\xEC\xA5\xC1\x00\x1C\x00":
935 case "\xEC\xA5\xC1\x00\x43\x00":
936 $mime =
"application/vnd.ms-powerpoint";
938 case "\xFD\xFF\xFF\xFF\x10\x00":
939 case "\xFD\xFF\xFF\xFF\x1F\x00":
940 case "\xFD\xFF\xFF\xFF\x22\x00":
941 case "\xFD\xFF\xFF\xFF\x23\x00":
942 case "\xFD\xFF\xFF\xFF\x28\x00":
943 case "\xFD\xFF\xFF\xFF\x29\x00":
944 case "\xFD\xFF\xFF\xFF\x10\x02":
945 case "\xFD\xFF\xFF\xFF\x1F\x02":
946 case "\xFD\xFF\xFF\xFF\x22\x02":
947 case "\xFD\xFF\xFF\xFF\x23\x02":
948 case "\xFD\xFF\xFF\xFF\x28\x02":
949 case "\xFD\xFF\xFF\xFF\x29\x02":
950 $mime =
"application/vnd.msexcel";
954 $this->logger->info( __METHOD__ .
955 ": detected a MS Office document with OPC trailer\n" );
957 $this->logger->info( __METHOD__ .
": unable to identify type of ZIP archive\n" );
971 if ( !$info[
'valid'] ) {
972 $this->logger->info( __METHOD__ .
': invalid file format' );
973 return 'unknown/unknown';
975 if ( !$info[
'mime'] ) {
976 $this->logger->info( __METHOD__ .
": unrecognised document subtype" );
977 return 'unknown/unknown';
979 return $info[
'mime'];
1002 $this->logger->info( __METHOD__ .
1003 ": WARNING: use of the \$ext parameter is deprecated. "
1004 .
"Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
1010 $m = $callback( $file );
1021 if (
strpos( $m,
'unknown' ) !==
false ) {
1024 $this->logger->info( __METHOD__ .
": magic mime type of $file: $m\n" );
1030 if (
$ext ===
true ) {
1036 $this->logger->info( __METHOD__ .
": refusing to guess mime type for .$ext file, "
1037 .
"we should have recognized it\n" );
1041 $this->logger->info( __METHOD__ .
": extension mime type of $file: $m\n" );
1048 $this->logger->info( __METHOD__ .
": failed to guess mime type for $file!\n" );
1049 return 'unknown/unknown';
1068 function getMediaType( $path =
null, $mime =
null ) {
1069 if ( !$mime && !$path ) {
1080 if ( $mime ==
'application/ogg' &&
file_exists( $path ) ) {
1092 if (
strpos( $head,
'theora' ) !==
false ) {
1130 $i =
strpos( $mime,
'/' );
1131 if ( $i !==
false ) {
1132 $major =
substr( $mime, 0, $i );
1158 if (
strpos( $extMime,
'.' ) === 0 ) {
1165 $m = explode(
' ', $m );
1168 if (
isset( $this->mimeTypeAliases[$extMime] ) ) {
1169 $extMime = $this->mimeTypeAliases[
$extMime];
1175 foreach ( $m as $mime ) {
1176 foreach ( $this->mediaTypes as
$type => $codes ) {
1177 if (
in_array( $mime, $codes,
true ) ) {
1206 return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
1215 if (
is_null( $this->IEAnalyzer ) ) {
and that you know you can do these things To protect your we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights These restrictions translate to certain responsibilities for you if you distribute copies of the or if you modify it For if you distribute copies of such a whether gratis or for a you must give the recipients all the rights that you have You must make sure that receive or can get the source code And you must show them these terms so they know their rights We protect your rights with two and(2) offer you this license which gives you legal permission to copy
This class simulates Microsoft Internet Explorer's terribly broken and insecure MIME type detection a...
static readHandle( $fileHandle)
Read from an open seekable handle.
for adding new MIME info to the list Use $mimeMagic addExtraTypes( $stringOfTypes)
returning false will NOT prevent logging $e
This document describes how event hooks work in the Renameuser extension For a more comprehensive guide to navigate to your root MediaWiki directory and read docs hooks txt
$data
Utility to generate mapping file used in mw.Title (phpCharToUpper.json)
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
const MEDIATYPE_MULTIMEDIA
if(!is_readable( $file)) $ext