MediaWiki REL1_31
MimeAnalyzer.php
Go to the documentation of this file.
1<?php
22use Psr\Log\LoggerAwareInterface;
23use Psr\Log\LoggerInterface;
24
30class MimeAnalyzer implements LoggerAwareInterface {
32 protected $typeFile;
34 protected $infoFile;
36 protected $xmlTypes;
38 protected $initCallback;
40 protected $detectCallback;
42 protected $guessCallback;
44 protected $extCallback;
46 protected $mediaTypes = null;
48 protected $mimeTypeAliases = null;
50 protected $mimetoExt = null;
51
53 public $mExtToMime = null; // legacy name; field accessed by hooks
54
56 protected $IEAnalyzer;
57
59 private $extraTypes = '';
61 private $extraInfo = '';
62
64 private $logger;
65
85 protected static $wellKnownTypes = <<<EOT
86application/ogg ogx ogg ogm ogv oga spx opus
87application/pdf pdf
88application/vnd.oasis.opendocument.chart odc
89application/vnd.oasis.opendocument.chart-template otc
90application/vnd.oasis.opendocument.database odb
91application/vnd.oasis.opendocument.formula odf
92application/vnd.oasis.opendocument.formula-template otf
93application/vnd.oasis.opendocument.graphics odg
94application/vnd.oasis.opendocument.graphics-template otg
95application/vnd.oasis.opendocument.image odi
96application/vnd.oasis.opendocument.image-template oti
97application/vnd.oasis.opendocument.presentation odp
98application/vnd.oasis.opendocument.presentation-template otp
99application/vnd.oasis.opendocument.spreadsheet ods
100application/vnd.oasis.opendocument.spreadsheet-template ots
101application/vnd.oasis.opendocument.text odt
102application/vnd.oasis.opendocument.text-master otm
103application/vnd.oasis.opendocument.text-template ott
104application/vnd.oasis.opendocument.text-web oth
105application/javascript js
106application/x-shockwave-flash swf
107audio/midi mid midi kar
108audio/mpeg mpga mpa mp2 mp3
109audio/x-aiff aif aiff aifc
110audio/x-wav wav
111audio/ogg oga spx ogg opus
112audio/opus opus ogg oga ogg spx
113image/x-bmp bmp
114image/gif gif
115image/jpeg jpeg jpg jpe
116image/png png
117image/svg+xml svg
118image/svg svg
119image/tiff tiff tif
120image/vnd.djvu djvu
121image/x.djvu djvu
122image/x-djvu djvu
123image/x-portable-pixmap ppm
124image/x-xcf xcf
125text/plain txt
126text/html html htm
127video/ogg ogv ogm ogg
128video/mpeg mpg mpeg
129EOT;
130
137 protected static $wellKnownInfo = <<<EOT
138application/pdf [OFFICE]
139application/vnd.oasis.opendocument.chart [OFFICE]
140application/vnd.oasis.opendocument.chart-template [OFFICE]
141application/vnd.oasis.opendocument.database [OFFICE]
142application/vnd.oasis.opendocument.formula [OFFICE]
143application/vnd.oasis.opendocument.formula-template [OFFICE]
144application/vnd.oasis.opendocument.graphics [OFFICE]
145application/vnd.oasis.opendocument.graphics-template [OFFICE]
146application/vnd.oasis.opendocument.image [OFFICE]
147application/vnd.oasis.opendocument.image-template [OFFICE]
148application/vnd.oasis.opendocument.presentation [OFFICE]
149application/vnd.oasis.opendocument.presentation-template [OFFICE]
150application/vnd.oasis.opendocument.spreadsheet [OFFICE]
151application/vnd.oasis.opendocument.spreadsheet-template [OFFICE]
152application/vnd.oasis.opendocument.text [OFFICE]
153application/vnd.oasis.opendocument.text-template [OFFICE]
154application/vnd.oasis.opendocument.text-master [OFFICE]
155application/vnd.oasis.opendocument.text-web [OFFICE]
156application/javascript text/javascript application/x-javascript [EXECUTABLE]
157application/x-shockwave-flash [MULTIMEDIA]
158audio/midi [AUDIO]
159audio/x-aiff [AUDIO]
160audio/x-wav [AUDIO]
161audio/mp3 audio/mpeg [AUDIO]
162application/ogg audio/ogg video/ogg [MULTIMEDIA]
163image/x-bmp image/x-ms-bmp image/bmp [BITMAP]
164image/gif [BITMAP]
165image/jpeg [BITMAP]
166image/png [BITMAP]
167image/svg+xml [DRAWING]
168image/tiff [BITMAP]
169image/vnd.djvu [BITMAP]
170image/x-xcf [BITMAP]
171image/x-portable-pixmap [BITMAP]
172text/plain [TEXT]
173text/html [TEXT]
174video/ogg [VIDEO]
175video/mpeg [VIDEO]
176unknown/unknown application/octet-stream application/x-empty [UNKNOWN]
177EOT;
178
194 public function __construct( array $params ) {
195 $this->typeFile = $params['typeFile'];
196 $this->infoFile = $params['infoFile'];
197 $this->xmlTypes = $params['xmlTypes'];
198 $this->initCallback = isset( $params['initCallback'] )
199 ? $params['initCallback']
200 : null;
201 $this->detectCallback = isset( $params['detectCallback'] )
202 ? $params['detectCallback']
203 : null;
204 $this->guessCallback = isset( $params['guessCallback'] )
205 ? $params['guessCallback']
206 : null;
207 $this->extCallback = isset( $params['extCallback'] )
208 ? $params['extCallback']
209 : null;
210 $this->logger = isset( $params['logger'] )
211 ? $params['logger']
212 : new \Psr\Log\NullLogger();
213
214 $this->loadFiles();
215 }
216
217 protected function loadFiles() {
222 # Allow media handling extensions adding MIME-types and MIME-info
223 if ( $this->initCallback ) {
224 call_user_func( $this->initCallback, $this );
225 }
226
227 $types = self::$wellKnownTypes;
228
229 $mimeTypeFile = $this->typeFile;
230 if ( $mimeTypeFile ) {
231 if ( is_file( $mimeTypeFile ) && is_readable( $mimeTypeFile ) ) {
232 $this->logger->info( __METHOD__ . ": loading mime types from $mimeTypeFile\n" );
233 $types .= "\n";
234 $types .= file_get_contents( $mimeTypeFile );
235 } else {
236 $this->logger->info( __METHOD__ . ": can't load mime types from $mimeTypeFile\n" );
237 }
238 } else {
239 $this->logger->info( __METHOD__ .
240 ": no mime types file defined, using built-ins only.\n" );
241 }
242
243 $types .= "\n" . $this->extraTypes;
244
245 $types = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $types );
246 $types = str_replace( "\t", " ", $types );
247
248 $this->mimetoExt = [];
249 $this->mExtToMime = [];
250
251 $lines = explode( "\n", $types );
252 foreach ( $lines as $s ) {
253 $s = trim( $s );
254 if ( empty( $s ) ) {
255 continue;
256 }
257 if ( strpos( $s, '#' ) === 0 ) {
258 continue;
259 }
260
261 $s = strtolower( $s );
262 $i = strpos( $s, ' ' );
263
264 if ( $i === false ) {
265 continue;
266 }
267
268 $mime = substr( $s, 0, $i );
269 $ext = trim( substr( $s, $i + 1 ) );
270
271 if ( empty( $ext ) ) {
272 continue;
273 }
274
275 if ( !empty( $this->mimetoExt[$mime] ) ) {
276 $this->mimetoExt[$mime] .= ' ' . $ext;
277 } else {
278 $this->mimetoExt[$mime] = $ext;
279 }
280
281 $extensions = explode( ' ', $ext );
282
283 foreach ( $extensions as $e ) {
284 $e = trim( $e );
285 if ( empty( $e ) ) {
286 continue;
287 }
288
289 if ( !empty( $this->mExtToMime[$e] ) ) {
290 $this->mExtToMime[$e] .= ' ' . $mime;
291 } else {
292 $this->mExtToMime[$e] = $mime;
293 }
294 }
295 }
296
301 $mimeInfoFile = $this->infoFile;
302
303 $info = self::$wellKnownInfo;
304
305 if ( $mimeInfoFile ) {
306 if ( is_file( $mimeInfoFile ) && is_readable( $mimeInfoFile ) ) {
307 $this->logger->info( __METHOD__ . ": loading mime info from $mimeInfoFile\n" );
308 $info .= "\n";
309 $info .= file_get_contents( $mimeInfoFile );
310 } else {
311 $this->logger->info( __METHOD__ . ": can't load mime info from $mimeInfoFile\n" );
312 }
313 } else {
314 $this->logger->info( __METHOD__ .
315 ": no mime info file defined, using built-ins only.\n" );
316 }
317
318 $info .= "\n" . $this->extraInfo;
319
320 $info = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $info );
321 $info = str_replace( "\t", " ", $info );
322
323 $this->mimeTypeAliases = [];
324 $this->mediaTypes = [];
325
326 $lines = explode( "\n", $info );
327 foreach ( $lines as $s ) {
328 $s = trim( $s );
329 if ( empty( $s ) ) {
330 continue;
331 }
332 if ( strpos( $s, '#' ) === 0 ) {
333 continue;
334 }
335
336 $s = strtolower( $s );
337 $i = strpos( $s, ' ' );
338
339 if ( $i === false ) {
340 continue;
341 }
342
343 # print "processing MIME INFO line $s<br>";
344
345 $match = [];
346 if ( preg_match( '!\[\s*(\w+)\s*\]!', $s, $match ) ) {
347 $s = preg_replace( '!\[\s*(\w+)\s*\]!', '', $s );
348 $mtype = trim( strtoupper( $match[1] ) );
349 } else {
350 $mtype = MEDIATYPE_UNKNOWN;
351 }
352
353 $m = explode( ' ', $s );
354
355 if ( !isset( $this->mediaTypes[$mtype] ) ) {
356 $this->mediaTypes[$mtype] = [];
357 }
358
359 foreach ( $m as $mime ) {
360 $mime = trim( $mime );
361 if ( empty( $mime ) ) {
362 continue;
363 }
364
365 $this->mediaTypes[$mtype][] = $mime;
366 }
367
368 if ( count( $m ) > 1 ) {
369 $main = $m[0];
370 $mCount = count( $m );
371 for ( $i = 1; $i < $mCount; $i += 1 ) {
372 $mime = $m[$i];
373 $this->mimeTypeAliases[$mime] = $main;
374 }
375 }
376 }
377 }
378
379 public function setLogger( LoggerInterface $logger ) {
380 $this->logger = $logger;
381 }
382
389 public function addExtraTypes( $types ) {
390 $this->extraTypes .= "\n" . $types;
391 }
392
399 public function addExtraInfo( $info ) {
400 $this->extraInfo .= "\n" . $info;
401 }
402
411 public function getExtensionsForType( $mime ) {
412 $mime = strtolower( $mime );
413
414 // Check the mime-to-ext map
415 if ( isset( $this->mimetoExt[$mime] ) ) {
416 return $this->mimetoExt[$mime];
417 }
418
419 // Resolve the MIME type to the canonical type
420 if ( isset( $this->mimeTypeAliases[$mime] ) ) {
421 $mime = $this->mimeTypeAliases[$mime];
422 if ( isset( $this->mimetoExt[$mime] ) ) {
423 return $this->mimetoExt[$mime];
424 }
425 }
426
427 return null;
428 }
429
437 public function getTypesForExtension( $ext ) {
438 $ext = strtolower( $ext );
439
440 $r = isset( $this->mExtToMime[$ext] ) ? $this->mExtToMime[$ext] : null;
441 return $r;
442 }
443
451 public function guessTypesForExtension( $ext ) {
452 $m = $this->getTypesForExtension( $ext );
453 if ( is_null( $m ) ) {
454 return null;
455 }
456
457 // TODO: Check if this is needed; strtok( $m, ' ' ) should be sufficient
458 $m = trim( $m );
459 $m = preg_replace( '/\s.*$/', '', $m );
460
461 return $m;
462 }
463
473 public function isMatchingExtension( $extension, $mime ) {
474 $ext = $this->getExtensionsForType( $mime );
475
476 if ( !$ext ) {
477 return null; // Unknown MIME type
478 }
479
480 $ext = explode( ' ', $ext );
481
482 $extension = strtolower( $extension );
483 return in_array( $extension, $ext );
484 }
485
494 public function isPHPImageType( $mime ) {
495 // As defined by imagegetsize and image_type_to_mime
496 static $types = [
497 'image/gif', 'image/jpeg', 'image/png',
498 'image/x-bmp', 'image/xbm', 'image/tiff',
499 'image/jp2', 'image/jpeg2000', 'image/iff',
500 'image/xbm', 'image/x-xbitmap',
501 'image/vnd.wap.wbmp', 'image/vnd.xiff',
502 'image/x-photoshop',
503 'application/x-shockwave-flash',
504 ];
505
506 return in_array( $mime, $types );
507 }
508
521 function isRecognizableExtension( $extension ) {
522 static $types = [
523 // Types recognized by getimagesize()
524 'gif', 'jpeg', 'jpg', 'png', 'swf', 'psd',
525 'bmp', 'tiff', 'tif', 'jpc', 'jp2',
526 'jpx', 'jb2', 'swc', 'iff', 'wbmp',
527 'xbm',
528
529 // Formats we recognize magic numbers for
530 'djvu', 'ogx', 'ogg', 'ogv', 'oga', 'spx', 'opus',
531 'mid', 'pdf', 'wmf', 'xcf', 'webm', 'mkv', 'mka',
532 'webp', 'mp3',
533
534 // XML formats we sure hope we recognize reliably
535 'svg',
536
537 // 3D formats
538 'stl',
539 ];
540 return in_array( strtolower( $extension ), $types );
541 }
542
554 public function improveTypeFromExtension( $mime, $ext ) {
555 if ( $mime === 'unknown/unknown' ) {
556 if ( $this->isRecognizableExtension( $ext ) ) {
557 $this->logger->info( __METHOD__ . ': refusing to guess mime type for .' .
558 "$ext file, we should have recognized it\n" );
559 } else {
560 // Not something we can detect, so simply
561 // trust the file extension
562 $mime = $this->guessTypesForExtension( $ext );
563 }
564 } elseif ( $mime === 'application/x-opc+zip' ) {
565 if ( $this->isMatchingExtension( $ext, $mime ) ) {
566 // A known file extension for an OPC file,
567 // find the proper MIME type for that file extension
568 $mime = $this->guessTypesForExtension( $ext );
569 } else {
570 $this->logger->info( __METHOD__ .
571 ": refusing to guess better type for $mime file, " .
572 ".$ext is not a known OPC extension.\n" );
573 $mime = 'application/zip';
574 }
575 } elseif ( $mime === 'text/plain' && $this->findMediaType( ".$ext" ) === MEDIATYPE_TEXT ) {
576 // Textual types are sometimes not recognized properly.
577 // If detected as text/plain, and has an extension which is textual
578 // improve to the extension's type. For example, csv and json are often
579 // misdetected as text/plain.
580 $mime = $this->guessTypesForExtension( $ext );
581 }
582
583 # Media handling extensions can improve the MIME detected
584 $callback = $this->extCallback;
585 if ( $callback ) {
586 $callback( $this, $ext, $mime /* by reference */ );
587 }
588
589 if ( isset( $this->mimeTypeAliases[$mime] ) ) {
590 $mime = $this->mimeTypeAliases[$mime];
591 }
592
593 $this->logger->info( __METHOD__ . ": improved mime type for .$ext: $mime\n" );
594 return $mime;
595 }
596
611 public function guessMimeType( $file, $ext = true ) {
612 if ( $ext ) { // TODO: make $ext default to false. Or better, remove it.
613 $this->logger->info( __METHOD__ .
614 ": WARNING: use of the \$ext parameter is deprecated. " .
615 "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
616 }
617
618 $mime = $this->doGuessMimeType( $file, $ext );
619
620 if ( !$mime ) {
621 $this->logger->info( __METHOD__ .
622 ": internal type detection failed for $file (.$ext)...\n" );
623 $mime = $this->detectMimeType( $file, $ext );
624 }
625
626 if ( isset( $this->mimeTypeAliases[$mime] ) ) {
627 $mime = $this->mimeTypeAliases[$mime];
628 }
629
630 $this->logger->info( __METHOD__ . ": guessed mime type of $file: $mime\n" );
631 return $mime;
632 }
633
644 private function doGuessMimeType( $file, $ext ) {
645 // Read a chunk of the file
646 Wikimedia\suppressWarnings();
647 $f = fopen( $file, 'rb' );
648 Wikimedia\restoreWarnings();
649
650 if ( !$f ) {
651 return 'unknown/unknown';
652 }
653
654 $fsize = filesize( $file );
655 if ( $fsize === false ) {
656 return 'unknown/unknown';
657 }
658
659 $head = fread( $f, 1024 );
660 $tailLength = min( 65558, $fsize ); // 65558 = maximum size of a zip EOCDR
661 if ( fseek( $f, -1 * $tailLength, SEEK_END ) === -1 ) {
662 throw new UnexpectedValueException(
663 "Seeking $tailLength bytes from EOF failed in " . __METHOD__ );
664 }
665 $tail = $tailLength ? fread( $f, $tailLength ) : '';
666 fclose( $f );
667
668 $this->logger->info( __METHOD__ .
669 ": analyzing head and tail of $file for magic numbers.\n" );
670
671 // Hardcode a few magic number checks...
672 $headers = [
673 // Multimedia...
674 'MThd' => 'audio/midi',
675 'OggS' => 'application/ogg',
676 'ID3' => 'audio/mpeg',
677 "\xff\xfb" => 'audio/mpeg', // MPEG-1 layer 3
678 "\xff\xf3" => 'audio/mpeg', // MPEG-2 layer 3 (lower sample rates)
679 "\xff\xe3" => 'audio/mpeg', // MPEG-2.5 layer 3 (very low sample rates)
680
681 // Image formats...
682 // Note that WMF may have a bare header, no magic number.
683 "\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives?
684 "\xd7\xcd\xc6\x9a" => 'application/x-msmetafile',
685 '%PDF' => 'application/pdf',
686 'gimp xcf' => 'image/x-xcf',
687
688 // Some forbidden fruit...
689 'MZ' => 'application/octet-stream', // DOS/Windows executable
690 "\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary
691 "\x7fELF" => 'application/octet-stream', // ELF binary
692 ];
693
694 foreach ( $headers as $magic => $candidate ) {
695 if ( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) {
696 $this->logger->info( __METHOD__ .
697 ": magic header in $file recognized as $candidate\n" );
698 return $candidate;
699 }
700 }
701
702 /* Look for WebM and Matroska files */
703 if ( strncmp( $head, pack( "C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) {
704 $doctype = strpos( $head, "\x42\x82" );
705 if ( $doctype ) {
706 // Next byte is datasize, then data (sizes larger than 1 byte are stupid muxers)
707 $data = substr( $head, $doctype + 3, 8 );
708 if ( strncmp( $data, "matroska", 8 ) == 0 ) {
709 $this->logger->info( __METHOD__ . ": recognized file as video/x-matroska\n" );
710 return "video/x-matroska";
711 } elseif ( strncmp( $data, "webm", 4 ) == 0 ) {
712 // XXX HACK look for a video track, if we don't find it, this is an audio file
713 $videotrack = strpos( $head, "\x86\x85V_VP" );
714
715 if ( $videotrack ) {
716 // There is a video track, so this is a video file.
717 $this->logger->info( __METHOD__ . ": recognized file as video/webm\n" );
718 return "video/webm";
719 }
720
721 $this->logger->info( __METHOD__ . ": recognized file as audio/webm\n" );
722 return "audio/webm";
723 }
724 }
725 $this->logger->info( __METHOD__ . ": unknown EBML file\n" );
726 return "unknown/unknown";
727 }
728
729 /* Look for WebP */
730 if ( strncmp( $head, "RIFF", 4 ) == 0 &&
731 strncmp( substr( $head, 8, 7 ), "WEBPVP8", 7 ) == 0
732 ) {
733 $this->logger->info( __METHOD__ . ": recognized file as image/webp\n" );
734 return "image/webp";
735 }
736
749 if ( ( strpos( $head, '<?php' ) !== false ) ||
750 ( strpos( $head, "<\x00?\x00p\x00h\x00p" ) !== false ) ||
751 ( strpos( $head, "<\x00?\x00 " ) !== false ) ||
752 ( strpos( $head, "<\x00?\x00\n" ) !== false ) ||
753 ( strpos( $head, "<\x00?\x00\t" ) !== false ) ||
754 ( strpos( $head, "<\x00?\x00=" ) !== false )
755 ) {
756 $this->logger->info( __METHOD__ . ": recognized $file as application/x-php\n" );
757 return 'application/x-php';
758 }
759
763 Wikimedia\suppressWarnings();
764 $xml = new XmlTypeCheck( $file );
765 Wikimedia\restoreWarnings();
766 if ( $xml->wellFormed ) {
767 $xmlTypes = $this->xmlTypes;
768 if ( isset( $xmlTypes[$xml->getRootElement()] ) ) {
769 return $xmlTypes[$xml->getRootElement()];
770 } else {
771 return 'application/xml';
772 }
773 }
774
778 $script_type = null;
779
780 # detect by shebang
781 if ( substr( $head, 0, 2 ) == "#!" ) {
782 $script_type = "ASCII";
783 } elseif ( substr( $head, 0, 5 ) == "\xef\xbb\xbf#!" ) {
784 $script_type = "UTF-8";
785 } elseif ( substr( $head, 0, 7 ) == "\xfe\xff\x00#\x00!" ) {
786 $script_type = "UTF-16BE";
787 } elseif ( substr( $head, 0, 7 ) == "\xff\xfe#\x00!" ) {
788 $script_type = "UTF-16LE";
789 }
790
791 if ( $script_type ) {
792 if ( $script_type !== "UTF-8" && $script_type !== "ASCII" ) {
793 // Quick and dirty fold down to ASCII!
794 $pack = [ 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' ];
795 $chars = unpack( $pack[$script_type], substr( $head, 2 ) );
796 $head = '';
797 foreach ( $chars as $codepoint ) {
798 if ( $codepoint < 128 ) {
799 $head .= chr( $codepoint );
800 } else {
801 $head .= '?';
802 }
803 }
804 }
805
806 $match = [];
807
808 if ( preg_match( '%/?([^\s]+/)(\w+)%', $head, $match ) ) {
809 $mime = "application/x-{$match[2]}";
810 $this->logger->info( __METHOD__ . ": shell script recognized as $mime\n" );
811 return $mime;
812 }
813 }
814
815 // Check for ZIP variants (before getimagesize)
816 if ( strpos( $tail, "PK\x05\x06" ) !== false ) {
817 $this->logger->info( __METHOD__ . ": ZIP header present in $file\n" );
818 return $this->detectZipType( $head, $tail, $ext );
819 }
820
821 // Check for STL (3D) files
822 // @see https://en.wikipedia.org/wiki/STL_(file_format)
823 if ( $fsize >= 15 &&
824 stripos( $head, 'SOLID ' ) === 0 &&
825 preg_match( '/\RENDSOLID .*$/i', $tail ) ) {
826 // ASCII STL file
827 return 'application/sla';
828 } elseif ( $fsize > 84 ) {
829 // binary STL file
830 $triangles = substr( $head, 80, 4 );
831 $triangles = unpack( 'V', $triangles );
832 $triangles = reset( $triangles );
833 if ( $triangles !== false && $fsize === 84 + ( $triangles * 50 ) ) {
834 return 'application/sla';
835 }
836 }
837
838 Wikimedia\suppressWarnings();
839 $gis = getimagesize( $file );
840 Wikimedia\restoreWarnings();
841
842 if ( $gis && isset( $gis['mime'] ) ) {
843 $mime = $gis['mime'];
844 $this->logger->info( __METHOD__ . ": getimagesize detected $file as $mime\n" );
845 return $mime;
846 }
847
848 # Media handling extensions can guess the MIME by content
849 # It's intentionally here so that if core is wrong about a type (false positive),
850 # people will hopefully nag and submit patches :)
851 $mime = false;
852 # Some strings by reference for performance - assuming well-behaved hooks
853 $callback = $this->guessCallback;
854 if ( $callback ) {
855 $callback( $this, $head, $tail, $file, $mime /* by reference */ );
856 };
857
858 return $mime;
859 }
860
874 function detectZipType( $header, $tail = null, $ext = false ) {
875 if ( $ext ) { # TODO: remove $ext param
876 $this->logger->info( __METHOD__ .
877 ": WARNING: use of the \$ext parameter is deprecated. " .
878 "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
879 }
880
881 $mime = 'application/zip';
882 $opendocTypes = [
883 'chart-template',
884 'chart',
885 'formula-template',
886 'formula',
887 'graphics-template',
888 'graphics',
889 'image-template',
890 'image',
891 'presentation-template',
892 'presentation',
893 'spreadsheet-template',
894 'spreadsheet',
895 'text-template',
896 'text-master',
897 'text-web',
898 'text' ];
899
900 // https://lists.oasis-open.org/archives/office/200505/msg00006.html
901 $types = '(?:' . implode( '|', $opendocTypes ) . ')';
902 $opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
903
904 $openxmlRegex = "/^\[Content_Types\].xml/";
905
906 if ( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) {
907 $mime = $matches[1];
908 $this->logger->info( __METHOD__ . ": detected $mime from ZIP archive\n" );
909 } elseif ( preg_match( $openxmlRegex, substr( $header, 30 ) ) ) {
910 $mime = "application/x-opc+zip";
911 # TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere
912 if ( $ext !== true && $ext !== false ) {
917 if ( $this->isMatchingExtension( $ext, $mime ) ) {
918 /* A known file extension for an OPC file,
919 * find the proper mime type for that file extension
920 */
921 $mime = $this->guessTypesForExtension( $ext );
922 } else {
923 $mime = "application/zip";
924 }
925 }
926 $this->logger->info( __METHOD__ .
927 ": detected an Open Packaging Conventions archive: $mime\n" );
928 } elseif ( substr( $header, 0, 8 ) == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" &&
929 ( $headerpos = strpos( $tail, "PK\x03\x04" ) ) !== false &&
930 preg_match( $openxmlRegex, substr( $tail, $headerpos + 30 ) ) ) {
931 if ( substr( $header, 512, 4 ) == "\xEC\xA5\xC1\x00" ) {
932 $mime = "application/msword";
933 }
934 switch ( substr( $header, 512, 6 ) ) {
935 case "\xEC\xA5\xC1\x00\x0E\x00":
936 case "\xEC\xA5\xC1\x00\x1C\x00":
937 case "\xEC\xA5\xC1\x00\x43\x00":
938 $mime = "application/vnd.ms-powerpoint";
939 break;
940 case "\xFD\xFF\xFF\xFF\x10\x00":
941 case "\xFD\xFF\xFF\xFF\x1F\x00":
942 case "\xFD\xFF\xFF\xFF\x22\x00":
943 case "\xFD\xFF\xFF\xFF\x23\x00":
944 case "\xFD\xFF\xFF\xFF\x28\x00":
945 case "\xFD\xFF\xFF\xFF\x29\x00":
946 case "\xFD\xFF\xFF\xFF\x10\x02":
947 case "\xFD\xFF\xFF\xFF\x1F\x02":
948 case "\xFD\xFF\xFF\xFF\x22\x02":
949 case "\xFD\xFF\xFF\xFF\x23\x02":
950 case "\xFD\xFF\xFF\xFF\x28\x02":
951 case "\xFD\xFF\xFF\xFF\x29\x02":
952 $mime = "application/vnd.msexcel";
953 break;
954 }
955
956 $this->logger->info( __METHOD__ .
957 ": detected a MS Office document with OPC trailer\n" );
958 } else {
959 $this->logger->info( __METHOD__ . ": unable to identify type of ZIP archive\n" );
960 }
961 return $mime;
962 }
963
981 private function detectMimeType( $file, $ext = true ) {
983 if ( $ext ) {
984 $this->logger->info( __METHOD__ .
985 ": WARNING: use of the \$ext parameter is deprecated. "
986 . "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
987 }
988
989 $callback = $this->detectCallback;
990 $m = null;
991 if ( $callback ) {
992 $m = $callback( $file );
993 } else {
994 $m = mime_content_type( $file );
995 }
996
997 if ( $m ) {
998 # normalize
999 $m = preg_replace( '![;, ].*$!', '', $m ); # strip charset, etc
1000 $m = trim( $m );
1001 $m = strtolower( $m );
1002
1003 if ( strpos( $m, 'unknown' ) !== false ) {
1004 $m = null;
1005 } else {
1006 $this->logger->info( __METHOD__ . ": magic mime type of $file: $m\n" );
1007 return $m;
1008 }
1009 }
1010
1011 // If desired, look at extension as a fallback.
1012 if ( $ext === true ) {
1013 $i = strrpos( $file, '.' );
1014 $ext = strtolower( $i ? substr( $file, $i + 1 ) : '' );
1015 }
1016 if ( $ext ) {
1017 if ( $this->isRecognizableExtension( $ext ) ) {
1018 $this->logger->info( __METHOD__ . ": refusing to guess mime type for .$ext file, "
1019 . "we should have recognized it\n" );
1020 } else {
1021 $m = $this->guessTypesForExtension( $ext );
1022 if ( $m ) {
1023 $this->logger->info( __METHOD__ . ": extension mime type of $file: $m\n" );
1024 return $m;
1025 }
1026 }
1027 }
1028
1029 // Unknown type
1030 $this->logger->info( __METHOD__ . ": failed to guess mime type for $file!\n" );
1031 return 'unknown/unknown';
1032 }
1033
1050 function getMediaType( $path = null, $mime = null ) {
1051 if ( !$mime && !$path ) {
1052 return MEDIATYPE_UNKNOWN;
1053 }
1054
1055 // If MIME type is unknown, guess it
1056 if ( !$mime ) {
1057 $mime = $this->guessMimeType( $path, false );
1058 }
1059
1060 // Special code for ogg - detect if it's video (theora),
1061 // else label it as sound.
1062 if ( $mime == 'application/ogg' && file_exists( $path ) ) {
1063 // Read a chunk of the file
1064 $f = fopen( $path, "rt" );
1065 if ( !$f ) {
1066 return MEDIATYPE_UNKNOWN;
1067 }
1068 $head = fread( $f, 256 );
1069 fclose( $f );
1070
1071 $head = str_replace( 'ffmpeg2theora', '', strtolower( $head ) );
1072
1073 // This is an UGLY HACK, file should be parsed correctly
1074 if ( strpos( $head, 'theora' ) !== false ) {
1075 return MEDIATYPE_VIDEO;
1076 } elseif ( strpos( $head, 'vorbis' ) !== false ) {
1077 return MEDIATYPE_AUDIO;
1078 } elseif ( strpos( $head, 'flac' ) !== false ) {
1079 return MEDIATYPE_AUDIO;
1080 } elseif ( strpos( $head, 'speex' ) !== false ) {
1081 return MEDIATYPE_AUDIO;
1082 } elseif ( strpos( $head, 'opus' ) !== false ) {
1083 return MEDIATYPE_AUDIO;
1084 } else {
1085 return MEDIATYPE_MULTIMEDIA;
1086 }
1087 }
1088
1089 $type = null;
1090 // Check for entry for full MIME type
1091 if ( $mime ) {
1092 $type = $this->findMediaType( $mime );
1093 if ( $type !== MEDIATYPE_UNKNOWN ) {
1094 return $type;
1095 }
1096 }
1097
1098 // Check for entry for file extension
1099 if ( $path ) {
1100 $i = strrpos( $path, '.' );
1101 $e = strtolower( $i ? substr( $path, $i + 1 ) : '' );
1102
1103 // TODO: look at multi-extension if this fails, parse from full path
1104 $type = $this->findMediaType( '.' . $e );
1105 if ( $type !== MEDIATYPE_UNKNOWN ) {
1106 return $type;
1107 }
1108 }
1109
1110 // Check major MIME type
1111 if ( $mime ) {
1112 $i = strpos( $mime, '/' );
1113 if ( $i !== false ) {
1114 $major = substr( $mime, 0, $i );
1115 $type = $this->findMediaType( $major );
1116 if ( $type !== MEDIATYPE_UNKNOWN ) {
1117 return $type;
1118 }
1119 }
1120 }
1121
1122 if ( !$type ) {
1124 }
1125
1126 return $type;
1127 }
1128
1139 function findMediaType( $extMime ) {
1140 if ( strpos( $extMime, '.' ) === 0 ) {
1141 // If it's an extension, look up the MIME types
1142 $m = $this->getTypesForExtension( substr( $extMime, 1 ) );
1143 if ( !$m ) {
1144 return MEDIATYPE_UNKNOWN;
1145 }
1146
1147 $m = explode( ' ', $m );
1148 } else {
1149 // Normalize MIME type
1150 if ( isset( $this->mimeTypeAliases[$extMime] ) ) {
1151 $extMime = $this->mimeTypeAliases[$extMime];
1152 }
1153
1154 $m = [ $extMime ];
1155 }
1156
1157 foreach ( $m as $mime ) {
1158 foreach ( $this->mediaTypes as $type => $codes ) {
1159 if ( in_array( $mime, $codes, true ) ) {
1160 return $type;
1161 }
1162 }
1163 }
1164
1165 return MEDIATYPE_UNKNOWN;
1166 }
1167
1173 public function getMediaTypes() {
1174 return array_keys( $this->mediaTypes );
1175 }
1176
1186 public function getIEMimeTypes( $fileName, $chunk, $proposed ) {
1187 $ca = $this->getIEContentAnalyzer();
1188 return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
1189 }
1190
1196 protected function getIEContentAnalyzer() {
1197 if ( is_null( $this->IEAnalyzer ) ) {
1198 $this->IEAnalyzer = new IEContentAnalyzer;
1199 }
1200 return $this->IEAnalyzer;
1201 }
1202}
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
This class simulates Microsoft Internet Explorer's terribly broken and insecure MIME type detection a...
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add etc
Definition design.txt:19
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add text
Definition design.txt:18
the array() calling protocol came about after MediaWiki 1.4rc1.
either a plain
Definition hooks.txt:2056
for adding new MIME info to the list Use $mimeMagic addExtraTypes( $stringOfTypes)
returning false will NOT prevent logging $e
Definition hooks.txt:2176
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
const MEDIATYPE_VIDEO
Definition defines.php:35
const MEDIATYPE_UNKNOWN
Definition defines.php:26
const MEDIATYPE_AUDIO
Definition defines.php:32
const MEDIATYPE_TEXT
Definition defines.php:41
const MEDIATYPE_MULTIMEDIA
Definition defines.php:37
if( $ext=='php'|| $ext=='php5') $mime
Definition router.php:59
$lines
Definition router.php:61
if(!is_readable( $file)) $ext
Definition router.php:55
$params
$header