MediaWiki REL1_28
MimeAnalyzer.php
Go to the documentation of this file.
1<?php
22use Psr\Log\LoggerAwareInterface;
23use Psr\Log\LoggerInterface;
24
30class MimeAnalyzer implements LoggerAwareInterface {
32 protected $typeFile;
34 protected $infoFile;
36 protected $xmlTypes;
38 protected $initCallback;
40 protected $detectCallback;
42 protected $guessCallback;
44 protected $extCallback;
46 protected $mediaTypes = null;
48 protected $mimeTypeAliases = null;
50 protected $mimetoExt = null;
51
53 public $mExtToMime = null; // legacy name; field accessed by hooks
54
56 protected $IEAnalyzer;
57
59 private $extraTypes = '';
61 private $extraInfo = '';
62
64 private $logger;
65
85 protected static $wellKnownTypes = <<<EOT
86application/ogg ogx ogg ogm ogv oga spx
87application/pdf pdf
88application/vnd.oasis.opendocument.chart odc
89application/vnd.oasis.opendocument.chart-template otc
90application/vnd.oasis.opendocument.database odb
91application/vnd.oasis.opendocument.formula odf
92application/vnd.oasis.opendocument.formula-template otf
93application/vnd.oasis.opendocument.graphics odg
94application/vnd.oasis.opendocument.graphics-template otg
95application/vnd.oasis.opendocument.image odi
96application/vnd.oasis.opendocument.image-template oti
97application/vnd.oasis.opendocument.presentation odp
98application/vnd.oasis.opendocument.presentation-template otp
99application/vnd.oasis.opendocument.spreadsheet ods
100application/vnd.oasis.opendocument.spreadsheet-template ots
101application/vnd.oasis.opendocument.text odt
102application/vnd.oasis.opendocument.text-master otm
103application/vnd.oasis.opendocument.text-template ott
104application/vnd.oasis.opendocument.text-web oth
105application/javascript js
106application/x-shockwave-flash swf
107audio/midi mid midi kar
108audio/mpeg mpga mpa mp2 mp3
109audio/x-aiff aif aiff aifc
110audio/x-wav wav
111audio/ogg oga spx ogg
112image/x-bmp bmp
113image/gif gif
114image/jpeg jpeg jpg jpe
115image/png png
116image/svg+xml svg
117image/svg svg
118image/tiff tiff tif
119image/vnd.djvu djvu
120image/x.djvu djvu
121image/x-djvu djvu
122image/x-portable-pixmap ppm
123image/x-xcf xcf
124text/plain txt
125text/html html htm
126video/ogg ogv ogm ogg
127video/mpeg mpg mpeg
128EOT;
129
136 protected static $wellKnownInfo = <<<EOT
137application/pdf [OFFICE]
138application/vnd.oasis.opendocument.chart [OFFICE]
139application/vnd.oasis.opendocument.chart-template [OFFICE]
140application/vnd.oasis.opendocument.database [OFFICE]
141application/vnd.oasis.opendocument.formula [OFFICE]
142application/vnd.oasis.opendocument.formula-template [OFFICE]
143application/vnd.oasis.opendocument.graphics [OFFICE]
144application/vnd.oasis.opendocument.graphics-template [OFFICE]
145application/vnd.oasis.opendocument.image [OFFICE]
146application/vnd.oasis.opendocument.image-template [OFFICE]
147application/vnd.oasis.opendocument.presentation [OFFICE]
148application/vnd.oasis.opendocument.presentation-template [OFFICE]
149application/vnd.oasis.opendocument.spreadsheet [OFFICE]
150application/vnd.oasis.opendocument.spreadsheet-template [OFFICE]
151application/vnd.oasis.opendocument.text [OFFICE]
152application/vnd.oasis.opendocument.text-template [OFFICE]
153application/vnd.oasis.opendocument.text-master [OFFICE]
154application/vnd.oasis.opendocument.text-web [OFFICE]
155application/javascript text/javascript application/x-javascript [EXECUTABLE]
156application/x-shockwave-flash [MULTIMEDIA]
157audio/midi [AUDIO]
158audio/x-aiff [AUDIO]
159audio/x-wav [AUDIO]
160audio/mp3 audio/mpeg [AUDIO]
161application/ogg audio/ogg video/ogg [MULTIMEDIA]
162image/x-bmp image/x-ms-bmp image/bmp [BITMAP]
163image/gif [BITMAP]
164image/jpeg [BITMAP]
165image/png [BITMAP]
166image/svg+xml [DRAWING]
167image/tiff [BITMAP]
168image/vnd.djvu [BITMAP]
169image/x-xcf [BITMAP]
170image/x-portable-pixmap [BITMAP]
171text/plain [TEXT]
172text/html [TEXT]
173video/ogg [VIDEO]
174video/mpeg [VIDEO]
175unknown/unknown application/octet-stream application/x-empty [UNKNOWN]
176EOT;
177
193 public function __construct( array $params ) {
194 $this->typeFile = $params['typeFile'];
195 $this->infoFile = $params['infoFile'];
196 $this->xmlTypes = $params['xmlTypes'];
197 $this->initCallback = isset( $params['initCallback'] )
198 ? $params['initCallback']
199 : null;
200 $this->detectCallback = isset( $params['detectCallback'] )
201 ? $params['detectCallback']
202 : null;
203 $this->guessCallback = isset( $params['guessCallback'] )
204 ? $params['guessCallback']
205 : null;
206 $this->extCallback = isset( $params['extCallback'] )
207 ? $params['extCallback']
208 : null;
209 $this->logger = isset( $params['logger'] )
210 ? $params['logger']
211 : new \Psr\Log\NullLogger();
212
213 $this->loadFiles();
214 }
215
216 protected function loadFiles() {
221 # Allow media handling extensions adding MIME-types and MIME-info
222 if ( $this->initCallback ) {
223 call_user_func( $this->initCallback, $this );
224 }
225
226 $types = self::$wellKnownTypes;
227
228 $mimeTypeFile = $this->typeFile;
229 if ( $mimeTypeFile ) {
230 if ( is_file( $mimeTypeFile ) && is_readable( $mimeTypeFile ) ) {
231 $this->logger->info( __METHOD__ . ": loading mime types from $mimeTypeFile\n" );
232 $types .= "\n";
233 $types .= file_get_contents( $mimeTypeFile );
234 } else {
235 $this->logger->info( __METHOD__ . ": can't load mime types from $mimeTypeFile\n" );
236 }
237 } else {
238 $this->logger->info( __METHOD__ .
239 ": no mime types file defined, using built-ins only.\n" );
240 }
241
242 $types .= "\n" . $this->extraTypes;
243
244 $types = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $types );
245 $types = str_replace( "\t", " ", $types );
246
247 $this->mimetoExt = [];
248 $this->mExtToMime = [];
249
250 $lines = explode( "\n", $types );
251 foreach ( $lines as $s ) {
252 $s = trim( $s );
253 if ( empty( $s ) ) {
254 continue;
255 }
256 if ( strpos( $s, '#' ) === 0 ) {
257 continue;
258 }
259
260 $s = strtolower( $s );
261 $i = strpos( $s, ' ' );
262
263 if ( $i === false ) {
264 continue;
265 }
266
267 $mime = substr( $s, 0, $i );
268 $ext = trim( substr( $s, $i + 1 ) );
269
270 if ( empty( $ext ) ) {
271 continue;
272 }
273
274 if ( !empty( $this->mimetoExt[$mime] ) ) {
275 $this->mimetoExt[$mime] .= ' ' . $ext;
276 } else {
277 $this->mimetoExt[$mime] = $ext;
278 }
279
280 $extensions = explode( ' ', $ext );
281
282 foreach ( $extensions as $e ) {
283 $e = trim( $e );
284 if ( empty( $e ) ) {
285 continue;
286 }
287
288 if ( !empty( $this->mExtToMime[$e] ) ) {
289 $this->mExtToMime[$e] .= ' ' . $mime;
290 } else {
291 $this->mExtToMime[$e] = $mime;
292 }
293 }
294 }
295
300 $mimeInfoFile = $this->infoFile;
301
302 $info = self::$wellKnownInfo;
303
304 if ( $mimeInfoFile ) {
305 if ( is_file( $mimeInfoFile ) && is_readable( $mimeInfoFile ) ) {
306 $this->logger->info( __METHOD__ . ": loading mime info from $mimeInfoFile\n" );
307 $info .= "\n";
308 $info .= file_get_contents( $mimeInfoFile );
309 } else {
310 $this->logger->info( __METHOD__ . ": can't load mime info from $mimeInfoFile\n" );
311 }
312 } else {
313 $this->logger->info( __METHOD__ .
314 ": no mime info file defined, using built-ins only.\n" );
315 }
316
317 $info .= "\n" . $this->extraInfo;
318
319 $info = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $info );
320 $info = str_replace( "\t", " ", $info );
321
322 $this->mimeTypeAliases = [];
323 $this->mediaTypes = [];
324
325 $lines = explode( "\n", $info );
326 foreach ( $lines as $s ) {
327 $s = trim( $s );
328 if ( empty( $s ) ) {
329 continue;
330 }
331 if ( strpos( $s, '#' ) === 0 ) {
332 continue;
333 }
334
335 $s = strtolower( $s );
336 $i = strpos( $s, ' ' );
337
338 if ( $i === false ) {
339 continue;
340 }
341
342 # print "processing MIME INFO line $s<br>";
343
344 $match = [];
345 if ( preg_match( '!\[\s*(\w+)\s*\]!', $s, $match ) ) {
346 $s = preg_replace( '!\[\s*(\w+)\s*\]!', '', $s );
347 $mtype = trim( strtoupper( $match[1] ) );
348 } else {
349 $mtype = MEDIATYPE_UNKNOWN;
350 }
351
352 $m = explode( ' ', $s );
353
354 if ( !isset( $this->mediaTypes[$mtype] ) ) {
355 $this->mediaTypes[$mtype] = [];
356 }
357
358 foreach ( $m as $mime ) {
359 $mime = trim( $mime );
360 if ( empty( $mime ) ) {
361 continue;
362 }
363
364 $this->mediaTypes[$mtype][] = $mime;
365 }
366
367 if ( count( $m ) > 1 ) {
368 $main = $m[0];
369 $mCount = count( $m );
370 for ( $i = 1; $i < $mCount; $i += 1 ) {
371 $mime = $m[$i];
372 $this->mimeTypeAliases[$mime] = $main;
373 }
374 }
375 }
376 }
377
378 public function setLogger( LoggerInterface $logger ) {
379 $this->logger = $logger;
380 }
381
388 public function addExtraTypes( $types ) {
389 $this->extraTypes .= "\n" . $types;
390 }
391
398 public function addExtraInfo( $info ) {
399 $this->extraInfo .= "\n" . $info;
400 }
401
410 public function getExtensionsForType( $mime ) {
411 $mime = strtolower( $mime );
412
413 // Check the mime-to-ext map
414 if ( isset( $this->mimetoExt[$mime] ) ) {
415 return $this->mimetoExt[$mime];
416 }
417
418 // Resolve the MIME type to the canonical type
419 if ( isset( $this->mimeTypeAliases[$mime] ) ) {
420 $mime = $this->mimeTypeAliases[$mime];
421 if ( isset( $this->mimetoExt[$mime] ) ) {
422 return $this->mimetoExt[$mime];
423 }
424 }
425
426 return null;
427 }
428
436 public function getTypesForExtension( $ext ) {
437 $ext = strtolower( $ext );
438
439 $r = isset( $this->mExtToMime[$ext] ) ? $this->mExtToMime[$ext] : null;
440 return $r;
441 }
442
450 public function guessTypesForExtension( $ext ) {
451 $m = $this->getTypesForExtension( $ext );
452 if ( is_null( $m ) ) {
453 return null;
454 }
455
456 // TODO: Check if this is needed; strtok( $m, ' ' ) should be sufficient
457 $m = trim( $m );
458 $m = preg_replace( '/\s.*$/', '', $m );
459
460 return $m;
461 }
462
472 public function isMatchingExtension( $extension, $mime ) {
473 $ext = $this->getExtensionsForType( $mime );
474
475 if ( !$ext ) {
476 return null; // Unknown MIME type
477 }
478
479 $ext = explode( ' ', $ext );
480
481 $extension = strtolower( $extension );
482 return in_array( $extension, $ext );
483 }
484
493 public function isPHPImageType( $mime ) {
494 // As defined by imagegetsize and image_type_to_mime
495 static $types = [
496 'image/gif', 'image/jpeg', 'image/png',
497 'image/x-bmp', 'image/xbm', 'image/tiff',
498 'image/jp2', 'image/jpeg2000', 'image/iff',
499 'image/xbm', 'image/x-xbitmap',
500 'image/vnd.wap.wbmp', 'image/vnd.xiff',
501 'image/x-photoshop',
502 'application/x-shockwave-flash',
503 ];
504
505 return in_array( $mime, $types );
506 }
507
520 function isRecognizableExtension( $extension ) {
521 static $types = [
522 // Types recognized by getimagesize()
523 'gif', 'jpeg', 'jpg', 'png', 'swf', 'psd',
524 'bmp', 'tiff', 'tif', 'jpc', 'jp2',
525 'jpx', 'jb2', 'swc', 'iff', 'wbmp',
526 'xbm',
527
528 // Formats we recognize magic numbers for
529 'djvu', 'ogx', 'ogg', 'ogv', 'oga', 'spx',
530 'mid', 'pdf', 'wmf', 'xcf', 'webm', 'mkv', 'mka',
531 'webp',
532
533 // XML formats we sure hope we recognize reliably
534 'svg',
535 ];
536 return in_array( strtolower( $extension ), $types );
537 }
538
550 public function improveTypeFromExtension( $mime, $ext ) {
551 if ( $mime === 'unknown/unknown' ) {
552 if ( $this->isRecognizableExtension( $ext ) ) {
553 $this->logger->info( __METHOD__ . ': refusing to guess mime type for .' .
554 "$ext file, we should have recognized it\n" );
555 } else {
556 // Not something we can detect, so simply
557 // trust the file extension
558 $mime = $this->guessTypesForExtension( $ext );
559 }
560 } elseif ( $mime === 'application/x-opc+zip' ) {
561 if ( $this->isMatchingExtension( $ext, $mime ) ) {
562 // A known file extension for an OPC file,
563 // find the proper MIME type for that file extension
564 $mime = $this->guessTypesForExtension( $ext );
565 } else {
566 $this->logger->info( __METHOD__ .
567 ": refusing to guess better type for $mime file, " .
568 ".$ext is not a known OPC extension.\n" );
569 $mime = 'application/zip';
570 }
571 } elseif ( $mime === 'text/plain' && $this->findMediaType( ".$ext" ) === MEDIATYPE_TEXT ) {
572 // Textual types are sometimes not recognized properly.
573 // If detected as text/plain, and has an extension which is textual
574 // improve to the extension's type. For example, csv and json are often
575 // misdetected as text/plain.
576 $mime = $this->guessTypesForExtension( $ext );
577 }
578
579 # Media handling extensions can improve the MIME detected
580 $callback = $this->extCallback;
581 if ( $callback ) {
582 $callback( $this, $ext, $mime /* by reference */ );
583 }
584
585 if ( isset( $this->mimeTypeAliases[$mime] ) ) {
586 $mime = $this->mimeTypeAliases[$mime];
587 }
588
589 $this->logger->info( __METHOD__ . ": improved mime type for .$ext: $mime\n" );
590 return $mime;
591 }
592
607 public function guessMimeType( $file, $ext = true ) {
608 if ( $ext ) { // TODO: make $ext default to false. Or better, remove it.
609 $this->logger->info( __METHOD__ .
610 ": WARNING: use of the \$ext parameter is deprecated. " .
611 "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
612 }
613
614 $mime = $this->doGuessMimeType( $file, $ext );
615
616 if ( !$mime ) {
617 $this->logger->info( __METHOD__ .
618 ": internal type detection failed for $file (.$ext)...\n" );
619 $mime = $this->detectMimeType( $file, $ext );
620 }
621
622 if ( isset( $this->mimeTypeAliases[$mime] ) ) {
623 $mime = $this->mimeTypeAliases[$mime];
624 }
625
626 $this->logger->info( __METHOD__ . ": guessed mime type of $file: $mime\n" );
627 return $mime;
628 }
629
640 private function doGuessMimeType( $file, $ext ) {
641 // Read a chunk of the file
642 MediaWiki\suppressWarnings();
643 $f = fopen( $file, 'rb' );
644 MediaWiki\restoreWarnings();
645
646 if ( !$f ) {
647 return 'unknown/unknown';
648 }
649
650 $fsize = filesize( $file );
651 if ( $fsize === false ) {
652 return 'unknown/unknown';
653 }
654
655 $head = fread( $f, 1024 );
656 $tailLength = min( 65558, $fsize ); // 65558 = maximum size of a zip EOCDR
657 if ( fseek( $f, -1 * $tailLength, SEEK_END ) === -1 ) {
658 throw new UnexpectedValueException(
659 "Seeking $tailLength bytes from EOF failed in " . __METHOD__ );
660 }
661 $tail = $tailLength ? fread( $f, $tailLength ) : '';
662 fclose( $f );
663
664 $this->logger->info( __METHOD__ .
665 ": analyzing head and tail of $file for magic numbers.\n" );
666
667 // Hardcode a few magic number checks...
668 $headers = [
669 // Multimedia...
670 'MThd' => 'audio/midi',
671 'OggS' => 'application/ogg',
672
673 // Image formats...
674 // Note that WMF may have a bare header, no magic number.
675 "\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives?
676 "\xd7\xcd\xc6\x9a" => 'application/x-msmetafile',
677 '%PDF' => 'application/pdf',
678 'gimp xcf' => 'image/x-xcf',
679
680 // Some forbidden fruit...
681 'MZ' => 'application/octet-stream', // DOS/Windows executable
682 "\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary
683 "\x7fELF" => 'application/octet-stream', // ELF binary
684 ];
685
686 foreach ( $headers as $magic => $candidate ) {
687 if ( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) {
688 $this->logger->info( __METHOD__ .
689 ": magic header in $file recognized as $candidate\n" );
690 return $candidate;
691 }
692 }
693
694 /* Look for WebM and Matroska files */
695 if ( strncmp( $head, pack( "C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) {
696 $doctype = strpos( $head, "\x42\x82" );
697 if ( $doctype ) {
698 // Next byte is datasize, then data (sizes larger than 1 byte are stupid muxers)
699 $data = substr( $head, $doctype + 3, 8 );
700 if ( strncmp( $data, "matroska", 8 ) == 0 ) {
701 $this->logger->info( __METHOD__ . ": recognized file as video/x-matroska\n" );
702 return "video/x-matroska";
703 } elseif ( strncmp( $data, "webm", 4 ) == 0 ) {
704 $this->logger->info( __METHOD__ . ": recognized file as video/webm\n" );
705 return "video/webm";
706 }
707 }
708 $this->logger->info( __METHOD__ . ": unknown EBML file\n" );
709 return "unknown/unknown";
710 }
711
712 /* Look for WebP */
713 if ( strncmp( $head, "RIFF", 4 ) == 0 &&
714 strncmp( substr( $head, 8, 7 ), "WEBPVP8", 7 ) == 0
715 ) {
716 $this->logger->info( __METHOD__ . ": recognized file as image/webp\n" );
717 return "image/webp";
718 }
719
732 if ( ( strpos( $head, '<?php' ) !== false ) ||
733 ( strpos( $head, "<\x00?\x00p\x00h\x00p" ) !== false ) ||
734 ( strpos( $head, "<\x00?\x00 " ) !== false ) ||
735 ( strpos( $head, "<\x00?\x00\n" ) !== false ) ||
736 ( strpos( $head, "<\x00?\x00\t" ) !== false ) ||
737 ( strpos( $head, "<\x00?\x00=" ) !== false ) ) {
738
739 $this->logger->info( __METHOD__ . ": recognized $file as application/x-php\n" );
740 return 'application/x-php';
741 }
742
746 $xml = new XmlTypeCheck( $file );
747 if ( $xml->wellFormed ) {
748 $xmlTypes = $this->xmlTypes;
749 if ( isset( $xmlTypes[$xml->getRootElement()] ) ) {
750 return $xmlTypes[$xml->getRootElement()];
751 } else {
752 return 'application/xml';
753 }
754 }
755
759 $script_type = null;
760
761 # detect by shebang
762 if ( substr( $head, 0, 2 ) == "#!" ) {
763 $script_type = "ASCII";
764 } elseif ( substr( $head, 0, 5 ) == "\xef\xbb\xbf#!" ) {
765 $script_type = "UTF-8";
766 } elseif ( substr( $head, 0, 7 ) == "\xfe\xff\x00#\x00!" ) {
767 $script_type = "UTF-16BE";
768 } elseif ( substr( $head, 0, 7 ) == "\xff\xfe#\x00!" ) {
769 $script_type = "UTF-16LE";
770 }
771
772 if ( $script_type ) {
773 if ( $script_type !== "UTF-8" && $script_type !== "ASCII" ) {
774 // Quick and dirty fold down to ASCII!
775 $pack = [ 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' ];
776 $chars = unpack( $pack[$script_type], substr( $head, 2 ) );
777 $head = '';
778 foreach ( $chars as $codepoint ) {
779 if ( $codepoint < 128 ) {
780 $head .= chr( $codepoint );
781 } else {
782 $head .= '?';
783 }
784 }
785 }
786
787 $match = [];
788
789 if ( preg_match( '%/?([^\s]+/)(\w+)%', $head, $match ) ) {
790 $mime = "application/x-{$match[2]}";
791 $this->logger->info( __METHOD__ . ": shell script recognized as $mime\n" );
792 return $mime;
793 }
794 }
795
796 // Check for ZIP variants (before getimagesize)
797 if ( strpos( $tail, "PK\x05\x06" ) !== false ) {
798 $this->logger->info( __METHOD__ . ": ZIP header present in $file\n" );
799 return $this->detectZipType( $head, $tail, $ext );
800 }
801
802 MediaWiki\suppressWarnings();
803 $gis = getimagesize( $file );
804 MediaWiki\restoreWarnings();
805
806 if ( $gis && isset( $gis['mime'] ) ) {
807 $mime = $gis['mime'];
808 $this->logger->info( __METHOD__ . ": getimagesize detected $file as $mime\n" );
809 return $mime;
810 }
811
812 # Media handling extensions can guess the MIME by content
813 # It's intentionally here so that if core is wrong about a type (false positive),
814 # people will hopefully nag and submit patches :)
815 $mime = false;
816 # Some strings by reference for performance - assuming well-behaved hooks
817 $callback = $this->guessCallback;
818 if ( $callback ) {
819 $callback( $this, $head, $tail, $file, $mime /* by reference */ );
820 };
821
822 return $mime;
823 }
824
838 function detectZipType( $header, $tail = null, $ext = false ) {
839 if ( $ext ) { # TODO: remove $ext param
840 $this->logger->info( __METHOD__ .
841 ": WARNING: use of the \$ext parameter is deprecated. " .
842 "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
843 }
844
845 $mime = 'application/zip';
846 $opendocTypes = [
847 'chart-template',
848 'chart',
849 'formula-template',
850 'formula',
851 'graphics-template',
852 'graphics',
853 'image-template',
854 'image',
855 'presentation-template',
856 'presentation',
857 'spreadsheet-template',
858 'spreadsheet',
859 'text-template',
860 'text-master',
861 'text-web',
862 'text' ];
863
864 // http://lists.oasis-open.org/archives/office/200505/msg00006.html
865 $types = '(?:' . implode( '|', $opendocTypes ) . ')';
866 $opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
867
868 $openxmlRegex = "/^\[Content_Types\].xml/";
869
870 if ( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) {
871 $mime = $matches[1];
872 $this->logger->info( __METHOD__ . ": detected $mime from ZIP archive\n" );
873 } elseif ( preg_match( $openxmlRegex, substr( $header, 30 ) ) ) {
874 $mime = "application/x-opc+zip";
875 # TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere
876 if ( $ext !== true && $ext !== false ) {
881 if ( $this->isMatchingExtension( $ext, $mime ) ) {
882 /* A known file extension for an OPC file,
883 * find the proper mime type for that file extension
884 */
885 $mime = $this->guessTypesForExtension( $ext );
886 } else {
887 $mime = "application/zip";
888 }
889 }
890 $this->logger->info( __METHOD__ .
891 ": detected an Open Packaging Conventions archive: $mime\n" );
892 } elseif ( substr( $header, 0, 8 ) == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" &&
893 ( $headerpos = strpos( $tail, "PK\x03\x04" ) ) !== false &&
894 preg_match( $openxmlRegex, substr( $tail, $headerpos + 30 ) ) ) {
895 if ( substr( $header, 512, 4 ) == "\xEC\xA5\xC1\x00" ) {
896 $mime = "application/msword";
897 }
898 switch ( substr( $header, 512, 6 ) ) {
899 case "\xEC\xA5\xC1\x00\x0E\x00":
900 case "\xEC\xA5\xC1\x00\x1C\x00":
901 case "\xEC\xA5\xC1\x00\x43\x00":
902 $mime = "application/vnd.ms-powerpoint";
903 break;
904 case "\xFD\xFF\xFF\xFF\x10\x00":
905 case "\xFD\xFF\xFF\xFF\x1F\x00":
906 case "\xFD\xFF\xFF\xFF\x22\x00":
907 case "\xFD\xFF\xFF\xFF\x23\x00":
908 case "\xFD\xFF\xFF\xFF\x28\x00":
909 case "\xFD\xFF\xFF\xFF\x29\x00":
910 case "\xFD\xFF\xFF\xFF\x10\x02":
911 case "\xFD\xFF\xFF\xFF\x1F\x02":
912 case "\xFD\xFF\xFF\xFF\x22\x02":
913 case "\xFD\xFF\xFF\xFF\x23\x02":
914 case "\xFD\xFF\xFF\xFF\x28\x02":
915 case "\xFD\xFF\xFF\xFF\x29\x02":
916 $mime = "application/vnd.msexcel";
917 break;
918 }
919
920 $this->logger->info( __METHOD__ .
921 ": detected a MS Office document with OPC trailer\n" );
922 } else {
923 $this->logger->info( __METHOD__ . ": unable to identify type of ZIP archive\n" );
924 }
925 return $mime;
926 }
927
945 private function detectMimeType( $file, $ext = true ) {
947 if ( $ext ) {
948 $this->logger->info( __METHOD__ .
949 ": WARNING: use of the \$ext parameter is deprecated. "
950 . "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
951 }
952
953 $callback = $this->detectCallback;
954 $m = null;
955 if ( $callback ) {
956 $m = $callback( $file );
957 } elseif ( function_exists( "finfo_open" ) && function_exists( "finfo_file" ) ) {
958 $mime_magic_resource = finfo_open( FILEINFO_MIME );
959
960 if ( $mime_magic_resource ) {
961 $m = finfo_file( $mime_magic_resource, $file );
962 finfo_close( $mime_magic_resource );
963 } else {
964 $this->logger->info( __METHOD__ .
965 ": finfo_open failed on " . FILEINFO_MIME . "!\n" );
966 }
967 } else {
968 $this->logger->info( __METHOD__ . ": no magic mime detector found!\n" );
969 }
970
971 if ( $m ) {
972 # normalize
973 $m = preg_replace( '![;, ].*$!', '', $m ); # strip charset, etc
974 $m = trim( $m );
975 $m = strtolower( $m );
976
977 if ( strpos( $m, 'unknown' ) !== false ) {
978 $m = null;
979 } else {
980 $this->logger->info( __METHOD__ . ": magic mime type of $file: $m\n" );
981 return $m;
982 }
983 }
984
985 // If desired, look at extension as a fallback.
986 if ( $ext === true ) {
987 $i = strrpos( $file, '.' );
988 $ext = strtolower( $i ? substr( $file, $i + 1 ) : '' );
989 }
990 if ( $ext ) {
991 if ( $this->isRecognizableExtension( $ext ) ) {
992 $this->logger->info( __METHOD__ . ": refusing to guess mime type for .$ext file, "
993 . "we should have recognized it\n" );
994 } else {
995 $m = $this->guessTypesForExtension( $ext );
996 if ( $m ) {
997 $this->logger->info( __METHOD__ . ": extension mime type of $file: $m\n" );
998 return $m;
999 }
1000 }
1001 }
1002
1003 // Unknown type
1004 $this->logger->info( __METHOD__ . ": failed to guess mime type for $file!\n" );
1005 return 'unknown/unknown';
1006 }
1007
1024 function getMediaType( $path = null, $mime = null ) {
1025 if ( !$mime && !$path ) {
1026 return MEDIATYPE_UNKNOWN;
1027 }
1028
1029 // If MIME type is unknown, guess it
1030 if ( !$mime ) {
1031 $mime = $this->guessMimeType( $path, false );
1032 }
1033
1034 // Special code for ogg - detect if it's video (theora),
1035 // else label it as sound.
1036 if ( $mime == 'application/ogg' && file_exists( $path ) ) {
1037
1038 // Read a chunk of the file
1039 $f = fopen( $path, "rt" );
1040 if ( !$f ) {
1041 return MEDIATYPE_UNKNOWN;
1042 }
1043 $head = fread( $f, 256 );
1044 fclose( $f );
1045
1046 $head = str_replace( 'ffmpeg2theora', '', strtolower( $head ) );
1047
1048 // This is an UGLY HACK, file should be parsed correctly
1049 if ( strpos( $head, 'theora' ) !== false ) {
1050 return MEDIATYPE_VIDEO;
1051 } elseif ( strpos( $head, 'vorbis' ) !== false ) {
1052 return MEDIATYPE_AUDIO;
1053 } elseif ( strpos( $head, 'flac' ) !== false ) {
1054 return MEDIATYPE_AUDIO;
1055 } elseif ( strpos( $head, 'speex' ) !== false ) {
1056 return MEDIATYPE_AUDIO;
1057 } else {
1058 return MEDIATYPE_MULTIMEDIA;
1059 }
1060 }
1061
1062 $type = null;
1063 // Check for entry for full MIME type
1064 if ( $mime ) {
1065 $type = $this->findMediaType( $mime );
1066 if ( $type !== MEDIATYPE_UNKNOWN ) {
1067 return $type;
1068 }
1069 }
1070
1071 // Check for entry for file extension
1072 if ( $path ) {
1073 $i = strrpos( $path, '.' );
1074 $e = strtolower( $i ? substr( $path, $i + 1 ) : '' );
1075
1076 // TODO: look at multi-extension if this fails, parse from full path
1077 $type = $this->findMediaType( '.' . $e );
1078 if ( $type !== MEDIATYPE_UNKNOWN ) {
1079 return $type;
1080 }
1081 }
1082
1083 // Check major MIME type
1084 if ( $mime ) {
1085 $i = strpos( $mime, '/' );
1086 if ( $i !== false ) {
1087 $major = substr( $mime, 0, $i );
1088 $type = $this->findMediaType( $major );
1089 if ( $type !== MEDIATYPE_UNKNOWN ) {
1090 return $type;
1091 }
1092 }
1093 }
1094
1095 if ( !$type ) {
1097 }
1098
1099 return $type;
1100 }
1101
1112 function findMediaType( $extMime ) {
1113 if ( strpos( $extMime, '.' ) === 0 ) {
1114 // If it's an extension, look up the MIME types
1115 $m = $this->getTypesForExtension( substr( $extMime, 1 ) );
1116 if ( !$m ) {
1117 return MEDIATYPE_UNKNOWN;
1118 }
1119
1120 $m = explode( ' ', $m );
1121 } else {
1122 // Normalize MIME type
1123 if ( isset( $this->mimeTypeAliases[$extMime] ) ) {
1124 $extMime = $this->mimeTypeAliases[$extMime];
1125 }
1126
1127 $m = [ $extMime ];
1128 }
1129
1130 foreach ( $m as $mime ) {
1131 foreach ( $this->mediaTypes as $type => $codes ) {
1132 if ( in_array( $mime, $codes, true ) ) {
1133 return $type;
1134 }
1135 }
1136 }
1137
1138 return MEDIATYPE_UNKNOWN;
1139 }
1140
1150 public function getIEMimeTypes( $fileName, $chunk, $proposed ) {
1151 $ca = $this->getIEContentAnalyzer();
1152 return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
1153 }
1154
1160 protected function getIEContentAnalyzer() {
1161 if ( is_null( $this->IEAnalyzer ) ) {
1162 $this->IEAnalyzer = new IEContentAnalyzer;
1163 }
1164 return $this->IEAnalyzer;
1165 }
1166}
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
This class simulates Microsoft Internet Explorer's terribly broken and insecure MIME type detection a...
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add etc
Definition design.txt:19
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add text
Definition design.txt:18
the array() calling protocol came about after MediaWiki 1.4rc1.
namespace are movable Hooks may change this value to override the return value of MWNamespace::isMovable(). 'NewDifferenceEngine' do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached one of or reset my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition hooks.txt:2568
either a plain
Definition hooks.txt:1990
for adding new MIME info to the list Use $mimeMagic addExtraTypes( $stringOfTypes)
returning false will NOT prevent logging $e
Definition hooks.txt:2110
$extensions
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
const MEDIATYPE_VIDEO
Definition defines.php:35
const MEDIATYPE_UNKNOWN
Definition defines.php:26
const MEDIATYPE_AUDIO
Definition defines.php:32
const MEDIATYPE_TEXT
Definition defines.php:41
const MEDIATYPE_MULTIMEDIA
Definition defines.php:37
if( $ext=='php'|| $ext=='php5') $mime
Definition router.php:65
$lines
Definition router.php:67
$params
$header