MediaWiki REL1_33
MimeAnalyzer.php
Go to the documentation of this file.
1<?php
22use Psr\Log\LoggerAwareInterface;
23use Psr\Log\LoggerInterface;
24
30class MimeAnalyzer implements LoggerAwareInterface {
32 protected $typeFile;
34 protected $infoFile;
36 protected $xmlTypes;
38 protected $initCallback;
40 protected $detectCallback;
42 protected $guessCallback;
44 protected $extCallback;
46 protected $mediaTypes = null;
48 protected $mimeTypeAliases = null;
50 protected $mimetoExt = null;
51
53 public $mExtToMime = null; // legacy name; field accessed by hooks
54
56 protected $IEAnalyzer;
57
59 private $extraTypes = '';
61 private $extraInfo = '';
62
64 private $logger;
65
85 protected static $wellKnownTypes = <<<EOT
86application/ogg ogx ogg ogm ogv oga spx opus
87application/pdf pdf
88application/vnd.oasis.opendocument.chart odc
89application/vnd.oasis.opendocument.chart-template otc
90application/vnd.oasis.opendocument.database odb
91application/vnd.oasis.opendocument.formula odf
92application/vnd.oasis.opendocument.formula-template otf
93application/vnd.oasis.opendocument.graphics odg
94application/vnd.oasis.opendocument.graphics-template otg
95application/vnd.oasis.opendocument.image odi
96application/vnd.oasis.opendocument.image-template oti
97application/vnd.oasis.opendocument.presentation odp
98application/vnd.oasis.opendocument.presentation-template otp
99application/vnd.oasis.opendocument.spreadsheet ods
100application/vnd.oasis.opendocument.spreadsheet-template ots
101application/vnd.oasis.opendocument.text odt
102application/vnd.oasis.opendocument.text-master otm
103application/vnd.oasis.opendocument.text-template ott
104application/vnd.oasis.opendocument.text-web oth
105application/javascript js
106application/x-shockwave-flash swf
107audio/midi mid midi kar
108audio/mpeg mpga mpa mp2 mp3
109audio/x-aiff aif aiff aifc
110audio/x-wav wav
111audio/ogg oga spx ogg opus
112audio/opus opus ogg oga ogg spx
113image/x-bmp bmp
114image/gif gif
115image/jpeg jpeg jpg jpe
116image/png png
117image/svg+xml svg
118image/svg svg
119image/tiff tiff tif
120image/vnd.djvu djvu
121image/x.djvu djvu
122image/x-djvu djvu
123image/x-portable-pixmap ppm
124image/x-xcf xcf
125text/plain txt
126text/html html htm
127video/ogg ogv ogm ogg
128video/mpeg mpg mpeg
129EOT;
130
137 protected static $wellKnownInfo = <<<EOT
138application/pdf [OFFICE]
139application/vnd.oasis.opendocument.chart [OFFICE]
140application/vnd.oasis.opendocument.chart-template [OFFICE]
141application/vnd.oasis.opendocument.database [OFFICE]
142application/vnd.oasis.opendocument.formula [OFFICE]
143application/vnd.oasis.opendocument.formula-template [OFFICE]
144application/vnd.oasis.opendocument.graphics [OFFICE]
145application/vnd.oasis.opendocument.graphics-template [OFFICE]
146application/vnd.oasis.opendocument.image [OFFICE]
147application/vnd.oasis.opendocument.image-template [OFFICE]
148application/vnd.oasis.opendocument.presentation [OFFICE]
149application/vnd.oasis.opendocument.presentation-template [OFFICE]
150application/vnd.oasis.opendocument.spreadsheet [OFFICE]
151application/vnd.oasis.opendocument.spreadsheet-template [OFFICE]
152application/vnd.oasis.opendocument.text [OFFICE]
153application/vnd.oasis.opendocument.text-template [OFFICE]
154application/vnd.oasis.opendocument.text-master [OFFICE]
155application/vnd.oasis.opendocument.text-web [OFFICE]
156application/javascript text/javascript application/x-javascript [EXECUTABLE]
157application/x-shockwave-flash [MULTIMEDIA]
158audio/midi [AUDIO]
159audio/x-aiff [AUDIO]
160audio/x-wav [AUDIO]
161audio/mp3 audio/mpeg [AUDIO]
162application/ogg audio/ogg video/ogg [MULTIMEDIA]
163image/x-bmp image/x-ms-bmp image/bmp [BITMAP]
164image/gif [BITMAP]
165image/jpeg [BITMAP]
166image/png [BITMAP]
167image/svg+xml [DRAWING]
168image/tiff [BITMAP]
169image/vnd.djvu [BITMAP]
170image/x-xcf [BITMAP]
171image/x-portable-pixmap [BITMAP]
172text/plain [TEXT]
173text/html [TEXT]
174video/ogg [VIDEO]
175video/mpeg [VIDEO]
176unknown/unknown application/octet-stream application/x-empty [UNKNOWN]
177EOT;
178
194 public function __construct( array $params ) {
195 $this->typeFile = $params['typeFile'];
196 $this->infoFile = $params['infoFile'];
197 $this->xmlTypes = $params['xmlTypes'];
198 $this->initCallback = $params['initCallback'] ?? null;
199 $this->detectCallback = $params['detectCallback'] ?? null;
200 $this->guessCallback = $params['guessCallback'] ?? null;
201 $this->extCallback = $params['extCallback'] ?? null;
202 $this->logger = $params['logger'] ?? new \Psr\Log\NullLogger();
203
204 $this->loadFiles();
205 }
206
207 protected function loadFiles() {
212 # Allow media handling extensions adding MIME-types and MIME-info
213 if ( $this->initCallback ) {
214 call_user_func( $this->initCallback, $this );
215 }
216
217 $types = self::$wellKnownTypes;
218
219 $mimeTypeFile = $this->typeFile;
220 if ( $mimeTypeFile ) {
221 if ( is_file( $mimeTypeFile ) && is_readable( $mimeTypeFile ) ) {
222 $this->logger->info( __METHOD__ . ": loading mime types from $mimeTypeFile\n" );
223 $types .= "\n";
224 $types .= file_get_contents( $mimeTypeFile );
225 } else {
226 $this->logger->info( __METHOD__ . ": can't load mime types from $mimeTypeFile\n" );
227 }
228 } else {
229 $this->logger->info( __METHOD__ .
230 ": no mime types file defined, using built-ins only.\n" );
231 }
232
233 $types .= "\n" . $this->extraTypes;
234
235 $types = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $types );
236 $types = str_replace( "\t", " ", $types );
237
238 $this->mimetoExt = [];
239 $this->mExtToMime = [];
240
241 $lines = explode( "\n", $types );
242 foreach ( $lines as $s ) {
243 $s = trim( $s );
244 if ( empty( $s ) ) {
245 continue;
246 }
247 if ( strpos( $s, '#' ) === 0 ) {
248 continue;
249 }
250
251 $s = strtolower( $s );
252 $i = strpos( $s, ' ' );
253
254 if ( $i === false ) {
255 continue;
256 }
257
258 $mime = substr( $s, 0, $i );
259 $ext = trim( substr( $s, $i + 1 ) );
260
261 if ( empty( $ext ) ) {
262 continue;
263 }
264
265 if ( !empty( $this->mimetoExt[$mime] ) ) {
266 $this->mimetoExt[$mime] .= ' ' . $ext;
267 } else {
268 $this->mimetoExt[$mime] = $ext;
269 }
270
271 $extensions = explode( ' ', $ext );
272
273 foreach ( $extensions as $e ) {
274 $e = trim( $e );
275 if ( empty( $e ) ) {
276 continue;
277 }
278
279 if ( !empty( $this->mExtToMime[$e] ) ) {
280 $this->mExtToMime[$e] .= ' ' . $mime;
281 } else {
282 $this->mExtToMime[$e] = $mime;
283 }
284 }
285 }
286
291 $mimeInfoFile = $this->infoFile;
292
293 $info = self::$wellKnownInfo;
294
295 if ( $mimeInfoFile ) {
296 if ( is_file( $mimeInfoFile ) && is_readable( $mimeInfoFile ) ) {
297 $this->logger->info( __METHOD__ . ": loading mime info from $mimeInfoFile\n" );
298 $info .= "\n";
299 $info .= file_get_contents( $mimeInfoFile );
300 } else {
301 $this->logger->info( __METHOD__ . ": can't load mime info from $mimeInfoFile\n" );
302 }
303 } else {
304 $this->logger->info( __METHOD__ .
305 ": no mime info file defined, using built-ins only.\n" );
306 }
307
308 $info .= "\n" . $this->extraInfo;
309
310 $info = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $info );
311 $info = str_replace( "\t", " ", $info );
312
313 $this->mimeTypeAliases = [];
314 $this->mediaTypes = [];
315
316 $lines = explode( "\n", $info );
317 foreach ( $lines as $s ) {
318 $s = trim( $s );
319 if ( empty( $s ) ) {
320 continue;
321 }
322 if ( strpos( $s, '#' ) === 0 ) {
323 continue;
324 }
325
326 $s = strtolower( $s );
327 $i = strpos( $s, ' ' );
328
329 if ( $i === false ) {
330 continue;
331 }
332
333 # print "processing MIME INFO line $s<br>";
334
335 $match = [];
336 if ( preg_match( '!\[\s*(\w+)\s*\]!', $s, $match ) ) {
337 $s = preg_replace( '!\[\s*(\w+)\s*\]!', '', $s );
338 $mtype = trim( strtoupper( $match[1] ) );
339 } else {
340 $mtype = MEDIATYPE_UNKNOWN;
341 }
342
343 $m = explode( ' ', $s );
344
345 if ( !isset( $this->mediaTypes[$mtype] ) ) {
346 $this->mediaTypes[$mtype] = [];
347 }
348
349 foreach ( $m as $mime ) {
350 $mime = trim( $mime );
351 if ( empty( $mime ) ) {
352 continue;
353 }
354
355 $this->mediaTypes[$mtype][] = $mime;
356 }
357
358 if ( count( $m ) > 1 ) {
359 $main = $m[0];
360 $mCount = count( $m );
361 for ( $i = 1; $i < $mCount; $i += 1 ) {
362 $mime = $m[$i];
363 $this->mimeTypeAliases[$mime] = $main;
364 }
365 }
366 }
367 }
368
369 public function setLogger( LoggerInterface $logger ) {
370 $this->logger = $logger;
371 }
372
379 public function addExtraTypes( $types ) {
380 $this->extraTypes .= "\n" . $types;
381 }
382
389 public function addExtraInfo( $info ) {
390 $this->extraInfo .= "\n" . $info;
391 }
392
401 public function getExtensionsForType( $mime ) {
402 $mime = strtolower( $mime );
403
404 // Check the mime-to-ext map
405 if ( isset( $this->mimetoExt[$mime] ) ) {
406 return $this->mimetoExt[$mime];
407 }
408
409 // Resolve the MIME type to the canonical type
410 if ( isset( $this->mimeTypeAliases[$mime] ) ) {
411 $mime = $this->mimeTypeAliases[$mime];
412 if ( isset( $this->mimetoExt[$mime] ) ) {
413 return $this->mimetoExt[$mime];
414 }
415 }
416
417 return null;
418 }
419
427 public function getTypesForExtension( $ext ) {
428 $ext = strtolower( $ext );
429
430 $r = $this->mExtToMime[$ext] ?? null;
431 return $r;
432 }
433
441 public function guessTypesForExtension( $ext ) {
442 $m = $this->getTypesForExtension( $ext );
443 if ( is_null( $m ) ) {
444 return null;
445 }
446
447 // TODO: Check if this is needed; strtok( $m, ' ' ) should be sufficient
448 $m = trim( $m );
449 $m = preg_replace( '/\s.*$/', '', $m );
450
451 return $m;
452 }
453
463 public function isMatchingExtension( $extension, $mime ) {
464 $ext = $this->getExtensionsForType( $mime );
465
466 if ( !$ext ) {
467 return null; // Unknown MIME type
468 }
469
470 $ext = explode( ' ', $ext );
471
472 $extension = strtolower( $extension );
473 return in_array( $extension, $ext );
474 }
475
484 public function isPHPImageType( $mime ) {
485 // As defined by imagegetsize and image_type_to_mime
486 static $types = [
487 'image/gif', 'image/jpeg', 'image/png',
488 'image/x-bmp', 'image/xbm', 'image/tiff',
489 'image/jp2', 'image/jpeg2000', 'image/iff',
490 'image/xbm', 'image/x-xbitmap',
491 'image/vnd.wap.wbmp', 'image/vnd.xiff',
492 'image/x-photoshop',
493 'application/x-shockwave-flash',
494 ];
495
496 return in_array( $mime, $types );
497 }
498
511 function isRecognizableExtension( $extension ) {
512 static $types = [
513 // Types recognized by getimagesize()
514 'gif', 'jpeg', 'jpg', 'png', 'swf', 'psd',
515 'bmp', 'tiff', 'tif', 'jpc', 'jp2',
516 'jpx', 'jb2', 'swc', 'iff', 'wbmp',
517 'xbm',
518
519 // Formats we recognize magic numbers for
520 'djvu', 'ogx', 'ogg', 'ogv', 'oga', 'spx', 'opus',
521 'mid', 'pdf', 'wmf', 'xcf', 'webm', 'mkv', 'mka',
522 'webp', 'mp3',
523
524 // XML formats we sure hope we recognize reliably
525 'svg',
526
527 // 3D formats
528 'stl',
529 ];
530 return in_array( strtolower( $extension ), $types );
531 }
532
544 public function improveTypeFromExtension( $mime, $ext ) {
545 if ( $mime === 'unknown/unknown' ) {
546 if ( $this->isRecognizableExtension( $ext ) ) {
547 $this->logger->info( __METHOD__ . ': refusing to guess mime type for .' .
548 "$ext file, we should have recognized it\n" );
549 } else {
550 // Not something we can detect, so simply
551 // trust the file extension
552 $mime = $this->guessTypesForExtension( $ext );
553 }
554 } elseif ( $mime === 'application/x-opc+zip' ) {
555 if ( $this->isMatchingExtension( $ext, $mime ) ) {
556 // A known file extension for an OPC file,
557 // find the proper MIME type for that file extension
558 $mime = $this->guessTypesForExtension( $ext );
559 } else {
560 $this->logger->info( __METHOD__ .
561 ": refusing to guess better type for $mime file, " .
562 ".$ext is not a known OPC extension.\n" );
563 $mime = 'application/zip';
564 }
565 } elseif ( $mime === 'text/plain' && $this->findMediaType( ".$ext" ) === MEDIATYPE_TEXT ) {
566 // Textual types are sometimes not recognized properly.
567 // If detected as text/plain, and has an extension which is textual
568 // improve to the extension's type. For example, csv and json are often
569 // misdetected as text/plain.
570 $mime = $this->guessTypesForExtension( $ext );
571 }
572
573 # Media handling extensions can improve the MIME detected
574 $callback = $this->extCallback;
575 if ( $callback ) {
576 $callback( $this, $ext, $mime /* by reference */ );
577 }
578
579 if ( isset( $this->mimeTypeAliases[$mime] ) ) {
580 $mime = $this->mimeTypeAliases[$mime];
581 }
582
583 $this->logger->info( __METHOD__ . ": improved mime type for .$ext: $mime\n" );
584 return $mime;
585 }
586
601 public function guessMimeType( $file, $ext = true ) {
602 if ( $ext ) { // TODO: make $ext default to false. Or better, remove it.
603 $this->logger->info( __METHOD__ .
604 ": WARNING: use of the \$ext parameter is deprecated. " .
605 "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
606 }
607
608 $mime = $this->doGuessMimeType( $file, $ext );
609
610 if ( !$mime ) {
611 $this->logger->info( __METHOD__ .
612 ": internal type detection failed for $file (.$ext)...\n" );
613 $mime = $this->detectMimeType( $file, $ext );
614 }
615
616 if ( isset( $this->mimeTypeAliases[$mime] ) ) {
617 $mime = $this->mimeTypeAliases[$mime];
618 }
619
620 $this->logger->info( __METHOD__ . ": guessed mime type of $file: $mime\n" );
621 return $mime;
622 }
623
634 private function doGuessMimeType( $file, $ext ) {
635 // Read a chunk of the file
636 Wikimedia\suppressWarnings();
637 $f = fopen( $file, 'rb' );
638 Wikimedia\restoreWarnings();
639
640 if ( !$f ) {
641 return 'unknown/unknown';
642 }
643
644 $fsize = filesize( $file );
645 if ( $fsize === false ) {
646 return 'unknown/unknown';
647 }
648
649 $head = fread( $f, 1024 );
650 $tailLength = min( 65558, $fsize ); // 65558 = maximum size of a zip EOCDR
651 if ( fseek( $f, -1 * $tailLength, SEEK_END ) === -1 ) {
652 throw new UnexpectedValueException(
653 "Seeking $tailLength bytes from EOF failed in " . __METHOD__ );
654 }
655 $tail = $tailLength ? fread( $f, $tailLength ) : '';
656
657 $this->logger->info( __METHOD__ .
658 ": analyzing head and tail of $file for magic numbers.\n" );
659
660 // Hardcode a few magic number checks...
661 $headers = [
662 // Multimedia...
663 'MThd' => 'audio/midi',
664 'OggS' => 'application/ogg',
665 'ID3' => 'audio/mpeg',
666 "\xff\xfb" => 'audio/mpeg', // MPEG-1 layer 3
667 "\xff\xf3" => 'audio/mpeg', // MPEG-2 layer 3 (lower sample rates)
668 "\xff\xe3" => 'audio/mpeg', // MPEG-2.5 layer 3 (very low sample rates)
669
670 // Image formats...
671 // Note that WMF may have a bare header, no magic number.
672 "\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives?
673 "\xd7\xcd\xc6\x9a" => 'application/x-msmetafile',
674 '%PDF' => 'application/pdf',
675 'gimp xcf' => 'image/x-xcf',
676
677 // Some forbidden fruit...
678 'MZ' => 'application/octet-stream', // DOS/Windows executable
679 "\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary
680 "\x7fELF" => 'application/octet-stream', // ELF binary
681 ];
682
683 foreach ( $headers as $magic => $candidate ) {
684 if ( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) {
685 $this->logger->info( __METHOD__ .
686 ": magic header in $file recognized as $candidate\n" );
687 return $candidate;
688 }
689 }
690
691 /* Look for WebM and Matroska files */
692 if ( strncmp( $head, pack( "C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) {
693 $doctype = strpos( $head, "\x42\x82" );
694 if ( $doctype ) {
695 // Next byte is datasize, then data (sizes larger than 1 byte are stupid muxers)
696 $data = substr( $head, $doctype + 3, 8 );
697 if ( strncmp( $data, "matroska", 8 ) == 0 ) {
698 $this->logger->info( __METHOD__ . ": recognized file as video/x-matroska\n" );
699 return "video/x-matroska";
700 } elseif ( strncmp( $data, "webm", 4 ) == 0 ) {
701 // XXX HACK look for a video track, if we don't find it, this is an audio file
702 $videotrack = strpos( $head, "\x86\x85V_VP" );
703
704 if ( $videotrack ) {
705 // There is a video track, so this is a video file.
706 $this->logger->info( __METHOD__ . ": recognized file as video/webm\n" );
707 return "video/webm";
708 }
709
710 $this->logger->info( __METHOD__ . ": recognized file as audio/webm\n" );
711 return "audio/webm";
712 }
713 }
714 $this->logger->info( __METHOD__ . ": unknown EBML file\n" );
715 return "unknown/unknown";
716 }
717
718 /* Look for WebP */
719 if ( strncmp( $head, "RIFF", 4 ) == 0 &&
720 strncmp( substr( $head, 8, 7 ), "WEBPVP8", 7 ) == 0
721 ) {
722 $this->logger->info( __METHOD__ . ": recognized file as image/webp\n" );
723 return "image/webp";
724 }
725
726 /* Look for MS Compound Binary (OLE) files */
727 if ( strncmp( $head, "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1", 8 ) == 0 ) {
728 $this->logger->info( __METHOD__ . ': recognized MS CFB (OLE) file' );
729 return $this->detectMicrosoftBinaryType( $f );
730 }
731
744 if ( ( strpos( $head, '<?php' ) !== false ) ||
745 ( strpos( $head, "<\x00?\x00p\x00h\x00p" ) !== false ) ||
746 ( strpos( $head, "<\x00?\x00 " ) !== false ) ||
747 ( strpos( $head, "<\x00?\x00\n" ) !== false ) ||
748 ( strpos( $head, "<\x00?\x00\t" ) !== false ) ||
749 ( strpos( $head, "<\x00?\x00=" ) !== false )
750 ) {
751 $this->logger->info( __METHOD__ . ": recognized $file as application/x-php\n" );
752 return 'application/x-php';
753 }
754
758 Wikimedia\suppressWarnings();
759 $xml = new XmlTypeCheck( $file );
760 Wikimedia\restoreWarnings();
761 if ( $xml->wellFormed ) {
762 $xmlTypes = $this->xmlTypes;
763 return $xmlTypes[$xml->getRootElement()] ?? 'application/xml';
764 }
765
769 $script_type = null;
770
771 # detect by shebang
772 if ( substr( $head, 0, 2 ) == "#!" ) {
773 $script_type = "ASCII";
774 } elseif ( substr( $head, 0, 5 ) == "\xef\xbb\xbf#!" ) {
775 $script_type = "UTF-8";
776 } elseif ( substr( $head, 0, 7 ) == "\xfe\xff\x00#\x00!" ) {
777 $script_type = "UTF-16BE";
778 } elseif ( substr( $head, 0, 7 ) == "\xff\xfe#\x00!" ) {
779 $script_type = "UTF-16LE";
780 }
781
782 if ( $script_type ) {
783 if ( $script_type !== "UTF-8" && $script_type !== "ASCII" ) {
784 // Quick and dirty fold down to ASCII!
785 $pack = [ 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' ];
786 $chars = unpack( $pack[$script_type], substr( $head, 2 ) );
787 $head = '';
788 foreach ( $chars as $codepoint ) {
789 if ( $codepoint < 128 ) {
790 $head .= chr( $codepoint );
791 } else {
792 $head .= '?';
793 }
794 }
795 }
796
797 $match = [];
798
799 if ( preg_match( '%/?([^\s]+/)(\w+)%', $head, $match ) ) {
800 $mime = "application/x-{$match[2]}";
801 $this->logger->info( __METHOD__ . ": shell script recognized as $mime\n" );
802 return $mime;
803 }
804 }
805
806 // Check for ZIP variants (before getimagesize)
807 $eocdrPos = strpos( $tail, "PK\x05\x06" );
808 if ( $eocdrPos !== false ) {
809 $this->logger->info( __METHOD__ . ": ZIP signature present in $file\n" );
810 // Check if it really is a ZIP file, make sure the EOCDR is at the end (T40432)
811 $commentLength = unpack( "n", substr( $tail, $eocdrPos + 20 ) )[0];
812 if ( $eocdrPos + 22 + $commentLength !== strlen( $tail ) ) {
813 $this->logger->info( __METHOD__ . ": ZIP EOCDR not at end. Not a ZIP file." );
814 } else {
815 return $this->detectZipType( $head, $tail, $ext );
816 }
817 }
818
819 // Check for STL (3D) files
820 // @see https://en.wikipedia.org/wiki/STL_(file_format)
821 if ( $fsize >= 15 &&
822 stripos( $head, 'SOLID ' ) === 0 &&
823 preg_match( '/\RENDSOLID .*$/i', $tail ) ) {
824 // ASCII STL file
825 return 'application/sla';
826 } elseif ( $fsize > 84 ) {
827 // binary STL file
828 $triangles = substr( $head, 80, 4 );
829 $triangles = unpack( 'V', $triangles );
830 $triangles = reset( $triangles );
831 if ( $triangles !== false && $fsize === 84 + ( $triangles * 50 ) ) {
832 return 'application/sla';
833 }
834 }
835
836 Wikimedia\suppressWarnings();
837 $gis = getimagesize( $file );
838 Wikimedia\restoreWarnings();
839
840 if ( $gis && isset( $gis['mime'] ) ) {
841 $mime = $gis['mime'];
842 $this->logger->info( __METHOD__ . ": getimagesize detected $file as $mime\n" );
843 return $mime;
844 }
845
846 # Media handling extensions can guess the MIME by content
847 # It's intentionally here so that if core is wrong about a type (false positive),
848 # people will hopefully nag and submit patches :)
849 $mime = false;
850 # Some strings by reference for performance - assuming well-behaved hooks
851 $callback = $this->guessCallback;
852 if ( $callback ) {
853 $callback( $this, $head, $tail, $file, $mime /* by reference */ );
854 };
855
856 return $mime;
857 }
858
872 function detectZipType( $header, $tail = null, $ext = false ) {
873 if ( $ext ) { # TODO: remove $ext param
874 $this->logger->info( __METHOD__ .
875 ": WARNING: use of the \$ext parameter is deprecated. " .
876 "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
877 }
878
879 $mime = 'application/zip';
880 $opendocTypes = [
881 'chart-template',
882 'chart',
883 'formula-template',
884 'formula',
885 'graphics-template',
886 'graphics',
887 'image-template',
888 'image',
889 'presentation-template',
890 'presentation',
891 'spreadsheet-template',
892 'spreadsheet',
893 'text-template',
894 'text-master',
895 'text-web',
896 'text' ];
897
898 // https://lists.oasis-open.org/archives/office/200505/msg00006.html
899 $types = '(?:' . implode( '|', $opendocTypes ) . ')';
900 $opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
901
902 $openxmlRegex = "/^\[Content_Types\].xml/";
903
904 if ( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) {
905 $mime = $matches[1];
906 $this->logger->info( __METHOD__ . ": detected $mime from ZIP archive\n" );
907 } elseif ( preg_match( $openxmlRegex, substr( $header, 30 ) ) ) {
908 $mime = "application/x-opc+zip";
909 # TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere
910 if ( $ext !== true && $ext !== false ) {
915 if ( $this->isMatchingExtension( $ext, $mime ) ) {
916 /* A known file extension for an OPC file,
917 * find the proper mime type for that file extension
918 */
919 $mime = $this->guessTypesForExtension( $ext );
920 } else {
921 $mime = "application/zip";
922 }
923 }
924 $this->logger->info( __METHOD__ .
925 ": detected an Open Packaging Conventions archive: $mime\n" );
926 } elseif ( substr( $header, 0, 8 ) == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" &&
927 ( $headerpos = strpos( $tail, "PK\x03\x04" ) ) !== false &&
928 preg_match( $openxmlRegex, substr( $tail, $headerpos + 30 ) ) ) {
929 if ( substr( $header, 512, 4 ) == "\xEC\xA5\xC1\x00" ) {
930 $mime = "application/msword";
931 }
932 switch ( substr( $header, 512, 6 ) ) {
933 case "\xEC\xA5\xC1\x00\x0E\x00":
934 case "\xEC\xA5\xC1\x00\x1C\x00":
935 case "\xEC\xA5\xC1\x00\x43\x00":
936 $mime = "application/vnd.ms-powerpoint";
937 break;
938 case "\xFD\xFF\xFF\xFF\x10\x00":
939 case "\xFD\xFF\xFF\xFF\x1F\x00":
940 case "\xFD\xFF\xFF\xFF\x22\x00":
941 case "\xFD\xFF\xFF\xFF\x23\x00":
942 case "\xFD\xFF\xFF\xFF\x28\x00":
943 case "\xFD\xFF\xFF\xFF\x29\x00":
944 case "\xFD\xFF\xFF\xFF\x10\x02":
945 case "\xFD\xFF\xFF\xFF\x1F\x02":
946 case "\xFD\xFF\xFF\xFF\x22\x02":
947 case "\xFD\xFF\xFF\xFF\x23\x02":
948 case "\xFD\xFF\xFF\xFF\x28\x02":
949 case "\xFD\xFF\xFF\xFF\x29\x02":
950 $mime = "application/vnd.msexcel";
951 break;
952 }
953
954 $this->logger->info( __METHOD__ .
955 ": detected a MS Office document with OPC trailer\n" );
956 } else {
957 $this->logger->info( __METHOD__ . ": unable to identify type of ZIP archive\n" );
958 }
959 return $mime;
960 }
961
969 function detectMicrosoftBinaryType( $handle ) {
970 $info = MSCompoundFileReader::readHandle( $handle );
971 if ( !$info['valid'] ) {
972 $this->logger->info( __METHOD__ . ': invalid file format' );
973 return 'unknown/unknown';
974 }
975 if ( !$info['mime'] ) {
976 $this->logger->info( __METHOD__ . ": unrecognised document subtype" );
977 return 'unknown/unknown';
978 }
979 return $info['mime'];
980 }
981
999 private function detectMimeType( $file, $ext = true ) {
1001 if ( $ext ) {
1002 $this->logger->info( __METHOD__ .
1003 ": WARNING: use of the \$ext parameter is deprecated. "
1004 . "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
1005 }
1006
1007 $callback = $this->detectCallback;
1008 $m = null;
1009 if ( $callback ) {
1010 $m = $callback( $file );
1011 } else {
1012 $m = mime_content_type( $file );
1013 }
1014
1015 if ( $m ) {
1016 # normalize
1017 $m = preg_replace( '![;, ].*$!', '', $m ); # strip charset, etc
1018 $m = trim( $m );
1019 $m = strtolower( $m );
1020
1021 if ( strpos( $m, 'unknown' ) !== false ) {
1022 $m = null;
1023 } else {
1024 $this->logger->info( __METHOD__ . ": magic mime type of $file: $m\n" );
1025 return $m;
1026 }
1027 }
1028
1029 // If desired, look at extension as a fallback.
1030 if ( $ext === true ) {
1031 $i = strrpos( $file, '.' );
1032 $ext = strtolower( $i ? substr( $file, $i + 1 ) : '' );
1033 }
1034 if ( $ext ) {
1035 if ( $this->isRecognizableExtension( $ext ) ) {
1036 $this->logger->info( __METHOD__ . ": refusing to guess mime type for .$ext file, "
1037 . "we should have recognized it\n" );
1038 } else {
1039 $m = $this->guessTypesForExtension( $ext );
1040 if ( $m ) {
1041 $this->logger->info( __METHOD__ . ": extension mime type of $file: $m\n" );
1042 return $m;
1043 }
1044 }
1045 }
1046
1047 // Unknown type
1048 $this->logger->info( __METHOD__ . ": failed to guess mime type for $file!\n" );
1049 return 'unknown/unknown';
1050 }
1051
1068 function getMediaType( $path = null, $mime = null ) {
1069 if ( !$mime && !$path ) {
1070 return MEDIATYPE_UNKNOWN;
1071 }
1072
1073 // If MIME type is unknown, guess it
1074 if ( !$mime ) {
1075 $mime = $this->guessMimeType( $path, false );
1076 }
1077
1078 // Special code for ogg - detect if it's video (theora),
1079 // else label it as sound.
1080 if ( $mime == 'application/ogg' && file_exists( $path ) ) {
1081 // Read a chunk of the file
1082 $f = fopen( $path, "rt" );
1083 if ( !$f ) {
1084 return MEDIATYPE_UNKNOWN;
1085 }
1086 $head = fread( $f, 256 );
1087 fclose( $f );
1088
1089 $head = str_replace( 'ffmpeg2theora', '', strtolower( $head ) );
1090
1091 // This is an UGLY HACK, file should be parsed correctly
1092 if ( strpos( $head, 'theora' ) !== false ) {
1093 return MEDIATYPE_VIDEO;
1094 } elseif ( strpos( $head, 'vorbis' ) !== false ) {
1095 return MEDIATYPE_AUDIO;
1096 } elseif ( strpos( $head, 'flac' ) !== false ) {
1097 return MEDIATYPE_AUDIO;
1098 } elseif ( strpos( $head, 'speex' ) !== false ) {
1099 return MEDIATYPE_AUDIO;
1100 } elseif ( strpos( $head, 'opus' ) !== false ) {
1101 return MEDIATYPE_AUDIO;
1102 } else {
1103 return MEDIATYPE_MULTIMEDIA;
1104 }
1105 }
1106
1107 $type = null;
1108 // Check for entry for full MIME type
1109 if ( $mime ) {
1110 $type = $this->findMediaType( $mime );
1111 if ( $type !== MEDIATYPE_UNKNOWN ) {
1112 return $type;
1113 }
1114 }
1115
1116 // Check for entry for file extension
1117 if ( $path ) {
1118 $i = strrpos( $path, '.' );
1119 $e = strtolower( $i ? substr( $path, $i + 1 ) : '' );
1120
1121 // TODO: look at multi-extension if this fails, parse from full path
1122 $type = $this->findMediaType( '.' . $e );
1123 if ( $type !== MEDIATYPE_UNKNOWN ) {
1124 return $type;
1125 }
1126 }
1127
1128 // Check major MIME type
1129 if ( $mime ) {
1130 $i = strpos( $mime, '/' );
1131 if ( $i !== false ) {
1132 $major = substr( $mime, 0, $i );
1133 $type = $this->findMediaType( $major );
1134 if ( $type !== MEDIATYPE_UNKNOWN ) {
1135 return $type;
1136 }
1137 }
1138 }
1139
1140 if ( !$type ) {
1142 }
1143
1144 return $type;
1145 }
1146
1157 function findMediaType( $extMime ) {
1158 if ( strpos( $extMime, '.' ) === 0 ) {
1159 // If it's an extension, look up the MIME types
1160 $m = $this->getTypesForExtension( substr( $extMime, 1 ) );
1161 if ( !$m ) {
1162 return MEDIATYPE_UNKNOWN;
1163 }
1164
1165 $m = explode( ' ', $m );
1166 } else {
1167 // Normalize MIME type
1168 if ( isset( $this->mimeTypeAliases[$extMime] ) ) {
1169 $extMime = $this->mimeTypeAliases[$extMime];
1170 }
1171
1172 $m = [ $extMime ];
1173 }
1174
1175 foreach ( $m as $mime ) {
1176 foreach ( $this->mediaTypes as $type => $codes ) {
1177 if ( in_array( $mime, $codes, true ) ) {
1178 return $type;
1179 }
1180 }
1181 }
1182
1183 return MEDIATYPE_UNKNOWN;
1184 }
1185
1191 public function getMediaTypes() {
1192 return array_keys( $this->mediaTypes );
1193 }
1194
1204 public function getIEMimeTypes( $fileName, $chunk, $proposed ) {
1205 $ca = $this->getIEContentAnalyzer();
1206 return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
1207 }
1208
1214 protected function getIEContentAnalyzer() {
1215 if ( is_null( $this->IEAnalyzer ) ) {
1216 $this->IEAnalyzer = new IEContentAnalyzer;
1217 }
1218 return $this->IEAnalyzer;
1219 }
1220}
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
This list may contain false positives That usually means there is additional text with links below the first Each row contains links to the first and second as well as the first line of the second redirect text
This class simulates Microsoft Internet Explorer's terribly broken and insecure MIME type detection a...
static readHandle( $fileHandle)
Read from an open seekable handle.
$data
Utility to generate mapping file used in mw.Title (phpCharToUpper.json)
either a plain
Definition hooks.txt:2054
for adding new MIME info to the list Use $mimeMagic addExtraTypes( $stringOfTypes)
Using a hook running we can avoid having all this option specific stuff in our mainline code Using the function We ve cleaned up the code here by removing clumps of infrequently used code and moving them off somewhere else It s much easier for someone working with this code to see what s _really_ going and make changes or fix bugs In we can take all the code that deals with the little used title reversing etc
Definition hooks.txt:91
returning false will NOT prevent logging $e
Definition hooks.txt:2175
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
const MEDIATYPE_VIDEO
Definition defines.php:35
const MEDIATYPE_UNKNOWN
Definition defines.php:26
const MEDIATYPE_AUDIO
Definition defines.php:32
const MEDIATYPE_TEXT
Definition defines.php:41
const MEDIATYPE_MULTIMEDIA
Definition defines.php:37
$f
Definition router.php:79
$lines
Definition router.php:61
if(!is_readable( $file)) $ext
Definition router.php:48
$params
$header