MediaWiki  1.27.2
MimeMagic.php
Go to the documentation of this file.
1 <?php
42 define( 'MM_WELL_KNOWN_MIME_TYPES', <<<END_STRING
43 application/ogg ogx ogg ogm ogv oga spx
44 application/pdf pdf
45 application/vnd.oasis.opendocument.chart odc
46 application/vnd.oasis.opendocument.chart-template otc
47 application/vnd.oasis.opendocument.database odb
48 application/vnd.oasis.opendocument.formula odf
49 application/vnd.oasis.opendocument.formula-template otf
50 application/vnd.oasis.opendocument.graphics odg
51 application/vnd.oasis.opendocument.graphics-template otg
52 application/vnd.oasis.opendocument.image odi
53 application/vnd.oasis.opendocument.image-template oti
54 application/vnd.oasis.opendocument.presentation odp
55 application/vnd.oasis.opendocument.presentation-template otp
56 application/vnd.oasis.opendocument.spreadsheet ods
57 application/vnd.oasis.opendocument.spreadsheet-template ots
58 application/vnd.oasis.opendocument.text odt
59 application/vnd.oasis.opendocument.text-master otm
60 application/vnd.oasis.opendocument.text-template ott
61 application/vnd.oasis.opendocument.text-web oth
62 application/javascript js
63 application/x-shockwave-flash swf
64 audio/midi mid midi kar
65 audio/mpeg mpga mpa mp2 mp3
66 audio/x-aiff aif aiff aifc
67 audio/x-wav wav
68 audio/ogg oga spx ogg
69 image/x-bmp bmp
70 image/gif gif
71 image/jpeg jpeg jpg jpe
72 image/png png
73 image/svg+xml svg
74 image/svg svg
75 image/tiff tiff tif
76 image/vnd.djvu djvu
77 image/x.djvu djvu
78 image/x-djvu djvu
79 image/x-portable-pixmap ppm
80 image/x-xcf xcf
81 text/plain txt
82 text/html html htm
83 video/ogg ogv ogm ogg
84 video/mpeg mpg mpeg
85 END_STRING
86 );
87 
94 define( 'MM_WELL_KNOWN_MIME_INFO', <<<END_STRING
95 application/pdf [OFFICE]
96 application/vnd.oasis.opendocument.chart [OFFICE]
97 application/vnd.oasis.opendocument.chart-template [OFFICE]
98 application/vnd.oasis.opendocument.database [OFFICE]
99 application/vnd.oasis.opendocument.formula [OFFICE]
100 application/vnd.oasis.opendocument.formula-template [OFFICE]
101 application/vnd.oasis.opendocument.graphics [OFFICE]
102 application/vnd.oasis.opendocument.graphics-template [OFFICE]
103 application/vnd.oasis.opendocument.image [OFFICE]
104 application/vnd.oasis.opendocument.image-template [OFFICE]
105 application/vnd.oasis.opendocument.presentation [OFFICE]
106 application/vnd.oasis.opendocument.presentation-template [OFFICE]
107 application/vnd.oasis.opendocument.spreadsheet [OFFICE]
108 application/vnd.oasis.opendocument.spreadsheet-template [OFFICE]
109 application/vnd.oasis.opendocument.text [OFFICE]
110 application/vnd.oasis.opendocument.text-template [OFFICE]
111 application/vnd.oasis.opendocument.text-master [OFFICE]
112 application/vnd.oasis.opendocument.text-web [OFFICE]
113 application/javascript text/javascript application/x-javascript [EXECUTABLE]
114 application/x-shockwave-flash [MULTIMEDIA]
115 audio/midi [AUDIO]
116 audio/x-aiff [AUDIO]
117 audio/x-wav [AUDIO]
118 audio/mp3 audio/mpeg [AUDIO]
119 application/ogg audio/ogg video/ogg [MULTIMEDIA]
120 image/x-bmp image/x-ms-bmp image/bmp [BITMAP]
121 image/gif [BITMAP]
122 image/jpeg [BITMAP]
123 image/png [BITMAP]
124 image/svg+xml [DRAWING]
125 image/tiff [BITMAP]
126 image/vnd.djvu [BITMAP]
127 image/x-xcf [BITMAP]
128 image/x-portable-pixmap [BITMAP]
129 text/plain [TEXT]
130 text/html [TEXT]
131 video/ogg [VIDEO]
132 video/mpeg [VIDEO]
133 unknown/unknown application/octet-stream application/x-empty [UNKNOWN]
134 END_STRING
135 );
136 
144 class MimeMagic {
149  protected $mMediaTypes = null;
150 
153  protected $mMimeTypeAliases = null;
154 
157  protected $mMimeToExt = null;
158 
161  public $mExtToMime = null;
162 
165  protected $mIEAnalyzer;
166 
169  private $mExtraTypes = '';
170 
173  private $mExtraInfo = '';
174 
176  private $mConfig;
177 
180  private static $instance = null;
181 
189  function __construct( Config $config = null ) {
190  if ( !$config ) {
191  wfDebug( __METHOD__ . ' called with no Config instance passed to it' );
192  $config = ConfigFactory::getDefaultInstance()->makeConfig( 'main' );
193  }
194  $this->mConfig = $config;
195 
200  global $IP;
201 
202  # Allow media handling extensions adding MIME-types and MIME-info
203  Hooks::run( 'MimeMagicInit', [ $this ] );
204 
205  $types = MM_WELL_KNOWN_MIME_TYPES;
206 
207  $mimeTypeFile = $this->mConfig->get( 'MimeTypeFile' );
208  if ( $mimeTypeFile == 'includes/mime.types' ) {
209  $mimeTypeFile = "$IP/$mimeTypeFile";
210  }
211 
212  if ( $mimeTypeFile ) {
213  if ( is_file( $mimeTypeFile ) && is_readable( $mimeTypeFile ) ) {
214  wfDebug( __METHOD__ . ": loading mime types from $mimeTypeFile\n" );
215  $types .= "\n";
216  $types .= file_get_contents( $mimeTypeFile );
217  } else {
218  wfDebug( __METHOD__ . ": can't load mime types from $mimeTypeFile\n" );
219  }
220  } else {
221  wfDebug( __METHOD__ . ": no mime types file defined, using built-ins only.\n" );
222  }
223 
224  $types .= "\n" . $this->mExtraTypes;
225 
226  $types = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $types );
227  $types = str_replace( "\t", " ", $types );
228 
229  $this->mMimeToExt = [];
230  $this->mExtToMime = [];
231 
232  $lines = explode( "\n", $types );
233  foreach ( $lines as $s ) {
234  $s = trim( $s );
235  if ( empty( $s ) ) {
236  continue;
237  }
238  if ( strpos( $s, '#' ) === 0 ) {
239  continue;
240  }
241 
242  $s = strtolower( $s );
243  $i = strpos( $s, ' ' );
244 
245  if ( $i === false ) {
246  continue;
247  }
248 
249  $mime = substr( $s, 0, $i );
250  $ext = trim( substr( $s, $i + 1 ) );
251 
252  if ( empty( $ext ) ) {
253  continue;
254  }
255 
256  if ( !empty( $this->mMimeToExt[$mime] ) ) {
257  $this->mMimeToExt[$mime] .= ' ' . $ext;
258  } else {
259  $this->mMimeToExt[$mime] = $ext;
260  }
261 
262  $extensions = explode( ' ', $ext );
263 
264  foreach ( $extensions as $e ) {
265  $e = trim( $e );
266  if ( empty( $e ) ) {
267  continue;
268  }
269 
270  if ( !empty( $this->mExtToMime[$e] ) ) {
271  $this->mExtToMime[$e] .= ' ' . $mime;
272  } else {
273  $this->mExtToMime[$e] = $mime;
274  }
275  }
276  }
277 
282  $mimeInfoFile = $this->mConfig->get( 'MimeInfoFile' );
283  if ( $mimeInfoFile == 'includes/mime.info' ) {
284  $mimeInfoFile = "$IP/$mimeInfoFile";
285  }
286 
287  $info = MM_WELL_KNOWN_MIME_INFO;
288 
289  if ( $mimeInfoFile ) {
290  if ( is_file( $mimeInfoFile ) && is_readable( $mimeInfoFile ) ) {
291  wfDebug( __METHOD__ . ": loading mime info from $mimeInfoFile\n" );
292  $info .= "\n";
293  $info .= file_get_contents( $mimeInfoFile );
294  } else {
295  wfDebug( __METHOD__ . ": can't load mime info from $mimeInfoFile\n" );
296  }
297  } else {
298  wfDebug( __METHOD__ . ": no mime info file defined, using built-ins only.\n" );
299  }
300 
301  $info .= "\n" . $this->mExtraInfo;
302 
303  $info = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $info );
304  $info = str_replace( "\t", " ", $info );
305 
306  $this->mMimeTypeAliases = [];
307  $this->mMediaTypes = [];
308 
309  $lines = explode( "\n", $info );
310  foreach ( $lines as $s ) {
311  $s = trim( $s );
312  if ( empty( $s ) ) {
313  continue;
314  }
315  if ( strpos( $s, '#' ) === 0 ) {
316  continue;
317  }
318 
319  $s = strtolower( $s );
320  $i = strpos( $s, ' ' );
321 
322  if ( $i === false ) {
323  continue;
324  }
325 
326  # print "processing MIME INFO line $s<br>";
327 
328  $match = [];
329  if ( preg_match( '!\[\s*(\w+)\s*\]!', $s, $match ) ) {
330  $s = preg_replace( '!\[\s*(\w+)\s*\]!', '', $s );
331  $mtype = trim( strtoupper( $match[1] ) );
332  } else {
333  $mtype = MEDIATYPE_UNKNOWN;
334  }
335 
336  $m = explode( ' ', $s );
337 
338  if ( !isset( $this->mMediaTypes[$mtype] ) ) {
339  $this->mMediaTypes[$mtype] = [];
340  }
341 
342  foreach ( $m as $mime ) {
343  $mime = trim( $mime );
344  if ( empty( $mime ) ) {
345  continue;
346  }
347 
348  $this->mMediaTypes[$mtype][] = $mime;
349  }
350 
351  if ( count( $m ) > 1 ) {
352  $main = $m[0];
353  $mCount = count( $m );
354  for ( $i = 1; $i < $mCount; $i += 1 ) {
355  $mime = $m[$i];
356  $this->mMimeTypeAliases[$mime] = $main;
357  }
358  }
359  }
360  }
361 
366  public static function singleton() {
367  if ( self::$instance === null ) {
368  self::$instance = new MimeMagic(
369  ConfigFactory::getDefaultInstance()->makeConfig( 'main' )
370  );
371  }
372  return self::$instance;
373  }
374 
381  public function addExtraTypes( $types ) {
382  $this->mExtraTypes .= "\n" . $types;
383  }
384 
391  public function addExtraInfo( $info ) {
392  $this->mExtraInfo .= "\n" . $info;
393  }
394 
403  public function getExtensionsForType( $mime ) {
404  $mime = strtolower( $mime );
405 
406  // Check the mime-to-ext map
407  if ( isset( $this->mMimeToExt[$mime] ) ) {
408  return $this->mMimeToExt[$mime];
409  }
410 
411  // Resolve the MIME type to the canonical type
412  if ( isset( $this->mMimeTypeAliases[$mime] ) ) {
413  $mime = $this->mMimeTypeAliases[$mime];
414  if ( isset( $this->mMimeToExt[$mime] ) ) {
415  return $this->mMimeToExt[$mime];
416  }
417  }
418 
419  return null;
420  }
421 
429  public function getTypesForExtension( $ext ) {
430  $ext = strtolower( $ext );
431 
432  $r = isset( $this->mExtToMime[$ext] ) ? $this->mExtToMime[$ext] : null;
433  return $r;
434  }
435 
443  public function guessTypesForExtension( $ext ) {
444  $m = $this->getTypesForExtension( $ext );
445  if ( is_null( $m ) ) {
446  return null;
447  }
448 
449  // TODO: Check if this is needed; strtok( $m, ' ' ) should be sufficient
450  $m = trim( $m );
451  $m = preg_replace( '/\s.*$/', '', $m );
452 
453  return $m;
454  }
455 
465  public function isMatchingExtension( $extension, $mime ) {
466  $ext = $this->getExtensionsForType( $mime );
467 
468  if ( !$ext ) {
469  return null; // Unknown MIME type
470  }
471 
472  $ext = explode( ' ', $ext );
473 
474  $extension = strtolower( $extension );
475  return in_array( $extension, $ext );
476  }
477 
486  public function isPHPImageType( $mime ) {
487  // As defined by imagegetsize and image_type_to_mime
488  static $types = [
489  'image/gif', 'image/jpeg', 'image/png',
490  'image/x-bmp', 'image/xbm', 'image/tiff',
491  'image/jp2', 'image/jpeg2000', 'image/iff',
492  'image/xbm', 'image/x-xbitmap',
493  'image/vnd.wap.wbmp', 'image/vnd.xiff',
494  'image/x-photoshop',
495  'application/x-shockwave-flash',
496  ];
497 
498  return in_array( $mime, $types );
499  }
500 
513  function isRecognizableExtension( $extension ) {
514  static $types = [
515  // Types recognized by getimagesize()
516  'gif', 'jpeg', 'jpg', 'png', 'swf', 'psd',
517  'bmp', 'tiff', 'tif', 'jpc', 'jp2',
518  'jpx', 'jb2', 'swc', 'iff', 'wbmp',
519  'xbm',
520 
521  // Formats we recognize magic numbers for
522  'djvu', 'ogx', 'ogg', 'ogv', 'oga', 'spx',
523  'mid', 'pdf', 'wmf', 'xcf', 'webm', 'mkv', 'mka',
524  'webp',
525 
526  // XML formats we sure hope we recognize reliably
527  'svg',
528  ];
529  return in_array( strtolower( $extension ), $types );
530  }
531 
543  public function improveTypeFromExtension( $mime, $ext ) {
544  if ( $mime === 'unknown/unknown' ) {
545  if ( $this->isRecognizableExtension( $ext ) ) {
546  wfDebug( __METHOD__ . ': refusing to guess mime type for .' .
547  "$ext file, we should have recognized it\n" );
548  } else {
549  // Not something we can detect, so simply
550  // trust the file extension
551  $mime = $this->guessTypesForExtension( $ext );
552  }
553  } elseif ( $mime === 'application/x-opc+zip' ) {
554  if ( $this->isMatchingExtension( $ext, $mime ) ) {
555  // A known file extension for an OPC file,
556  // find the proper MIME type for that file extension
557  $mime = $this->guessTypesForExtension( $ext );
558  } else {
559  wfDebug( __METHOD__ . ": refusing to guess better type for $mime file, " .
560  ".$ext is not a known OPC extension.\n" );
561  $mime = 'application/zip';
562  }
563  } elseif ( $mime === 'text/plain' && $this->findMediaType( ".$ext" ) === MEDIATYPE_TEXT ) {
564  // Textual types are sometimes not recognized properly.
565  // If detected as text/plain, and has an extension which is textual
566  // improve to the extension's type. For example, csv and json are often
567  // misdetected as text/plain.
568  $mime = $this->guessTypesForExtension( $ext );
569  }
570 
571  # Media handling extensions can improve the MIME detected
572  Hooks::run( 'MimeMagicImproveFromExtension', [ $this, $ext, &$mime ] );
573 
574  if ( isset( $this->mMimeTypeAliases[$mime] ) ) {
575  $mime = $this->mMimeTypeAliases[$mime];
576  }
577 
578  wfDebug( __METHOD__ . ": improved mime type for .$ext: $mime\n" );
579  return $mime;
580  }
581 
596  public function guessMimeType( $file, $ext = true ) {
597  if ( $ext ) { // TODO: make $ext default to false. Or better, remove it.
598  wfDebug( __METHOD__ . ": WARNING: use of the \$ext parameter is deprecated. " .
599  "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
600  }
601 
602  $mime = $this->doGuessMimeType( $file, $ext );
603 
604  if ( !$mime ) {
605  wfDebug( __METHOD__ . ": internal type detection failed for $file (.$ext)...\n" );
606  $mime = $this->detectMimeType( $file, $ext );
607  }
608 
609  if ( isset( $this->mMimeTypeAliases[$mime] ) ) {
610  $mime = $this->mMimeTypeAliases[$mime];
611  }
612 
613  wfDebug( __METHOD__ . ": guessed mime type of $file: $mime\n" );
614  return $mime;
615  }
616 
627  private function doGuessMimeType( $file, $ext ) {
628  // Read a chunk of the file
629  MediaWiki\suppressWarnings();
630  $f = fopen( $file, 'rb' );
631  MediaWiki\restoreWarnings();
632 
633  if ( !$f ) {
634  return 'unknown/unknown';
635  }
636 
637  $fsize = filesize( $file );
638  if ( $fsize === false ) {
639  return 'unknown/unknown';
640  }
641 
642  $head = fread( $f, 1024 );
643  $tailLength = min( 65558, $fsize ); // 65558 = maximum size of a zip EOCDR
644  if ( fseek( $f, -1 * $tailLength, SEEK_END ) === -1 ) {
645  throw new MWException(
646  "Seeking $tailLength bytes from EOF failed in " . __METHOD__ );
647  }
648  $tail = $tailLength ? fread( $f, $tailLength ) : '';
649  fclose( $f );
650 
651  wfDebug( __METHOD__ . ": analyzing head and tail of $file for magic numbers.\n" );
652 
653  // Hardcode a few magic number checks...
654  $headers = [
655  // Multimedia...
656  'MThd' => 'audio/midi',
657  'OggS' => 'application/ogg',
658 
659  // Image formats...
660  // Note that WMF may have a bare header, no magic number.
661  "\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives?
662  "\xd7\xcd\xc6\x9a" => 'application/x-msmetafile',
663  '%PDF' => 'application/pdf',
664  'gimp xcf' => 'image/x-xcf',
665 
666  // Some forbidden fruit...
667  'MZ' => 'application/octet-stream', // DOS/Windows executable
668  "\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary
669  "\x7fELF" => 'application/octet-stream', // ELF binary
670  ];
671 
672  foreach ( $headers as $magic => $candidate ) {
673  if ( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) {
674  wfDebug( __METHOD__ . ": magic header in $file recognized as $candidate\n" );
675  return $candidate;
676  }
677  }
678 
679  /* Look for WebM and Matroska files */
680  if ( strncmp( $head, pack( "C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) {
681  $doctype = strpos( $head, "\x42\x82" );
682  if ( $doctype ) {
683  // Next byte is datasize, then data (sizes larger than 1 byte are very stupid muxers)
684  $data = substr( $head, $doctype + 3, 8 );
685  if ( strncmp( $data, "matroska", 8 ) == 0 ) {
686  wfDebug( __METHOD__ . ": recognized file as video/x-matroska\n" );
687  return "video/x-matroska";
688  } elseif ( strncmp( $data, "webm", 4 ) == 0 ) {
689  wfDebug( __METHOD__ . ": recognized file as video/webm\n" );
690  return "video/webm";
691  }
692  }
693  wfDebug( __METHOD__ . ": unknown EBML file\n" );
694  return "unknown/unknown";
695  }
696 
697  /* Look for WebP */
698  if ( strncmp( $head, "RIFF", 4 ) == 0 && strncmp( substr( $head, 8, 7 ), "WEBPVP8", 7 ) == 0 ) {
699  wfDebug( __METHOD__ . ": recognized file as image/webp\n" );
700  return "image/webp";
701  }
702 
715  if ( ( strpos( $head, '<?php' ) !== false ) ||
716  ( strpos( $head, "<\x00?\x00p\x00h\x00p" ) !== false ) ||
717  ( strpos( $head, "<\x00?\x00 " ) !== false ) ||
718  ( strpos( $head, "<\x00?\x00\n" ) !== false ) ||
719  ( strpos( $head, "<\x00?\x00\t" ) !== false ) ||
720  ( strpos( $head, "<\x00?\x00=" ) !== false ) ) {
721 
722  wfDebug( __METHOD__ . ": recognized $file as application/x-php\n" );
723  return 'application/x-php';
724  }
725 
729  $xml = new XmlTypeCheck( $file );
730  if ( $xml->wellFormed ) {
731  $xmlMimeTypes = $this->mConfig->get( 'XMLMimeTypes' );
732  if ( isset( $xmlMimeTypes[$xml->getRootElement()] ) ) {
733  return $xmlMimeTypes[$xml->getRootElement()];
734  } else {
735  return 'application/xml';
736  }
737  }
738 
742  $script_type = null;
743 
744  # detect by shebang
745  if ( substr( $head, 0, 2 ) == "#!" ) {
746  $script_type = "ASCII";
747  } elseif ( substr( $head, 0, 5 ) == "\xef\xbb\xbf#!" ) {
748  $script_type = "UTF-8";
749  } elseif ( substr( $head, 0, 7 ) == "\xfe\xff\x00#\x00!" ) {
750  $script_type = "UTF-16BE";
751  } elseif ( substr( $head, 0, 7 ) == "\xff\xfe#\x00!" ) {
752  $script_type = "UTF-16LE";
753  }
754 
755  if ( $script_type ) {
756  if ( $script_type !== "UTF-8" && $script_type !== "ASCII" ) {
757  // Quick and dirty fold down to ASCII!
758  $pack = [ 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' ];
759  $chars = unpack( $pack[$script_type], substr( $head, 2 ) );
760  $head = '';
761  foreach ( $chars as $codepoint ) {
762  if ( $codepoint < 128 ) {
763  $head .= chr( $codepoint );
764  } else {
765  $head .= '?';
766  }
767  }
768  }
769 
770  $match = [];
771 
772  if ( preg_match( '%/?([^\s]+/)(\w+)%', $head, $match ) ) {
773  $mime = "application/x-{$match[2]}";
774  wfDebug( __METHOD__ . ": shell script recognized as $mime\n" );
775  return $mime;
776  }
777  }
778 
779  // Check for ZIP variants (before getimagesize)
780  if ( strpos( $tail, "PK\x05\x06" ) !== false ) {
781  wfDebug( __METHOD__ . ": ZIP header present in $file\n" );
782  return $this->detectZipType( $head, $tail, $ext );
783  }
784 
785  MediaWiki\suppressWarnings();
786  $gis = getimagesize( $file );
787  MediaWiki\restoreWarnings();
788 
789  if ( $gis && isset( $gis['mime'] ) ) {
790  $mime = $gis['mime'];
791  wfDebug( __METHOD__ . ": getimagesize detected $file as $mime\n" );
792  return $mime;
793  }
794 
795  // Also test DjVu
796  $deja = new DjVuImage( $file );
797  if ( $deja->isValid() ) {
798  wfDebug( __METHOD__ . ": detected $file as image/vnd.djvu\n" );
799  return 'image/vnd.djvu';
800  }
801 
802  # Media handling extensions can guess the MIME by content
803  # It's intentionally here so that if core is wrong about a type (false positive),
804  # people will hopefully nag and submit patches :)
805  $mime = false;
806  # Some strings by reference for performance - assuming well-behaved hooks
807  Hooks::run(
808  'MimeMagicGuessFromContent',
809  [ $this, &$head, &$tail, $file, &$mime ]
810  );
811 
812  return $mime;
813  }
814 
828  function detectZipType( $header, $tail = null, $ext = false ) {
829  if ( $ext ) { # TODO: remove $ext param
830  wfDebug( __METHOD__ . ": WARNING: use of the \$ext parameter is deprecated. " .
831  "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
832  }
833 
834  $mime = 'application/zip';
835  $opendocTypes = [
836  'chart-template',
837  'chart',
838  'formula-template',
839  'formula',
840  'graphics-template',
841  'graphics',
842  'image-template',
843  'image',
844  'presentation-template',
845  'presentation',
846  'spreadsheet-template',
847  'spreadsheet',
848  'text-template',
849  'text-master',
850  'text-web',
851  'text' ];
852 
853  // http://lists.oasis-open.org/archives/office/200505/msg00006.html
854  $types = '(?:' . implode( '|', $opendocTypes ) . ')';
855  $opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
856 
857  $openxmlRegex = "/^\[Content_Types\].xml/";
858 
859  if ( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) {
860  $mime = $matches[1];
861  wfDebug( __METHOD__ . ": detected $mime from ZIP archive\n" );
862  } elseif ( preg_match( $openxmlRegex, substr( $header, 30 ) ) ) {
863  $mime = "application/x-opc+zip";
864  # TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere
865  if ( $ext !== true && $ext !== false ) {
870  if ( $this->isMatchingExtension( $ext, $mime ) ) {
871  /* A known file extension for an OPC file,
872  * find the proper mime type for that file extension
873  */
874  $mime = $this->guessTypesForExtension( $ext );
875  } else {
876  $mime = "application/zip";
877  }
878  }
879  wfDebug( __METHOD__ . ": detected an Open Packaging Conventions archive: $mime\n" );
880  } elseif ( substr( $header, 0, 8 ) == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" &&
881  ( $headerpos = strpos( $tail, "PK\x03\x04" ) ) !== false &&
882  preg_match( $openxmlRegex, substr( $tail, $headerpos + 30 ) ) ) {
883  if ( substr( $header, 512, 4 ) == "\xEC\xA5\xC1\x00" ) {
884  $mime = "application/msword";
885  }
886  switch ( substr( $header, 512, 6 ) ) {
887  case "\xEC\xA5\xC1\x00\x0E\x00":
888  case "\xEC\xA5\xC1\x00\x1C\x00":
889  case "\xEC\xA5\xC1\x00\x43\x00":
890  $mime = "application/vnd.ms-powerpoint";
891  break;
892  case "\xFD\xFF\xFF\xFF\x10\x00":
893  case "\xFD\xFF\xFF\xFF\x1F\x00":
894  case "\xFD\xFF\xFF\xFF\x22\x00":
895  case "\xFD\xFF\xFF\xFF\x23\x00":
896  case "\xFD\xFF\xFF\xFF\x28\x00":
897  case "\xFD\xFF\xFF\xFF\x29\x00":
898  case "\xFD\xFF\xFF\xFF\x10\x02":
899  case "\xFD\xFF\xFF\xFF\x1F\x02":
900  case "\xFD\xFF\xFF\xFF\x22\x02":
901  case "\xFD\xFF\xFF\xFF\x23\x02":
902  case "\xFD\xFF\xFF\xFF\x28\x02":
903  case "\xFD\xFF\xFF\xFF\x29\x02":
904  $mime = "application/vnd.msexcel";
905  break;
906  }
907 
908  wfDebug( __METHOD__ . ": detected a MS Office document with OPC trailer\n" );
909  } else {
910  wfDebug( __METHOD__ . ": unable to identify type of ZIP archive\n" );
911  }
912  return $mime;
913  }
914 
933  private function detectMimeType( $file, $ext = true ) {
935  if ( $ext ) {
936  wfDebug( __METHOD__ . ": WARNING: use of the \$ext parameter is deprecated. "
937  . "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
938  }
939 
940  $mimeDetectorCommand = $this->mConfig->get( 'MimeDetectorCommand' );
941  $m = null;
942  if ( $mimeDetectorCommand ) {
943  $args = wfEscapeShellArg( $file );
944  $m = wfShellExec( "$mimeDetectorCommand $args" );
945  } elseif ( function_exists( "finfo_open" ) && function_exists( "finfo_file" ) ) {
946  $mime_magic_resource = finfo_open( FILEINFO_MIME );
947 
948  if ( $mime_magic_resource ) {
949  $m = finfo_file( $mime_magic_resource, $file );
950  finfo_close( $mime_magic_resource );
951  } else {
952  wfDebug( __METHOD__ . ": finfo_open failed on " . FILEINFO_MIME . "!\n" );
953  }
954  } else {
955  wfDebug( __METHOD__ . ": no magic mime detector found!\n" );
956  }
957 
958  if ( $m ) {
959  # normalize
960  $m = preg_replace( '![;, ].*$!', '', $m ); # strip charset, etc
961  $m = trim( $m );
962  $m = strtolower( $m );
963 
964  if ( strpos( $m, 'unknown' ) !== false ) {
965  $m = null;
966  } else {
967  wfDebug( __METHOD__ . ": magic mime type of $file: $m\n" );
968  return $m;
969  }
970  }
971 
972  // If desired, look at extension as a fallback.
973  if ( $ext === true ) {
974  $i = strrpos( $file, '.' );
975  $ext = strtolower( $i ? substr( $file, $i + 1 ) : '' );
976  }
977  if ( $ext ) {
978  if ( $this->isRecognizableExtension( $ext ) ) {
979  wfDebug( __METHOD__ . ": refusing to guess mime type for .$ext file, "
980  . "we should have recognized it\n" );
981  } else {
982  $m = $this->guessTypesForExtension( $ext );
983  if ( $m ) {
984  wfDebug( __METHOD__ . ": extension mime type of $file: $m\n" );
985  return $m;
986  }
987  }
988  }
989 
990  // Unknown type
991  wfDebug( __METHOD__ . ": failed to guess mime type for $file!\n" );
992  return 'unknown/unknown';
993  }
994 
1011  function getMediaType( $path = null, $mime = null ) {
1012  if ( !$mime && !$path ) {
1013  return MEDIATYPE_UNKNOWN;
1014  }
1015 
1016  // If MIME type is unknown, guess it
1017  if ( !$mime ) {
1018  $mime = $this->guessMimeType( $path, false );
1019  }
1020 
1021  // Special code for ogg - detect if it's video (theora),
1022  // else label it as sound.
1023  if ( $mime == 'application/ogg' && file_exists( $path ) ) {
1024 
1025  // Read a chunk of the file
1026  $f = fopen( $path, "rt" );
1027  if ( !$f ) {
1028  return MEDIATYPE_UNKNOWN;
1029  }
1030  $head = fread( $f, 256 );
1031  fclose( $f );
1032 
1033  $head = str_replace( 'ffmpeg2theora', '', strtolower( $head ) );
1034 
1035  // This is an UGLY HACK, file should be parsed correctly
1036  if ( strpos( $head, 'theora' ) !== false ) {
1037  return MEDIATYPE_VIDEO;
1038  } elseif ( strpos( $head, 'vorbis' ) !== false ) {
1039  return MEDIATYPE_AUDIO;
1040  } elseif ( strpos( $head, 'flac' ) !== false ) {
1041  return MEDIATYPE_AUDIO;
1042  } elseif ( strpos( $head, 'speex' ) !== false ) {
1043  return MEDIATYPE_AUDIO;
1044  } else {
1045  return MEDIATYPE_MULTIMEDIA;
1046  }
1047  }
1048 
1049  // Check for entry for full MIME type
1050  if ( $mime ) {
1051  $type = $this->findMediaType( $mime );
1052  if ( $type !== MEDIATYPE_UNKNOWN ) {
1053  return $type;
1054  }
1055  }
1056 
1057  // Check for entry for file extension
1058  if ( $path ) {
1059  $i = strrpos( $path, '.' );
1060  $e = strtolower( $i ? substr( $path, $i + 1 ) : '' );
1061 
1062  // TODO: look at multi-extension if this fails, parse from full path
1063  $type = $this->findMediaType( '.' . $e );
1064  if ( $type !== MEDIATYPE_UNKNOWN ) {
1065  return $type;
1066  }
1067  }
1068 
1069  // Check major MIME type
1070  if ( $mime ) {
1071  $i = strpos( $mime, '/' );
1072  if ( $i !== false ) {
1073  $major = substr( $mime, 0, $i );
1074  $type = $this->findMediaType( $major );
1075  if ( $type !== MEDIATYPE_UNKNOWN ) {
1076  return $type;
1077  }
1078  }
1079  }
1080 
1081  if ( !$type ) {
1083  }
1084 
1085  return $type;
1086  }
1087 
1098  function findMediaType( $extMime ) {
1099  if ( strpos( $extMime, '.' ) === 0 ) {
1100  // If it's an extension, look up the MIME types
1101  $m = $this->getTypesForExtension( substr( $extMime, 1 ) );
1102  if ( !$m ) {
1103  return MEDIATYPE_UNKNOWN;
1104  }
1105 
1106  $m = explode( ' ', $m );
1107  } else {
1108  // Normalize MIME type
1109  if ( isset( $this->mMimeTypeAliases[$extMime] ) ) {
1110  $extMime = $this->mMimeTypeAliases[$extMime];
1111  }
1112 
1113  $m = [ $extMime ];
1114  }
1115 
1116  foreach ( $m as $mime ) {
1117  foreach ( $this->mMediaTypes as $type => $codes ) {
1118  if ( in_array( $mime, $codes, true ) ) {
1119  return $type;
1120  }
1121  }
1122  }
1123 
1124  return MEDIATYPE_UNKNOWN;
1125  }
1126 
1136  public function getIEMimeTypes( $fileName, $chunk, $proposed ) {
1137  $ca = $this->getIEContentAnalyzer();
1138  return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
1139  }
1140 
1146  protected function getIEContentAnalyzer() {
1147  if ( is_null( $this->mIEAnalyzer ) ) {
1148  $this->mIEAnalyzer = new IEContentAnalyzer;
1149  }
1150  return $this->mIEAnalyzer;
1151  }
1152 }
const MEDIATYPE_MULTIMEDIA
Definition: Defines.php:124
const MM_WELL_KNOWN_MIME_INFO
Defines a set of well known MIME info entries This is used as a fallback to mime.info files...
Definition: MimeMagic.php:94
addExtraInfo($info)
Adds to the list mapping MIME to media type.
Definition: MimeMagic.php:391
guessTypesForExtension($ext)
Returns a single MIME type for a given file extension or null if unknown.
Definition: MimeMagic.php:443
string $mExtraInfo
Extra MIME info, set for example by media handling extensions.
Definition: MimeMagic.php:173
improveTypeFromExtension($mime, $ext)
Improves a MIME type using the file extension.
Definition: MimeMagic.php:543
$IP
Definition: WebStart.php:58
static singleton()
Get an instance of this class.
Definition: MimeMagic.php:366
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException'returning false will NOT prevent logging $e
Definition: hooks.txt:1932
isMatchingExtension($extension, $mime)
Tests if the extension matches the given MIME type.
Definition: MimeMagic.php:465
doGuessMimeType($file, $ext)
Guess the MIME type from the file contents.
Definition: MimeMagic.php:627
IEContentAnalyzer $mIEAnalyzer
Definition: MimeMagic.php:165
const MEDIATYPE_TEXT
Definition: Defines.php:128
if($ext== 'php'||$ext== 'php5') $mime
Definition: router.php:65
array $mMimeTypeAliases
Map of MIME type aliases.
Definition: MimeMagic.php:153
const MEDIATYPE_VIDEO
Definition: Defines.php:122
Implements functions related to MIME types such as detection and mapping to file extension.
Definition: MimeMagic.php:144
wfShellExec($cmd, &$retval=null, $environ=[], $limits=[], $options=[])
Execute a shell command, with time and memory limits mirrored from the PHP configuration if supported...
when a variable name is used in a it is silently declared as a new local masking the global
Definition: design.txt:93
const MM_WELL_KNOWN_MIME_TYPES
Defines a set of well known MIME types This is used as a fallback to mime.types files.
Definition: MimeMagic.php:42
getMediaType($path=null, $mime=null)
Determine the media type code for a file, using its MIME type, name and possibly its contents...
Definition: MimeMagic.php:1011
wfDebug($text, $dest= 'all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
if($line===false) $args
Definition: cdb.php:64
Support for detecting/validating DjVu image files and getting some basic file metadata (resolution et...
Definition: DjVuImage.php:36
string $mExtraTypes
Extra MIME types, set for example by media handling extensions.
Definition: MimeMagic.php:169
findMediaType($extMime)
Returns a media code matching the given MIME type or file extension.
Definition: MimeMagic.php:1098
getIEMimeTypes($fileName, $chunk, $proposed)
Get the MIME types that various versions of Internet Explorer would detect from a chunk of the conten...
Definition: MimeMagic.php:1136
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add etc
Definition: design.txt:12
This class simulates Microsoft Internet Explorer's terribly broken and insecure MIME type detection a...
array $mMimeToExt
Map of MIME types to file extensions (as a space separated list)
Definition: MimeMagic.php:157
const MEDIATYPE_UNKNOWN
Definition: Defines.php:113
static MimeMagic $instance
The singleton instance.
Definition: MimeMagic.php:180
guessMimeType($file, $ext=true)
MIME type detection.
Definition: MimeMagic.php:596
static run($event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:131
getExtensionsForType($mime)
Returns a list of file extensions for a given MIME type as a space separated string or null if the MI...
Definition: MimeMagic.php:403
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add text
Definition: design.txt:12
detectZipType($header, $tail=null, $ext=false)
Detect application-specific file type of a given ZIP file from its header data.
Definition: MimeMagic.php:828
detectMimeType($file, $ext=true)
Internal MIME type detection.
Definition: MimeMagic.php:933
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
static getDefaultInstance()
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
getIEContentAnalyzer()
Get a cached instance of IEContentAnalyzer.
Definition: MimeMagic.php:1146
$lines
Definition: router.php:66
Config $mConfig
Definition: MimeMagic.php:176
isPHPImageType($mime)
Returns true if the MIME type is known to represent an image format supported by the PHP GD library...
Definition: MimeMagic.php:486
wfEscapeShellArg()
Windows-compatible version of escapeshellarg() Windows doesn't recognise single-quotes in the shell...
isRecognizableExtension($extension)
Returns true if the extension represents a type which can be reliably detected from its content...
Definition: MimeMagic.php:513
__construct(Config $config=null)
Initializes the MimeMagic object.
Definition: MimeMagic.php:189
$extensions
array $mExtToMime
Map of file extensions types to MIME types (as a space separated list)
Definition: MimeMagic.php:161
getTypesForExtension($ext)
Returns a list of MIME types for a given file extension as a space separated string or null if the ex...
Definition: MimeMagic.php:429
array $mMediaTypes
Mapping of media types to arrays of MIME types.
Definition: MimeMagic.php:149
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached one of or reset my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition: hooks.txt:2338
addExtraTypes($types)
Adds to the list mapping MIME to file extensions.
Definition: MimeMagic.php:381
$matches
const MEDIATYPE_AUDIO
Definition: Defines.php:119