MediaWiki  1.28.0
MimeAnalyzer.php
Go to the documentation of this file.
1 <?php
24 
30 class MimeAnalyzer implements LoggerAwareInterface {
32  protected $typeFile;
34  protected $infoFile;
36  protected $xmlTypes;
38  protected $initCallback;
40  protected $detectCallback;
42  protected $guessCallback;
44  protected $extCallback;
46  protected $mediaTypes = null;
48  protected $mimeTypeAliases = null;
50  protected $mimetoExt = null;
51 
53  public $mExtToMime = null; // legacy name; field accessed by hooks
54 
56  protected $IEAnalyzer;
57 
59  private $extraTypes = '';
61  private $extraInfo = '';
62 
64  private $logger;
65 
85  protected static $wellKnownTypes = <<<EOT
86 application/ogg ogx ogg ogm ogv oga spx
87 application/pdf pdf
88 application/vnd.oasis.opendocument.chart odc
89 application/vnd.oasis.opendocument.chart-template otc
90 application/vnd.oasis.opendocument.database odb
91 application/vnd.oasis.opendocument.formula odf
92 application/vnd.oasis.opendocument.formula-template otf
93 application/vnd.oasis.opendocument.graphics odg
94 application/vnd.oasis.opendocument.graphics-template otg
95 application/vnd.oasis.opendocument.image odi
96 application/vnd.oasis.opendocument.image-template oti
97 application/vnd.oasis.opendocument.presentation odp
98 application/vnd.oasis.opendocument.presentation-template otp
99 application/vnd.oasis.opendocument.spreadsheet ods
100 application/vnd.oasis.opendocument.spreadsheet-template ots
101 application/vnd.oasis.opendocument.text odt
102 application/vnd.oasis.opendocument.text-master otm
103 application/vnd.oasis.opendocument.text-template ott
104 application/vnd.oasis.opendocument.text-web oth
105 application/javascript js
106 application/x-shockwave-flash swf
107 audio/midi mid midi kar
108 audio/mpeg mpga mpa mp2 mp3
109 audio/x-aiff aif aiff aifc
110 audio/x-wav wav
111 audio/ogg oga spx ogg
112 image/x-bmp bmp
113 image/gif gif
114 image/jpeg jpeg jpg jpe
115 image/png png
116 image/svg+xml svg
117 image/svg svg
118 image/tiff tiff tif
119 image/vnd.djvu djvu
120 image/x.djvu djvu
121 image/x-djvu djvu
122 image/x-portable-pixmap ppm
123 image/x-xcf xcf
124 text/plain txt
125 text/html html htm
126 video/ogg ogv ogm ogg
127 video/mpeg mpg mpeg
128 EOT;
129 
136  protected static $wellKnownInfo = <<<EOT
137 application/pdf [OFFICE]
138 application/vnd.oasis.opendocument.chart [OFFICE]
139 application/vnd.oasis.opendocument.chart-template [OFFICE]
140 application/vnd.oasis.opendocument.database [OFFICE]
141 application/vnd.oasis.opendocument.formula [OFFICE]
142 application/vnd.oasis.opendocument.formula-template [OFFICE]
143 application/vnd.oasis.opendocument.graphics [OFFICE]
144 application/vnd.oasis.opendocument.graphics-template [OFFICE]
145 application/vnd.oasis.opendocument.image [OFFICE]
146 application/vnd.oasis.opendocument.image-template [OFFICE]
147 application/vnd.oasis.opendocument.presentation [OFFICE]
148 application/vnd.oasis.opendocument.presentation-template [OFFICE]
149 application/vnd.oasis.opendocument.spreadsheet [OFFICE]
150 application/vnd.oasis.opendocument.spreadsheet-template [OFFICE]
151 application/vnd.oasis.opendocument.text [OFFICE]
152 application/vnd.oasis.opendocument.text-template [OFFICE]
153 application/vnd.oasis.opendocument.text-master [OFFICE]
154 application/vnd.oasis.opendocument.text-web [OFFICE]
155 application/javascript text/javascript application/x-javascript [EXECUTABLE]
156 application/x-shockwave-flash [MULTIMEDIA]
157 audio/midi [AUDIO]
158 audio/x-aiff [AUDIO]
159 audio/x-wav [AUDIO]
160 audio/mp3 audio/mpeg [AUDIO]
161 application/ogg audio/ogg video/ogg [MULTIMEDIA]
162 image/x-bmp image/x-ms-bmp image/bmp [BITMAP]
163 image/gif [BITMAP]
164 image/jpeg [BITMAP]
165 image/png [BITMAP]
166 image/svg+xml [DRAWING]
167 image/tiff [BITMAP]
168 image/vnd.djvu [BITMAP]
169 image/x-xcf [BITMAP]
170 image/x-portable-pixmap [BITMAP]
171 text/plain [TEXT]
172 text/html [TEXT]
173 video/ogg [VIDEO]
174 video/mpeg [VIDEO]
175 unknown/unknown application/octet-stream application/x-empty [UNKNOWN]
176 EOT;
177 
193  public function __construct( array $params ) {
194  $this->typeFile = $params['typeFile'];
195  $this->infoFile = $params['infoFile'];
196  $this->xmlTypes = $params['xmlTypes'];
197  $this->initCallback = isset( $params['initCallback'] )
198  ? $params['initCallback']
199  : null;
200  $this->detectCallback = isset( $params['detectCallback'] )
201  ? $params['detectCallback']
202  : null;
203  $this->guessCallback = isset( $params['guessCallback'] )
204  ? $params['guessCallback']
205  : null;
206  $this->extCallback = isset( $params['extCallback'] )
207  ? $params['extCallback']
208  : null;
209  $this->logger = isset( $params['logger'] )
210  ? $params['logger']
211  : new \Psr\Log\NullLogger();
212 
213  $this->loadFiles();
214  }
215 
216  protected function loadFiles() {
221  # Allow media handling extensions adding MIME-types and MIME-info
222  if ( $this->initCallback ) {
223  call_user_func( $this->initCallback, $this );
224  }
225 
226  $types = self::$wellKnownTypes;
227 
228  $mimeTypeFile = $this->typeFile;
229  if ( $mimeTypeFile ) {
230  if ( is_file( $mimeTypeFile ) && is_readable( $mimeTypeFile ) ) {
231  $this->logger->info( __METHOD__ . ": loading mime types from $mimeTypeFile\n" );
232  $types .= "\n";
233  $types .= file_get_contents( $mimeTypeFile );
234  } else {
235  $this->logger->info( __METHOD__ . ": can't load mime types from $mimeTypeFile\n" );
236  }
237  } else {
238  $this->logger->info( __METHOD__ .
239  ": no mime types file defined, using built-ins only.\n" );
240  }
241 
242  $types .= "\n" . $this->extraTypes;
243 
244  $types = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $types );
245  $types = str_replace( "\t", " ", $types );
246 
247  $this->mimetoExt = [];
248  $this->mExtToMime = [];
249 
250  $lines = explode( "\n", $types );
251  foreach ( $lines as $s ) {
252  $s = trim( $s );
253  if ( empty( $s ) ) {
254  continue;
255  }
256  if ( strpos( $s, '#' ) === 0 ) {
257  continue;
258  }
259 
260  $s = strtolower( $s );
261  $i = strpos( $s, ' ' );
262 
263  if ( $i === false ) {
264  continue;
265  }
266 
267  $mime = substr( $s, 0, $i );
268  $ext = trim( substr( $s, $i + 1 ) );
269 
270  if ( empty( $ext ) ) {
271  continue;
272  }
273 
274  if ( !empty( $this->mimetoExt[$mime] ) ) {
275  $this->mimetoExt[$mime] .= ' ' . $ext;
276  } else {
277  $this->mimetoExt[$mime] = $ext;
278  }
279 
280  $extensions = explode( ' ', $ext );
281 
282  foreach ( $extensions as $e ) {
283  $e = trim( $e );
284  if ( empty( $e ) ) {
285  continue;
286  }
287 
288  if ( !empty( $this->mExtToMime[$e] ) ) {
289  $this->mExtToMime[$e] .= ' ' . $mime;
290  } else {
291  $this->mExtToMime[$e] = $mime;
292  }
293  }
294  }
295 
300  $mimeInfoFile = $this->infoFile;
301 
302  $info = self::$wellKnownInfo;
303 
304  if ( $mimeInfoFile ) {
305  if ( is_file( $mimeInfoFile ) && is_readable( $mimeInfoFile ) ) {
306  $this->logger->info( __METHOD__ . ": loading mime info from $mimeInfoFile\n" );
307  $info .= "\n";
308  $info .= file_get_contents( $mimeInfoFile );
309  } else {
310  $this->logger->info( __METHOD__ . ": can't load mime info from $mimeInfoFile\n" );
311  }
312  } else {
313  $this->logger->info( __METHOD__ .
314  ": no mime info file defined, using built-ins only.\n" );
315  }
316 
317  $info .= "\n" . $this->extraInfo;
318 
319  $info = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $info );
320  $info = str_replace( "\t", " ", $info );
321 
322  $this->mimeTypeAliases = [];
323  $this->mediaTypes = [];
324 
325  $lines = explode( "\n", $info );
326  foreach ( $lines as $s ) {
327  $s = trim( $s );
328  if ( empty( $s ) ) {
329  continue;
330  }
331  if ( strpos( $s, '#' ) === 0 ) {
332  continue;
333  }
334 
335  $s = strtolower( $s );
336  $i = strpos( $s, ' ' );
337 
338  if ( $i === false ) {
339  continue;
340  }
341 
342  # print "processing MIME INFO line $s<br>";
343 
344  $match = [];
345  if ( preg_match( '!\[\s*(\w+)\s*\]!', $s, $match ) ) {
346  $s = preg_replace( '!\[\s*(\w+)\s*\]!', '', $s );
347  $mtype = trim( strtoupper( $match[1] ) );
348  } else {
349  $mtype = MEDIATYPE_UNKNOWN;
350  }
351 
352  $m = explode( ' ', $s );
353 
354  if ( !isset( $this->mediaTypes[$mtype] ) ) {
355  $this->mediaTypes[$mtype] = [];
356  }
357 
358  foreach ( $m as $mime ) {
359  $mime = trim( $mime );
360  if ( empty( $mime ) ) {
361  continue;
362  }
363 
364  $this->mediaTypes[$mtype][] = $mime;
365  }
366 
367  if ( count( $m ) > 1 ) {
368  $main = $m[0];
369  $mCount = count( $m );
370  for ( $i = 1; $i < $mCount; $i += 1 ) {
371  $mime = $m[$i];
372  $this->mimeTypeAliases[$mime] = $main;
373  }
374  }
375  }
376  }
377 
378  public function setLogger( LoggerInterface $logger ) {
379  $this->logger = $logger;
380  }
381 
388  public function addExtraTypes( $types ) {
389  $this->extraTypes .= "\n" . $types;
390  }
391 
398  public function addExtraInfo( $info ) {
399  $this->extraInfo .= "\n" . $info;
400  }
401 
410  public function getExtensionsForType( $mime ) {
411  $mime = strtolower( $mime );
412 
413  // Check the mime-to-ext map
414  if ( isset( $this->mimetoExt[$mime] ) ) {
415  return $this->mimetoExt[$mime];
416  }
417 
418  // Resolve the MIME type to the canonical type
419  if ( isset( $this->mimeTypeAliases[$mime] ) ) {
420  $mime = $this->mimeTypeAliases[$mime];
421  if ( isset( $this->mimetoExt[$mime] ) ) {
422  return $this->mimetoExt[$mime];
423  }
424  }
425 
426  return null;
427  }
428 
436  public function getTypesForExtension( $ext ) {
437  $ext = strtolower( $ext );
438 
439  $r = isset( $this->mExtToMime[$ext] ) ? $this->mExtToMime[$ext] : null;
440  return $r;
441  }
442 
450  public function guessTypesForExtension( $ext ) {
451  $m = $this->getTypesForExtension( $ext );
452  if ( is_null( $m ) ) {
453  return null;
454  }
455 
456  // TODO: Check if this is needed; strtok( $m, ' ' ) should be sufficient
457  $m = trim( $m );
458  $m = preg_replace( '/\s.*$/', '', $m );
459 
460  return $m;
461  }
462 
472  public function isMatchingExtension( $extension, $mime ) {
473  $ext = $this->getExtensionsForType( $mime );
474 
475  if ( !$ext ) {
476  return null; // Unknown MIME type
477  }
478 
479  $ext = explode( ' ', $ext );
480 
481  $extension = strtolower( $extension );
482  return in_array( $extension, $ext );
483  }
484 
493  public function isPHPImageType( $mime ) {
494  // As defined by imagegetsize and image_type_to_mime
495  static $types = [
496  'image/gif', 'image/jpeg', 'image/png',
497  'image/x-bmp', 'image/xbm', 'image/tiff',
498  'image/jp2', 'image/jpeg2000', 'image/iff',
499  'image/xbm', 'image/x-xbitmap',
500  'image/vnd.wap.wbmp', 'image/vnd.xiff',
501  'image/x-photoshop',
502  'application/x-shockwave-flash',
503  ];
504 
505  return in_array( $mime, $types );
506  }
507 
520  function isRecognizableExtension( $extension ) {
521  static $types = [
522  // Types recognized by getimagesize()
523  'gif', 'jpeg', 'jpg', 'png', 'swf', 'psd',
524  'bmp', 'tiff', 'tif', 'jpc', 'jp2',
525  'jpx', 'jb2', 'swc', 'iff', 'wbmp',
526  'xbm',
527 
528  // Formats we recognize magic numbers for
529  'djvu', 'ogx', 'ogg', 'ogv', 'oga', 'spx',
530  'mid', 'pdf', 'wmf', 'xcf', 'webm', 'mkv', 'mka',
531  'webp',
532 
533  // XML formats we sure hope we recognize reliably
534  'svg',
535  ];
536  return in_array( strtolower( $extension ), $types );
537  }
538 
550  public function improveTypeFromExtension( $mime, $ext ) {
551  if ( $mime === 'unknown/unknown' ) {
552  if ( $this->isRecognizableExtension( $ext ) ) {
553  $this->logger->info( __METHOD__ . ': refusing to guess mime type for .' .
554  "$ext file, we should have recognized it\n" );
555  } else {
556  // Not something we can detect, so simply
557  // trust the file extension
558  $mime = $this->guessTypesForExtension( $ext );
559  }
560  } elseif ( $mime === 'application/x-opc+zip' ) {
561  if ( $this->isMatchingExtension( $ext, $mime ) ) {
562  // A known file extension for an OPC file,
563  // find the proper MIME type for that file extension
564  $mime = $this->guessTypesForExtension( $ext );
565  } else {
566  $this->logger->info( __METHOD__ .
567  ": refusing to guess better type for $mime file, " .
568  ".$ext is not a known OPC extension.\n" );
569  $mime = 'application/zip';
570  }
571  } elseif ( $mime === 'text/plain' && $this->findMediaType( ".$ext" ) === MEDIATYPE_TEXT ) {
572  // Textual types are sometimes not recognized properly.
573  // If detected as text/plain, and has an extension which is textual
574  // improve to the extension's type. For example, csv and json are often
575  // misdetected as text/plain.
576  $mime = $this->guessTypesForExtension( $ext );
577  }
578 
579  # Media handling extensions can improve the MIME detected
580  $callback = $this->extCallback;
581  if ( $callback ) {
582  $callback( $this, $ext, $mime /* by reference */ );
583  }
584 
585  if ( isset( $this->mimeTypeAliases[$mime] ) ) {
586  $mime = $this->mimeTypeAliases[$mime];
587  }
588 
589  $this->logger->info( __METHOD__ . ": improved mime type for .$ext: $mime\n" );
590  return $mime;
591  }
592 
607  public function guessMimeType( $file, $ext = true ) {
608  if ( $ext ) { // TODO: make $ext default to false. Or better, remove it.
609  $this->logger->info( __METHOD__ .
610  ": WARNING: use of the \$ext parameter is deprecated. " .
611  "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
612  }
613 
614  $mime = $this->doGuessMimeType( $file, $ext );
615 
616  if ( !$mime ) {
617  $this->logger->info( __METHOD__ .
618  ": internal type detection failed for $file (.$ext)...\n" );
619  $mime = $this->detectMimeType( $file, $ext );
620  }
621 
622  if ( isset( $this->mimeTypeAliases[$mime] ) ) {
623  $mime = $this->mimeTypeAliases[$mime];
624  }
625 
626  $this->logger->info( __METHOD__ . ": guessed mime type of $file: $mime\n" );
627  return $mime;
628  }
629 
640  private function doGuessMimeType( $file, $ext ) {
641  // Read a chunk of the file
642  MediaWiki\suppressWarnings();
643  $f = fopen( $file, 'rb' );
644  MediaWiki\restoreWarnings();
645 
646  if ( !$f ) {
647  return 'unknown/unknown';
648  }
649 
650  $fsize = filesize( $file );
651  if ( $fsize === false ) {
652  return 'unknown/unknown';
653  }
654 
655  $head = fread( $f, 1024 );
656  $tailLength = min( 65558, $fsize ); // 65558 = maximum size of a zip EOCDR
657  if ( fseek( $f, -1 * $tailLength, SEEK_END ) === -1 ) {
658  throw new UnexpectedValueException(
659  "Seeking $tailLength bytes from EOF failed in " . __METHOD__ );
660  }
661  $tail = $tailLength ? fread( $f, $tailLength ) : '';
662  fclose( $f );
663 
664  $this->logger->info( __METHOD__ .
665  ": analyzing head and tail of $file for magic numbers.\n" );
666 
667  // Hardcode a few magic number checks...
668  $headers = [
669  // Multimedia...
670  'MThd' => 'audio/midi',
671  'OggS' => 'application/ogg',
672 
673  // Image formats...
674  // Note that WMF may have a bare header, no magic number.
675  "\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives?
676  "\xd7\xcd\xc6\x9a" => 'application/x-msmetafile',
677  '%PDF' => 'application/pdf',
678  'gimp xcf' => 'image/x-xcf',
679 
680  // Some forbidden fruit...
681  'MZ' => 'application/octet-stream', // DOS/Windows executable
682  "\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary
683  "\x7fELF" => 'application/octet-stream', // ELF binary
684  ];
685 
686  foreach ( $headers as $magic => $candidate ) {
687  if ( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) {
688  $this->logger->info( __METHOD__ .
689  ": magic header in $file recognized as $candidate\n" );
690  return $candidate;
691  }
692  }
693 
694  /* Look for WebM and Matroska files */
695  if ( strncmp( $head, pack( "C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) {
696  $doctype = strpos( $head, "\x42\x82" );
697  if ( $doctype ) {
698  // Next byte is datasize, then data (sizes larger than 1 byte are stupid muxers)
699  $data = substr( $head, $doctype + 3, 8 );
700  if ( strncmp( $data, "matroska", 8 ) == 0 ) {
701  $this->logger->info( __METHOD__ . ": recognized file as video/x-matroska\n" );
702  return "video/x-matroska";
703  } elseif ( strncmp( $data, "webm", 4 ) == 0 ) {
704  $this->logger->info( __METHOD__ . ": recognized file as video/webm\n" );
705  return "video/webm";
706  }
707  }
708  $this->logger->info( __METHOD__ . ": unknown EBML file\n" );
709  return "unknown/unknown";
710  }
711 
712  /* Look for WebP */
713  if ( strncmp( $head, "RIFF", 4 ) == 0 &&
714  strncmp( substr( $head, 8, 7 ), "WEBPVP8", 7 ) == 0
715  ) {
716  $this->logger->info( __METHOD__ . ": recognized file as image/webp\n" );
717  return "image/webp";
718  }
719 
732  if ( ( strpos( $head, '<?php' ) !== false ) ||
733  ( strpos( $head, "<\x00?\x00p\x00h\x00p" ) !== false ) ||
734  ( strpos( $head, "<\x00?\x00 " ) !== false ) ||
735  ( strpos( $head, "<\x00?\x00\n" ) !== false ) ||
736  ( strpos( $head, "<\x00?\x00\t" ) !== false ) ||
737  ( strpos( $head, "<\x00?\x00=" ) !== false ) ) {
738 
739  $this->logger->info( __METHOD__ . ": recognized $file as application/x-php\n" );
740  return 'application/x-php';
741  }
742 
746  $xml = new XmlTypeCheck( $file );
747  if ( $xml->wellFormed ) {
749  if ( isset( $xmlTypes[$xml->getRootElement()] ) ) {
750  return $xmlTypes[$xml->getRootElement()];
751  } else {
752  return 'application/xml';
753  }
754  }
755 
759  $script_type = null;
760 
761  # detect by shebang
762  if ( substr( $head, 0, 2 ) == "#!" ) {
763  $script_type = "ASCII";
764  } elseif ( substr( $head, 0, 5 ) == "\xef\xbb\xbf#!" ) {
765  $script_type = "UTF-8";
766  } elseif ( substr( $head, 0, 7 ) == "\xfe\xff\x00#\x00!" ) {
767  $script_type = "UTF-16BE";
768  } elseif ( substr( $head, 0, 7 ) == "\xff\xfe#\x00!" ) {
769  $script_type = "UTF-16LE";
770  }
771 
772  if ( $script_type ) {
773  if ( $script_type !== "UTF-8" && $script_type !== "ASCII" ) {
774  // Quick and dirty fold down to ASCII!
775  $pack = [ 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' ];
776  $chars = unpack( $pack[$script_type], substr( $head, 2 ) );
777  $head = '';
778  foreach ( $chars as $codepoint ) {
779  if ( $codepoint < 128 ) {
780  $head .= chr( $codepoint );
781  } else {
782  $head .= '?';
783  }
784  }
785  }
786 
787  $match = [];
788 
789  if ( preg_match( '%/?([^\s]+/)(\w+)%', $head, $match ) ) {
790  $mime = "application/x-{$match[2]}";
791  $this->logger->info( __METHOD__ . ": shell script recognized as $mime\n" );
792  return $mime;
793  }
794  }
795 
796  // Check for ZIP variants (before getimagesize)
797  if ( strpos( $tail, "PK\x05\x06" ) !== false ) {
798  $this->logger->info( __METHOD__ . ": ZIP header present in $file\n" );
799  return $this->detectZipType( $head, $tail, $ext );
800  }
801 
802  MediaWiki\suppressWarnings();
803  $gis = getimagesize( $file );
804  MediaWiki\restoreWarnings();
805 
806  if ( $gis && isset( $gis['mime'] ) ) {
807  $mime = $gis['mime'];
808  $this->logger->info( __METHOD__ . ": getimagesize detected $file as $mime\n" );
809  return $mime;
810  }
811 
812  # Media handling extensions can guess the MIME by content
813  # It's intentionally here so that if core is wrong about a type (false positive),
814  # people will hopefully nag and submit patches :)
815  $mime = false;
816  # Some strings by reference for performance - assuming well-behaved hooks
817  $callback = $this->guessCallback;
818  if ( $callback ) {
819  $callback( $this, $head, $tail, $file, $mime /* by reference */ );
820  };
821 
822  return $mime;
823  }
824 
838  function detectZipType( $header, $tail = null, $ext = false ) {
839  if ( $ext ) { # TODO: remove $ext param
840  $this->logger->info( __METHOD__ .
841  ": WARNING: use of the \$ext parameter is deprecated. " .
842  "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
843  }
844 
845  $mime = 'application/zip';
846  $opendocTypes = [
847  'chart-template',
848  'chart',
849  'formula-template',
850  'formula',
851  'graphics-template',
852  'graphics',
853  'image-template',
854  'image',
855  'presentation-template',
856  'presentation',
857  'spreadsheet-template',
858  'spreadsheet',
859  'text-template',
860  'text-master',
861  'text-web',
862  'text' ];
863 
864  // http://lists.oasis-open.org/archives/office/200505/msg00006.html
865  $types = '(?:' . implode( '|', $opendocTypes ) . ')';
866  $opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
867 
868  $openxmlRegex = "/^\[Content_Types\].xml/";
869 
870  if ( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) {
871  $mime = $matches[1];
872  $this->logger->info( __METHOD__ . ": detected $mime from ZIP archive\n" );
873  } elseif ( preg_match( $openxmlRegex, substr( $header, 30 ) ) ) {
874  $mime = "application/x-opc+zip";
875  # TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere
876  if ( $ext !== true && $ext !== false ) {
881  if ( $this->isMatchingExtension( $ext, $mime ) ) {
882  /* A known file extension for an OPC file,
883  * find the proper mime type for that file extension
884  */
885  $mime = $this->guessTypesForExtension( $ext );
886  } else {
887  $mime = "application/zip";
888  }
889  }
890  $this->logger->info( __METHOD__ .
891  ": detected an Open Packaging Conventions archive: $mime\n" );
892  } elseif ( substr( $header, 0, 8 ) == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" &&
893  ( $headerpos = strpos( $tail, "PK\x03\x04" ) ) !== false &&
894  preg_match( $openxmlRegex, substr( $tail, $headerpos + 30 ) ) ) {
895  if ( substr( $header, 512, 4 ) == "\xEC\xA5\xC1\x00" ) {
896  $mime = "application/msword";
897  }
898  switch ( substr( $header, 512, 6 ) ) {
899  case "\xEC\xA5\xC1\x00\x0E\x00":
900  case "\xEC\xA5\xC1\x00\x1C\x00":
901  case "\xEC\xA5\xC1\x00\x43\x00":
902  $mime = "application/vnd.ms-powerpoint";
903  break;
904  case "\xFD\xFF\xFF\xFF\x10\x00":
905  case "\xFD\xFF\xFF\xFF\x1F\x00":
906  case "\xFD\xFF\xFF\xFF\x22\x00":
907  case "\xFD\xFF\xFF\xFF\x23\x00":
908  case "\xFD\xFF\xFF\xFF\x28\x00":
909  case "\xFD\xFF\xFF\xFF\x29\x00":
910  case "\xFD\xFF\xFF\xFF\x10\x02":
911  case "\xFD\xFF\xFF\xFF\x1F\x02":
912  case "\xFD\xFF\xFF\xFF\x22\x02":
913  case "\xFD\xFF\xFF\xFF\x23\x02":
914  case "\xFD\xFF\xFF\xFF\x28\x02":
915  case "\xFD\xFF\xFF\xFF\x29\x02":
916  $mime = "application/vnd.msexcel";
917  break;
918  }
919 
920  $this->logger->info( __METHOD__ .
921  ": detected a MS Office document with OPC trailer\n" );
922  } else {
923  $this->logger->info( __METHOD__ . ": unable to identify type of ZIP archive\n" );
924  }
925  return $mime;
926  }
927 
945  private function detectMimeType( $file, $ext = true ) {
947  if ( $ext ) {
948  $this->logger->info( __METHOD__ .
949  ": WARNING: use of the \$ext parameter is deprecated. "
950  . "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
951  }
952 
953  $callback = $this->detectCallback;
954  $m = null;
955  if ( $callback ) {
956  $m = $callback( $file );
957  } elseif ( function_exists( "finfo_open" ) && function_exists( "finfo_file" ) ) {
958  $mime_magic_resource = finfo_open( FILEINFO_MIME );
959 
960  if ( $mime_magic_resource ) {
961  $m = finfo_file( $mime_magic_resource, $file );
962  finfo_close( $mime_magic_resource );
963  } else {
964  $this->logger->info( __METHOD__ .
965  ": finfo_open failed on " . FILEINFO_MIME . "!\n" );
966  }
967  } else {
968  $this->logger->info( __METHOD__ . ": no magic mime detector found!\n" );
969  }
970 
971  if ( $m ) {
972  # normalize
973  $m = preg_replace( '![;, ].*$!', '', $m ); # strip charset, etc
974  $m = trim( $m );
975  $m = strtolower( $m );
976 
977  if ( strpos( $m, 'unknown' ) !== false ) {
978  $m = null;
979  } else {
980  $this->logger->info( __METHOD__ . ": magic mime type of $file: $m\n" );
981  return $m;
982  }
983  }
984 
985  // If desired, look at extension as a fallback.
986  if ( $ext === true ) {
987  $i = strrpos( $file, '.' );
988  $ext = strtolower( $i ? substr( $file, $i + 1 ) : '' );
989  }
990  if ( $ext ) {
991  if ( $this->isRecognizableExtension( $ext ) ) {
992  $this->logger->info( __METHOD__ . ": refusing to guess mime type for .$ext file, "
993  . "we should have recognized it\n" );
994  } else {
995  $m = $this->guessTypesForExtension( $ext );
996  if ( $m ) {
997  $this->logger->info( __METHOD__ . ": extension mime type of $file: $m\n" );
998  return $m;
999  }
1000  }
1001  }
1002 
1003  // Unknown type
1004  $this->logger->info( __METHOD__ . ": failed to guess mime type for $file!\n" );
1005  return 'unknown/unknown';
1006  }
1007 
1024  function getMediaType( $path = null, $mime = null ) {
1025  if ( !$mime && !$path ) {
1026  return MEDIATYPE_UNKNOWN;
1027  }
1028 
1029  // If MIME type is unknown, guess it
1030  if ( !$mime ) {
1031  $mime = $this->guessMimeType( $path, false );
1032  }
1033 
1034  // Special code for ogg - detect if it's video (theora),
1035  // else label it as sound.
1036  if ( $mime == 'application/ogg' && file_exists( $path ) ) {
1037 
1038  // Read a chunk of the file
1039  $f = fopen( $path, "rt" );
1040  if ( !$f ) {
1041  return MEDIATYPE_UNKNOWN;
1042  }
1043  $head = fread( $f, 256 );
1044  fclose( $f );
1045 
1046  $head = str_replace( 'ffmpeg2theora', '', strtolower( $head ) );
1047 
1048  // This is an UGLY HACK, file should be parsed correctly
1049  if ( strpos( $head, 'theora' ) !== false ) {
1050  return MEDIATYPE_VIDEO;
1051  } elseif ( strpos( $head, 'vorbis' ) !== false ) {
1052  return MEDIATYPE_AUDIO;
1053  } elseif ( strpos( $head, 'flac' ) !== false ) {
1054  return MEDIATYPE_AUDIO;
1055  } elseif ( strpos( $head, 'speex' ) !== false ) {
1056  return MEDIATYPE_AUDIO;
1057  } else {
1058  return MEDIATYPE_MULTIMEDIA;
1059  }
1060  }
1061 
1062  $type = null;
1063  // Check for entry for full MIME type
1064  if ( $mime ) {
1065  $type = $this->findMediaType( $mime );
1066  if ( $type !== MEDIATYPE_UNKNOWN ) {
1067  return $type;
1068  }
1069  }
1070 
1071  // Check for entry for file extension
1072  if ( $path ) {
1073  $i = strrpos( $path, '.' );
1074  $e = strtolower( $i ? substr( $path, $i + 1 ) : '' );
1075 
1076  // TODO: look at multi-extension if this fails, parse from full path
1077  $type = $this->findMediaType( '.' . $e );
1078  if ( $type !== MEDIATYPE_UNKNOWN ) {
1079  return $type;
1080  }
1081  }
1082 
1083  // Check major MIME type
1084  if ( $mime ) {
1085  $i = strpos( $mime, '/' );
1086  if ( $i !== false ) {
1087  $major = substr( $mime, 0, $i );
1088  $type = $this->findMediaType( $major );
1089  if ( $type !== MEDIATYPE_UNKNOWN ) {
1090  return $type;
1091  }
1092  }
1093  }
1094 
1095  if ( !$type ) {
1097  }
1098 
1099  return $type;
1100  }
1101 
1112  function findMediaType( $extMime ) {
1113  if ( strpos( $extMime, '.' ) === 0 ) {
1114  // If it's an extension, look up the MIME types
1115  $m = $this->getTypesForExtension( substr( $extMime, 1 ) );
1116  if ( !$m ) {
1117  return MEDIATYPE_UNKNOWN;
1118  }
1119 
1120  $m = explode( ' ', $m );
1121  } else {
1122  // Normalize MIME type
1123  if ( isset( $this->mimeTypeAliases[$extMime] ) ) {
1124  $extMime = $this->mimeTypeAliases[$extMime];
1125  }
1126 
1127  $m = [ $extMime ];
1128  }
1129 
1130  foreach ( $m as $mime ) {
1131  foreach ( $this->mediaTypes as $type => $codes ) {
1132  if ( in_array( $mime, $codes, true ) ) {
1133  return $type;
1134  }
1135  }
1136  }
1137 
1138  return MEDIATYPE_UNKNOWN;
1139  }
1140 
1150  public function getIEMimeTypes( $fileName, $chunk, $proposed ) {
1151  $ca = $this->getIEContentAnalyzer();
1152  return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
1153  }
1154 
1160  protected function getIEContentAnalyzer() {
1161  if ( is_null( $this->IEAnalyzer ) ) {
1162  $this->IEAnalyzer = new IEContentAnalyzer;
1163  }
1164  return $this->IEAnalyzer;
1165  }
1166 }
getTypesForExtension($ext)
Returns a list of MIME types for a given file extension as a space separated string or null if the ex...
improveTypeFromExtension($mime, $ext)
Improves a MIME type using the file extension.
setLogger(LoggerInterface $logger)
string $xmlTypes
the array() calling protocol came about after MediaWiki 1.4rc1.
either a plain
Definition: hooks.txt:1987
const MEDIATYPE_TEXT
Definition: defines.php:41
callable $initCallback
doGuessMimeType($file, $ext)
Guess the MIME type from the file contents.
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException'returning false will NOT prevent logging $e
Definition: hooks.txt:2102
const MEDIATYPE_MULTIMEDIA
Definition: defines.php:37
if($ext== 'php'||$ext== 'php5') $mime
Definition: router.php:65
getExtensionsForType($mime)
Returns a list of file extensions for a given MIME type as a space separated string or null if the MI...
guessMimeType($file, $ext=true)
MIME type detection.
IEContentAnalyzer $IEAnalyzer
__construct(array $params)
guessTypesForExtension($ext)
Returns a single MIME type for a given file extension or null if unknown.
detectMimeType($file, $ext=true)
Internal MIME type detection.
const MEDIATYPE_UNKNOWN
Definition: defines.php:26
findMediaType($extMime)
Returns a media code matching the given MIME type or file extension.
array $mediaTypes
Mapping of media types to arrays of MIME types.
const MEDIATYPE_VIDEO
Definition: defines.php:35
callable $detectCallback
callable $extCallback
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add etc
Definition: design.txt:12
getMediaType($path=null, $mime=null)
Determine the media type code for a file, using its MIME type, name and possibly its contents...
This class simulates Microsoft Internet Explorer's terribly broken and insecure MIME type detection a...
static $wellKnownInfo
Defines a set of well known MIME info entries This is used as a fallback to mime.info files...
string $extraTypes
Extra MIME types, set for example by media handling extensions.
$params
isPHPImageType($mime)
Returns true if the MIME type is known to represent an image format supported by the PHP GD library...
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add text
Definition: design.txt:12
detectZipType($header, $tail=null, $ext=false)
Detect application-specific file type of a given ZIP file from its header data.
$header
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
LoggerInterface $logger
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
string $infoFile
getIEMimeTypes($fileName, $chunk, $proposed)
Get the MIME types that various versions of Internet Explorer would detect from a chunk of the conten...
$lines
Definition: router.php:67
const MEDIATYPE_AUDIO
Definition: defines.php:32
array $mExtToMime
Map of file extensions types to MIME types (as a space separated list)
array $mimeTypeAliases
Map of MIME type aliases.
array $mimetoExt
Map of MIME types to file extensions (as a space separated list)
string $typeFile
getIEContentAnalyzer()
Get a cached instance of IEContentAnalyzer.
isMatchingExtension($extension, $mime)
Tests if the extension matches the given MIME type.
isRecognizableExtension($extension)
Returns true if the extension represents a type which can be reliably detected from its content...
callable $guessCallback
$extensions
static $wellKnownTypes
Defines a set of well known MIME types This is used as a fallback to mime.types files.
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached one of or reset my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition: hooks.txt:2491
addExtraTypes($types)
Adds to the list mapping MIME to file extensions.
string $extraInfo
Extra MIME info, set for example by media handling extensions.
addExtraInfo($info)
Adds to the list mapping MIME to media type.
$matches