MediaWiki  1.29.1
MimeAnalyzer.php
Go to the documentation of this file.
1 <?php
22 use Psr\Log\LoggerAwareInterface;
23 use Psr\Log\LoggerInterface;
24 
30 class MimeAnalyzer implements LoggerAwareInterface {
32  protected $typeFile;
34  protected $infoFile;
36  protected $xmlTypes;
38  protected $initCallback;
40  protected $detectCallback;
42  protected $guessCallback;
44  protected $extCallback;
46  protected $mediaTypes = null;
48  protected $mimeTypeAliases = null;
50  protected $mimetoExt = null;
51 
53  public $mExtToMime = null; // legacy name; field accessed by hooks
54 
56  protected $IEAnalyzer;
57 
59  private $extraTypes = '';
61  private $extraInfo = '';
62 
64  private $logger;
65 
85  protected static $wellKnownTypes = <<<EOT
86 application/ogg ogx ogg ogm ogv oga spx opus
87 application/pdf pdf
88 application/vnd.oasis.opendocument.chart odc
89 application/vnd.oasis.opendocument.chart-template otc
90 application/vnd.oasis.opendocument.database odb
91 application/vnd.oasis.opendocument.formula odf
92 application/vnd.oasis.opendocument.formula-template otf
93 application/vnd.oasis.opendocument.graphics odg
94 application/vnd.oasis.opendocument.graphics-template otg
95 application/vnd.oasis.opendocument.image odi
96 application/vnd.oasis.opendocument.image-template oti
97 application/vnd.oasis.opendocument.presentation odp
98 application/vnd.oasis.opendocument.presentation-template otp
99 application/vnd.oasis.opendocument.spreadsheet ods
100 application/vnd.oasis.opendocument.spreadsheet-template ots
101 application/vnd.oasis.opendocument.text odt
102 application/vnd.oasis.opendocument.text-master otm
103 application/vnd.oasis.opendocument.text-template ott
104 application/vnd.oasis.opendocument.text-web oth
105 application/javascript js
106 application/x-shockwave-flash swf
107 audio/midi mid midi kar
108 audio/mpeg mpga mpa mp2 mp3
109 audio/x-aiff aif aiff aifc
110 audio/x-wav wav
111 audio/ogg oga spx ogg opus
112 audio/opus opus ogg oga ogg spx
113 image/x-bmp bmp
114 image/gif gif
115 image/jpeg jpeg jpg jpe
116 image/png png
117 image/svg+xml svg
118 image/svg svg
119 image/tiff tiff tif
120 image/vnd.djvu djvu
121 image/x.djvu djvu
122 image/x-djvu djvu
123 image/x-portable-pixmap ppm
124 image/x-xcf xcf
125 text/plain txt
126 text/html html htm
127 video/ogg ogv ogm ogg
128 video/mpeg mpg mpeg
129 EOT;
130 
137  protected static $wellKnownInfo = <<<EOT
138 application/pdf [OFFICE]
139 application/vnd.oasis.opendocument.chart [OFFICE]
140 application/vnd.oasis.opendocument.chart-template [OFFICE]
141 application/vnd.oasis.opendocument.database [OFFICE]
142 application/vnd.oasis.opendocument.formula [OFFICE]
143 application/vnd.oasis.opendocument.formula-template [OFFICE]
144 application/vnd.oasis.opendocument.graphics [OFFICE]
145 application/vnd.oasis.opendocument.graphics-template [OFFICE]
146 application/vnd.oasis.opendocument.image [OFFICE]
147 application/vnd.oasis.opendocument.image-template [OFFICE]
148 application/vnd.oasis.opendocument.presentation [OFFICE]
149 application/vnd.oasis.opendocument.presentation-template [OFFICE]
150 application/vnd.oasis.opendocument.spreadsheet [OFFICE]
151 application/vnd.oasis.opendocument.spreadsheet-template [OFFICE]
152 application/vnd.oasis.opendocument.text [OFFICE]
153 application/vnd.oasis.opendocument.text-template [OFFICE]
154 application/vnd.oasis.opendocument.text-master [OFFICE]
155 application/vnd.oasis.opendocument.text-web [OFFICE]
156 application/javascript text/javascript application/x-javascript [EXECUTABLE]
157 application/x-shockwave-flash [MULTIMEDIA]
158 audio/midi [AUDIO]
159 audio/x-aiff [AUDIO]
160 audio/x-wav [AUDIO]
161 audio/mp3 audio/mpeg [AUDIO]
162 application/ogg audio/ogg video/ogg [MULTIMEDIA]
163 image/x-bmp image/x-ms-bmp image/bmp [BITMAP]
164 image/gif [BITMAP]
165 image/jpeg [BITMAP]
166 image/png [BITMAP]
167 image/svg+xml [DRAWING]
168 image/tiff [BITMAP]
169 image/vnd.djvu [BITMAP]
170 image/x-xcf [BITMAP]
171 image/x-portable-pixmap [BITMAP]
172 text/plain [TEXT]
173 text/html [TEXT]
174 video/ogg [VIDEO]
175 video/mpeg [VIDEO]
176 unknown/unknown application/octet-stream application/x-empty [UNKNOWN]
177 EOT;
178 
194  public function __construct( array $params ) {
195  $this->typeFile = $params['typeFile'];
196  $this->infoFile = $params['infoFile'];
197  $this->xmlTypes = $params['xmlTypes'];
198  $this->initCallback = isset( $params['initCallback'] )
199  ? $params['initCallback']
200  : null;
201  $this->detectCallback = isset( $params['detectCallback'] )
202  ? $params['detectCallback']
203  : null;
204  $this->guessCallback = isset( $params['guessCallback'] )
205  ? $params['guessCallback']
206  : null;
207  $this->extCallback = isset( $params['extCallback'] )
208  ? $params['extCallback']
209  : null;
210  $this->logger = isset( $params['logger'] )
211  ? $params['logger']
212  : new \Psr\Log\NullLogger();
213 
214  $this->loadFiles();
215  }
216 
217  protected function loadFiles() {
222  # Allow media handling extensions adding MIME-types and MIME-info
223  if ( $this->initCallback ) {
224  call_user_func( $this->initCallback, $this );
225  }
226 
227  $types = self::$wellKnownTypes;
228 
229  $mimeTypeFile = $this->typeFile;
230  if ( $mimeTypeFile ) {
231  if ( is_file( $mimeTypeFile ) && is_readable( $mimeTypeFile ) ) {
232  $this->logger->info( __METHOD__ . ": loading mime types from $mimeTypeFile\n" );
233  $types .= "\n";
234  $types .= file_get_contents( $mimeTypeFile );
235  } else {
236  $this->logger->info( __METHOD__ . ": can't load mime types from $mimeTypeFile\n" );
237  }
238  } else {
239  $this->logger->info( __METHOD__ .
240  ": no mime types file defined, using built-ins only.\n" );
241  }
242 
243  $types .= "\n" . $this->extraTypes;
244 
245  $types = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $types );
246  $types = str_replace( "\t", " ", $types );
247 
248  $this->mimetoExt = [];
249  $this->mExtToMime = [];
250 
251  $lines = explode( "\n", $types );
252  foreach ( $lines as $s ) {
253  $s = trim( $s );
254  if ( empty( $s ) ) {
255  continue;
256  }
257  if ( strpos( $s, '#' ) === 0 ) {
258  continue;
259  }
260 
261  $s = strtolower( $s );
262  $i = strpos( $s, ' ' );
263 
264  if ( $i === false ) {
265  continue;
266  }
267 
268  $mime = substr( $s, 0, $i );
269  $ext = trim( substr( $s, $i + 1 ) );
270 
271  if ( empty( $ext ) ) {
272  continue;
273  }
274 
275  if ( !empty( $this->mimetoExt[$mime] ) ) {
276  $this->mimetoExt[$mime] .= ' ' . $ext;
277  } else {
278  $this->mimetoExt[$mime] = $ext;
279  }
280 
281  $extensions = explode( ' ', $ext );
282 
283  foreach ( $extensions as $e ) {
284  $e = trim( $e );
285  if ( empty( $e ) ) {
286  continue;
287  }
288 
289  if ( !empty( $this->mExtToMime[$e] ) ) {
290  $this->mExtToMime[$e] .= ' ' . $mime;
291  } else {
292  $this->mExtToMime[$e] = $mime;
293  }
294  }
295  }
296 
301  $mimeInfoFile = $this->infoFile;
302 
303  $info = self::$wellKnownInfo;
304 
305  if ( $mimeInfoFile ) {
306  if ( is_file( $mimeInfoFile ) && is_readable( $mimeInfoFile ) ) {
307  $this->logger->info( __METHOD__ . ": loading mime info from $mimeInfoFile\n" );
308  $info .= "\n";
309  $info .= file_get_contents( $mimeInfoFile );
310  } else {
311  $this->logger->info( __METHOD__ . ": can't load mime info from $mimeInfoFile\n" );
312  }
313  } else {
314  $this->logger->info( __METHOD__ .
315  ": no mime info file defined, using built-ins only.\n" );
316  }
317 
318  $info .= "\n" . $this->extraInfo;
319 
320  $info = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $info );
321  $info = str_replace( "\t", " ", $info );
322 
323  $this->mimeTypeAliases = [];
324  $this->mediaTypes = [];
325 
326  $lines = explode( "\n", $info );
327  foreach ( $lines as $s ) {
328  $s = trim( $s );
329  if ( empty( $s ) ) {
330  continue;
331  }
332  if ( strpos( $s, '#' ) === 0 ) {
333  continue;
334  }
335 
336  $s = strtolower( $s );
337  $i = strpos( $s, ' ' );
338 
339  if ( $i === false ) {
340  continue;
341  }
342 
343  # print "processing MIME INFO line $s<br>";
344 
345  $match = [];
346  if ( preg_match( '!\[\s*(\w+)\s*\]!', $s, $match ) ) {
347  $s = preg_replace( '!\[\s*(\w+)\s*\]!', '', $s );
348  $mtype = trim( strtoupper( $match[1] ) );
349  } else {
350  $mtype = MEDIATYPE_UNKNOWN;
351  }
352 
353  $m = explode( ' ', $s );
354 
355  if ( !isset( $this->mediaTypes[$mtype] ) ) {
356  $this->mediaTypes[$mtype] = [];
357  }
358 
359  foreach ( $m as $mime ) {
360  $mime = trim( $mime );
361  if ( empty( $mime ) ) {
362  continue;
363  }
364 
365  $this->mediaTypes[$mtype][] = $mime;
366  }
367 
368  if ( count( $m ) > 1 ) {
369  $main = $m[0];
370  $mCount = count( $m );
371  for ( $i = 1; $i < $mCount; $i += 1 ) {
372  $mime = $m[$i];
373  $this->mimeTypeAliases[$mime] = $main;
374  }
375  }
376  }
377  }
378 
379  public function setLogger( LoggerInterface $logger ) {
380  $this->logger = $logger;
381  }
382 
389  public function addExtraTypes( $types ) {
390  $this->extraTypes .= "\n" . $types;
391  }
392 
399  public function addExtraInfo( $info ) {
400  $this->extraInfo .= "\n" . $info;
401  }
402 
411  public function getExtensionsForType( $mime ) {
412  $mime = strtolower( $mime );
413 
414  // Check the mime-to-ext map
415  if ( isset( $this->mimetoExt[$mime] ) ) {
416  return $this->mimetoExt[$mime];
417  }
418 
419  // Resolve the MIME type to the canonical type
420  if ( isset( $this->mimeTypeAliases[$mime] ) ) {
421  $mime = $this->mimeTypeAliases[$mime];
422  if ( isset( $this->mimetoExt[$mime] ) ) {
423  return $this->mimetoExt[$mime];
424  }
425  }
426 
427  return null;
428  }
429 
437  public function getTypesForExtension( $ext ) {
438  $ext = strtolower( $ext );
439 
440  $r = isset( $this->mExtToMime[$ext] ) ? $this->mExtToMime[$ext] : null;
441  return $r;
442  }
443 
451  public function guessTypesForExtension( $ext ) {
452  $m = $this->getTypesForExtension( $ext );
453  if ( is_null( $m ) ) {
454  return null;
455  }
456 
457  // TODO: Check if this is needed; strtok( $m, ' ' ) should be sufficient
458  $m = trim( $m );
459  $m = preg_replace( '/\s.*$/', '', $m );
460 
461  return $m;
462  }
463 
473  public function isMatchingExtension( $extension, $mime ) {
474  $ext = $this->getExtensionsForType( $mime );
475 
476  if ( !$ext ) {
477  return null; // Unknown MIME type
478  }
479 
480  $ext = explode( ' ', $ext );
481 
482  $extension = strtolower( $extension );
483  return in_array( $extension, $ext );
484  }
485 
494  public function isPHPImageType( $mime ) {
495  // As defined by imagegetsize and image_type_to_mime
496  static $types = [
497  'image/gif', 'image/jpeg', 'image/png',
498  'image/x-bmp', 'image/xbm', 'image/tiff',
499  'image/jp2', 'image/jpeg2000', 'image/iff',
500  'image/xbm', 'image/x-xbitmap',
501  'image/vnd.wap.wbmp', 'image/vnd.xiff',
502  'image/x-photoshop',
503  'application/x-shockwave-flash',
504  ];
505 
506  return in_array( $mime, $types );
507  }
508 
521  function isRecognizableExtension( $extension ) {
522  static $types = [
523  // Types recognized by getimagesize()
524  'gif', 'jpeg', 'jpg', 'png', 'swf', 'psd',
525  'bmp', 'tiff', 'tif', 'jpc', 'jp2',
526  'jpx', 'jb2', 'swc', 'iff', 'wbmp',
527  'xbm',
528 
529  // Formats we recognize magic numbers for
530  'djvu', 'ogx', 'ogg', 'ogv', 'oga', 'spx', 'opus',
531  'mid', 'pdf', 'wmf', 'xcf', 'webm', 'mkv', 'mka',
532  'webp',
533 
534  // XML formats we sure hope we recognize reliably
535  'svg',
536  ];
537  return in_array( strtolower( $extension ), $types );
538  }
539 
551  public function improveTypeFromExtension( $mime, $ext ) {
552  if ( $mime === 'unknown/unknown' ) {
553  if ( $this->isRecognizableExtension( $ext ) ) {
554  $this->logger->info( __METHOD__ . ': refusing to guess mime type for .' .
555  "$ext file, we should have recognized it\n" );
556  } else {
557  // Not something we can detect, so simply
558  // trust the file extension
559  $mime = $this->guessTypesForExtension( $ext );
560  }
561  } elseif ( $mime === 'application/x-opc+zip' ) {
562  if ( $this->isMatchingExtension( $ext, $mime ) ) {
563  // A known file extension for an OPC file,
564  // find the proper MIME type for that file extension
565  $mime = $this->guessTypesForExtension( $ext );
566  } else {
567  $this->logger->info( __METHOD__ .
568  ": refusing to guess better type for $mime file, " .
569  ".$ext is not a known OPC extension.\n" );
570  $mime = 'application/zip';
571  }
572  } elseif ( $mime === 'text/plain' && $this->findMediaType( ".$ext" ) === MEDIATYPE_TEXT ) {
573  // Textual types are sometimes not recognized properly.
574  // If detected as text/plain, and has an extension which is textual
575  // improve to the extension's type. For example, csv and json are often
576  // misdetected as text/plain.
577  $mime = $this->guessTypesForExtension( $ext );
578  }
579 
580  # Media handling extensions can improve the MIME detected
581  $callback = $this->extCallback;
582  if ( $callback ) {
583  $callback( $this, $ext, $mime /* by reference */ );
584  }
585 
586  if ( isset( $this->mimeTypeAliases[$mime] ) ) {
587  $mime = $this->mimeTypeAliases[$mime];
588  }
589 
590  $this->logger->info( __METHOD__ . ": improved mime type for .$ext: $mime\n" );
591  return $mime;
592  }
593 
608  public function guessMimeType( $file, $ext = true ) {
609  if ( $ext ) { // TODO: make $ext default to false. Or better, remove it.
610  $this->logger->info( __METHOD__ .
611  ": WARNING: use of the \$ext parameter is deprecated. " .
612  "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
613  }
614 
615  $mime = $this->doGuessMimeType( $file, $ext );
616 
617  if ( !$mime ) {
618  $this->logger->info( __METHOD__ .
619  ": internal type detection failed for $file (.$ext)...\n" );
620  $mime = $this->detectMimeType( $file, $ext );
621  }
622 
623  if ( isset( $this->mimeTypeAliases[$mime] ) ) {
624  $mime = $this->mimeTypeAliases[$mime];
625  }
626 
627  $this->logger->info( __METHOD__ . ": guessed mime type of $file: $mime\n" );
628  return $mime;
629  }
630 
641  private function doGuessMimeType( $file, $ext ) {
642  // Read a chunk of the file
643  MediaWiki\suppressWarnings();
644  $f = fopen( $file, 'rb' );
645  MediaWiki\restoreWarnings();
646 
647  if ( !$f ) {
648  return 'unknown/unknown';
649  }
650 
651  $fsize = filesize( $file );
652  if ( $fsize === false ) {
653  return 'unknown/unknown';
654  }
655 
656  $head = fread( $f, 1024 );
657  $tailLength = min( 65558, $fsize ); // 65558 = maximum size of a zip EOCDR
658  if ( fseek( $f, -1 * $tailLength, SEEK_END ) === -1 ) {
659  throw new UnexpectedValueException(
660  "Seeking $tailLength bytes from EOF failed in " . __METHOD__ );
661  }
662  $tail = $tailLength ? fread( $f, $tailLength ) : '';
663  fclose( $f );
664 
665  $this->logger->info( __METHOD__ .
666  ": analyzing head and tail of $file for magic numbers.\n" );
667 
668  // Hardcode a few magic number checks...
669  $headers = [
670  // Multimedia...
671  'MThd' => 'audio/midi',
672  'OggS' => 'application/ogg',
673 
674  // Image formats...
675  // Note that WMF may have a bare header, no magic number.
676  "\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives?
677  "\xd7\xcd\xc6\x9a" => 'application/x-msmetafile',
678  '%PDF' => 'application/pdf',
679  'gimp xcf' => 'image/x-xcf',
680 
681  // Some forbidden fruit...
682  'MZ' => 'application/octet-stream', // DOS/Windows executable
683  "\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary
684  "\x7fELF" => 'application/octet-stream', // ELF binary
685  ];
686 
687  foreach ( $headers as $magic => $candidate ) {
688  if ( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) {
689  $this->logger->info( __METHOD__ .
690  ": magic header in $file recognized as $candidate\n" );
691  return $candidate;
692  }
693  }
694 
695  /* Look for WebM and Matroska files */
696  if ( strncmp( $head, pack( "C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) {
697  $doctype = strpos( $head, "\x42\x82" );
698  if ( $doctype ) {
699  // Next byte is datasize, then data (sizes larger than 1 byte are stupid muxers)
700  $data = substr( $head, $doctype + 3, 8 );
701  if ( strncmp( $data, "matroska", 8 ) == 0 ) {
702  $this->logger->info( __METHOD__ . ": recognized file as video/x-matroska\n" );
703  return "video/x-matroska";
704  } elseif ( strncmp( $data, "webm", 4 ) == 0 ) {
705  $this->logger->info( __METHOD__ . ": recognized file as video/webm\n" );
706  return "video/webm";
707  }
708  }
709  $this->logger->info( __METHOD__ . ": unknown EBML file\n" );
710  return "unknown/unknown";
711  }
712 
713  /* Look for WebP */
714  if ( strncmp( $head, "RIFF", 4 ) == 0 &&
715  strncmp( substr( $head, 8, 7 ), "WEBPVP8", 7 ) == 0
716  ) {
717  $this->logger->info( __METHOD__ . ": recognized file as image/webp\n" );
718  return "image/webp";
719  }
720 
733  if ( ( strpos( $head, '<?php' ) !== false ) ||
734  ( strpos( $head, "<\x00?\x00p\x00h\x00p" ) !== false ) ||
735  ( strpos( $head, "<\x00?\x00 " ) !== false ) ||
736  ( strpos( $head, "<\x00?\x00\n" ) !== false ) ||
737  ( strpos( $head, "<\x00?\x00\t" ) !== false ) ||
738  ( strpos( $head, "<\x00?\x00=" ) !== false ) ) {
739 
740  $this->logger->info( __METHOD__ . ": recognized $file as application/x-php\n" );
741  return 'application/x-php';
742  }
743 
747  $xml = new XmlTypeCheck( $file );
748  if ( $xml->wellFormed ) {
750  if ( isset( $xmlTypes[$xml->getRootElement()] ) ) {
751  return $xmlTypes[$xml->getRootElement()];
752  } else {
753  return 'application/xml';
754  }
755  }
756 
760  $script_type = null;
761 
762  # detect by shebang
763  if ( substr( $head, 0, 2 ) == "#!" ) {
764  $script_type = "ASCII";
765  } elseif ( substr( $head, 0, 5 ) == "\xef\xbb\xbf#!" ) {
766  $script_type = "UTF-8";
767  } elseif ( substr( $head, 0, 7 ) == "\xfe\xff\x00#\x00!" ) {
768  $script_type = "UTF-16BE";
769  } elseif ( substr( $head, 0, 7 ) == "\xff\xfe#\x00!" ) {
770  $script_type = "UTF-16LE";
771  }
772 
773  if ( $script_type ) {
774  if ( $script_type !== "UTF-8" && $script_type !== "ASCII" ) {
775  // Quick and dirty fold down to ASCII!
776  $pack = [ 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' ];
777  $chars = unpack( $pack[$script_type], substr( $head, 2 ) );
778  $head = '';
779  foreach ( $chars as $codepoint ) {
780  if ( $codepoint < 128 ) {
781  $head .= chr( $codepoint );
782  } else {
783  $head .= '?';
784  }
785  }
786  }
787 
788  $match = [];
789 
790  if ( preg_match( '%/?([^\s]+/)(\w+)%', $head, $match ) ) {
791  $mime = "application/x-{$match[2]}";
792  $this->logger->info( __METHOD__ . ": shell script recognized as $mime\n" );
793  return $mime;
794  }
795  }
796 
797  // Check for ZIP variants (before getimagesize)
798  if ( strpos( $tail, "PK\x05\x06" ) !== false ) {
799  $this->logger->info( __METHOD__ . ": ZIP header present in $file\n" );
800  return $this->detectZipType( $head, $tail, $ext );
801  }
802 
803  MediaWiki\suppressWarnings();
804  $gis = getimagesize( $file );
805  MediaWiki\restoreWarnings();
806 
807  if ( $gis && isset( $gis['mime'] ) ) {
808  $mime = $gis['mime'];
809  $this->logger->info( __METHOD__ . ": getimagesize detected $file as $mime\n" );
810  return $mime;
811  }
812 
813  # Media handling extensions can guess the MIME by content
814  # It's intentionally here so that if core is wrong about a type (false positive),
815  # people will hopefully nag and submit patches :)
816  $mime = false;
817  # Some strings by reference for performance - assuming well-behaved hooks
818  $callback = $this->guessCallback;
819  if ( $callback ) {
820  $callback( $this, $head, $tail, $file, $mime /* by reference */ );
821  };
822 
823  return $mime;
824  }
825 
839  function detectZipType( $header, $tail = null, $ext = false ) {
840  if ( $ext ) { # TODO: remove $ext param
841  $this->logger->info( __METHOD__ .
842  ": WARNING: use of the \$ext parameter is deprecated. " .
843  "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
844  }
845 
846  $mime = 'application/zip';
847  $opendocTypes = [
848  'chart-template',
849  'chart',
850  'formula-template',
851  'formula',
852  'graphics-template',
853  'graphics',
854  'image-template',
855  'image',
856  'presentation-template',
857  'presentation',
858  'spreadsheet-template',
859  'spreadsheet',
860  'text-template',
861  'text-master',
862  'text-web',
863  'text' ];
864 
865  // https://lists.oasis-open.org/archives/office/200505/msg00006.html
866  $types = '(?:' . implode( '|', $opendocTypes ) . ')';
867  $opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
868 
869  $openxmlRegex = "/^\[Content_Types\].xml/";
870 
871  if ( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) {
872  $mime = $matches[1];
873  $this->logger->info( __METHOD__ . ": detected $mime from ZIP archive\n" );
874  } elseif ( preg_match( $openxmlRegex, substr( $header, 30 ) ) ) {
875  $mime = "application/x-opc+zip";
876  # TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere
877  if ( $ext !== true && $ext !== false ) {
882  if ( $this->isMatchingExtension( $ext, $mime ) ) {
883  /* A known file extension for an OPC file,
884  * find the proper mime type for that file extension
885  */
886  $mime = $this->guessTypesForExtension( $ext );
887  } else {
888  $mime = "application/zip";
889  }
890  }
891  $this->logger->info( __METHOD__ .
892  ": detected an Open Packaging Conventions archive: $mime\n" );
893  } elseif ( substr( $header, 0, 8 ) == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" &&
894  ( $headerpos = strpos( $tail, "PK\x03\x04" ) ) !== false &&
895  preg_match( $openxmlRegex, substr( $tail, $headerpos + 30 ) ) ) {
896  if ( substr( $header, 512, 4 ) == "\xEC\xA5\xC1\x00" ) {
897  $mime = "application/msword";
898  }
899  switch ( substr( $header, 512, 6 ) ) {
900  case "\xEC\xA5\xC1\x00\x0E\x00":
901  case "\xEC\xA5\xC1\x00\x1C\x00":
902  case "\xEC\xA5\xC1\x00\x43\x00":
903  $mime = "application/vnd.ms-powerpoint";
904  break;
905  case "\xFD\xFF\xFF\xFF\x10\x00":
906  case "\xFD\xFF\xFF\xFF\x1F\x00":
907  case "\xFD\xFF\xFF\xFF\x22\x00":
908  case "\xFD\xFF\xFF\xFF\x23\x00":
909  case "\xFD\xFF\xFF\xFF\x28\x00":
910  case "\xFD\xFF\xFF\xFF\x29\x00":
911  case "\xFD\xFF\xFF\xFF\x10\x02":
912  case "\xFD\xFF\xFF\xFF\x1F\x02":
913  case "\xFD\xFF\xFF\xFF\x22\x02":
914  case "\xFD\xFF\xFF\xFF\x23\x02":
915  case "\xFD\xFF\xFF\xFF\x28\x02":
916  case "\xFD\xFF\xFF\xFF\x29\x02":
917  $mime = "application/vnd.msexcel";
918  break;
919  }
920 
921  $this->logger->info( __METHOD__ .
922  ": detected a MS Office document with OPC trailer\n" );
923  } else {
924  $this->logger->info( __METHOD__ . ": unable to identify type of ZIP archive\n" );
925  }
926  return $mime;
927  }
928 
946  private function detectMimeType( $file, $ext = true ) {
948  if ( $ext ) {
949  $this->logger->info( __METHOD__ .
950  ": WARNING: use of the \$ext parameter is deprecated. "
951  . "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
952  }
953 
954  $callback = $this->detectCallback;
955  $m = null;
956  if ( $callback ) {
957  $m = $callback( $file );
958  } elseif ( function_exists( "finfo_open" ) && function_exists( "finfo_file" ) ) {
959  $mime_magic_resource = finfo_open( FILEINFO_MIME );
960 
961  if ( $mime_magic_resource ) {
962  $m = finfo_file( $mime_magic_resource, $file );
963  finfo_close( $mime_magic_resource );
964  } else {
965  $this->logger->info( __METHOD__ .
966  ": finfo_open failed on " . FILEINFO_MIME . "!\n" );
967  }
968  } else {
969  $this->logger->info( __METHOD__ . ": no magic mime detector found!\n" );
970  }
971 
972  if ( $m ) {
973  # normalize
974  $m = preg_replace( '![;, ].*$!', '', $m ); # strip charset, etc
975  $m = trim( $m );
976  $m = strtolower( $m );
977 
978  if ( strpos( $m, 'unknown' ) !== false ) {
979  $m = null;
980  } else {
981  $this->logger->info( __METHOD__ . ": magic mime type of $file: $m\n" );
982  return $m;
983  }
984  }
985 
986  // If desired, look at extension as a fallback.
987  if ( $ext === true ) {
988  $i = strrpos( $file, '.' );
989  $ext = strtolower( $i ? substr( $file, $i + 1 ) : '' );
990  }
991  if ( $ext ) {
992  if ( $this->isRecognizableExtension( $ext ) ) {
993  $this->logger->info( __METHOD__ . ": refusing to guess mime type for .$ext file, "
994  . "we should have recognized it\n" );
995  } else {
996  $m = $this->guessTypesForExtension( $ext );
997  if ( $m ) {
998  $this->logger->info( __METHOD__ . ": extension mime type of $file: $m\n" );
999  return $m;
1000  }
1001  }
1002  }
1003 
1004  // Unknown type
1005  $this->logger->info( __METHOD__ . ": failed to guess mime type for $file!\n" );
1006  return 'unknown/unknown';
1007  }
1008 
1025  function getMediaType( $path = null, $mime = null ) {
1026  if ( !$mime && !$path ) {
1027  return MEDIATYPE_UNKNOWN;
1028  }
1029 
1030  // If MIME type is unknown, guess it
1031  if ( !$mime ) {
1032  $mime = $this->guessMimeType( $path, false );
1033  }
1034 
1035  // Special code for ogg - detect if it's video (theora),
1036  // else label it as sound.
1037  if ( $mime == 'application/ogg' && file_exists( $path ) ) {
1038 
1039  // Read a chunk of the file
1040  $f = fopen( $path, "rt" );
1041  if ( !$f ) {
1042  return MEDIATYPE_UNKNOWN;
1043  }
1044  $head = fread( $f, 256 );
1045  fclose( $f );
1046 
1047  $head = str_replace( 'ffmpeg2theora', '', strtolower( $head ) );
1048 
1049  // This is an UGLY HACK, file should be parsed correctly
1050  if ( strpos( $head, 'theora' ) !== false ) {
1051  return MEDIATYPE_VIDEO;
1052  } elseif ( strpos( $head, 'vorbis' ) !== false ) {
1053  return MEDIATYPE_AUDIO;
1054  } elseif ( strpos( $head, 'flac' ) !== false ) {
1055  return MEDIATYPE_AUDIO;
1056  } elseif ( strpos( $head, 'speex' ) !== false ) {
1057  return MEDIATYPE_AUDIO;
1058  } elseif ( strpos( $head, 'opus' ) !== false ) {
1059  return MEDIATYPE_AUDIO;
1060  } else {
1061  return MEDIATYPE_MULTIMEDIA;
1062  }
1063  }
1064 
1065  $type = null;
1066  // Check for entry for full MIME type
1067  if ( $mime ) {
1068  $type = $this->findMediaType( $mime );
1069  if ( $type !== MEDIATYPE_UNKNOWN ) {
1070  return $type;
1071  }
1072  }
1073 
1074  // Check for entry for file extension
1075  if ( $path ) {
1076  $i = strrpos( $path, '.' );
1077  $e = strtolower( $i ? substr( $path, $i + 1 ) : '' );
1078 
1079  // TODO: look at multi-extension if this fails, parse from full path
1080  $type = $this->findMediaType( '.' . $e );
1081  if ( $type !== MEDIATYPE_UNKNOWN ) {
1082  return $type;
1083  }
1084  }
1085 
1086  // Check major MIME type
1087  if ( $mime ) {
1088  $i = strpos( $mime, '/' );
1089  if ( $i !== false ) {
1090  $major = substr( $mime, 0, $i );
1091  $type = $this->findMediaType( $major );
1092  if ( $type !== MEDIATYPE_UNKNOWN ) {
1093  return $type;
1094  }
1095  }
1096  }
1097 
1098  if ( !$type ) {
1100  }
1101 
1102  return $type;
1103  }
1104 
1115  function findMediaType( $extMime ) {
1116  if ( strpos( $extMime, '.' ) === 0 ) {
1117  // If it's an extension, look up the MIME types
1118  $m = $this->getTypesForExtension( substr( $extMime, 1 ) );
1119  if ( !$m ) {
1120  return MEDIATYPE_UNKNOWN;
1121  }
1122 
1123  $m = explode( ' ', $m );
1124  } else {
1125  // Normalize MIME type
1126  if ( isset( $this->mimeTypeAliases[$extMime] ) ) {
1127  $extMime = $this->mimeTypeAliases[$extMime];
1128  }
1129 
1130  $m = [ $extMime ];
1131  }
1132 
1133  foreach ( $m as $mime ) {
1134  foreach ( $this->mediaTypes as $type => $codes ) {
1135  if ( in_array( $mime, $codes, true ) ) {
1136  return $type;
1137  }
1138  }
1139  }
1140 
1141  return MEDIATYPE_UNKNOWN;
1142  }
1143 
1153  public function getIEMimeTypes( $fileName, $chunk, $proposed ) {
1154  $ca = $this->getIEContentAnalyzer();
1155  return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
1156  }
1157 
1163  protected function getIEContentAnalyzer() {
1164  if ( is_null( $this->IEAnalyzer ) ) {
1165  $this->IEAnalyzer = new IEContentAnalyzer;
1166  }
1167  return $this->IEAnalyzer;
1168  }
1169 }
MimeAnalyzer\$guessCallback
callable $guessCallback
Definition: MimeAnalyzer.php:42
MimeAnalyzer\getIEContentAnalyzer
getIEContentAnalyzer()
Get a cached instance of IEContentAnalyzer.
Definition: MimeAnalyzer.php:1163
MimeAnalyzer\$wellKnownTypes
static $wellKnownTypes
Defines a set of well known MIME types This is used as a fallback to mime.types files.
Definition: MimeAnalyzer.php:85
MimeAnalyzer\guessTypesForExtension
guessTypesForExtension( $ext)
Returns a single MIME type for a given file extension or null if unknown.
Definition: MimeAnalyzer.php:451
MimeAnalyzer\addExtraInfo
addExtraInfo( $info)
Adds to the list mapping MIME to media type.
Definition: MimeAnalyzer.php:399
MimeAnalyzer\isRecognizableExtension
isRecognizableExtension( $extension)
Returns true if the extension represents a type which can be reliably detected from its content.
Definition: MimeAnalyzer.php:521
MEDIATYPE_AUDIO
const MEDIATYPE_AUDIO
Definition: defines.php:32
MimeAnalyzer\getExtensionsForType
getExtensionsForType( $mime)
Returns a list of file extensions for a given MIME type as a space separated string or null if the MI...
Definition: MimeAnalyzer.php:411
MimeAnalyzer\$initCallback
callable $initCallback
Definition: MimeAnalyzer.php:38
MimeAnalyzer\getMediaType
getMediaType( $path=null, $mime=null)
Determine the media type code for a file, using its MIME type, name and possibly its contents.
Definition: MimeAnalyzer.php:1025
MimeAnalyzer
Implements functions related to MIME types such as detection and mapping to file extension.
Definition: MimeAnalyzer.php:30
captcha-old.count
count
Definition: captcha-old.py:225
MimeAnalyzer\$mimetoExt
array $mimetoExt
Map of MIME types to file extensions (as a space separated list)
Definition: MimeAnalyzer.php:50
text
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add text
Definition: design.txt:12
MimeAnalyzer\$mExtToMime
array $mExtToMime
Map of file extensions types to MIME types (as a space separated list)
Definition: MimeAnalyzer.php:53
MimeAnalyzer\guessMimeType
guessMimeType( $file, $ext=true)
MIME type detection.
Definition: MimeAnalyzer.php:608
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
MimeAnalyzer\$infoFile
string $infoFile
Definition: MimeAnalyzer.php:34
txt
This document describes how event hooks work in the Renameuser extension For a more comprehensive guide to navigate to your root MediaWiki directory and read docs hooks txt
Definition: hooks.txt:6
MimeAnalyzer\__construct
__construct(array $params)
Definition: MimeAnalyzer.php:194
MimeAnalyzer\getTypesForExtension
getTypesForExtension( $ext)
Returns a list of MIME types for a given file extension as a space separated string or null if the ex...
Definition: MimeAnalyzer.php:437
$params
$params
Definition: styleTest.css.php:40
MEDIATYPE_UNKNOWN
const MEDIATYPE_UNKNOWN
Definition: defines.php:26
MimeAnalyzer\$mediaTypes
array $mediaTypes
Mapping of media types to arrays of MIME types.
Definition: MimeAnalyzer.php:46
$s
$s
Definition: mergeMessageFileList.php:188
$type
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition: hooks.txt:2536
MimeAnalyzer\setLogger
setLogger(LoggerInterface $logger)
Definition: MimeAnalyzer.php:379
MimeAnalyzer\detectMimeType
detectMimeType( $file, $ext=true)
Internal MIME type detection.
Definition: MimeAnalyzer.php:946
MimeAnalyzer\$IEAnalyzer
IEContentAnalyzer $IEAnalyzer
Definition: MimeAnalyzer.php:56
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
MimeAnalyzer\getIEMimeTypes
getIEMimeTypes( $fileName, $chunk, $proposed)
Get the MIME types that various versions of Internet Explorer would detect from a chunk of the conten...
Definition: MimeAnalyzer.php:1153
MimeAnalyzer\$typeFile
string $typeFile
Definition: MimeAnalyzer.php:32
$matches
$matches
Definition: NoLocalSettings.php:24
$lines
$lines
Definition: router.php:67
MimeAnalyzer\detectZipType
detectZipType( $header, $tail=null, $ext=false)
Detect application-specific file type of a given ZIP file from its header data.
Definition: MimeAnalyzer.php:839
IEContentAnalyzer
This class simulates Microsoft Internet Explorer's terribly broken and insecure MIME type detection a...
Definition: IEContentAnalyzer.php:27
MimeAnalyzer\$wellKnownInfo
static $wellKnownInfo
Defines a set of well known MIME info entries This is used as a fallback to mime.info files.
Definition: MimeAnalyzer.php:137
$mime
if( $ext=='php'|| $ext=='php5') $mime
Definition: router.php:65
MEDIATYPE_MULTIMEDIA
const MEDIATYPE_MULTIMEDIA
Definition: defines.php:37
$e
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException' returning false will NOT prevent logging $e
Definition: hooks.txt:2122
MimeAnalyzer\isMatchingExtension
isMatchingExtension( $extension, $mime)
Tests if the extension matches the given MIME type.
Definition: MimeAnalyzer.php:473
$header
$header
Definition: updateCredits.php:35
MimeAnalyzer\findMediaType
findMediaType( $extMime)
Returns a media code matching the given MIME type or file extension.
Definition: MimeAnalyzer.php:1115
etc
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add etc
Definition: design.txt:12
MimeAnalyzer\$logger
LoggerInterface $logger
Definition: MimeAnalyzer.php:64
MimeAnalyzer\$extraInfo
string $extraInfo
Extra MIME info, set for example by media handling extensions.
Definition: MimeAnalyzer.php:61
MimeAnalyzer\$mimeTypeAliases
array $mimeTypeAliases
Map of MIME type aliases.
Definition: MimeAnalyzer.php:48
MimeAnalyzer\isPHPImageType
isPHPImageType( $mime)
Returns true if the MIME type is known to represent an image format supported by the PHP GD library.
Definition: MimeAnalyzer.php:494
XmlTypeCheck
Definition: XmlTypeCheck.php:28
MimeAnalyzer\loadFiles
loadFiles()
Definition: MimeAnalyzer.php:217
plain
either a plain
Definition: hooks.txt:2007
MEDIATYPE_TEXT
const MEDIATYPE_TEXT
Definition: defines.php:41
MimeAnalyzer\addExtraTypes
addExtraTypes( $types)
Adds to the list mapping MIME to file extensions.
Definition: MimeAnalyzer.php:389
MEDIATYPE_VIDEO
const MEDIATYPE_VIDEO
Definition: defines.php:35
MimeAnalyzer\improveTypeFromExtension
improveTypeFromExtension( $mime, $ext)
Improves a MIME type using the file extension.
Definition: MimeAnalyzer.php:551
MimeAnalyzer\$extCallback
callable $extCallback
Definition: MimeAnalyzer.php:44
$ext
$ext
Definition: NoLocalSettings.php:25
$path
$path
Definition: NoLocalSettings.php:26
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
MimeAnalyzer\$extraTypes
string $extraTypes
Extra MIME types, set for example by media handling extensions.
Definition: MimeAnalyzer.php:59
MimeAnalyzer\$xmlTypes
string $xmlTypes
Definition: MimeAnalyzer.php:36
MimeAnalyzer\$detectCallback
callable $detectCallback
Definition: MimeAnalyzer.php:40
MimeAnalyzer\doGuessMimeType
doGuessMimeType( $file, $ext)
Guess the MIME type from the file contents.
Definition: MimeAnalyzer.php:641
array
the array() calling protocol came about after MediaWiki 1.4rc1.