MediaWiki  1.30.0
MimeAnalyzer.php
Go to the documentation of this file.
1 <?php
22 use Psr\Log\LoggerAwareInterface;
23 use Psr\Log\LoggerInterface;
24 
30 class MimeAnalyzer implements LoggerAwareInterface {
32  protected $typeFile;
34  protected $infoFile;
36  protected $xmlTypes;
38  protected $initCallback;
40  protected $detectCallback;
42  protected $guessCallback;
44  protected $extCallback;
46  protected $mediaTypes = null;
48  protected $mimeTypeAliases = null;
50  protected $mimetoExt = null;
51 
53  public $mExtToMime = null; // legacy name; field accessed by hooks
54 
56  protected $IEAnalyzer;
57 
59  private $extraTypes = '';
61  private $extraInfo = '';
62 
64  private $logger;
65 
85  protected static $wellKnownTypes = <<<EOT
86 application/ogg ogx ogg ogm ogv oga spx opus
87 application/pdf pdf
88 application/vnd.oasis.opendocument.chart odc
89 application/vnd.oasis.opendocument.chart-template otc
90 application/vnd.oasis.opendocument.database odb
91 application/vnd.oasis.opendocument.formula odf
92 application/vnd.oasis.opendocument.formula-template otf
93 application/vnd.oasis.opendocument.graphics odg
94 application/vnd.oasis.opendocument.graphics-template otg
95 application/vnd.oasis.opendocument.image odi
96 application/vnd.oasis.opendocument.image-template oti
97 application/vnd.oasis.opendocument.presentation odp
98 application/vnd.oasis.opendocument.presentation-template otp
99 application/vnd.oasis.opendocument.spreadsheet ods
100 application/vnd.oasis.opendocument.spreadsheet-template ots
101 application/vnd.oasis.opendocument.text odt
102 application/vnd.oasis.opendocument.text-master otm
103 application/vnd.oasis.opendocument.text-template ott
104 application/vnd.oasis.opendocument.text-web oth
105 application/javascript js
106 application/x-shockwave-flash swf
107 audio/midi mid midi kar
108 audio/mpeg mpga mpa mp2 mp3
109 audio/x-aiff aif aiff aifc
110 audio/x-wav wav
111 audio/ogg oga spx ogg opus
112 audio/opus opus ogg oga ogg spx
113 image/x-bmp bmp
114 image/gif gif
115 image/jpeg jpeg jpg jpe
116 image/png png
117 image/svg+xml svg
118 image/svg svg
119 image/tiff tiff tif
120 image/vnd.djvu djvu
121 image/x.djvu djvu
122 image/x-djvu djvu
123 image/x-portable-pixmap ppm
124 image/x-xcf xcf
125 text/plain txt
126 text/html html htm
127 video/ogg ogv ogm ogg
128 video/mpeg mpg mpeg
129 EOT;
130 
137  protected static $wellKnownInfo = <<<EOT
138 application/pdf [OFFICE]
139 application/vnd.oasis.opendocument.chart [OFFICE]
140 application/vnd.oasis.opendocument.chart-template [OFFICE]
141 application/vnd.oasis.opendocument.database [OFFICE]
142 application/vnd.oasis.opendocument.formula [OFFICE]
143 application/vnd.oasis.opendocument.formula-template [OFFICE]
144 application/vnd.oasis.opendocument.graphics [OFFICE]
145 application/vnd.oasis.opendocument.graphics-template [OFFICE]
146 application/vnd.oasis.opendocument.image [OFFICE]
147 application/vnd.oasis.opendocument.image-template [OFFICE]
148 application/vnd.oasis.opendocument.presentation [OFFICE]
149 application/vnd.oasis.opendocument.presentation-template [OFFICE]
150 application/vnd.oasis.opendocument.spreadsheet [OFFICE]
151 application/vnd.oasis.opendocument.spreadsheet-template [OFFICE]
152 application/vnd.oasis.opendocument.text [OFFICE]
153 application/vnd.oasis.opendocument.text-template [OFFICE]
154 application/vnd.oasis.opendocument.text-master [OFFICE]
155 application/vnd.oasis.opendocument.text-web [OFFICE]
156 application/javascript text/javascript application/x-javascript [EXECUTABLE]
157 application/x-shockwave-flash [MULTIMEDIA]
158 audio/midi [AUDIO]
159 audio/x-aiff [AUDIO]
160 audio/x-wav [AUDIO]
161 audio/mp3 audio/mpeg [AUDIO]
162 application/ogg audio/ogg video/ogg [MULTIMEDIA]
163 image/x-bmp image/x-ms-bmp image/bmp [BITMAP]
164 image/gif [BITMAP]
165 image/jpeg [BITMAP]
166 image/png [BITMAP]
167 image/svg+xml [DRAWING]
168 image/tiff [BITMAP]
169 image/vnd.djvu [BITMAP]
170 image/x-xcf [BITMAP]
171 image/x-portable-pixmap [BITMAP]
172 text/plain [TEXT]
173 text/html [TEXT]
174 video/ogg [VIDEO]
175 video/mpeg [VIDEO]
176 unknown/unknown application/octet-stream application/x-empty [UNKNOWN]
177 EOT;
178 
194  public function __construct( array $params ) {
195  $this->typeFile = $params['typeFile'];
196  $this->infoFile = $params['infoFile'];
197  $this->xmlTypes = $params['xmlTypes'];
198  $this->initCallback = isset( $params['initCallback'] )
199  ? $params['initCallback']
200  : null;
201  $this->detectCallback = isset( $params['detectCallback'] )
202  ? $params['detectCallback']
203  : null;
204  $this->guessCallback = isset( $params['guessCallback'] )
205  ? $params['guessCallback']
206  : null;
207  $this->extCallback = isset( $params['extCallback'] )
208  ? $params['extCallback']
209  : null;
210  $this->logger = isset( $params['logger'] )
211  ? $params['logger']
212  : new \Psr\Log\NullLogger();
213 
214  $this->loadFiles();
215  }
216 
217  protected function loadFiles() {
222  # Allow media handling extensions adding MIME-types and MIME-info
223  if ( $this->initCallback ) {
224  call_user_func( $this->initCallback, $this );
225  }
226 
227  $types = self::$wellKnownTypes;
228 
229  $mimeTypeFile = $this->typeFile;
230  if ( $mimeTypeFile ) {
231  if ( is_file( $mimeTypeFile ) && is_readable( $mimeTypeFile ) ) {
232  $this->logger->info( __METHOD__ . ": loading mime types from $mimeTypeFile\n" );
233  $types .= "\n";
234  $types .= file_get_contents( $mimeTypeFile );
235  } else {
236  $this->logger->info( __METHOD__ . ": can't load mime types from $mimeTypeFile\n" );
237  }
238  } else {
239  $this->logger->info( __METHOD__ .
240  ": no mime types file defined, using built-ins only.\n" );
241  }
242 
243  $types .= "\n" . $this->extraTypes;
244 
245  $types = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $types );
246  $types = str_replace( "\t", " ", $types );
247 
248  $this->mimetoExt = [];
249  $this->mExtToMime = [];
250 
251  $lines = explode( "\n", $types );
252  foreach ( $lines as $s ) {
253  $s = trim( $s );
254  if ( empty( $s ) ) {
255  continue;
256  }
257  if ( strpos( $s, '#' ) === 0 ) {
258  continue;
259  }
260 
261  $s = strtolower( $s );
262  $i = strpos( $s, ' ' );
263 
264  if ( $i === false ) {
265  continue;
266  }
267 
268  $mime = substr( $s, 0, $i );
269  $ext = trim( substr( $s, $i + 1 ) );
270 
271  if ( empty( $ext ) ) {
272  continue;
273  }
274 
275  if ( !empty( $this->mimetoExt[$mime] ) ) {
276  $this->mimetoExt[$mime] .= ' ' . $ext;
277  } else {
278  $this->mimetoExt[$mime] = $ext;
279  }
280 
281  $extensions = explode( ' ', $ext );
282 
283  foreach ( $extensions as $e ) {
284  $e = trim( $e );
285  if ( empty( $e ) ) {
286  continue;
287  }
288 
289  if ( !empty( $this->mExtToMime[$e] ) ) {
290  $this->mExtToMime[$e] .= ' ' . $mime;
291  } else {
292  $this->mExtToMime[$e] = $mime;
293  }
294  }
295  }
296 
301  $mimeInfoFile = $this->infoFile;
302 
303  $info = self::$wellKnownInfo;
304 
305  if ( $mimeInfoFile ) {
306  if ( is_file( $mimeInfoFile ) && is_readable( $mimeInfoFile ) ) {
307  $this->logger->info( __METHOD__ . ": loading mime info from $mimeInfoFile\n" );
308  $info .= "\n";
309  $info .= file_get_contents( $mimeInfoFile );
310  } else {
311  $this->logger->info( __METHOD__ . ": can't load mime info from $mimeInfoFile\n" );
312  }
313  } else {
314  $this->logger->info( __METHOD__ .
315  ": no mime info file defined, using built-ins only.\n" );
316  }
317 
318  $info .= "\n" . $this->extraInfo;
319 
320  $info = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $info );
321  $info = str_replace( "\t", " ", $info );
322 
323  $this->mimeTypeAliases = [];
324  $this->mediaTypes = [];
325 
326  $lines = explode( "\n", $info );
327  foreach ( $lines as $s ) {
328  $s = trim( $s );
329  if ( empty( $s ) ) {
330  continue;
331  }
332  if ( strpos( $s, '#' ) === 0 ) {
333  continue;
334  }
335 
336  $s = strtolower( $s );
337  $i = strpos( $s, ' ' );
338 
339  if ( $i === false ) {
340  continue;
341  }
342 
343  # print "processing MIME INFO line $s<br>";
344 
345  $match = [];
346  if ( preg_match( '!\[\s*(\w+)\s*\]!', $s, $match ) ) {
347  $s = preg_replace( '!\[\s*(\w+)\s*\]!', '', $s );
348  $mtype = trim( strtoupper( $match[1] ) );
349  } else {
350  $mtype = MEDIATYPE_UNKNOWN;
351  }
352 
353  $m = explode( ' ', $s );
354 
355  if ( !isset( $this->mediaTypes[$mtype] ) ) {
356  $this->mediaTypes[$mtype] = [];
357  }
358 
359  foreach ( $m as $mime ) {
360  $mime = trim( $mime );
361  if ( empty( $mime ) ) {
362  continue;
363  }
364 
365  $this->mediaTypes[$mtype][] = $mime;
366  }
367 
368  if ( count( $m ) > 1 ) {
369  $main = $m[0];
370  $mCount = count( $m );
371  for ( $i = 1; $i < $mCount; $i += 1 ) {
372  $mime = $m[$i];
373  $this->mimeTypeAliases[$mime] = $main;
374  }
375  }
376  }
377  }
378 
379  public function setLogger( LoggerInterface $logger ) {
380  $this->logger = $logger;
381  }
382 
389  public function addExtraTypes( $types ) {
390  $this->extraTypes .= "\n" . $types;
391  }
392 
399  public function addExtraInfo( $info ) {
400  $this->extraInfo .= "\n" . $info;
401  }
402 
411  public function getExtensionsForType( $mime ) {
412  $mime = strtolower( $mime );
413 
414  // Check the mime-to-ext map
415  if ( isset( $this->mimetoExt[$mime] ) ) {
416  return $this->mimetoExt[$mime];
417  }
418 
419  // Resolve the MIME type to the canonical type
420  if ( isset( $this->mimeTypeAliases[$mime] ) ) {
421  $mime = $this->mimeTypeAliases[$mime];
422  if ( isset( $this->mimetoExt[$mime] ) ) {
423  return $this->mimetoExt[$mime];
424  }
425  }
426 
427  return null;
428  }
429 
437  public function getTypesForExtension( $ext ) {
438  $ext = strtolower( $ext );
439 
440  $r = isset( $this->mExtToMime[$ext] ) ? $this->mExtToMime[$ext] : null;
441  return $r;
442  }
443 
451  public function guessTypesForExtension( $ext ) {
452  $m = $this->getTypesForExtension( $ext );
453  if ( is_null( $m ) ) {
454  return null;
455  }
456 
457  // TODO: Check if this is needed; strtok( $m, ' ' ) should be sufficient
458  $m = trim( $m );
459  $m = preg_replace( '/\s.*$/', '', $m );
460 
461  return $m;
462  }
463 
473  public function isMatchingExtension( $extension, $mime ) {
474  $ext = $this->getExtensionsForType( $mime );
475 
476  if ( !$ext ) {
477  return null; // Unknown MIME type
478  }
479 
480  $ext = explode( ' ', $ext );
481 
482  $extension = strtolower( $extension );
483  return in_array( $extension, $ext );
484  }
485 
494  public function isPHPImageType( $mime ) {
495  // As defined by imagegetsize and image_type_to_mime
496  static $types = [
497  'image/gif', 'image/jpeg', 'image/png',
498  'image/x-bmp', 'image/xbm', 'image/tiff',
499  'image/jp2', 'image/jpeg2000', 'image/iff',
500  'image/xbm', 'image/x-xbitmap',
501  'image/vnd.wap.wbmp', 'image/vnd.xiff',
502  'image/x-photoshop',
503  'application/x-shockwave-flash',
504  ];
505 
506  return in_array( $mime, $types );
507  }
508 
521  function isRecognizableExtension( $extension ) {
522  static $types = [
523  // Types recognized by getimagesize()
524  'gif', 'jpeg', 'jpg', 'png', 'swf', 'psd',
525  'bmp', 'tiff', 'tif', 'jpc', 'jp2',
526  'jpx', 'jb2', 'swc', 'iff', 'wbmp',
527  'xbm',
528 
529  // Formats we recognize magic numbers for
530  'djvu', 'ogx', 'ogg', 'ogv', 'oga', 'spx', 'opus',
531  'mid', 'pdf', 'wmf', 'xcf', 'webm', 'mkv', 'mka',
532  'webp', 'mp3',
533 
534  // XML formats we sure hope we recognize reliably
535  'svg',
536 
537  // 3D formats
538  'stl',
539  ];
540  return in_array( strtolower( $extension ), $types );
541  }
542 
554  public function improveTypeFromExtension( $mime, $ext ) {
555  if ( $mime === 'unknown/unknown' ) {
556  if ( $this->isRecognizableExtension( $ext ) ) {
557  $this->logger->info( __METHOD__ . ': refusing to guess mime type for .' .
558  "$ext file, we should have recognized it\n" );
559  } else {
560  // Not something we can detect, so simply
561  // trust the file extension
562  $mime = $this->guessTypesForExtension( $ext );
563  }
564  } elseif ( $mime === 'application/x-opc+zip' ) {
565  if ( $this->isMatchingExtension( $ext, $mime ) ) {
566  // A known file extension for an OPC file,
567  // find the proper MIME type for that file extension
568  $mime = $this->guessTypesForExtension( $ext );
569  } else {
570  $this->logger->info( __METHOD__ .
571  ": refusing to guess better type for $mime file, " .
572  ".$ext is not a known OPC extension.\n" );
573  $mime = 'application/zip';
574  }
575  } elseif ( $mime === 'text/plain' && $this->findMediaType( ".$ext" ) === MEDIATYPE_TEXT ) {
576  // Textual types are sometimes not recognized properly.
577  // If detected as text/plain, and has an extension which is textual
578  // improve to the extension's type. For example, csv and json are often
579  // misdetected as text/plain.
580  $mime = $this->guessTypesForExtension( $ext );
581  }
582 
583  # Media handling extensions can improve the MIME detected
584  $callback = $this->extCallback;
585  if ( $callback ) {
586  $callback( $this, $ext, $mime /* by reference */ );
587  }
588 
589  if ( isset( $this->mimeTypeAliases[$mime] ) ) {
590  $mime = $this->mimeTypeAliases[$mime];
591  }
592 
593  $this->logger->info( __METHOD__ . ": improved mime type for .$ext: $mime\n" );
594  return $mime;
595  }
596 
611  public function guessMimeType( $file, $ext = true ) {
612  if ( $ext ) { // TODO: make $ext default to false. Or better, remove it.
613  $this->logger->info( __METHOD__ .
614  ": WARNING: use of the \$ext parameter is deprecated. " .
615  "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
616  }
617 
618  $mime = $this->doGuessMimeType( $file, $ext );
619 
620  if ( !$mime ) {
621  $this->logger->info( __METHOD__ .
622  ": internal type detection failed for $file (.$ext)...\n" );
623  $mime = $this->detectMimeType( $file, $ext );
624  }
625 
626  if ( isset( $this->mimeTypeAliases[$mime] ) ) {
627  $mime = $this->mimeTypeAliases[$mime];
628  }
629 
630  $this->logger->info( __METHOD__ . ": guessed mime type of $file: $mime\n" );
631  return $mime;
632  }
633 
644  private function doGuessMimeType( $file, $ext ) {
645  // Read a chunk of the file
646  MediaWiki\suppressWarnings();
647  $f = fopen( $file, 'rb' );
648  MediaWiki\restoreWarnings();
649 
650  if ( !$f ) {
651  return 'unknown/unknown';
652  }
653 
654  $fsize = filesize( $file );
655  if ( $fsize === false ) {
656  return 'unknown/unknown';
657  }
658 
659  $head = fread( $f, 1024 );
660  $tailLength = min( 65558, $fsize ); // 65558 = maximum size of a zip EOCDR
661  if ( fseek( $f, -1 * $tailLength, SEEK_END ) === -1 ) {
662  throw new UnexpectedValueException(
663  "Seeking $tailLength bytes from EOF failed in " . __METHOD__ );
664  }
665  $tail = $tailLength ? fread( $f, $tailLength ) : '';
666  fclose( $f );
667 
668  $this->logger->info( __METHOD__ .
669  ": analyzing head and tail of $file for magic numbers.\n" );
670 
671  // Hardcode a few magic number checks...
672  $headers = [
673  // Multimedia...
674  'MThd' => 'audio/midi',
675  'OggS' => 'application/ogg',
676  'ID3' => 'audio/mpeg',
677  "\xff\xfb" => 'audio/mpeg', // MPEG-1 layer 3
678  "\xff\xf3" => 'audio/mpeg', // MPEG-2 layer 3 (lower sample rates)
679  "\xff\xe3" => 'audio/mpeg', // MPEG-2.5 layer 3 (very low sample rates)
680 
681  // Image formats...
682  // Note that WMF may have a bare header, no magic number.
683  "\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives?
684  "\xd7\xcd\xc6\x9a" => 'application/x-msmetafile',
685  '%PDF' => 'application/pdf',
686  'gimp xcf' => 'image/x-xcf',
687 
688  // Some forbidden fruit...
689  'MZ' => 'application/octet-stream', // DOS/Windows executable
690  "\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary
691  "\x7fELF" => 'application/octet-stream', // ELF binary
692  ];
693 
694  foreach ( $headers as $magic => $candidate ) {
695  if ( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) {
696  $this->logger->info( __METHOD__ .
697  ": magic header in $file recognized as $candidate\n" );
698  return $candidate;
699  }
700  }
701 
702  /* Look for WebM and Matroska files */
703  if ( strncmp( $head, pack( "C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) {
704  $doctype = strpos( $head, "\x42\x82" );
705  if ( $doctype ) {
706  // Next byte is datasize, then data (sizes larger than 1 byte are stupid muxers)
707  $data = substr( $head, $doctype + 3, 8 );
708  if ( strncmp( $data, "matroska", 8 ) == 0 ) {
709  $this->logger->info( __METHOD__ . ": recognized file as video/x-matroska\n" );
710  return "video/x-matroska";
711  } elseif ( strncmp( $data, "webm", 4 ) == 0 ) {
712  // XXX HACK look for a video track, if we don't find it, this is an audio file
713  $videotrack = strpos( $head, "\x86\x85V_VP" );
714 
715  if ( $videotrack ) {
716  // There is a video track, so this is a video file.
717  $this->logger->info( __METHOD__ . ": recognized file as video/webm\n" );
718  return "video/webm";
719  }
720 
721  $this->logger->info( __METHOD__ . ": recognized file as audio/webm\n" );
722  return "audio/webm";
723  }
724  }
725  $this->logger->info( __METHOD__ . ": unknown EBML file\n" );
726  return "unknown/unknown";
727  }
728 
729  /* Look for WebP */
730  if ( strncmp( $head, "RIFF", 4 ) == 0 &&
731  strncmp( substr( $head, 8, 7 ), "WEBPVP8", 7 ) == 0
732  ) {
733  $this->logger->info( __METHOD__ . ": recognized file as image/webp\n" );
734  return "image/webp";
735  }
736 
749  if ( ( strpos( $head, '<?php' ) !== false ) ||
750  ( strpos( $head, "<\x00?\x00p\x00h\x00p" ) !== false ) ||
751  ( strpos( $head, "<\x00?\x00 " ) !== false ) ||
752  ( strpos( $head, "<\x00?\x00\n" ) !== false ) ||
753  ( strpos( $head, "<\x00?\x00\t" ) !== false ) ||
754  ( strpos( $head, "<\x00?\x00=" ) !== false )
755  ) {
756  $this->logger->info( __METHOD__ . ": recognized $file as application/x-php\n" );
757  return 'application/x-php';
758  }
759 
763  $xml = new XmlTypeCheck( $file );
764  if ( $xml->wellFormed ) {
766  if ( isset( $xmlTypes[$xml->getRootElement()] ) ) {
767  return $xmlTypes[$xml->getRootElement()];
768  } else {
769  return 'application/xml';
770  }
771  }
772 
776  $script_type = null;
777 
778  # detect by shebang
779  if ( substr( $head, 0, 2 ) == "#!" ) {
780  $script_type = "ASCII";
781  } elseif ( substr( $head, 0, 5 ) == "\xef\xbb\xbf#!" ) {
782  $script_type = "UTF-8";
783  } elseif ( substr( $head, 0, 7 ) == "\xfe\xff\x00#\x00!" ) {
784  $script_type = "UTF-16BE";
785  } elseif ( substr( $head, 0, 7 ) == "\xff\xfe#\x00!" ) {
786  $script_type = "UTF-16LE";
787  }
788 
789  if ( $script_type ) {
790  if ( $script_type !== "UTF-8" && $script_type !== "ASCII" ) {
791  // Quick and dirty fold down to ASCII!
792  $pack = [ 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' ];
793  $chars = unpack( $pack[$script_type], substr( $head, 2 ) );
794  $head = '';
795  foreach ( $chars as $codepoint ) {
796  if ( $codepoint < 128 ) {
797  $head .= chr( $codepoint );
798  } else {
799  $head .= '?';
800  }
801  }
802  }
803 
804  $match = [];
805 
806  if ( preg_match( '%/?([^\s]+/)(\w+)%', $head, $match ) ) {
807  $mime = "application/x-{$match[2]}";
808  $this->logger->info( __METHOD__ . ": shell script recognized as $mime\n" );
809  return $mime;
810  }
811  }
812 
813  // Check for ZIP variants (before getimagesize)
814  if ( strpos( $tail, "PK\x05\x06" ) !== false ) {
815  $this->logger->info( __METHOD__ . ": ZIP header present in $file\n" );
816  return $this->detectZipType( $head, $tail, $ext );
817  }
818 
819  // Check for STL (3D) files
820  // @see https://en.wikipedia.org/wiki/STL_(file_format)
821  if ( $fsize >= 15 &&
822  stripos( $head, 'SOLID ' ) === 0 &&
823  preg_match( '/\RENDSOLID .*$/i', $tail ) ) {
824  // ASCII STL file
825  return 'application/sla';
826  } elseif ( $fsize > 84 ) {
827  // binary STL file
828  $triangles = substr( $head, 80, 4 );
829  $triangles = unpack( 'V', $triangles );
830  $triangles = reset( $triangles );
831  if ( $triangles !== false && $fsize === 84 + ( $triangles * 50 ) ) {
832  return 'application/sla';
833  }
834  }
835 
836  MediaWiki\suppressWarnings();
837  $gis = getimagesize( $file );
838  MediaWiki\restoreWarnings();
839 
840  if ( $gis && isset( $gis['mime'] ) ) {
841  $mime = $gis['mime'];
842  $this->logger->info( __METHOD__ . ": getimagesize detected $file as $mime\n" );
843  return $mime;
844  }
845 
846  # Media handling extensions can guess the MIME by content
847  # It's intentionally here so that if core is wrong about a type (false positive),
848  # people will hopefully nag and submit patches :)
849  $mime = false;
850  # Some strings by reference for performance - assuming well-behaved hooks
851  $callback = $this->guessCallback;
852  if ( $callback ) {
853  $callback( $this, $head, $tail, $file, $mime /* by reference */ );
854  };
855 
856  return $mime;
857  }
858 
872  function detectZipType( $header, $tail = null, $ext = false ) {
873  if ( $ext ) { # TODO: remove $ext param
874  $this->logger->info( __METHOD__ .
875  ": WARNING: use of the \$ext parameter is deprecated. " .
876  "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
877  }
878 
879  $mime = 'application/zip';
880  $opendocTypes = [
881  'chart-template',
882  'chart',
883  'formula-template',
884  'formula',
885  'graphics-template',
886  'graphics',
887  'image-template',
888  'image',
889  'presentation-template',
890  'presentation',
891  'spreadsheet-template',
892  'spreadsheet',
893  'text-template',
894  'text-master',
895  'text-web',
896  'text' ];
897 
898  // https://lists.oasis-open.org/archives/office/200505/msg00006.html
899  $types = '(?:' . implode( '|', $opendocTypes ) . ')';
900  $opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
901 
902  $openxmlRegex = "/^\[Content_Types\].xml/";
903 
904  if ( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) {
905  $mime = $matches[1];
906  $this->logger->info( __METHOD__ . ": detected $mime from ZIP archive\n" );
907  } elseif ( preg_match( $openxmlRegex, substr( $header, 30 ) ) ) {
908  $mime = "application/x-opc+zip";
909  # TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere
910  if ( $ext !== true && $ext !== false ) {
915  if ( $this->isMatchingExtension( $ext, $mime ) ) {
916  /* A known file extension for an OPC file,
917  * find the proper mime type for that file extension
918  */
919  $mime = $this->guessTypesForExtension( $ext );
920  } else {
921  $mime = "application/zip";
922  }
923  }
924  $this->logger->info( __METHOD__ .
925  ": detected an Open Packaging Conventions archive: $mime\n" );
926  } elseif ( substr( $header, 0, 8 ) == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" &&
927  ( $headerpos = strpos( $tail, "PK\x03\x04" ) ) !== false &&
928  preg_match( $openxmlRegex, substr( $tail, $headerpos + 30 ) ) ) {
929  if ( substr( $header, 512, 4 ) == "\xEC\xA5\xC1\x00" ) {
930  $mime = "application/msword";
931  }
932  switch ( substr( $header, 512, 6 ) ) {
933  case "\xEC\xA5\xC1\x00\x0E\x00":
934  case "\xEC\xA5\xC1\x00\x1C\x00":
935  case "\xEC\xA5\xC1\x00\x43\x00":
936  $mime = "application/vnd.ms-powerpoint";
937  break;
938  case "\xFD\xFF\xFF\xFF\x10\x00":
939  case "\xFD\xFF\xFF\xFF\x1F\x00":
940  case "\xFD\xFF\xFF\xFF\x22\x00":
941  case "\xFD\xFF\xFF\xFF\x23\x00":
942  case "\xFD\xFF\xFF\xFF\x28\x00":
943  case "\xFD\xFF\xFF\xFF\x29\x00":
944  case "\xFD\xFF\xFF\xFF\x10\x02":
945  case "\xFD\xFF\xFF\xFF\x1F\x02":
946  case "\xFD\xFF\xFF\xFF\x22\x02":
947  case "\xFD\xFF\xFF\xFF\x23\x02":
948  case "\xFD\xFF\xFF\xFF\x28\x02":
949  case "\xFD\xFF\xFF\xFF\x29\x02":
950  $mime = "application/vnd.msexcel";
951  break;
952  }
953 
954  $this->logger->info( __METHOD__ .
955  ": detected a MS Office document with OPC trailer\n" );
956  } else {
957  $this->logger->info( __METHOD__ . ": unable to identify type of ZIP archive\n" );
958  }
959  return $mime;
960  }
961 
979  private function detectMimeType( $file, $ext = true ) {
981  if ( $ext ) {
982  $this->logger->info( __METHOD__ .
983  ": WARNING: use of the \$ext parameter is deprecated. "
984  . "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
985  }
986 
987  $callback = $this->detectCallback;
988  $m = null;
989  if ( $callback ) {
990  $m = $callback( $file );
991  } else {
992  $m = mime_content_type( $file );
993  }
994 
995  if ( $m ) {
996  # normalize
997  $m = preg_replace( '![;, ].*$!', '', $m ); # strip charset, etc
998  $m = trim( $m );
999  $m = strtolower( $m );
1000 
1001  if ( strpos( $m, 'unknown' ) !== false ) {
1002  $m = null;
1003  } else {
1004  $this->logger->info( __METHOD__ . ": magic mime type of $file: $m\n" );
1005  return $m;
1006  }
1007  }
1008 
1009  // If desired, look at extension as a fallback.
1010  if ( $ext === true ) {
1011  $i = strrpos( $file, '.' );
1012  $ext = strtolower( $i ? substr( $file, $i + 1 ) : '' );
1013  }
1014  if ( $ext ) {
1015  if ( $this->isRecognizableExtension( $ext ) ) {
1016  $this->logger->info( __METHOD__ . ": refusing to guess mime type for .$ext file, "
1017  . "we should have recognized it\n" );
1018  } else {
1019  $m = $this->guessTypesForExtension( $ext );
1020  if ( $m ) {
1021  $this->logger->info( __METHOD__ . ": extension mime type of $file: $m\n" );
1022  return $m;
1023  }
1024  }
1025  }
1026 
1027  // Unknown type
1028  $this->logger->info( __METHOD__ . ": failed to guess mime type for $file!\n" );
1029  return 'unknown/unknown';
1030  }
1031 
1048  function getMediaType( $path = null, $mime = null ) {
1049  if ( !$mime && !$path ) {
1050  return MEDIATYPE_UNKNOWN;
1051  }
1052 
1053  // If MIME type is unknown, guess it
1054  if ( !$mime ) {
1055  $mime = $this->guessMimeType( $path, false );
1056  }
1057 
1058  // Special code for ogg - detect if it's video (theora),
1059  // else label it as sound.
1060  if ( $mime == 'application/ogg' && file_exists( $path ) ) {
1061  // Read a chunk of the file
1062  $f = fopen( $path, "rt" );
1063  if ( !$f ) {
1064  return MEDIATYPE_UNKNOWN;
1065  }
1066  $head = fread( $f, 256 );
1067  fclose( $f );
1068 
1069  $head = str_replace( 'ffmpeg2theora', '', strtolower( $head ) );
1070 
1071  // This is an UGLY HACK, file should be parsed correctly
1072  if ( strpos( $head, 'theora' ) !== false ) {
1073  return MEDIATYPE_VIDEO;
1074  } elseif ( strpos( $head, 'vorbis' ) !== false ) {
1075  return MEDIATYPE_AUDIO;
1076  } elseif ( strpos( $head, 'flac' ) !== false ) {
1077  return MEDIATYPE_AUDIO;
1078  } elseif ( strpos( $head, 'speex' ) !== false ) {
1079  return MEDIATYPE_AUDIO;
1080  } elseif ( strpos( $head, 'opus' ) !== false ) {
1081  return MEDIATYPE_AUDIO;
1082  } else {
1083  return MEDIATYPE_MULTIMEDIA;
1084  }
1085  }
1086 
1087  $type = null;
1088  // Check for entry for full MIME type
1089  if ( $mime ) {
1090  $type = $this->findMediaType( $mime );
1091  if ( $type !== MEDIATYPE_UNKNOWN ) {
1092  return $type;
1093  }
1094  }
1095 
1096  // Check for entry for file extension
1097  if ( $path ) {
1098  $i = strrpos( $path, '.' );
1099  $e = strtolower( $i ? substr( $path, $i + 1 ) : '' );
1100 
1101  // TODO: look at multi-extension if this fails, parse from full path
1102  $type = $this->findMediaType( '.' . $e );
1103  if ( $type !== MEDIATYPE_UNKNOWN ) {
1104  return $type;
1105  }
1106  }
1107 
1108  // Check major MIME type
1109  if ( $mime ) {
1110  $i = strpos( $mime, '/' );
1111  if ( $i !== false ) {
1112  $major = substr( $mime, 0, $i );
1113  $type = $this->findMediaType( $major );
1114  if ( $type !== MEDIATYPE_UNKNOWN ) {
1115  return $type;
1116  }
1117  }
1118  }
1119 
1120  if ( !$type ) {
1122  }
1123 
1124  return $type;
1125  }
1126 
1137  function findMediaType( $extMime ) {
1138  if ( strpos( $extMime, '.' ) === 0 ) {
1139  // If it's an extension, look up the MIME types
1140  $m = $this->getTypesForExtension( substr( $extMime, 1 ) );
1141  if ( !$m ) {
1142  return MEDIATYPE_UNKNOWN;
1143  }
1144 
1145  $m = explode( ' ', $m );
1146  } else {
1147  // Normalize MIME type
1148  if ( isset( $this->mimeTypeAliases[$extMime] ) ) {
1149  $extMime = $this->mimeTypeAliases[$extMime];
1150  }
1151 
1152  $m = [ $extMime ];
1153  }
1154 
1155  foreach ( $m as $mime ) {
1156  foreach ( $this->mediaTypes as $type => $codes ) {
1157  if ( in_array( $mime, $codes, true ) ) {
1158  return $type;
1159  }
1160  }
1161  }
1162 
1163  return MEDIATYPE_UNKNOWN;
1164  }
1165 
1171  public function getMediaTypes() {
1172  return array_keys( $this->mediaTypes );
1173  }
1174 
1184  public function getIEMimeTypes( $fileName, $chunk, $proposed ) {
1185  $ca = $this->getIEContentAnalyzer();
1186  return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
1187  }
1188 
1194  protected function getIEContentAnalyzer() {
1195  if ( is_null( $this->IEAnalyzer ) ) {
1196  $this->IEAnalyzer = new IEContentAnalyzer;
1197  }
1198  return $this->IEAnalyzer;
1199  }
1200 }
MimeAnalyzer\$guessCallback
callable $guessCallback
Definition: MimeAnalyzer.php:42
MimeAnalyzer\getIEContentAnalyzer
getIEContentAnalyzer()
Get a cached instance of IEContentAnalyzer.
Definition: MimeAnalyzer.php:1194
MimeAnalyzer\$wellKnownTypes
static $wellKnownTypes
Defines a set of well known MIME types This is used as a fallback to mime.types files.
Definition: MimeAnalyzer.php:85
MimeAnalyzer\guessTypesForExtension
guessTypesForExtension( $ext)
Returns a single MIME type for a given file extension or null if unknown.
Definition: MimeAnalyzer.php:451
MimeAnalyzer\addExtraInfo
addExtraInfo( $info)
Adds to the list mapping MIME to media type.
Definition: MimeAnalyzer.php:399
MimeAnalyzer\isRecognizableExtension
isRecognizableExtension( $extension)
Returns true if the extension represents a type which can be reliably detected from its content.
Definition: MimeAnalyzer.php:521
MEDIATYPE_AUDIO
const MEDIATYPE_AUDIO
Definition: defines.php:32
MimeAnalyzer\getExtensionsForType
getExtensionsForType( $mime)
Returns a list of file extensions for a given MIME type as a space separated string or null if the MI...
Definition: MimeAnalyzer.php:411
MimeAnalyzer\$initCallback
callable $initCallback
Definition: MimeAnalyzer.php:38
MimeAnalyzer\getMediaType
getMediaType( $path=null, $mime=null)
Determine the media type code for a file, using its MIME type, name and possibly its contents.
Definition: MimeAnalyzer.php:1048
MimeAnalyzer
Implements functions related to MIME types such as detection and mapping to file extension.
Definition: MimeAnalyzer.php:30
captcha-old.count
count
Definition: captcha-old.py:249
MimeAnalyzer\$mimetoExt
array $mimetoExt
Map of MIME types to file extensions (as a space separated list)
Definition: MimeAnalyzer.php:50
text
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add text
Definition: design.txt:12
MimeAnalyzer\$mExtToMime
array $mExtToMime
Map of file extensions types to MIME types (as a space separated list)
Definition: MimeAnalyzer.php:53
MimeAnalyzer\guessMimeType
guessMimeType( $file, $ext=true)
MIME type detection.
Definition: MimeAnalyzer.php:611
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
MimeAnalyzer\$infoFile
string $infoFile
Definition: MimeAnalyzer.php:34
txt
This document describes how event hooks work in the Renameuser extension For a more comprehensive guide to navigate to your root MediaWiki directory and read docs hooks txt
Definition: hooks.txt:6
MimeAnalyzer\__construct
__construct(array $params)
Definition: MimeAnalyzer.php:194
MimeAnalyzer\getTypesForExtension
getTypesForExtension( $ext)
Returns a list of MIME types for a given file extension as a space separated string or null if the ex...
Definition: MimeAnalyzer.php:437
$params
$params
Definition: styleTest.css.php:40
MEDIATYPE_UNKNOWN
const MEDIATYPE_UNKNOWN
Definition: defines.php:26
MimeAnalyzer\$mediaTypes
array $mediaTypes
Mapping of media types to arrays of MIME types.
Definition: MimeAnalyzer.php:46
$s
$s
Definition: mergeMessageFileList.php:188
MimeAnalyzer\setLogger
setLogger(LoggerInterface $logger)
Definition: MimeAnalyzer.php:379
MimeAnalyzer\detectMimeType
detectMimeType( $file, $ext=true)
Internal MIME type detection.
Definition: MimeAnalyzer.php:979
MimeAnalyzer\$IEAnalyzer
IEContentAnalyzer $IEAnalyzer
Definition: MimeAnalyzer.php:56
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
MimeAnalyzer\getIEMimeTypes
getIEMimeTypes( $fileName, $chunk, $proposed)
Get the MIME types that various versions of Internet Explorer would detect from a chunk of the conten...
Definition: MimeAnalyzer.php:1184
MimeAnalyzer\$typeFile
string $typeFile
Definition: MimeAnalyzer.php:32
$matches
$matches
Definition: NoLocalSettings.php:24
$lines
$lines
Definition: router.php:67
MimeAnalyzer\detectZipType
detectZipType( $header, $tail=null, $ext=false)
Detect application-specific file type of a given ZIP file from its header data.
Definition: MimeAnalyzer.php:872
IEContentAnalyzer
This class simulates Microsoft Internet Explorer's terribly broken and insecure MIME type detection a...
Definition: IEContentAnalyzer.php:27
MimeAnalyzer\$wellKnownInfo
static $wellKnownInfo
Defines a set of well known MIME info entries This is used as a fallback to mime.info files.
Definition: MimeAnalyzer.php:137
$mime
if( $ext=='php'|| $ext=='php5') $mime
Definition: router.php:65
MEDIATYPE_MULTIMEDIA
const MEDIATYPE_MULTIMEDIA
Definition: defines.php:37
$e
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException' returning false will NOT prevent logging $e
Definition: hooks.txt:2141
MimeAnalyzer\isMatchingExtension
isMatchingExtension( $extension, $mime)
Tests if the extension matches the given MIME type.
Definition: MimeAnalyzer.php:473
$header
$header
Definition: updateCredits.php:35
MimeAnalyzer\findMediaType
findMediaType( $extMime)
Returns a media code matching the given MIME type or file extension.
Definition: MimeAnalyzer.php:1137
etc
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add etc
Definition: design.txt:12
MimeAnalyzer\getMediaTypes
getMediaTypes()
Returns an array of media types (MEDIATYPE_xxx constants)
Definition: MimeAnalyzer.php:1171
MimeAnalyzer\$logger
LoggerInterface $logger
Definition: MimeAnalyzer.php:64
MimeAnalyzer\$extraInfo
string $extraInfo
Extra MIME info, set for example by media handling extensions.
Definition: MimeAnalyzer.php:61
MimeAnalyzer\$mimeTypeAliases
array $mimeTypeAliases
Map of MIME type aliases.
Definition: MimeAnalyzer.php:48
MimeAnalyzer\isPHPImageType
isPHPImageType( $mime)
Returns true if the MIME type is known to represent an image format supported by the PHP GD library.
Definition: MimeAnalyzer.php:494
XmlTypeCheck
Definition: XmlTypeCheck.php:28
MimeAnalyzer\loadFiles
loadFiles()
Definition: MimeAnalyzer.php:217
plain
either a plain
Definition: hooks.txt:2026
MEDIATYPE_TEXT
const MEDIATYPE_TEXT
Definition: defines.php:41
MimeAnalyzer\addExtraTypes
addExtraTypes( $types)
Adds to the list mapping MIME to file extensions.
Definition: MimeAnalyzer.php:389
MEDIATYPE_VIDEO
const MEDIATYPE_VIDEO
Definition: defines.php:35
MimeAnalyzer\improveTypeFromExtension
improveTypeFromExtension( $mime, $ext)
Improves a MIME type using the file extension.
Definition: MimeAnalyzer.php:554
MimeAnalyzer\$extCallback
callable $extCallback
Definition: MimeAnalyzer.php:44
$ext
$ext
Definition: NoLocalSettings.php:25
$path
$path
Definition: NoLocalSettings.php:26
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
MimeAnalyzer\$extraTypes
string $extraTypes
Extra MIME types, set for example by media handling extensions.
Definition: MimeAnalyzer.php:59
MimeAnalyzer\$xmlTypes
string $xmlTypes
Definition: MimeAnalyzer.php:36
MimeAnalyzer\$detectCallback
callable $detectCallback
Definition: MimeAnalyzer.php:40
MimeAnalyzer\doGuessMimeType
doGuessMimeType( $file, $ext)
Guess the MIME type from the file contents.
Definition: MimeAnalyzer.php:644
array
the array() calling protocol came about after MediaWiki 1.4rc1.
$type
$type
Definition: testCompression.php:48