MediaWiki  REL1_31
MimeAnalyzer.php
Go to the documentation of this file.
1 <?php
22 use Psr\Log\LoggerAwareInterface;
23 use Psr\Log\LoggerInterface;
24 
30 class MimeAnalyzer implements LoggerAwareInterface {
32  protected $typeFile;
34  protected $infoFile;
36  protected $xmlTypes;
38  protected $initCallback;
40  protected $detectCallback;
42  protected $guessCallback;
44  protected $extCallback;
46  protected $mediaTypes = null;
48  protected $mimeTypeAliases = null;
50  protected $mimetoExt = null;
51 
53  public $mExtToMime = null; // legacy name; field accessed by hooks
54 
56  protected $IEAnalyzer;
57 
59  private $extraTypes = '';
61  private $extraInfo = '';
62 
64  private $logger;
65 
85  protected static $wellKnownTypes = <<<EOT
86 application/ogg ogx ogg ogm ogv oga spx opus
87 application/pdf pdf
88 application/vnd.oasis.opendocument.chart odc
89 application/vnd.oasis.opendocument.chart-template otc
90 application/vnd.oasis.opendocument.database odb
91 application/vnd.oasis.opendocument.formula odf
92 application/vnd.oasis.opendocument.formula-template otf
93 application/vnd.oasis.opendocument.graphics odg
94 application/vnd.oasis.opendocument.graphics-template otg
95 application/vnd.oasis.opendocument.image odi
96 application/vnd.oasis.opendocument.image-template oti
97 application/vnd.oasis.opendocument.presentation odp
98 application/vnd.oasis.opendocument.presentation-template otp
99 application/vnd.oasis.opendocument.spreadsheet ods
100 application/vnd.oasis.opendocument.spreadsheet-template ots
101 application/vnd.oasis.opendocument.text odt
102 application/vnd.oasis.opendocument.text-master otm
103 application/vnd.oasis.opendocument.text-template ott
104 application/vnd.oasis.opendocument.text-web oth
105 application/javascript js
106 application/x-shockwave-flash swf
107 audio/midi mid midi kar
108 audio/mpeg mpga mpa mp2 mp3
109 audio/x-aiff aif aiff aifc
110 audio/x-wav wav
111 audio/ogg oga spx ogg opus
112 audio/opus opus ogg oga ogg spx
113 image/x-bmp bmp
114 image/gif gif
115 image/jpeg jpeg jpg jpe
116 image/png png
117 image/svg+xml svg
118 image/svg svg
119 image/tiff tiff tif
120 image/vnd.djvu djvu
121 image/x.djvu djvu
122 image/x-djvu djvu
123 image/x-portable-pixmap ppm
124 image/x-xcf xcf
125 text/plain txt
126 text/html html htm
127 video/ogg ogv ogm ogg
128 video/mpeg mpg mpeg
129 EOT;
130 
137  protected static $wellKnownInfo = <<<EOT
138 application/pdf [OFFICE]
139 application/vnd.oasis.opendocument.chart [OFFICE]
140 application/vnd.oasis.opendocument.chart-template [OFFICE]
141 application/vnd.oasis.opendocument.database [OFFICE]
142 application/vnd.oasis.opendocument.formula [OFFICE]
143 application/vnd.oasis.opendocument.formula-template [OFFICE]
144 application/vnd.oasis.opendocument.graphics [OFFICE]
145 application/vnd.oasis.opendocument.graphics-template [OFFICE]
146 application/vnd.oasis.opendocument.image [OFFICE]
147 application/vnd.oasis.opendocument.image-template [OFFICE]
148 application/vnd.oasis.opendocument.presentation [OFFICE]
149 application/vnd.oasis.opendocument.presentation-template [OFFICE]
150 application/vnd.oasis.opendocument.spreadsheet [OFFICE]
151 application/vnd.oasis.opendocument.spreadsheet-template [OFFICE]
152 application/vnd.oasis.opendocument.text [OFFICE]
153 application/vnd.oasis.opendocument.text-template [OFFICE]
154 application/vnd.oasis.opendocument.text-master [OFFICE]
155 application/vnd.oasis.opendocument.text-web [OFFICE]
156 application/javascript text/javascript application/x-javascript [EXECUTABLE]
157 application/x-shockwave-flash [MULTIMEDIA]
158 audio/midi [AUDIO]
159 audio/x-aiff [AUDIO]
160 audio/x-wav [AUDIO]
161 audio/mp3 audio/mpeg [AUDIO]
162 application/ogg audio/ogg video/ogg [MULTIMEDIA]
163 image/x-bmp image/x-ms-bmp image/bmp [BITMAP]
164 image/gif [BITMAP]
165 image/jpeg [BITMAP]
166 image/png [BITMAP]
167 image/svg+xml [DRAWING]
168 image/tiff [BITMAP]
169 image/vnd.djvu [BITMAP]
170 image/x-xcf [BITMAP]
171 image/x-portable-pixmap [BITMAP]
172 text/plain [TEXT]
173 text/html [TEXT]
174 video/ogg [VIDEO]
175 video/mpeg [VIDEO]
176 unknown/unknown application/octet-stream application/x-empty [UNKNOWN]
177 EOT;
178 
194  public function __construct( array $params ) {
195  $this->typeFile = $params['typeFile'];
196  $this->infoFile = $params['infoFile'];
197  $this->xmlTypes = $params['xmlTypes'];
198  $this->initCallback = isset( $params['initCallback'] )
199  ? $params['initCallback']
200  : null;
201  $this->detectCallback = isset( $params['detectCallback'] )
202  ? $params['detectCallback']
203  : null;
204  $this->guessCallback = isset( $params['guessCallback'] )
205  ? $params['guessCallback']
206  : null;
207  $this->extCallback = isset( $params['extCallback'] )
208  ? $params['extCallback']
209  : null;
210  $this->logger = isset( $params['logger'] )
211  ? $params['logger']
212  : new \Psr\Log\NullLogger();
213 
214  $this->loadFiles();
215  }
216 
217  protected function loadFiles() {
222  # Allow media handling extensions adding MIME-types and MIME-info
223  if ( $this->initCallback ) {
224  call_user_func( $this->initCallback, $this );
225  }
226 
227  $types = self::$wellKnownTypes;
228 
229  $mimeTypeFile = $this->typeFile;
230  if ( $mimeTypeFile ) {
231  if ( is_file( $mimeTypeFile ) && is_readable( $mimeTypeFile ) ) {
232  $this->logger->info( __METHOD__ . ": loading mime types from $mimeTypeFile\n" );
233  $types .= "\n";
234  $types .= file_get_contents( $mimeTypeFile );
235  } else {
236  $this->logger->info( __METHOD__ . ": can't load mime types from $mimeTypeFile\n" );
237  }
238  } else {
239  $this->logger->info( __METHOD__ .
240  ": no mime types file defined, using built-ins only.\n" );
241  }
242 
243  $types .= "\n" . $this->extraTypes;
244 
245  $types = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $types );
246  $types = str_replace( "\t", " ", $types );
247 
248  $this->mimetoExt = [];
249  $this->mExtToMime = [];
250 
251  $lines = explode( "\n", $types );
252  foreach ( $lines as $s ) {
253  $s = trim( $s );
254  if ( empty( $s ) ) {
255  continue;
256  }
257  if ( strpos( $s, '#' ) === 0 ) {
258  continue;
259  }
260 
261  $s = strtolower( $s );
262  $i = strpos( $s, ' ' );
263 
264  if ( $i === false ) {
265  continue;
266  }
267 
268  $mime = substr( $s, 0, $i );
269  $ext = trim( substr( $s, $i + 1 ) );
270 
271  if ( empty( $ext ) ) {
272  continue;
273  }
274 
275  if ( !empty( $this->mimetoExt[$mime] ) ) {
276  $this->mimetoExt[$mime] .= ' ' . $ext;
277  } else {
278  $this->mimetoExt[$mime] = $ext;
279  }
280 
281  $extensions = explode( ' ', $ext );
282 
283  foreach ( $extensions as $e ) {
284  $e = trim( $e );
285  if ( empty( $e ) ) {
286  continue;
287  }
288 
289  if ( !empty( $this->mExtToMime[$e] ) ) {
290  $this->mExtToMime[$e] .= ' ' . $mime;
291  } else {
292  $this->mExtToMime[$e] = $mime;
293  }
294  }
295  }
296 
301  $mimeInfoFile = $this->infoFile;
302 
303  $info = self::$wellKnownInfo;
304 
305  if ( $mimeInfoFile ) {
306  if ( is_file( $mimeInfoFile ) && is_readable( $mimeInfoFile ) ) {
307  $this->logger->info( __METHOD__ . ": loading mime info from $mimeInfoFile\n" );
308  $info .= "\n";
309  $info .= file_get_contents( $mimeInfoFile );
310  } else {
311  $this->logger->info( __METHOD__ . ": can't load mime info from $mimeInfoFile\n" );
312  }
313  } else {
314  $this->logger->info( __METHOD__ .
315  ": no mime info file defined, using built-ins only.\n" );
316  }
317 
318  $info .= "\n" . $this->extraInfo;
319 
320  $info = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $info );
321  $info = str_replace( "\t", " ", $info );
322 
323  $this->mimeTypeAliases = [];
324  $this->mediaTypes = [];
325 
326  $lines = explode( "\n", $info );
327  foreach ( $lines as $s ) {
328  $s = trim( $s );
329  if ( empty( $s ) ) {
330  continue;
331  }
332  if ( strpos( $s, '#' ) === 0 ) {
333  continue;
334  }
335 
336  $s = strtolower( $s );
337  $i = strpos( $s, ' ' );
338 
339  if ( $i === false ) {
340  continue;
341  }
342 
343  # print "processing MIME INFO line $s<br>";
344 
345  $match = [];
346  if ( preg_match( '!\[\s*(\w+)\s*\]!', $s, $match ) ) {
347  $s = preg_replace( '!\[\s*(\w+)\s*\]!', '', $s );
348  $mtype = trim( strtoupper( $match[1] ) );
349  } else {
350  $mtype = MEDIATYPE_UNKNOWN;
351  }
352 
353  $m = explode( ' ', $s );
354 
355  if ( !isset( $this->mediaTypes[$mtype] ) ) {
356  $this->mediaTypes[$mtype] = [];
357  }
358 
359  foreach ( $m as $mime ) {
360  $mime = trim( $mime );
361  if ( empty( $mime ) ) {
362  continue;
363  }
364 
365  $this->mediaTypes[$mtype][] = $mime;
366  }
367 
368  if ( count( $m ) > 1 ) {
369  $main = $m[0];
370  $mCount = count( $m );
371  for ( $i = 1; $i < $mCount; $i += 1 ) {
372  $mime = $m[$i];
373  $this->mimeTypeAliases[$mime] = $main;
374  }
375  }
376  }
377  }
378 
379  public function setLogger( LoggerInterface $logger ) {
380  $this->logger = $logger;
381  }
382 
389  public function addExtraTypes( $types ) {
390  $this->extraTypes .= "\n" . $types;
391  }
392 
399  public function addExtraInfo( $info ) {
400  $this->extraInfo .= "\n" . $info;
401  }
402 
411  public function getExtensionsForType( $mime ) {
412  $mime = strtolower( $mime );
413 
414  // Check the mime-to-ext map
415  if ( isset( $this->mimetoExt[$mime] ) ) {
416  return $this->mimetoExt[$mime];
417  }
418 
419  // Resolve the MIME type to the canonical type
420  if ( isset( $this->mimeTypeAliases[$mime] ) ) {
421  $mime = $this->mimeTypeAliases[$mime];
422  if ( isset( $this->mimetoExt[$mime] ) ) {
423  return $this->mimetoExt[$mime];
424  }
425  }
426 
427  return null;
428  }
429 
437  public function getTypesForExtension( $ext ) {
438  $ext = strtolower( $ext );
439 
440  $r = isset( $this->mExtToMime[$ext] ) ? $this->mExtToMime[$ext] : null;
441  return $r;
442  }
443 
451  public function guessTypesForExtension( $ext ) {
452  $m = $this->getTypesForExtension( $ext );
453  if ( is_null( $m ) ) {
454  return null;
455  }
456 
457  // TODO: Check if this is needed; strtok( $m, ' ' ) should be sufficient
458  $m = trim( $m );
459  $m = preg_replace( '/\s.*$/', '', $m );
460 
461  return $m;
462  }
463 
473  public function isMatchingExtension( $extension, $mime ) {
474  $ext = $this->getExtensionsForType( $mime );
475 
476  if ( !$ext ) {
477  return null; // Unknown MIME type
478  }
479 
480  $ext = explode( ' ', $ext );
481 
482  $extension = strtolower( $extension );
483  return in_array( $extension, $ext );
484  }
485 
494  public function isPHPImageType( $mime ) {
495  // As defined by imagegetsize and image_type_to_mime
496  static $types = [
497  'image/gif', 'image/jpeg', 'image/png',
498  'image/x-bmp', 'image/xbm', 'image/tiff',
499  'image/jp2', 'image/jpeg2000', 'image/iff',
500  'image/xbm', 'image/x-xbitmap',
501  'image/vnd.wap.wbmp', 'image/vnd.xiff',
502  'image/x-photoshop',
503  'application/x-shockwave-flash',
504  ];
505 
506  return in_array( $mime, $types );
507  }
508 
521  function isRecognizableExtension( $extension ) {
522  static $types = [
523  // Types recognized by getimagesize()
524  'gif', 'jpeg', 'jpg', 'png', 'swf', 'psd',
525  'bmp', 'tiff', 'tif', 'jpc', 'jp2',
526  'jpx', 'jb2', 'swc', 'iff', 'wbmp',
527  'xbm',
528 
529  // Formats we recognize magic numbers for
530  'djvu', 'ogx', 'ogg', 'ogv', 'oga', 'spx', 'opus',
531  'mid', 'pdf', 'wmf', 'xcf', 'webm', 'mkv', 'mka',
532  'webp', 'mp3',
533 
534  // XML formats we sure hope we recognize reliably
535  'svg',
536 
537  // 3D formats
538  'stl',
539  ];
540  return in_array( strtolower( $extension ), $types );
541  }
542 
554  public function improveTypeFromExtension( $mime, $ext ) {
555  if ( $mime === 'unknown/unknown' ) {
556  if ( $this->isRecognizableExtension( $ext ) ) {
557  $this->logger->info( __METHOD__ . ': refusing to guess mime type for .' .
558  "$ext file, we should have recognized it\n" );
559  } else {
560  // Not something we can detect, so simply
561  // trust the file extension
562  $mime = $this->guessTypesForExtension( $ext );
563  }
564  } elseif ( $mime === 'application/x-opc+zip' ) {
565  if ( $this->isMatchingExtension( $ext, $mime ) ) {
566  // A known file extension for an OPC file,
567  // find the proper MIME type for that file extension
568  $mime = $this->guessTypesForExtension( $ext );
569  } else {
570  $this->logger->info( __METHOD__ .
571  ": refusing to guess better type for $mime file, " .
572  ".$ext is not a known OPC extension.\n" );
573  $mime = 'application/zip';
574  }
575  } elseif ( $mime === 'text/plain' && $this->findMediaType( ".$ext" ) === MEDIATYPE_TEXT ) {
576  // Textual types are sometimes not recognized properly.
577  // If detected as text/plain, and has an extension which is textual
578  // improve to the extension's type. For example, csv and json are often
579  // misdetected as text/plain.
580  $mime = $this->guessTypesForExtension( $ext );
581  }
582 
583  # Media handling extensions can improve the MIME detected
584  $callback = $this->extCallback;
585  if ( $callback ) {
586  $callback( $this, $ext, $mime /* by reference */ );
587  }
588 
589  if ( isset( $this->mimeTypeAliases[$mime] ) ) {
590  $mime = $this->mimeTypeAliases[$mime];
591  }
592 
593  $this->logger->info( __METHOD__ . ": improved mime type for .$ext: $mime\n" );
594  return $mime;
595  }
596 
611  public function guessMimeType( $file, $ext = true ) {
612  if ( $ext ) { // TODO: make $ext default to false. Or better, remove it.
613  $this->logger->info( __METHOD__ .
614  ": WARNING: use of the \$ext parameter is deprecated. " .
615  "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
616  }
617 
618  $mime = $this->doGuessMimeType( $file, $ext );
619 
620  if ( !$mime ) {
621  $this->logger->info( __METHOD__ .
622  ": internal type detection failed for $file (.$ext)...\n" );
623  $mime = $this->detectMimeType( $file, $ext );
624  }
625 
626  if ( isset( $this->mimeTypeAliases[$mime] ) ) {
627  $mime = $this->mimeTypeAliases[$mime];
628  }
629 
630  $this->logger->info( __METHOD__ . ": guessed mime type of $file: $mime\n" );
631  return $mime;
632  }
633 
644  private function doGuessMimeType( $file, $ext ) {
645  // Read a chunk of the file
646  Wikimedia\suppressWarnings();
647  $f = fopen( $file, 'rb' );
648  Wikimedia\restoreWarnings();
649 
650  if ( !$f ) {
651  return 'unknown/unknown';
652  }
653 
654  $fsize = filesize( $file );
655  if ( $fsize === false ) {
656  return 'unknown/unknown';
657  }
658 
659  $head = fread( $f, 1024 );
660  $tailLength = min( 65558, $fsize ); // 65558 = maximum size of a zip EOCDR
661  if ( fseek( $f, -1 * $tailLength, SEEK_END ) === -1 ) {
662  throw new UnexpectedValueException(
663  "Seeking $tailLength bytes from EOF failed in " . __METHOD__ );
664  }
665  $tail = $tailLength ? fread( $f, $tailLength ) : '';
666  fclose( $f );
667 
668  $this->logger->info( __METHOD__ .
669  ": analyzing head and tail of $file for magic numbers.\n" );
670 
671  // Hardcode a few magic number checks...
672  $headers = [
673  // Multimedia...
674  'MThd' => 'audio/midi',
675  'OggS' => 'application/ogg',
676  'ID3' => 'audio/mpeg',
677  "\xff\xfb" => 'audio/mpeg', // MPEG-1 layer 3
678  "\xff\xf3" => 'audio/mpeg', // MPEG-2 layer 3 (lower sample rates)
679  "\xff\xe3" => 'audio/mpeg', // MPEG-2.5 layer 3 (very low sample rates)
680 
681  // Image formats...
682  // Note that WMF may have a bare header, no magic number.
683  "\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives?
684  "\xd7\xcd\xc6\x9a" => 'application/x-msmetafile',
685  '%PDF' => 'application/pdf',
686  'gimp xcf' => 'image/x-xcf',
687 
688  // Some forbidden fruit...
689  'MZ' => 'application/octet-stream', // DOS/Windows executable
690  "\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary
691  "\x7fELF" => 'application/octet-stream', // ELF binary
692  ];
693 
694  foreach ( $headers as $magic => $candidate ) {
695  if ( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) {
696  $this->logger->info( __METHOD__ .
697  ": magic header in $file recognized as $candidate\n" );
698  return $candidate;
699  }
700  }
701 
702  /* Look for WebM and Matroska files */
703  if ( strncmp( $head, pack( "C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) {
704  $doctype = strpos( $head, "\x42\x82" );
705  if ( $doctype ) {
706  // Next byte is datasize, then data (sizes larger than 1 byte are stupid muxers)
707  $data = substr( $head, $doctype + 3, 8 );
708  if ( strncmp( $data, "matroska", 8 ) == 0 ) {
709  $this->logger->info( __METHOD__ . ": recognized file as video/x-matroska\n" );
710  return "video/x-matroska";
711  } elseif ( strncmp( $data, "webm", 4 ) == 0 ) {
712  // XXX HACK look for a video track, if we don't find it, this is an audio file
713  $videotrack = strpos( $head, "\x86\x85V_VP" );
714 
715  if ( $videotrack ) {
716  // There is a video track, so this is a video file.
717  $this->logger->info( __METHOD__ . ": recognized file as video/webm\n" );
718  return "video/webm";
719  }
720 
721  $this->logger->info( __METHOD__ . ": recognized file as audio/webm\n" );
722  return "audio/webm";
723  }
724  }
725  $this->logger->info( __METHOD__ . ": unknown EBML file\n" );
726  return "unknown/unknown";
727  }
728 
729  /* Look for WebP */
730  if ( strncmp( $head, "RIFF", 4 ) == 0 &&
731  strncmp( substr( $head, 8, 7 ), "WEBPVP8", 7 ) == 0
732  ) {
733  $this->logger->info( __METHOD__ . ": recognized file as image/webp\n" );
734  return "image/webp";
735  }
736 
749  if ( ( strpos( $head, '<?php' ) !== false ) ||
750  ( strpos( $head, "<\x00?\x00p\x00h\x00p" ) !== false ) ||
751  ( strpos( $head, "<\x00?\x00 " ) !== false ) ||
752  ( strpos( $head, "<\x00?\x00\n" ) !== false ) ||
753  ( strpos( $head, "<\x00?\x00\t" ) !== false ) ||
754  ( strpos( $head, "<\x00?\x00=" ) !== false )
755  ) {
756  $this->logger->info( __METHOD__ . ": recognized $file as application/x-php\n" );
757  return 'application/x-php';
758  }
759 
763  Wikimedia\suppressWarnings();
764  $xml = new XmlTypeCheck( $file );
765  Wikimedia\restoreWarnings();
766  if ( $xml->wellFormed ) {
768  if ( isset( $xmlTypes[$xml->getRootElement()] ) ) {
769  return $xmlTypes[$xml->getRootElement()];
770  } else {
771  return 'application/xml';
772  }
773  }
774 
778  $script_type = null;
779 
780  # detect by shebang
781  if ( substr( $head, 0, 2 ) == "#!" ) {
782  $script_type = "ASCII";
783  } elseif ( substr( $head, 0, 5 ) == "\xef\xbb\xbf#!" ) {
784  $script_type = "UTF-8";
785  } elseif ( substr( $head, 0, 7 ) == "\xfe\xff\x00#\x00!" ) {
786  $script_type = "UTF-16BE";
787  } elseif ( substr( $head, 0, 7 ) == "\xff\xfe#\x00!" ) {
788  $script_type = "UTF-16LE";
789  }
790 
791  if ( $script_type ) {
792  if ( $script_type !== "UTF-8" && $script_type !== "ASCII" ) {
793  // Quick and dirty fold down to ASCII!
794  $pack = [ 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' ];
795  $chars = unpack( $pack[$script_type], substr( $head, 2 ) );
796  $head = '';
797  foreach ( $chars as $codepoint ) {
798  if ( $codepoint < 128 ) {
799  $head .= chr( $codepoint );
800  } else {
801  $head .= '?';
802  }
803  }
804  }
805 
806  $match = [];
807 
808  if ( preg_match( '%/?([^\s]+/)(\w+)%', $head, $match ) ) {
809  $mime = "application/x-{$match[2]}";
810  $this->logger->info( __METHOD__ . ": shell script recognized as $mime\n" );
811  return $mime;
812  }
813  }
814 
815  // Check for ZIP variants (before getimagesize)
816  if ( strpos( $tail, "PK\x05\x06" ) !== false ) {
817  $this->logger->info( __METHOD__ . ": ZIP header present in $file\n" );
818  return $this->detectZipType( $head, $tail, $ext );
819  }
820 
821  // Check for STL (3D) files
822  // @see https://en.wikipedia.org/wiki/STL_(file_format)
823  if ( $fsize >= 15 &&
824  stripos( $head, 'SOLID ' ) === 0 &&
825  preg_match( '/\RENDSOLID .*$/i', $tail ) ) {
826  // ASCII STL file
827  return 'application/sla';
828  } elseif ( $fsize > 84 ) {
829  // binary STL file
830  $triangles = substr( $head, 80, 4 );
831  $triangles = unpack( 'V', $triangles );
832  $triangles = reset( $triangles );
833  if ( $triangles !== false && $fsize === 84 + ( $triangles * 50 ) ) {
834  return 'application/sla';
835  }
836  }
837 
838  Wikimedia\suppressWarnings();
839  $gis = getimagesize( $file );
840  Wikimedia\restoreWarnings();
841 
842  if ( $gis && isset( $gis['mime'] ) ) {
843  $mime = $gis['mime'];
844  $this->logger->info( __METHOD__ . ": getimagesize detected $file as $mime\n" );
845  return $mime;
846  }
847 
848  # Media handling extensions can guess the MIME by content
849  # It's intentionally here so that if core is wrong about a type (false positive),
850  # people will hopefully nag and submit patches :)
851  $mime = false;
852  # Some strings by reference for performance - assuming well-behaved hooks
853  $callback = $this->guessCallback;
854  if ( $callback ) {
855  $callback( $this, $head, $tail, $file, $mime /* by reference */ );
856  };
857 
858  return $mime;
859  }
860 
874  function detectZipType( $header, $tail = null, $ext = false ) {
875  if ( $ext ) { # TODO: remove $ext param
876  $this->logger->info( __METHOD__ .
877  ": WARNING: use of the \$ext parameter is deprecated. " .
878  "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
879  }
880 
881  $mime = 'application/zip';
882  $opendocTypes = [
883  'chart-template',
884  'chart',
885  'formula-template',
886  'formula',
887  'graphics-template',
888  'graphics',
889  'image-template',
890  'image',
891  'presentation-template',
892  'presentation',
893  'spreadsheet-template',
894  'spreadsheet',
895  'text-template',
896  'text-master',
897  'text-web',
898  'text' ];
899 
900  // https://lists.oasis-open.org/archives/office/200505/msg00006.html
901  $types = '(?:' . implode( '|', $opendocTypes ) . ')';
902  $opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
903 
904  $openxmlRegex = "/^\[Content_Types\].xml/";
905 
906  if ( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) {
907  $mime = $matches[1];
908  $this->logger->info( __METHOD__ . ": detected $mime from ZIP archive\n" );
909  } elseif ( preg_match( $openxmlRegex, substr( $header, 30 ) ) ) {
910  $mime = "application/x-opc+zip";
911  # TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere
912  if ( $ext !== true && $ext !== false ) {
917  if ( $this->isMatchingExtension( $ext, $mime ) ) {
918  /* A known file extension for an OPC file,
919  * find the proper mime type for that file extension
920  */
921  $mime = $this->guessTypesForExtension( $ext );
922  } else {
923  $mime = "application/zip";
924  }
925  }
926  $this->logger->info( __METHOD__ .
927  ": detected an Open Packaging Conventions archive: $mime\n" );
928  } elseif ( substr( $header, 0, 8 ) == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" &&
929  ( $headerpos = strpos( $tail, "PK\x03\x04" ) ) !== false &&
930  preg_match( $openxmlRegex, substr( $tail, $headerpos + 30 ) ) ) {
931  if ( substr( $header, 512, 4 ) == "\xEC\xA5\xC1\x00" ) {
932  $mime = "application/msword";
933  }
934  switch ( substr( $header, 512, 6 ) ) {
935  case "\xEC\xA5\xC1\x00\x0E\x00":
936  case "\xEC\xA5\xC1\x00\x1C\x00":
937  case "\xEC\xA5\xC1\x00\x43\x00":
938  $mime = "application/vnd.ms-powerpoint";
939  break;
940  case "\xFD\xFF\xFF\xFF\x10\x00":
941  case "\xFD\xFF\xFF\xFF\x1F\x00":
942  case "\xFD\xFF\xFF\xFF\x22\x00":
943  case "\xFD\xFF\xFF\xFF\x23\x00":
944  case "\xFD\xFF\xFF\xFF\x28\x00":
945  case "\xFD\xFF\xFF\xFF\x29\x00":
946  case "\xFD\xFF\xFF\xFF\x10\x02":
947  case "\xFD\xFF\xFF\xFF\x1F\x02":
948  case "\xFD\xFF\xFF\xFF\x22\x02":
949  case "\xFD\xFF\xFF\xFF\x23\x02":
950  case "\xFD\xFF\xFF\xFF\x28\x02":
951  case "\xFD\xFF\xFF\xFF\x29\x02":
952  $mime = "application/vnd.msexcel";
953  break;
954  }
955 
956  $this->logger->info( __METHOD__ .
957  ": detected a MS Office document with OPC trailer\n" );
958  } else {
959  $this->logger->info( __METHOD__ . ": unable to identify type of ZIP archive\n" );
960  }
961  return $mime;
962  }
963 
981  private function detectMimeType( $file, $ext = true ) {
983  if ( $ext ) {
984  $this->logger->info( __METHOD__ .
985  ": WARNING: use of the \$ext parameter is deprecated. "
986  . "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
987  }
988 
989  $callback = $this->detectCallback;
990  $m = null;
991  if ( $callback ) {
992  $m = $callback( $file );
993  } else {
994  $m = mime_content_type( $file );
995  }
996 
997  if ( $m ) {
998  # normalize
999  $m = preg_replace( '![;, ].*$!', '', $m ); # strip charset, etc
1000  $m = trim( $m );
1001  $m = strtolower( $m );
1002 
1003  if ( strpos( $m, 'unknown' ) !== false ) {
1004  $m = null;
1005  } else {
1006  $this->logger->info( __METHOD__ . ": magic mime type of $file: $m\n" );
1007  return $m;
1008  }
1009  }
1010 
1011  // If desired, look at extension as a fallback.
1012  if ( $ext === true ) {
1013  $i = strrpos( $file, '.' );
1014  $ext = strtolower( $i ? substr( $file, $i + 1 ) : '' );
1015  }
1016  if ( $ext ) {
1017  if ( $this->isRecognizableExtension( $ext ) ) {
1018  $this->logger->info( __METHOD__ . ": refusing to guess mime type for .$ext file, "
1019  . "we should have recognized it\n" );
1020  } else {
1021  $m = $this->guessTypesForExtension( $ext );
1022  if ( $m ) {
1023  $this->logger->info( __METHOD__ . ": extension mime type of $file: $m\n" );
1024  return $m;
1025  }
1026  }
1027  }
1028 
1029  // Unknown type
1030  $this->logger->info( __METHOD__ . ": failed to guess mime type for $file!\n" );
1031  return 'unknown/unknown';
1032  }
1033 
1050  function getMediaType( $path = null, $mime = null ) {
1051  if ( !$mime && !$path ) {
1052  return MEDIATYPE_UNKNOWN;
1053  }
1054 
1055  // If MIME type is unknown, guess it
1056  if ( !$mime ) {
1057  $mime = $this->guessMimeType( $path, false );
1058  }
1059 
1060  // Special code for ogg - detect if it's video (theora),
1061  // else label it as sound.
1062  if ( $mime == 'application/ogg' && file_exists( $path ) ) {
1063  // Read a chunk of the file
1064  $f = fopen( $path, "rt" );
1065  if ( !$f ) {
1066  return MEDIATYPE_UNKNOWN;
1067  }
1068  $head = fread( $f, 256 );
1069  fclose( $f );
1070 
1071  $head = str_replace( 'ffmpeg2theora', '', strtolower( $head ) );
1072 
1073  // This is an UGLY HACK, file should be parsed correctly
1074  if ( strpos( $head, 'theora' ) !== false ) {
1075  return MEDIATYPE_VIDEO;
1076  } elseif ( strpos( $head, 'vorbis' ) !== false ) {
1077  return MEDIATYPE_AUDIO;
1078  } elseif ( strpos( $head, 'flac' ) !== false ) {
1079  return MEDIATYPE_AUDIO;
1080  } elseif ( strpos( $head, 'speex' ) !== false ) {
1081  return MEDIATYPE_AUDIO;
1082  } elseif ( strpos( $head, 'opus' ) !== false ) {
1083  return MEDIATYPE_AUDIO;
1084  } else {
1085  return MEDIATYPE_MULTIMEDIA;
1086  }
1087  }
1088 
1089  $type = null;
1090  // Check for entry for full MIME type
1091  if ( $mime ) {
1092  $type = $this->findMediaType( $mime );
1093  if ( $type !== MEDIATYPE_UNKNOWN ) {
1094  return $type;
1095  }
1096  }
1097 
1098  // Check for entry for file extension
1099  if ( $path ) {
1100  $i = strrpos( $path, '.' );
1101  $e = strtolower( $i ? substr( $path, $i + 1 ) : '' );
1102 
1103  // TODO: look at multi-extension if this fails, parse from full path
1104  $type = $this->findMediaType( '.' . $e );
1105  if ( $type !== MEDIATYPE_UNKNOWN ) {
1106  return $type;
1107  }
1108  }
1109 
1110  // Check major MIME type
1111  if ( $mime ) {
1112  $i = strpos( $mime, '/' );
1113  if ( $i !== false ) {
1114  $major = substr( $mime, 0, $i );
1115  $type = $this->findMediaType( $major );
1116  if ( $type !== MEDIATYPE_UNKNOWN ) {
1117  return $type;
1118  }
1119  }
1120  }
1121 
1122  if ( !$type ) {
1124  }
1125 
1126  return $type;
1127  }
1128 
1139  function findMediaType( $extMime ) {
1140  if ( strpos( $extMime, '.' ) === 0 ) {
1141  // If it's an extension, look up the MIME types
1142  $m = $this->getTypesForExtension( substr( $extMime, 1 ) );
1143  if ( !$m ) {
1144  return MEDIATYPE_UNKNOWN;
1145  }
1146 
1147  $m = explode( ' ', $m );
1148  } else {
1149  // Normalize MIME type
1150  if ( isset( $this->mimeTypeAliases[$extMime] ) ) {
1151  $extMime = $this->mimeTypeAliases[$extMime];
1152  }
1153 
1154  $m = [ $extMime ];
1155  }
1156 
1157  foreach ( $m as $mime ) {
1158  foreach ( $this->mediaTypes as $type => $codes ) {
1159  if ( in_array( $mime, $codes, true ) ) {
1160  return $type;
1161  }
1162  }
1163  }
1164 
1165  return MEDIATYPE_UNKNOWN;
1166  }
1167 
1173  public function getMediaTypes() {
1174  return array_keys( $this->mediaTypes );
1175  }
1176 
1186  public function getIEMimeTypes( $fileName, $chunk, $proposed ) {
1187  $ca = $this->getIEContentAnalyzer();
1188  return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
1189  }
1190 
1196  protected function getIEContentAnalyzer() {
1197  if ( is_null( $this->IEAnalyzer ) ) {
1198  $this->IEAnalyzer = new IEContentAnalyzer;
1199  }
1200  return $this->IEAnalyzer;
1201  }
1202 }
MimeAnalyzer\$guessCallback
callable $guessCallback
Definition: MimeAnalyzer.php:42
MimeAnalyzer\getIEContentAnalyzer
getIEContentAnalyzer()
Get a cached instance of IEContentAnalyzer.
Definition: MimeAnalyzer.php:1196
MimeAnalyzer\$wellKnownTypes
static $wellKnownTypes
Defines a set of well known MIME types This is used as a fallback to mime.types files.
Definition: MimeAnalyzer.php:85
MimeAnalyzer\guessTypesForExtension
guessTypesForExtension( $ext)
Returns a single MIME type for a given file extension or null if unknown.
Definition: MimeAnalyzer.php:451
MimeAnalyzer\addExtraInfo
addExtraInfo( $info)
Adds to the list mapping MIME to media type.
Definition: MimeAnalyzer.php:399
MimeAnalyzer\isRecognizableExtension
isRecognizableExtension( $extension)
Returns true if the extension represents a type which can be reliably detected from its content.
Definition: MimeAnalyzer.php:521
MEDIATYPE_AUDIO
const MEDIATYPE_AUDIO
Definition: defines.php:32
use
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
Definition: APACHE-LICENSE-2.0.txt:10
MimeAnalyzer\getExtensionsForType
getExtensionsForType( $mime)
Returns a list of file extensions for a given MIME type as a space separated string or null if the MI...
Definition: MimeAnalyzer.php:411
array
the array() calling protocol came about after MediaWiki 1.4rc1.
MimeAnalyzer\$initCallback
callable $initCallback
Definition: MimeAnalyzer.php:38
plain
either a plain
Definition: hooks.txt:2056
MimeAnalyzer\getMediaType
getMediaType( $path=null, $mime=null)
Determine the media type code for a file, using its MIME type, name and possibly its contents.
Definition: MimeAnalyzer.php:1050
MimeAnalyzer
Implements functions related to MIME types such as detection and mapping to file extension.
Definition: MimeAnalyzer.php:30
MimeAnalyzer\$mimetoExt
array $mimetoExt
Map of MIME types to file extensions (as a space separated list)
Definition: MimeAnalyzer.php:50
text
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add text
Definition: design.txt:18
MimeAnalyzer\$mExtToMime
array $mExtToMime
Map of file extensions types to MIME types (as a space separated list)
Definition: MimeAnalyzer.php:53
MimeAnalyzer\guessMimeType
guessMimeType( $file, $ext=true)
MIME type detection.
Definition: MimeAnalyzer.php:611
MimeAnalyzer\$infoFile
string $infoFile
Definition: MimeAnalyzer.php:34
MimeAnalyzer\__construct
__construct(array $params)
Definition: MimeAnalyzer.php:194
MimeAnalyzer\getTypesForExtension
getTypesForExtension( $ext)
Returns a list of MIME types for a given file extension as a space separated string or null if the ex...
Definition: MimeAnalyzer.php:437
$params
$params
Definition: styleTest.css.php:40
MEDIATYPE_UNKNOWN
const MEDIATYPE_UNKNOWN
Definition: defines.php:26
MimeAnalyzer\$mediaTypes
array $mediaTypes
Mapping of media types to arrays of MIME types.
Definition: MimeAnalyzer.php:46
$s
$s
Definition: mergeMessageFileList.php:187
MimeAnalyzer\setLogger
setLogger(LoggerInterface $logger)
Definition: MimeAnalyzer.php:379
MimeAnalyzer\detectMimeType
detectMimeType( $file, $ext=true)
Internal MIME type detection.
Definition: MimeAnalyzer.php:981
MimeAnalyzer\$IEAnalyzer
IEContentAnalyzer $IEAnalyzer
Definition: MimeAnalyzer.php:56
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:37
MimeAnalyzer\getIEMimeTypes
getIEMimeTypes( $fileName, $chunk, $proposed)
Get the MIME types that various versions of Internet Explorer would detect from a chunk of the conten...
Definition: MimeAnalyzer.php:1186
MimeAnalyzer\$typeFile
string $typeFile
Definition: MimeAnalyzer.php:32
$matches
$matches
Definition: NoLocalSettings.php:24
$lines
$lines
Definition: router.php:61
MimeAnalyzer\detectZipType
detectZipType( $header, $tail=null, $ext=false)
Detect application-specific file type of a given ZIP file from its header data.
Definition: MimeAnalyzer.php:874
IEContentAnalyzer
This class simulates Microsoft Internet Explorer's terribly broken and insecure MIME type detection a...
Definition: IEContentAnalyzer.php:27
MimeAnalyzer\$wellKnownInfo
static $wellKnownInfo
Defines a set of well known MIME info entries This is used as a fallback to mime.info files.
Definition: MimeAnalyzer.php:137
$mime
if( $ext=='php'|| $ext=='php5') $mime
Definition: router.php:59
MEDIATYPE_MULTIMEDIA
const MEDIATYPE_MULTIMEDIA
Definition: defines.php:37
MimeAnalyzer\isMatchingExtension
isMatchingExtension( $extension, $mime)
Tests if the extension matches the given MIME type.
Definition: MimeAnalyzer.php:473
$header
$header
Definition: updateCredits.php:35
MimeAnalyzer\findMediaType
findMediaType( $extMime)
Returns a media code matching the given MIME type or file extension.
Definition: MimeAnalyzer.php:1139
etc
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add etc
Definition: design.txt:19
MimeAnalyzer\getMediaTypes
getMediaTypes()
Returns an array of media types (MEDIATYPE_xxx constants)
Definition: MimeAnalyzer.php:1173
MimeAnalyzer\$logger
LoggerInterface $logger
Definition: MimeAnalyzer.php:64
MimeAnalyzer\$extraInfo
string $extraInfo
Extra MIME info, set for example by media handling extensions.
Definition: MimeAnalyzer.php:61
MimeAnalyzer\$mimeTypeAliases
array $mimeTypeAliases
Map of MIME type aliases.
Definition: MimeAnalyzer.php:48
MimeAnalyzer\isPHPImageType
isPHPImageType( $mime)
Returns true if the MIME type is known to represent an image format supported by the PHP GD library.
Definition: MimeAnalyzer.php:494
XmlTypeCheck
Definition: XmlTypeCheck.php:28
MimeAnalyzer\loadFiles
loadFiles()
Definition: MimeAnalyzer.php:217
MEDIATYPE_TEXT
const MEDIATYPE_TEXT
Definition: defines.php:41
MimeAnalyzer\addExtraTypes
addExtraTypes( $types)
Adds to the list mapping MIME to file extensions.
Definition: MimeAnalyzer.php:389
MEDIATYPE_VIDEO
const MEDIATYPE_VIDEO
Definition: defines.php:35
MimeAnalyzer\improveTypeFromExtension
improveTypeFromExtension( $mime, $ext)
Improves a MIME type using the file extension.
Definition: MimeAnalyzer.php:554
MimeAnalyzer\$extCallback
callable $extCallback
Definition: MimeAnalyzer.php:44
$path
$path
Definition: NoLocalSettings.php:25
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:22
MimeAnalyzer\$extraTypes
string $extraTypes
Extra MIME types, set for example by media handling extensions.
Definition: MimeAnalyzer.php:59
MimeAnalyzer\$xmlTypes
string $xmlTypes
Definition: MimeAnalyzer.php:36
$ext
$ext
Definition: router.php:55
MimeAnalyzer\$detectCallback
callable $detectCallback
Definition: MimeAnalyzer.php:40
MimeAnalyzer\doGuessMimeType
doGuessMimeType( $file, $ext)
Guess the MIME type from the file contents.
Definition: MimeAnalyzer.php:644
$e
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException' returning false will NOT prevent logging $e
Definition: hooks.txt:2171
$type
$type
Definition: testCompression.php:48