MediaWiki  1.32.5
MimeAnalyzer.php
Go to the documentation of this file.
1 <?php
22 use Psr\Log\LoggerAwareInterface;
23 use Psr\Log\LoggerInterface;
24 
30 class MimeAnalyzer implements LoggerAwareInterface {
32  protected $typeFile;
34  protected $infoFile;
36  protected $xmlTypes;
38  protected $initCallback;
40  protected $detectCallback;
42  protected $guessCallback;
44  protected $extCallback;
46  protected $mediaTypes = null;
48  protected $mimeTypeAliases = null;
50  protected $mimetoExt = null;
51 
53  public $mExtToMime = null; // legacy name; field accessed by hooks
54 
56  protected $IEAnalyzer;
57 
59  private $extraTypes = '';
61  private $extraInfo = '';
62 
64  private $logger;
65 
85  protected static $wellKnownTypes = <<<EOT
86 application/ogg ogx ogg ogm ogv oga spx opus
87 application/pdf pdf
88 application/vnd.oasis.opendocument.chart odc
89 application/vnd.oasis.opendocument.chart-template otc
90 application/vnd.oasis.opendocument.database odb
91 application/vnd.oasis.opendocument.formula odf
92 application/vnd.oasis.opendocument.formula-template otf
93 application/vnd.oasis.opendocument.graphics odg
94 application/vnd.oasis.opendocument.graphics-template otg
95 application/vnd.oasis.opendocument.image odi
96 application/vnd.oasis.opendocument.image-template oti
97 application/vnd.oasis.opendocument.presentation odp
98 application/vnd.oasis.opendocument.presentation-template otp
99 application/vnd.oasis.opendocument.spreadsheet ods
100 application/vnd.oasis.opendocument.spreadsheet-template ots
101 application/vnd.oasis.opendocument.text odt
102 application/vnd.oasis.opendocument.text-master otm
103 application/vnd.oasis.opendocument.text-template ott
104 application/vnd.oasis.opendocument.text-web oth
105 application/javascript js
106 application/x-shockwave-flash swf
107 audio/midi mid midi kar
108 audio/mpeg mpga mpa mp2 mp3
109 audio/x-aiff aif aiff aifc
110 audio/x-wav wav
111 audio/ogg oga spx ogg opus
112 audio/opus opus ogg oga ogg spx
113 image/x-bmp bmp
114 image/gif gif
115 image/jpeg jpeg jpg jpe
116 image/png png
117 image/svg+xml svg
118 image/svg svg
119 image/tiff tiff tif
120 image/vnd.djvu djvu
121 image/x.djvu djvu
122 image/x-djvu djvu
123 image/x-portable-pixmap ppm
124 image/x-xcf xcf
125 text/plain txt
126 text/html html htm
127 video/ogg ogv ogm ogg
128 video/mpeg mpg mpeg
129 EOT;
130 
137  protected static $wellKnownInfo = <<<EOT
138 application/pdf [OFFICE]
139 application/vnd.oasis.opendocument.chart [OFFICE]
140 application/vnd.oasis.opendocument.chart-template [OFFICE]
141 application/vnd.oasis.opendocument.database [OFFICE]
142 application/vnd.oasis.opendocument.formula [OFFICE]
143 application/vnd.oasis.opendocument.formula-template [OFFICE]
144 application/vnd.oasis.opendocument.graphics [OFFICE]
145 application/vnd.oasis.opendocument.graphics-template [OFFICE]
146 application/vnd.oasis.opendocument.image [OFFICE]
147 application/vnd.oasis.opendocument.image-template [OFFICE]
148 application/vnd.oasis.opendocument.presentation [OFFICE]
149 application/vnd.oasis.opendocument.presentation-template [OFFICE]
150 application/vnd.oasis.opendocument.spreadsheet [OFFICE]
151 application/vnd.oasis.opendocument.spreadsheet-template [OFFICE]
152 application/vnd.oasis.opendocument.text [OFFICE]
153 application/vnd.oasis.opendocument.text-template [OFFICE]
154 application/vnd.oasis.opendocument.text-master [OFFICE]
155 application/vnd.oasis.opendocument.text-web [OFFICE]
156 application/javascript text/javascript application/x-javascript [EXECUTABLE]
157 application/x-shockwave-flash [MULTIMEDIA]
158 audio/midi [AUDIO]
159 audio/x-aiff [AUDIO]
160 audio/x-wav [AUDIO]
161 audio/mp3 audio/mpeg [AUDIO]
162 application/ogg audio/ogg video/ogg [MULTIMEDIA]
163 image/x-bmp image/x-ms-bmp image/bmp [BITMAP]
164 image/gif [BITMAP]
165 image/jpeg [BITMAP]
166 image/png [BITMAP]
167 image/svg+xml [DRAWING]
168 image/tiff [BITMAP]
169 image/vnd.djvu [BITMAP]
170 image/x-xcf [BITMAP]
171 image/x-portable-pixmap [BITMAP]
172 text/plain [TEXT]
173 text/html [TEXT]
174 video/ogg [VIDEO]
175 video/mpeg [VIDEO]
176 unknown/unknown application/octet-stream application/x-empty [UNKNOWN]
177 EOT;
178 
194  public function __construct( array $params ) {
195  $this->typeFile = $params['typeFile'];
196  $this->infoFile = $params['infoFile'];
197  $this->xmlTypes = $params['xmlTypes'];
198  $this->initCallback = $params['initCallback'] ?? null;
199  $this->detectCallback = $params['detectCallback'] ?? null;
200  $this->guessCallback = $params['guessCallback'] ?? null;
201  $this->extCallback = $params['extCallback'] ?? null;
202  $this->logger = $params['logger'] ?? new \Psr\Log\NullLogger();
203 
204  $this->loadFiles();
205  }
206 
207  protected function loadFiles() {
212  # Allow media handling extensions adding MIME-types and MIME-info
213  if ( $this->initCallback ) {
214  call_user_func( $this->initCallback, $this );
215  }
216 
217  $types = self::$wellKnownTypes;
218 
219  $mimeTypeFile = $this->typeFile;
220  if ( $mimeTypeFile ) {
221  if ( is_file( $mimeTypeFile ) && is_readable( $mimeTypeFile ) ) {
222  $this->logger->info( __METHOD__ . ": loading mime types from $mimeTypeFile\n" );
223  $types .= "\n";
224  $types .= file_get_contents( $mimeTypeFile );
225  } else {
226  $this->logger->info( __METHOD__ . ": can't load mime types from $mimeTypeFile\n" );
227  }
228  } else {
229  $this->logger->info( __METHOD__ .
230  ": no mime types file defined, using built-ins only.\n" );
231  }
232 
233  $types .= "\n" . $this->extraTypes;
234 
235  $types = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $types );
236  $types = str_replace( "\t", " ", $types );
237 
238  $this->mimetoExt = [];
239  $this->mExtToMime = [];
240 
241  $lines = explode( "\n", $types );
242  foreach ( $lines as $s ) {
243  $s = trim( $s );
244  if ( empty( $s ) ) {
245  continue;
246  }
247  if ( strpos( $s, '#' ) === 0 ) {
248  continue;
249  }
250 
251  $s = strtolower( $s );
252  $i = strpos( $s, ' ' );
253 
254  if ( $i === false ) {
255  continue;
256  }
257 
258  $mime = substr( $s, 0, $i );
259  $ext = trim( substr( $s, $i + 1 ) );
260 
261  if ( empty( $ext ) ) {
262  continue;
263  }
264 
265  if ( !empty( $this->mimetoExt[$mime] ) ) {
266  $this->mimetoExt[$mime] .= ' ' . $ext;
267  } else {
268  $this->mimetoExt[$mime] = $ext;
269  }
270 
271  $extensions = explode( ' ', $ext );
272 
273  foreach ( $extensions as $e ) {
274  $e = trim( $e );
275  if ( empty( $e ) ) {
276  continue;
277  }
278 
279  if ( !empty( $this->mExtToMime[$e] ) ) {
280  $this->mExtToMime[$e] .= ' ' . $mime;
281  } else {
282  $this->mExtToMime[$e] = $mime;
283  }
284  }
285  }
286 
291  $mimeInfoFile = $this->infoFile;
292 
293  $info = self::$wellKnownInfo;
294 
295  if ( $mimeInfoFile ) {
296  if ( is_file( $mimeInfoFile ) && is_readable( $mimeInfoFile ) ) {
297  $this->logger->info( __METHOD__ . ": loading mime info from $mimeInfoFile\n" );
298  $info .= "\n";
299  $info .= file_get_contents( $mimeInfoFile );
300  } else {
301  $this->logger->info( __METHOD__ . ": can't load mime info from $mimeInfoFile\n" );
302  }
303  } else {
304  $this->logger->info( __METHOD__ .
305  ": no mime info file defined, using built-ins only.\n" );
306  }
307 
308  $info .= "\n" . $this->extraInfo;
309 
310  $info = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $info );
311  $info = str_replace( "\t", " ", $info );
312 
313  $this->mimeTypeAliases = [];
314  $this->mediaTypes = [];
315 
316  $lines = explode( "\n", $info );
317  foreach ( $lines as $s ) {
318  $s = trim( $s );
319  if ( empty( $s ) ) {
320  continue;
321  }
322  if ( strpos( $s, '#' ) === 0 ) {
323  continue;
324  }
325 
326  $s = strtolower( $s );
327  $i = strpos( $s, ' ' );
328 
329  if ( $i === false ) {
330  continue;
331  }
332 
333  # print "processing MIME INFO line $s<br>";
334 
335  $match = [];
336  if ( preg_match( '!\[\s*(\w+)\s*\]!', $s, $match ) ) {
337  $s = preg_replace( '!\[\s*(\w+)\s*\]!', '', $s );
338  $mtype = trim( strtoupper( $match[1] ) );
339  } else {
340  $mtype = MEDIATYPE_UNKNOWN;
341  }
342 
343  $m = explode( ' ', $s );
344 
345  if ( !isset( $this->mediaTypes[$mtype] ) ) {
346  $this->mediaTypes[$mtype] = [];
347  }
348 
349  foreach ( $m as $mime ) {
350  $mime = trim( $mime );
351  if ( empty( $mime ) ) {
352  continue;
353  }
354 
355  $this->mediaTypes[$mtype][] = $mime;
356  }
357 
358  if ( count( $m ) > 1 ) {
359  $main = $m[0];
360  $mCount = count( $m );
361  for ( $i = 1; $i < $mCount; $i += 1 ) {
362  $mime = $m[$i];
363  $this->mimeTypeAliases[$mime] = $main;
364  }
365  }
366  }
367  }
368 
369  public function setLogger( LoggerInterface $logger ) {
370  $this->logger = $logger;
371  }
372 
379  public function addExtraTypes( $types ) {
380  $this->extraTypes .= "\n" . $types;
381  }
382 
389  public function addExtraInfo( $info ) {
390  $this->extraInfo .= "\n" . $info;
391  }
392 
401  public function getExtensionsForType( $mime ) {
402  $mime = strtolower( $mime );
403 
404  // Check the mime-to-ext map
405  if ( isset( $this->mimetoExt[$mime] ) ) {
406  return $this->mimetoExt[$mime];
407  }
408 
409  // Resolve the MIME type to the canonical type
410  if ( isset( $this->mimeTypeAliases[$mime] ) ) {
411  $mime = $this->mimeTypeAliases[$mime];
412  if ( isset( $this->mimetoExt[$mime] ) ) {
413  return $this->mimetoExt[$mime];
414  }
415  }
416 
417  return null;
418  }
419 
427  public function getTypesForExtension( $ext ) {
428  $ext = strtolower( $ext );
429 
430  $r = $this->mExtToMime[$ext] ?? null;
431  return $r;
432  }
433 
441  public function guessTypesForExtension( $ext ) {
442  $m = $this->getTypesForExtension( $ext );
443  if ( is_null( $m ) ) {
444  return null;
445  }
446 
447  // TODO: Check if this is needed; strtok( $m, ' ' ) should be sufficient
448  $m = trim( $m );
449  $m = preg_replace( '/\s.*$/', '', $m );
450 
451  return $m;
452  }
453 
463  public function isMatchingExtension( $extension, $mime ) {
464  $ext = $this->getExtensionsForType( $mime );
465 
466  if ( !$ext ) {
467  return null; // Unknown MIME type
468  }
469 
470  $ext = explode( ' ', $ext );
471 
472  $extension = strtolower( $extension );
473  return in_array( $extension, $ext );
474  }
475 
484  public function isPHPImageType( $mime ) {
485  // As defined by imagegetsize and image_type_to_mime
486  static $types = [
487  'image/gif', 'image/jpeg', 'image/png',
488  'image/x-bmp', 'image/xbm', 'image/tiff',
489  'image/jp2', 'image/jpeg2000', 'image/iff',
490  'image/xbm', 'image/x-xbitmap',
491  'image/vnd.wap.wbmp', 'image/vnd.xiff',
492  'image/x-photoshop',
493  'application/x-shockwave-flash',
494  ];
495 
496  return in_array( $mime, $types );
497  }
498 
511  function isRecognizableExtension( $extension ) {
512  static $types = [
513  // Types recognized by getimagesize()
514  'gif', 'jpeg', 'jpg', 'png', 'swf', 'psd',
515  'bmp', 'tiff', 'tif', 'jpc', 'jp2',
516  'jpx', 'jb2', 'swc', 'iff', 'wbmp',
517  'xbm',
518 
519  // Formats we recognize magic numbers for
520  'djvu', 'ogx', 'ogg', 'ogv', 'oga', 'spx', 'opus',
521  'mid', 'pdf', 'wmf', 'xcf', 'webm', 'mkv', 'mka',
522  'webp', 'mp3',
523 
524  // XML formats we sure hope we recognize reliably
525  'svg',
526 
527  // 3D formats
528  'stl',
529  ];
530  return in_array( strtolower( $extension ), $types );
531  }
532 
544  public function improveTypeFromExtension( $mime, $ext ) {
545  if ( $mime === 'unknown/unknown' ) {
546  if ( $this->isRecognizableExtension( $ext ) ) {
547  $this->logger->info( __METHOD__ . ': refusing to guess mime type for .' .
548  "$ext file, we should have recognized it\n" );
549  } else {
550  // Not something we can detect, so simply
551  // trust the file extension
552  $mime = $this->guessTypesForExtension( $ext );
553  }
554  } elseif ( $mime === 'application/x-opc+zip' ) {
555  if ( $this->isMatchingExtension( $ext, $mime ) ) {
556  // A known file extension for an OPC file,
557  // find the proper MIME type for that file extension
558  $mime = $this->guessTypesForExtension( $ext );
559  } else {
560  $this->logger->info( __METHOD__ .
561  ": refusing to guess better type for $mime file, " .
562  ".$ext is not a known OPC extension.\n" );
563  $mime = 'application/zip';
564  }
565  } elseif ( $mime === 'text/plain' && $this->findMediaType( ".$ext" ) === MEDIATYPE_TEXT ) {
566  // Textual types are sometimes not recognized properly.
567  // If detected as text/plain, and has an extension which is textual
568  // improve to the extension's type. For example, csv and json are often
569  // misdetected as text/plain.
570  $mime = $this->guessTypesForExtension( $ext );
571  }
572 
573  # Media handling extensions can improve the MIME detected
574  $callback = $this->extCallback;
575  if ( $callback ) {
576  $callback( $this, $ext, $mime /* by reference */ );
577  }
578 
579  if ( isset( $this->mimeTypeAliases[$mime] ) ) {
580  $mime = $this->mimeTypeAliases[$mime];
581  }
582 
583  $this->logger->info( __METHOD__ . ": improved mime type for .$ext: $mime\n" );
584  return $mime;
585  }
586 
601  public function guessMimeType( $file, $ext = true ) {
602  if ( $ext ) { // TODO: make $ext default to false. Or better, remove it.
603  $this->logger->info( __METHOD__ .
604  ": WARNING: use of the \$ext parameter is deprecated. " .
605  "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
606  }
607 
608  $mime = $this->doGuessMimeType( $file, $ext );
609 
610  if ( !$mime ) {
611  $this->logger->info( __METHOD__ .
612  ": internal type detection failed for $file (.$ext)...\n" );
613  $mime = $this->detectMimeType( $file, $ext );
614  }
615 
616  if ( isset( $this->mimeTypeAliases[$mime] ) ) {
617  $mime = $this->mimeTypeAliases[$mime];
618  }
619 
620  $this->logger->info( __METHOD__ . ": guessed mime type of $file: $mime\n" );
621  return $mime;
622  }
623 
634  private function doGuessMimeType( $file, $ext ) {
635  // Read a chunk of the file
636  Wikimedia\suppressWarnings();
637  $f = fopen( $file, 'rb' );
638  Wikimedia\restoreWarnings();
639 
640  if ( !$f ) {
641  return 'unknown/unknown';
642  }
643 
644  $fsize = filesize( $file );
645  if ( $fsize === false ) {
646  return 'unknown/unknown';
647  }
648 
649  $head = fread( $f, 1024 );
650  $tailLength = min( 65558, $fsize ); // 65558 = maximum size of a zip EOCDR
651  if ( fseek( $f, -1 * $tailLength, SEEK_END ) === -1 ) {
652  throw new UnexpectedValueException(
653  "Seeking $tailLength bytes from EOF failed in " . __METHOD__ );
654  }
655  $tail = $tailLength ? fread( $f, $tailLength ) : '';
656  fclose( $f );
657 
658  $this->logger->info( __METHOD__ .
659  ": analyzing head and tail of $file for magic numbers.\n" );
660 
661  // Hardcode a few magic number checks...
662  $headers = [
663  // Multimedia...
664  'MThd' => 'audio/midi',
665  'OggS' => 'application/ogg',
666  'ID3' => 'audio/mpeg',
667  "\xff\xfb" => 'audio/mpeg', // MPEG-1 layer 3
668  "\xff\xf3" => 'audio/mpeg', // MPEG-2 layer 3 (lower sample rates)
669  "\xff\xe3" => 'audio/mpeg', // MPEG-2.5 layer 3 (very low sample rates)
670 
671  // Image formats...
672  // Note that WMF may have a bare header, no magic number.
673  "\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives?
674  "\xd7\xcd\xc6\x9a" => 'application/x-msmetafile',
675  '%PDF' => 'application/pdf',
676  'gimp xcf' => 'image/x-xcf',
677 
678  // Some forbidden fruit...
679  'MZ' => 'application/octet-stream', // DOS/Windows executable
680  "\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary
681  "\x7fELF" => 'application/octet-stream', // ELF binary
682  ];
683 
684  foreach ( $headers as $magic => $candidate ) {
685  if ( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) {
686  $this->logger->info( __METHOD__ .
687  ": magic header in $file recognized as $candidate\n" );
688  return $candidate;
689  }
690  }
691 
692  /* Look for WebM and Matroska files */
693  if ( strncmp( $head, pack( "C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) {
694  $doctype = strpos( $head, "\x42\x82" );
695  if ( $doctype ) {
696  // Next byte is datasize, then data (sizes larger than 1 byte are stupid muxers)
697  $data = substr( $head, $doctype + 3, 8 );
698  if ( strncmp( $data, "matroska", 8 ) == 0 ) {
699  $this->logger->info( __METHOD__ . ": recognized file as video/x-matroska\n" );
700  return "video/x-matroska";
701  } elseif ( strncmp( $data, "webm", 4 ) == 0 ) {
702  // XXX HACK look for a video track, if we don't find it, this is an audio file
703  $videotrack = strpos( $head, "\x86\x85V_VP" );
704 
705  if ( $videotrack ) {
706  // There is a video track, so this is a video file.
707  $this->logger->info( __METHOD__ . ": recognized file as video/webm\n" );
708  return "video/webm";
709  }
710 
711  $this->logger->info( __METHOD__ . ": recognized file as audio/webm\n" );
712  return "audio/webm";
713  }
714  }
715  $this->logger->info( __METHOD__ . ": unknown EBML file\n" );
716  return "unknown/unknown";
717  }
718 
719  /* Look for WebP */
720  if ( strncmp( $head, "RIFF", 4 ) == 0 &&
721  strncmp( substr( $head, 8, 7 ), "WEBPVP8", 7 ) == 0
722  ) {
723  $this->logger->info( __METHOD__ . ": recognized file as image/webp\n" );
724  return "image/webp";
725  }
726 
739  if ( ( strpos( $head, '<?php' ) !== false ) ||
740  ( strpos( $head, "<\x00?\x00p\x00h\x00p" ) !== false ) ||
741  ( strpos( $head, "<\x00?\x00 " ) !== false ) ||
742  ( strpos( $head, "<\x00?\x00\n" ) !== false ) ||
743  ( strpos( $head, "<\x00?\x00\t" ) !== false ) ||
744  ( strpos( $head, "<\x00?\x00=" ) !== false )
745  ) {
746  $this->logger->info( __METHOD__ . ": recognized $file as application/x-php\n" );
747  return 'application/x-php';
748  }
749 
753  $xml = new XmlTypeCheck( $file );
754  if ( $xml->wellFormed ) {
756  if ( isset( $xmlTypes[$xml->getRootElement()] ) ) {
757  return $xmlTypes[$xml->getRootElement()];
758  } else {
759  return 'application/xml';
760  }
761  }
762 
766  $script_type = null;
767 
768  # detect by shebang
769  if ( substr( $head, 0, 2 ) == "#!" ) {
770  $script_type = "ASCII";
771  } elseif ( substr( $head, 0, 5 ) == "\xef\xbb\xbf#!" ) {
772  $script_type = "UTF-8";
773  } elseif ( substr( $head, 0, 7 ) == "\xfe\xff\x00#\x00!" ) {
774  $script_type = "UTF-16BE";
775  } elseif ( substr( $head, 0, 7 ) == "\xff\xfe#\x00!" ) {
776  $script_type = "UTF-16LE";
777  }
778 
779  if ( $script_type ) {
780  if ( $script_type !== "UTF-8" && $script_type !== "ASCII" ) {
781  // Quick and dirty fold down to ASCII!
782  $pack = [ 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' ];
783  $chars = unpack( $pack[$script_type], substr( $head, 2 ) );
784  $head = '';
785  foreach ( $chars as $codepoint ) {
786  if ( $codepoint < 128 ) {
787  $head .= chr( $codepoint );
788  } else {
789  $head .= '?';
790  }
791  }
792  }
793 
794  $match = [];
795 
796  if ( preg_match( '%/?([^\s]+/)(\w+)%', $head, $match ) ) {
797  $mime = "application/x-{$match[2]}";
798  $this->logger->info( __METHOD__ . ": shell script recognized as $mime\n" );
799  return $mime;
800  }
801  }
802 
803  // Check for ZIP variants (before getimagesize)
804  if ( strpos( $tail, "PK\x05\x06" ) !== false ) {
805  $this->logger->info( __METHOD__ . ": ZIP header present in $file\n" );
806  return $this->detectZipType( $head, $tail, $ext );
807  }
808 
809  // Check for STL (3D) files
810  // @see https://en.wikipedia.org/wiki/STL_(file_format)
811  if ( $fsize >= 15 &&
812  stripos( $head, 'SOLID ' ) === 0 &&
813  preg_match( '/\RENDSOLID .*$/i', $tail ) ) {
814  // ASCII STL file
815  return 'application/sla';
816  } elseif ( $fsize > 84 ) {
817  // binary STL file
818  $triangles = substr( $head, 80, 4 );
819  $triangles = unpack( 'V', $triangles );
820  $triangles = reset( $triangles );
821  if ( $triangles !== false && $fsize === 84 + ( $triangles * 50 ) ) {
822  return 'application/sla';
823  }
824  }
825 
826  Wikimedia\suppressWarnings();
827  $gis = getimagesize( $file );
828  Wikimedia\restoreWarnings();
829 
830  if ( $gis && isset( $gis['mime'] ) ) {
831  $mime = $gis['mime'];
832  $this->logger->info( __METHOD__ . ": getimagesize detected $file as $mime\n" );
833  return $mime;
834  }
835 
836  # Media handling extensions can guess the MIME by content
837  # It's intentionally here so that if core is wrong about a type (false positive),
838  # people will hopefully nag and submit patches :)
839  $mime = false;
840  # Some strings by reference for performance - assuming well-behaved hooks
841  $callback = $this->guessCallback;
842  if ( $callback ) {
843  $callback( $this, $head, $tail, $file, $mime /* by reference */ );
844  };
845 
846  return $mime;
847  }
848 
862  function detectZipType( $header, $tail = null, $ext = false ) {
863  if ( $ext ) { # TODO: remove $ext param
864  $this->logger->info( __METHOD__ .
865  ": WARNING: use of the \$ext parameter is deprecated. " .
866  "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
867  }
868 
869  $mime = 'application/zip';
870  $opendocTypes = [
871  'chart-template',
872  'chart',
873  'formula-template',
874  'formula',
875  'graphics-template',
876  'graphics',
877  'image-template',
878  'image',
879  'presentation-template',
880  'presentation',
881  'spreadsheet-template',
882  'spreadsheet',
883  'text-template',
884  'text-master',
885  'text-web',
886  'text' ];
887 
888  // https://lists.oasis-open.org/archives/office/200505/msg00006.html
889  $types = '(?:' . implode( '|', $opendocTypes ) . ')';
890  $opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
891 
892  $openxmlRegex = "/^\[Content_Types\].xml/";
893 
894  if ( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) {
895  $mime = $matches[1];
896  $this->logger->info( __METHOD__ . ": detected $mime from ZIP archive\n" );
897  } elseif ( preg_match( $openxmlRegex, substr( $header, 30 ) ) ) {
898  $mime = "application/x-opc+zip";
899  # TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere
900  if ( $ext !== true && $ext !== false ) {
905  if ( $this->isMatchingExtension( $ext, $mime ) ) {
906  /* A known file extension for an OPC file,
907  * find the proper mime type for that file extension
908  */
909  $mime = $this->guessTypesForExtension( $ext );
910  } else {
911  $mime = "application/zip";
912  }
913  }
914  $this->logger->info( __METHOD__ .
915  ": detected an Open Packaging Conventions archive: $mime\n" );
916  } elseif ( substr( $header, 0, 8 ) == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" &&
917  ( $headerpos = strpos( $tail, "PK\x03\x04" ) ) !== false &&
918  preg_match( $openxmlRegex, substr( $tail, $headerpos + 30 ) ) ) {
919  if ( substr( $header, 512, 4 ) == "\xEC\xA5\xC1\x00" ) {
920  $mime = "application/msword";
921  }
922  switch ( substr( $header, 512, 6 ) ) {
923  case "\xEC\xA5\xC1\x00\x0E\x00":
924  case "\xEC\xA5\xC1\x00\x1C\x00":
925  case "\xEC\xA5\xC1\x00\x43\x00":
926  $mime = "application/vnd.ms-powerpoint";
927  break;
928  case "\xFD\xFF\xFF\xFF\x10\x00":
929  case "\xFD\xFF\xFF\xFF\x1F\x00":
930  case "\xFD\xFF\xFF\xFF\x22\x00":
931  case "\xFD\xFF\xFF\xFF\x23\x00":
932  case "\xFD\xFF\xFF\xFF\x28\x00":
933  case "\xFD\xFF\xFF\xFF\x29\x00":
934  case "\xFD\xFF\xFF\xFF\x10\x02":
935  case "\xFD\xFF\xFF\xFF\x1F\x02":
936  case "\xFD\xFF\xFF\xFF\x22\x02":
937  case "\xFD\xFF\xFF\xFF\x23\x02":
938  case "\xFD\xFF\xFF\xFF\x28\x02":
939  case "\xFD\xFF\xFF\xFF\x29\x02":
940  $mime = "application/vnd.msexcel";
941  break;
942  }
943 
944  $this->logger->info( __METHOD__ .
945  ": detected a MS Office document with OPC trailer\n" );
946  } else {
947  $this->logger->info( __METHOD__ . ": unable to identify type of ZIP archive\n" );
948  }
949  return $mime;
950  }
951 
969  private function detectMimeType( $file, $ext = true ) {
971  if ( $ext ) {
972  $this->logger->info( __METHOD__ .
973  ": WARNING: use of the \$ext parameter is deprecated. "
974  . "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
975  }
976 
977  $callback = $this->detectCallback;
978  $m = null;
979  if ( $callback ) {
980  $m = $callback( $file );
981  } else {
982  $m = mime_content_type( $file );
983  }
984 
985  if ( $m ) {
986  # normalize
987  $m = preg_replace( '![;, ].*$!', '', $m ); # strip charset, etc
988  $m = trim( $m );
989  $m = strtolower( $m );
990 
991  if ( strpos( $m, 'unknown' ) !== false ) {
992  $m = null;
993  } else {
994  $this->logger->info( __METHOD__ . ": magic mime type of $file: $m\n" );
995  return $m;
996  }
997  }
998 
999  // If desired, look at extension as a fallback.
1000  if ( $ext === true ) {
1001  $i = strrpos( $file, '.' );
1002  $ext = strtolower( $i ? substr( $file, $i + 1 ) : '' );
1003  }
1004  if ( $ext ) {
1005  if ( $this->isRecognizableExtension( $ext ) ) {
1006  $this->logger->info( __METHOD__ . ": refusing to guess mime type for .$ext file, "
1007  . "we should have recognized it\n" );
1008  } else {
1009  $m = $this->guessTypesForExtension( $ext );
1010  if ( $m ) {
1011  $this->logger->info( __METHOD__ . ": extension mime type of $file: $m\n" );
1012  return $m;
1013  }
1014  }
1015  }
1016 
1017  // Unknown type
1018  $this->logger->info( __METHOD__ . ": failed to guess mime type for $file!\n" );
1019  return 'unknown/unknown';
1020  }
1021 
1038  function getMediaType( $path = null, $mime = null ) {
1039  if ( !$mime && !$path ) {
1040  return MEDIATYPE_UNKNOWN;
1041  }
1042 
1043  // If MIME type is unknown, guess it
1044  if ( !$mime ) {
1045  $mime = $this->guessMimeType( $path, false );
1046  }
1047 
1048  // Special code for ogg - detect if it's video (theora),
1049  // else label it as sound.
1050  if ( $mime == 'application/ogg' && file_exists( $path ) ) {
1051  // Read a chunk of the file
1052  $f = fopen( $path, "rt" );
1053  if ( !$f ) {
1054  return MEDIATYPE_UNKNOWN;
1055  }
1056  $head = fread( $f, 256 );
1057  fclose( $f );
1058 
1059  $head = str_replace( 'ffmpeg2theora', '', strtolower( $head ) );
1060 
1061  // This is an UGLY HACK, file should be parsed correctly
1062  if ( strpos( $head, 'theora' ) !== false ) {
1063  return MEDIATYPE_VIDEO;
1064  } elseif ( strpos( $head, 'vorbis' ) !== false ) {
1065  return MEDIATYPE_AUDIO;
1066  } elseif ( strpos( $head, 'flac' ) !== false ) {
1067  return MEDIATYPE_AUDIO;
1068  } elseif ( strpos( $head, 'speex' ) !== false ) {
1069  return MEDIATYPE_AUDIO;
1070  } elseif ( strpos( $head, 'opus' ) !== false ) {
1071  return MEDIATYPE_AUDIO;
1072  } else {
1073  return MEDIATYPE_MULTIMEDIA;
1074  }
1075  }
1076 
1077  $type = null;
1078  // Check for entry for full MIME type
1079  if ( $mime ) {
1080  $type = $this->findMediaType( $mime );
1081  if ( $type !== MEDIATYPE_UNKNOWN ) {
1082  return $type;
1083  }
1084  }
1085 
1086  // Check for entry for file extension
1087  if ( $path ) {
1088  $i = strrpos( $path, '.' );
1089  $e = strtolower( $i ? substr( $path, $i + 1 ) : '' );
1090 
1091  // TODO: look at multi-extension if this fails, parse from full path
1092  $type = $this->findMediaType( '.' . $e );
1093  if ( $type !== MEDIATYPE_UNKNOWN ) {
1094  return $type;
1095  }
1096  }
1097 
1098  // Check major MIME type
1099  if ( $mime ) {
1100  $i = strpos( $mime, '/' );
1101  if ( $i !== false ) {
1102  $major = substr( $mime, 0, $i );
1103  $type = $this->findMediaType( $major );
1104  if ( $type !== MEDIATYPE_UNKNOWN ) {
1105  return $type;
1106  }
1107  }
1108  }
1109 
1110  if ( !$type ) {
1112  }
1113 
1114  return $type;
1115  }
1116 
1127  function findMediaType( $extMime ) {
1128  if ( strpos( $extMime, '.' ) === 0 ) {
1129  // If it's an extension, look up the MIME types
1130  $m = $this->getTypesForExtension( substr( $extMime, 1 ) );
1131  if ( !$m ) {
1132  return MEDIATYPE_UNKNOWN;
1133  }
1134 
1135  $m = explode( ' ', $m );
1136  } else {
1137  // Normalize MIME type
1138  if ( isset( $this->mimeTypeAliases[$extMime] ) ) {
1139  $extMime = $this->mimeTypeAliases[$extMime];
1140  }
1141 
1142  $m = [ $extMime ];
1143  }
1144 
1145  foreach ( $m as $mime ) {
1146  foreach ( $this->mediaTypes as $type => $codes ) {
1147  if ( in_array( $mime, $codes, true ) ) {
1148  return $type;
1149  }
1150  }
1151  }
1152 
1153  return MEDIATYPE_UNKNOWN;
1154  }
1155 
1161  public function getMediaTypes() {
1162  return array_keys( $this->mediaTypes );
1163  }
1164 
1174  public function getIEMimeTypes( $fileName, $chunk, $proposed ) {
1175  $ca = $this->getIEContentAnalyzer();
1176  return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
1177  }
1178 
1184  protected function getIEContentAnalyzer() {
1185  if ( is_null( $this->IEAnalyzer ) ) {
1186  $this->IEAnalyzer = new IEContentAnalyzer;
1187  }
1188  return $this->IEAnalyzer;
1189  }
1190 }
MimeAnalyzer\$guessCallback
callable $guessCallback
Definition: MimeAnalyzer.php:42
MimeAnalyzer\getIEContentAnalyzer
getIEContentAnalyzer()
Get a cached instance of IEContentAnalyzer.
Definition: MimeAnalyzer.php:1184
MimeAnalyzer\$wellKnownTypes
static $wellKnownTypes
Defines a set of well known MIME types This is used as a fallback to mime.types files.
Definition: MimeAnalyzer.php:85
MimeAnalyzer\guessTypesForExtension
guessTypesForExtension( $ext)
Returns a single MIME type for a given file extension or null if unknown.
Definition: MimeAnalyzer.php:441
MimeAnalyzer\addExtraInfo
addExtraInfo( $info)
Adds to the list mapping MIME to media type.
Definition: MimeAnalyzer.php:389
MimeAnalyzer\isRecognizableExtension
isRecognizableExtension( $extension)
Returns true if the extension represents a type which can be reliably detected from its content.
Definition: MimeAnalyzer.php:511
MEDIATYPE_AUDIO
const MEDIATYPE_AUDIO
Definition: defines.php:32
MimeAnalyzer\getExtensionsForType
getExtensionsForType( $mime)
Returns a list of file extensions for a given MIME type as a space separated string or null if the MI...
Definition: MimeAnalyzer.php:401
MimeAnalyzer\$initCallback
callable $initCallback
Definition: MimeAnalyzer.php:38
MimeAnalyzer\getMediaType
getMediaType( $path=null, $mime=null)
Determine the media type code for a file, using its MIME type, name and possibly its contents.
Definition: MimeAnalyzer.php:1038
MimeAnalyzer
Implements functions related to MIME types such as detection and mapping to file extension.
Definition: MimeAnalyzer.php:30
captcha-old.count
count
Definition: captcha-old.py:249
MimeAnalyzer\$mimetoExt
array $mimetoExt
Map of MIME types to file extensions (as a space separated list)
Definition: MimeAnalyzer.php:50
MimeAnalyzer\$mExtToMime
array $mExtToMime
Map of file extensions types to MIME types (as a space separated list)
Definition: MimeAnalyzer.php:53
MimeAnalyzer\guessMimeType
guessMimeType( $file, $ext=true)
MIME type detection.
Definition: MimeAnalyzer.php:601
MimeAnalyzer\$infoFile
string $infoFile
Definition: MimeAnalyzer.php:34
txt
This document describes how event hooks work in the Renameuser extension For a more comprehensive guide to navigate to your root MediaWiki directory and read docs hooks txt
Definition: hooks.txt:6
MimeAnalyzer\__construct
__construct(array $params)
Definition: MimeAnalyzer.php:194
MimeAnalyzer\getTypesForExtension
getTypesForExtension( $ext)
Returns a list of MIME types for a given file extension as a space separated string or null if the ex...
Definition: MimeAnalyzer.php:427
$params
$params
Definition: styleTest.css.php:44
MEDIATYPE_UNKNOWN
const MEDIATYPE_UNKNOWN
Definition: defines.php:26
MimeAnalyzer\$mediaTypes
array $mediaTypes
Mapping of media types to arrays of MIME types.
Definition: MimeAnalyzer.php:46
$s
$s
Definition: mergeMessageFileList.php:187
MimeAnalyzer\setLogger
setLogger(LoggerInterface $logger)
Definition: MimeAnalyzer.php:369
MimeAnalyzer\detectMimeType
detectMimeType( $file, $ext=true)
Internal MIME type detection.
Definition: MimeAnalyzer.php:969
MimeAnalyzer\$IEAnalyzer
IEContentAnalyzer $IEAnalyzer
Definition: MimeAnalyzer.php:56
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
MimeAnalyzer\getIEMimeTypes
getIEMimeTypes( $fileName, $chunk, $proposed)
Get the MIME types that various versions of Internet Explorer would detect from a chunk of the conten...
Definition: MimeAnalyzer.php:1174
MimeAnalyzer\$typeFile
string $typeFile
Definition: MimeAnalyzer.php:32
$matches
$matches
Definition: NoLocalSettings.php:24
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
$lines
$lines
Definition: router.php:61
etc
Using a hook running we can avoid having all this option specific stuff in our mainline code Using the function We ve cleaned up the code here by removing clumps of infrequently used code and moving them off somewhere else It s much easier for someone working with this code to see what s _really_ going and make changes or fix bugs In we can take all the code that deals with the little used title reversing etc
Definition: hooks.txt:91
MimeAnalyzer\detectZipType
detectZipType( $header, $tail=null, $ext=false)
Detect application-specific file type of a given ZIP file from its header data.
Definition: MimeAnalyzer.php:862
array
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
IEContentAnalyzer
This class simulates Microsoft Internet Explorer's terribly broken and insecure MIME type detection a...
Definition: IEContentAnalyzer.php:27
MimeAnalyzer\$wellKnownInfo
static $wellKnownInfo
Defines a set of well known MIME info entries This is used as a fallback to mime.info files.
Definition: MimeAnalyzer.php:137
$mime
if( $ext=='php'|| $ext=='php5') $mime
Definition: router.php:59
MEDIATYPE_MULTIMEDIA
const MEDIATYPE_MULTIMEDIA
Definition: defines.php:37
$e
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException' returning false will NOT prevent logging $e
Definition: hooks.txt:2221
MimeAnalyzer\isMatchingExtension
isMatchingExtension( $extension, $mime)
Tests if the extension matches the given MIME type.
Definition: MimeAnalyzer.php:463
$header
$header
Definition: updateCredits.php:35
MimeAnalyzer\findMediaType
findMediaType( $extMime)
Returns a media code matching the given MIME type or file extension.
Definition: MimeAnalyzer.php:1127
MimeAnalyzer\getMediaTypes
getMediaTypes()
Returns an array of media types (MEDIATYPE_xxx constants)
Definition: MimeAnalyzer.php:1161
MimeAnalyzer\$logger
LoggerInterface $logger
Definition: MimeAnalyzer.php:64
MimeAnalyzer\$extraInfo
string $extraInfo
Extra MIME info, set for example by media handling extensions.
Definition: MimeAnalyzer.php:61
MimeAnalyzer\$mimeTypeAliases
array $mimeTypeAliases
Map of MIME type aliases.
Definition: MimeAnalyzer.php:48
MimeAnalyzer\isPHPImageType
isPHPImageType( $mime)
Returns true if the MIME type is known to represent an image format supported by the PHP GD library.
Definition: MimeAnalyzer.php:484
XmlTypeCheck
Definition: XmlTypeCheck.php:28
MimeAnalyzer\loadFiles
loadFiles()
Definition: MimeAnalyzer.php:207
plain
either a plain
Definition: hooks.txt:2105
text
This list may contain false positives That usually means there is additional text with links below the first Each row contains links to the first and second as well as the first line of the second redirect text
Definition: All_system_messages.txt:1267
MEDIATYPE_TEXT
const MEDIATYPE_TEXT
Definition: defines.php:41
MimeAnalyzer\addExtraTypes
addExtraTypes( $types)
Adds to the list mapping MIME to file extensions.
Definition: MimeAnalyzer.php:379
MEDIATYPE_VIDEO
const MEDIATYPE_VIDEO
Definition: defines.php:35
MimeAnalyzer\improveTypeFromExtension
improveTypeFromExtension( $mime, $ext)
Improves a MIME type using the file extension.
Definition: MimeAnalyzer.php:544
MimeAnalyzer\$extCallback
callable $extCallback
Definition: MimeAnalyzer.php:44
$path
$path
Definition: NoLocalSettings.php:25
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
MimeAnalyzer\$extraTypes
string $extraTypes
Extra MIME types, set for example by media handling extensions.
Definition: MimeAnalyzer.php:59
MimeAnalyzer\$xmlTypes
string $xmlTypes
Definition: MimeAnalyzer.php:36
$ext
$ext
Definition: router.php:55
MimeAnalyzer\$detectCallback
callable $detectCallback
Definition: MimeAnalyzer.php:40
MimeAnalyzer\doGuessMimeType
doGuessMimeType( $file, $ext)
Guess the MIME type from the file contents.
Definition: MimeAnalyzer.php:634
$type
$type
Definition: testCompression.php:48