MediaWiki  1.23.0
MimeMagic.php
Go to the documentation of this file.
1 <?php
42 define( 'MM_WELL_KNOWN_MIME_TYPES', <<<END_STRING
43 application/ogg ogx ogg ogm ogv oga spx
44 application/pdf pdf
45 application/vnd.oasis.opendocument.chart odc
46 application/vnd.oasis.opendocument.chart-template otc
47 application/vnd.oasis.opendocument.database odb
48 application/vnd.oasis.opendocument.formula odf
49 application/vnd.oasis.opendocument.formula-template otf
50 application/vnd.oasis.opendocument.graphics odg
51 application/vnd.oasis.opendocument.graphics-template otg
52 application/vnd.oasis.opendocument.image odi
53 application/vnd.oasis.opendocument.image-template oti
54 application/vnd.oasis.opendocument.presentation odp
55 application/vnd.oasis.opendocument.presentation-template otp
56 application/vnd.oasis.opendocument.spreadsheet ods
57 application/vnd.oasis.opendocument.spreadsheet-template ots
58 application/vnd.oasis.opendocument.text odt
59 application/vnd.oasis.opendocument.text-master otm
60 application/vnd.oasis.opendocument.text-template ott
61 application/vnd.oasis.opendocument.text-web oth
62 application/x-javascript js
63 application/x-shockwave-flash swf
64 audio/midi mid midi kar
65 audio/mpeg mpga mpa mp2 mp3
66 audio/x-aiff aif aiff aifc
67 audio/x-wav wav
68 audio/ogg oga spx ogg
69 image/x-bmp bmp
70 image/gif gif
71 image/jpeg jpeg jpg jpe
72 image/png png
73 image/svg+xml svg
74 image/svg svg
75 image/tiff tiff tif
76 image/vnd.djvu djvu
77 image/x.djvu djvu
78 image/x-djvu djvu
79 image/x-portable-pixmap ppm
80 image/x-xcf xcf
81 text/plain txt
82 text/html html htm
83 video/ogg ogv ogm ogg
84 video/mpeg mpg mpeg
85 END_STRING
86 );
87 
94 define( 'MM_WELL_KNOWN_MIME_INFO', <<<END_STRING
95 application/pdf [OFFICE]
96 application/vnd.oasis.opendocument.chart [OFFICE]
97 application/vnd.oasis.opendocument.chart-template [OFFICE]
98 application/vnd.oasis.opendocument.database [OFFICE]
99 application/vnd.oasis.opendocument.formula [OFFICE]
100 application/vnd.oasis.opendocument.formula-template [OFFICE]
101 application/vnd.oasis.opendocument.graphics [OFFICE]
102 application/vnd.oasis.opendocument.graphics-template [OFFICE]
103 application/vnd.oasis.opendocument.image [OFFICE]
104 application/vnd.oasis.opendocument.image-template [OFFICE]
105 application/vnd.oasis.opendocument.presentation [OFFICE]
106 application/vnd.oasis.opendocument.presentation-template [OFFICE]
107 application/vnd.oasis.opendocument.spreadsheet [OFFICE]
108 application/vnd.oasis.opendocument.spreadsheet-template [OFFICE]
109 application/vnd.oasis.opendocument.text [OFFICE]
110 application/vnd.oasis.opendocument.text-template [OFFICE]
111 application/vnd.oasis.opendocument.text-master [OFFICE]
112 application/vnd.oasis.opendocument.text-web [OFFICE]
113 text/javascript application/x-javascript [EXECUTABLE]
114 application/x-shockwave-flash [MULTIMEDIA]
115 audio/midi [AUDIO]
116 audio/x-aiff [AUDIO]
117 audio/x-wav [AUDIO]
118 audio/mp3 audio/mpeg [AUDIO]
119 application/ogg audio/ogg video/ogg [MULTIMEDIA]
120 image/x-bmp image/x-ms-bmp image/bmp [BITMAP]
121 image/gif [BITMAP]
122 image/jpeg [BITMAP]
123 image/png [BITMAP]
124 image/svg+xml [DRAWING]
125 image/tiff [BITMAP]
126 image/vnd.djvu [BITMAP]
127 image/x-xcf [BITMAP]
128 image/x-portable-pixmap [BITMAP]
129 text/plain [TEXT]
130 text/html [TEXT]
131 video/ogg [VIDEO]
132 video/mpeg [VIDEO]
133 unknown/unknown application/octet-stream application/x-empty [UNKNOWN]
134 END_STRING
135 );
136 
144 class MimeMagic {
145 
150  var $mMediaTypes = null;
151 
154  var $mMimeTypeAliases = null;
155 
158  var $mMimeToExt = null;
159 
162  var $mExtToMime = null;
163 
166  var $mIEAnalyzer;
167 
170  private static $instance = null;
171 
176  function __construct() {
181  global $wgMimeTypeFile, $IP;
182 
183  $types = MM_WELL_KNOWN_MIME_TYPES;
184 
185  if ( $wgMimeTypeFile == 'includes/mime.types' ) {
186  $wgMimeTypeFile = "$IP/$wgMimeTypeFile";
187  }
188 
189  if ( $wgMimeTypeFile ) {
190  if ( is_file( $wgMimeTypeFile ) and is_readable( $wgMimeTypeFile ) ) {
191  wfDebug( __METHOD__ . ": loading mime types from $wgMimeTypeFile\n" );
192  $types .= "\n";
193  $types .= file_get_contents( $wgMimeTypeFile );
194  } else {
195  wfDebug( __METHOD__ . ": can't load mime types from $wgMimeTypeFile\n" );
196  }
197  } else {
198  wfDebug( __METHOD__ . ": no mime types file defined, using build-ins only.\n" );
199  }
200 
201  $types = str_replace( array( "\r\n", "\n\r", "\n\n", "\r\r", "\r" ), "\n", $types );
202  $types = str_replace( "\t", " ", $types );
203 
204  $this->mMimeToExt = array();
205  $this->mToMime = array();
206 
207  $lines = explode( "\n", $types );
208  foreach ( $lines as $s ) {
209  $s = trim( $s );
210  if ( empty( $s ) ) {
211  continue;
212  }
213  if ( strpos( $s, '#' ) === 0 ) {
214  continue;
215  }
216 
217  $s = strtolower( $s );
218  $i = strpos( $s, ' ' );
219 
220  if ( $i === false ) {
221  continue;
222  }
223 
224  $mime = substr( $s, 0, $i );
225  $ext = trim( substr( $s, $i + 1 ) );
226 
227  if ( empty( $ext ) ) {
228  continue;
229  }
230 
231  if ( !empty( $this->mMimeToExt[$mime] ) ) {
232  $this->mMimeToExt[$mime] .= ' ' . $ext;
233  } else {
234  $this->mMimeToExt[$mime] = $ext;
235  }
236 
237  $extensions = explode( ' ', $ext );
238 
239  foreach ( $extensions as $e ) {
240  $e = trim( $e );
241  if ( empty( $e ) ) {
242  continue;
243  }
244 
245  if ( !empty( $this->mExtToMime[$e] ) ) {
246  $this->mExtToMime[$e] .= ' ' . $mime;
247  } else {
248  $this->mExtToMime[$e] = $mime;
249  }
250  }
251  }
252 
257  global $wgMimeInfoFile;
258  if ( $wgMimeInfoFile == 'includes/mime.info' ) {
259  $wgMimeInfoFile = "$IP/$wgMimeInfoFile";
260  }
261 
262  $info = MM_WELL_KNOWN_MIME_INFO;
263 
264  if ( $wgMimeInfoFile ) {
265  if ( is_file( $wgMimeInfoFile ) and is_readable( $wgMimeInfoFile ) ) {
266  wfDebug( __METHOD__ . ": loading mime info from $wgMimeInfoFile\n" );
267  $info .= "\n";
268  $info .= file_get_contents( $wgMimeInfoFile );
269  } else {
270  wfDebug( __METHOD__ . ": can't load mime info from $wgMimeInfoFile\n" );
271  }
272  } else {
273  wfDebug( __METHOD__ . ": no mime info file defined, using build-ins only.\n" );
274  }
275 
276  $info = str_replace( array( "\r\n", "\n\r", "\n\n", "\r\r", "\r" ), "\n", $info );
277  $info = str_replace( "\t", " ", $info );
278 
279  $this->mMimeTypeAliases = array();
280  $this->mMediaTypes = array();
281 
282  $lines = explode( "\n", $info );
283  foreach ( $lines as $s ) {
284  $s = trim( $s );
285  if ( empty( $s ) ) {
286  continue;
287  }
288  if ( strpos( $s, '#' ) === 0 ) {
289  continue;
290  }
291 
292  $s = strtolower( $s );
293  $i = strpos( $s, ' ' );
294 
295  if ( $i === false ) {
296  continue;
297  }
298 
299  #print "processing MIME INFO line $s<br>";
300 
301  $match = array();
302  if ( preg_match( '!\[\s*(\w+)\s*\]!', $s, $match ) ) {
303  $s = preg_replace( '!\[\s*(\w+)\s*\]!', '', $s );
304  $mtype = trim( strtoupper( $match[1] ) );
305  } else {
306  $mtype = MEDIATYPE_UNKNOWN;
307  }
308 
309  $m = explode( ' ', $s );
310 
311  if ( !isset( $this->mMediaTypes[$mtype] ) ) {
312  $this->mMediaTypes[$mtype] = array();
313  }
314 
315  foreach ( $m as $mime ) {
316  $mime = trim( $mime );
317  if ( empty( $mime ) ) {
318  continue;
319  }
320 
321  $this->mMediaTypes[$mtype][] = $mime;
322  }
323 
324  if ( count( $m ) > 1 ) {
325  $main = $m[0];
326  for ( $i = 1; $i < count( $m ); $i += 1 ) {
327  $mime = $m[$i];
328  $this->mMimeTypeAliases[$mime] = $main;
329  }
330  }
331  }
332 
333  }
334 
339  public static function singleton() {
340  if ( self::$instance === null ) {
341  self::$instance = new MimeMagic;
342  }
343  return self::$instance;
344  }
345 
354  public function getExtensionsForType( $mime ) {
355  $mime = strtolower( $mime );
356 
357  // Check the mime-to-ext map
358  if ( isset( $this->mMimeToExt[$mime] ) ) {
359  return $this->mMimeToExt[$mime];
360  }
361 
362  // Resolve the mime type to the canonical type
363  if ( isset( $this->mMimeTypeAliases[$mime] ) ) {
364  $mime = $this->mMimeTypeAliases[$mime];
365  if ( isset( $this->mMimeToExt[$mime] ) ) {
366  return $this->mMimeToExt[$mime];
367  }
368  }
369 
370  return null;
371  }
372 
380  public function getTypesForExtension( $ext ) {
381  $ext = strtolower( $ext );
382 
383  $r = isset( $this->mExtToMime[$ext] ) ? $this->mExtToMime[$ext] : null;
384  return $r;
385  }
386 
394  public function guessTypesForExtension( $ext ) {
395  $m = $this->getTypesForExtension( $ext );
396  if ( is_null( $m ) ) {
397  return null;
398  }
399 
400  // TODO: Check if this is needed; strtok( $m, ' ' ) should be sufficient
401  $m = trim( $m );
402  $m = preg_replace( '/\s.*$/', '', $m );
403 
404  return $m;
405  }
406 
416  public function isMatchingExtension( $extension, $mime ) {
417  $ext = $this->getExtensionsForType( $mime );
418 
419  if ( !$ext ) {
420  return null; // Unknown mime type
421  }
422 
423  $ext = explode( ' ', $ext );
424 
425  $extension = strtolower( $extension );
426  return in_array( $extension, $ext );
427  }
428 
437  public function isPHPImageType( $mime ) {
438  // As defined by imagegetsize and image_type_to_mime
439  static $types = array(
440  'image/gif', 'image/jpeg', 'image/png',
441  'image/x-bmp', 'image/xbm', 'image/tiff',
442  'image/jp2', 'image/jpeg2000', 'image/iff',
443  'image/xbm', 'image/x-xbitmap',
444  'image/vnd.wap.wbmp', 'image/vnd.xiff',
445  'image/x-photoshop',
446  'application/x-shockwave-flash',
447  );
448 
449  return in_array( $mime, $types );
450  }
451 
463  function isRecognizableExtension( $extension ) {
464  static $types = array(
465  // Types recognized by getimagesize()
466  'gif', 'jpeg', 'jpg', 'png', 'swf', 'psd',
467  'bmp', 'tiff', 'tif', 'jpc', 'jp2',
468  'jpx', 'jb2', 'swc', 'iff', 'wbmp',
469  'xbm',
470 
471  // Formats we recognize magic numbers for
472  'djvu', 'ogx', 'ogg', 'ogv', 'oga', 'spx',
473  'mid', 'pdf', 'wmf', 'xcf', 'webm', 'mkv', 'mka',
474  'webp',
475 
476  // XML formats we sure hope we recognize reliably
477  'svg',
478  );
479  return in_array( strtolower( $extension ), $types );
480  }
481 
501  public function improveTypeFromExtension( $mime, $ext ) {
502  if ( $mime === 'unknown/unknown' ) {
503  if ( $this->isRecognizableExtension( $ext ) ) {
504  wfDebug( __METHOD__ . ': refusing to guess mime type for .' .
505  "$ext file, we should have recognized it\n" );
506  } else {
507  // Not something we can detect, so simply
508  // trust the file extension
509  $mime = $this->guessTypesForExtension( $ext );
510  }
511  } elseif ( $mime === 'application/x-opc+zip' ) {
512  if ( $this->isMatchingExtension( $ext, $mime ) ) {
513  // A known file extension for an OPC file,
514  // find the proper mime type for that file extension
515  $mime = $this->guessTypesForExtension( $ext );
516  } else {
517  wfDebug( __METHOD__ . ": refusing to guess better type for $mime file, " .
518  ".$ext is not a known OPC extension.\n" );
519  $mime = 'application/zip';
520  }
521  }
522 
523  if ( isset( $this->mMimeTypeAliases[$mime] ) ) {
524  $mime = $this->mMimeTypeAliases[$mime];
525  }
526 
527  wfDebug( __METHOD__ . ": improved mime type for .$ext: $mime\n" );
528  return $mime;
529  }
530 
545  public function guessMimeType( $file, $ext = true ) {
546  if ( $ext ) { // TODO: make $ext default to false. Or better, remove it.
547  wfDebug( __METHOD__ . ": WARNING: use of the \$ext parameter is deprecated. " .
548  "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
549  }
550 
551  $mime = $this->doGuessMimeType( $file, $ext );
552 
553  if ( !$mime ) {
554  wfDebug( __METHOD__ . ": internal type detection failed for $file (.$ext)...\n" );
555  $mime = $this->detectMimeType( $file, $ext );
556  }
557 
558  if ( isset( $this->mMimeTypeAliases[$mime] ) ) {
559  $mime = $this->mMimeTypeAliases[$mime];
560  }
561 
562  wfDebug( __METHOD__ . ": guessed mime type of $file: $mime\n" );
563  return $mime;
564  }
565 
573  private function doGuessMimeType( $file, $ext ) { // TODO: remove $ext param
574  // Read a chunk of the file
576  // @todo FIXME: Shouldn't this be rb?
577  $f = fopen( $file, 'rt' );
579 
580  if ( !$f ) {
581  return 'unknown/unknown';
582  }
583  $head = fread( $f, 1024 );
584  fseek( $f, -65558, SEEK_END );
585  $tail = fread( $f, 65558 ); // 65558 = maximum size of a zip EOCDR
586  fclose( $f );
587 
588  wfDebug( __METHOD__ . ": analyzing head and tail of $file for magic numbers.\n" );
589 
590  // Hardcode a few magic number checks...
591  $headers = array(
592  // Multimedia...
593  'MThd' => 'audio/midi',
594  'OggS' => 'application/ogg',
595 
596  // Image formats...
597  // Note that WMF may have a bare header, no magic number.
598  "\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives?
599  "\xd7\xcd\xc6\x9a" => 'application/x-msmetafile',
600  '%PDF' => 'application/pdf',
601  'gimp xcf' => 'image/x-xcf',
602 
603  // Some forbidden fruit...
604  'MZ' => 'application/octet-stream', // DOS/Windows executable
605  "\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary
606  "\x7fELF" => 'application/octet-stream', // ELF binary
607  );
608 
609  foreach ( $headers as $magic => $candidate ) {
610  if ( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) {
611  wfDebug( __METHOD__ . ": magic header in $file recognized as $candidate\n" );
612  return $candidate;
613  }
614  }
615 
616  /* Look for WebM and Matroska files */
617  if ( strncmp( $head, pack( "C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) {
618  $doctype = strpos( $head, "\x42\x82" );
619  if ( $doctype ) {
620  // Next byte is datasize, then data (sizes larger than 1 byte are very stupid muxers)
621  $data = substr( $head, $doctype + 3, 8 );
622  if ( strncmp( $data, "matroska", 8 ) == 0 ) {
623  wfDebug( __METHOD__ . ": recognized file as video/x-matroska\n" );
624  return "video/x-matroska";
625  } elseif ( strncmp( $data, "webm", 4 ) == 0 ) {
626  wfDebug( __METHOD__ . ": recognized file as video/webm\n" );
627  return "video/webm";
628  }
629  }
630  wfDebug( __METHOD__ . ": unknown EBML file\n" );
631  return "unknown/unknown";
632  }
633 
634  /* Look for WebP */
635  if ( strncmp( $head, "RIFF", 4 ) == 0 && strncmp( substr( $head, 8, 8 ), "WEBPVP8 ", 8 ) == 0 ) {
636  wfDebug( __METHOD__ . ": recognized file as image/webp\n" );
637  return "image/webp";
638  }
639 
652  if ( ( strpos( $head, '<?php' ) !== false ) ||
653  ( strpos( $head, "<\x00?\x00p\x00h\x00p" ) !== false ) ||
654  ( strpos( $head, "<\x00?\x00 " ) !== false ) ||
655  ( strpos( $head, "<\x00?\x00\n" ) !== false ) ||
656  ( strpos( $head, "<\x00?\x00\t" ) !== false ) ||
657  ( strpos( $head, "<\x00?\x00=" ) !== false ) ) {
658 
659  wfDebug( __METHOD__ . ": recognized $file as application/x-php\n" );
660  return 'application/x-php';
661  }
662 
666  $xml = new XmlTypeCheck( $file );
667  if ( $xml->wellFormed ) {
668  global $wgXMLMimeTypes;
669  if ( isset( $wgXMLMimeTypes[$xml->getRootElement()] ) ) {
670  return $wgXMLMimeTypes[$xml->getRootElement()];
671  } else {
672  return 'application/xml';
673  }
674  }
675 
679  $script_type = null;
680 
681  # detect by shebang
682  if ( substr( $head, 0, 2 ) == "#!" ) {
683  $script_type = "ASCII";
684  } elseif ( substr( $head, 0, 5 ) == "\xef\xbb\xbf#!" ) {
685  $script_type = "UTF-8";
686  } elseif ( substr( $head, 0, 7 ) == "\xfe\xff\x00#\x00!" ) {
687  $script_type = "UTF-16BE";
688  } elseif ( substr( $head, 0, 7 ) == "\xff\xfe#\x00!" ) {
689  $script_type = "UTF-16LE";
690  }
691 
692  if ( $script_type ) {
693  if ( $script_type !== "UTF-8" && $script_type !== "ASCII" ) {
694  // Quick and dirty fold down to ASCII!
695  $pack = array( 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' );
696  $chars = unpack( $pack[$script_type], substr( $head, 2 ) );
697  $head = '';
698  foreach ( $chars as $codepoint ) {
699  if ( $codepoint < 128 ) {
700  $head .= chr( $codepoint );
701  } else {
702  $head .= '?';
703  }
704  }
705  }
706 
707  $match = array();
708 
709  if ( preg_match( '%/?([^\s]+/)(\w+)%', $head, $match ) ) {
710  $mime = "application/x-{$match[2]}";
711  wfDebug( __METHOD__ . ": shell script recognized as $mime\n" );
712  return $mime;
713  }
714  }
715 
716  // Check for ZIP variants (before getimagesize)
717  if ( strpos( $tail, "PK\x05\x06" ) !== false ) {
718  wfDebug( __METHOD__ . ": ZIP header present in $file\n" );
719  return $this->detectZipType( $head, $tail, $ext );
720  }
721 
723  $gis = getimagesize( $file );
725 
726  if ( $gis && isset( $gis['mime'] ) ) {
727  $mime = $gis['mime'];
728  wfDebug( __METHOD__ . ": getimagesize detected $file as $mime\n" );
729  return $mime;
730  }
731 
732  // Also test DjVu
733  $deja = new DjVuImage( $file );
734  if ( $deja->isValid() ) {
735  wfDebug( __METHOD__ . ": detected $file as image/vnd.djvu\n" );
736  return 'image/vnd.djvu';
737  }
738 
739  return false;
740  }
741 
755  function detectZipType( $header, $tail = null, $ext = false ) {
756  if ( $ext ) { # TODO: remove $ext param
757  wfDebug( __METHOD__ . ": WARNING: use of the \$ext parameter is deprecated. " .
758  "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
759  }
760 
761  $mime = 'application/zip';
762  $opendocTypes = array(
763  'chart-template',
764  'chart',
765  'formula-template',
766  'formula',
767  'graphics-template',
768  'graphics',
769  'image-template',
770  'image',
771  'presentation-template',
772  'presentation',
773  'spreadsheet-template',
774  'spreadsheet',
775  'text-template',
776  'text-master',
777  'text-web',
778  'text' );
779 
780  // http://lists.oasis-open.org/archives/office/200505/msg00006.html
781  $types = '(?:' . implode( '|', $opendocTypes ) . ')';
782  $opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
783 
784  $openxmlRegex = "/^\[Content_Types\].xml/";
785 
786  if ( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) {
787  $mime = $matches[1];
788  wfDebug( __METHOD__ . ": detected $mime from ZIP archive\n" );
789  } elseif ( preg_match( $openxmlRegex, substr( $header, 30 ) ) ) {
790  $mime = "application/x-opc+zip";
791  # TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere
792  if ( $ext !== true && $ext !== false ) {
797  if ( $this->isMatchingExtension( $ext, $mime ) ) {
798  /* A known file extension for an OPC file,
799  * find the proper mime type for that file extension
800  */
801  $mime = $this->guessTypesForExtension( $ext );
802  } else {
803  $mime = "application/zip";
804  }
805  }
806  wfDebug( __METHOD__ . ": detected an Open Packaging Conventions archive: $mime\n" );
807  } elseif ( substr( $header, 0, 8 ) == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" &&
808  ( $headerpos = strpos( $tail, "PK\x03\x04" ) ) !== false &&
809  preg_match( $openxmlRegex, substr( $tail, $headerpos + 30 ) ) ) {
810  if ( substr( $header, 512, 4 ) == "\xEC\xA5\xC1\x00" ) {
811  $mime = "application/msword";
812  }
813  switch ( substr( $header, 512, 6 ) ) {
814  case "\xEC\xA5\xC1\x00\x0E\x00":
815  case "\xEC\xA5\xC1\x00\x1C\x00":
816  case "\xEC\xA5\xC1\x00\x43\x00":
817  $mime = "application/vnd.ms-powerpoint";
818  break;
819  case "\xFD\xFF\xFF\xFF\x10\x00":
820  case "\xFD\xFF\xFF\xFF\x1F\x00":
821  case "\xFD\xFF\xFF\xFF\x22\x00":
822  case "\xFD\xFF\xFF\xFF\x23\x00":
823  case "\xFD\xFF\xFF\xFF\x28\x00":
824  case "\xFD\xFF\xFF\xFF\x29\x00":
825  case "\xFD\xFF\xFF\xFF\x10\x02":
826  case "\xFD\xFF\xFF\xFF\x1F\x02":
827  case "\xFD\xFF\xFF\xFF\x22\x02":
828  case "\xFD\xFF\xFF\xFF\x23\x02":
829  case "\xFD\xFF\xFF\xFF\x28\x02":
830  case "\xFD\xFF\xFF\xFF\x29\x02":
831  $mime = "application/vnd.msexcel";
832  break;
833  }
834 
835  wfDebug( __METHOD__ . ": detected a MS Office document with OPC trailer\n" );
836  } else {
837  wfDebug( __METHOD__ . ": unable to identify type of ZIP archive\n" );
838  }
839  return $mime;
840  }
841 
860  private function detectMimeType( $file, $ext = true ) {
861  global $wgMimeDetectorCommand;
862 
863  if ( $ext ) { # TODO: make $ext default to false. Or better, remove it.
864  wfDebug( __METHOD__ . ": WARNING: use of the \$ext parameter is deprecated. Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
865  }
866 
867  $m = null;
868  if ( $wgMimeDetectorCommand ) {
870  $m = wfShellExec( "$wgMimeDetectorCommand $args" );
871  } elseif ( function_exists( "finfo_open" ) && function_exists( "finfo_file" ) ) {
872 
873  # This required the fileinfo extension by PECL,
874  # see http://pecl.php.net/package/fileinfo
875  # This must be compiled into PHP
876  #
877  # finfo is the official replacement for the deprecated
878  # mime_content_type function, see below.
879  #
880  # If you may need to load the fileinfo extension at runtime, set
881  # $wgLoadFileinfoExtension in LocalSettings.php
882 
883  $mime_magic_resource = finfo_open( FILEINFO_MIME ); /* return mime type ala mimetype extension */
884 
885  if ( $mime_magic_resource ) {
886  $m = finfo_file( $mime_magic_resource, $file );
887  finfo_close( $mime_magic_resource );
888  } else {
889  wfDebug( __METHOD__ . ": finfo_open failed on " . FILEINFO_MIME . "!\n" );
890  }
891  } elseif ( function_exists( "mime_content_type" ) ) {
892 
893  # NOTE: this function is available since PHP 4.3.0, but only if
894  # PHP was compiled with --with-mime-magic or, before 4.3.2, with --enable-mime-magic.
895  #
896  # On Windows, you must set mime_magic.magicfile in php.ini to point to the mime.magic file bundled with PHP;
897  # sometimes, this may even be needed under linus/unix.
898  #
899  # Also note that this has been DEPRECATED in favor of the fileinfo extension by PECL, see above.
900  # see http://www.php.net/manual/en/ref.mime-magic.php for details.
901 
902  $m = mime_content_type( $file );
903  } else {
904  wfDebug( __METHOD__ . ": no magic mime detector found!\n" );
905  }
906 
907  if ( $m ) {
908  # normalize
909  $m = preg_replace( '![;, ].*$!', '', $m ); #strip charset, etc
910  $m = trim( $m );
911  $m = strtolower( $m );
912 
913  if ( strpos( $m, 'unknown' ) !== false ) {
914  $m = null;
915  } else {
916  wfDebug( __METHOD__ . ": magic mime type of $file: $m\n" );
917  return $m;
918  }
919  }
920 
921  // If desired, look at extension as a fallback.
922  if ( $ext === true ) {
923  $i = strrpos( $file, '.' );
924  $ext = strtolower( $i ? substr( $file, $i + 1 ) : '' );
925  }
926  if ( $ext ) {
927  if ( $this->isRecognizableExtension( $ext ) ) {
928  wfDebug( __METHOD__ . ": refusing to guess mime type for .$ext file, we should have recognized it\n" );
929  } else {
930  $m = $this->guessTypesForExtension( $ext );
931  if ( $m ) {
932  wfDebug( __METHOD__ . ": extension mime type of $file: $m\n" );
933  return $m;
934  }
935  }
936  }
937 
938  // Unknown type
939  wfDebug( __METHOD__ . ": failed to guess mime type for $file!\n" );
940  return 'unknown/unknown';
941  }
942 
959  function getMediaType( $path = null, $mime = null ) {
960  if ( !$mime && !$path ) {
961  return MEDIATYPE_UNKNOWN;
962  }
963 
964  // If mime type is unknown, guess it
965  if ( !$mime ) {
966  $mime = $this->guessMimeType( $path, false );
967  }
968 
969  // Special code for ogg - detect if it's video (theora),
970  // else label it as sound.
971  if ( $mime == 'application/ogg' && file_exists( $path ) ) {
972 
973  // Read a chunk of the file
974  $f = fopen( $path, "rt" );
975  if ( !$f ) {
976  return MEDIATYPE_UNKNOWN;
977  }
978  $head = fread( $f, 256 );
979  fclose( $f );
980 
981  $head = strtolower( $head );
982 
983  // This is an UGLY HACK, file should be parsed correctly
984  if ( strpos( $head, 'theora' ) !== false ) {
985  return MEDIATYPE_VIDEO;
986  } elseif ( strpos( $head, 'vorbis' ) !== false ) {
987  return MEDIATYPE_AUDIO;
988  } elseif ( strpos( $head, 'flac' ) !== false ) {
989  return MEDIATYPE_AUDIO;
990  } elseif ( strpos( $head, 'speex' ) !== false ) {
991  return MEDIATYPE_AUDIO;
992  } else {
993  return MEDIATYPE_MULTIMEDIA;
994  }
995  }
996 
997  // Check for entry for full mime type
998  if ( $mime ) {
999  $type = $this->findMediaType( $mime );
1000  if ( $type !== MEDIATYPE_UNKNOWN ) {
1001  return $type;
1002  }
1003  }
1004 
1005  // Check for entry for file extension
1006  if ( $path ) {
1007  $i = strrpos( $path, '.' );
1008  $e = strtolower( $i ? substr( $path, $i + 1 ) : '' );
1009 
1010  // TODO: look at multi-extension if this fails, parse from full path
1011  $type = $this->findMediaType( '.' . $e );
1012  if ( $type !== MEDIATYPE_UNKNOWN ) {
1013  return $type;
1014  }
1015  }
1016 
1017  // Check major mime type
1018  if ( $mime ) {
1019  $i = strpos( $mime, '/' );
1020  if ( $i !== false ) {
1021  $major = substr( $mime, 0, $i );
1022  $type = $this->findMediaType( $major );
1023  if ( $type !== MEDIATYPE_UNKNOWN ) {
1024  return $type;
1025  }
1026  }
1027  }
1028 
1029  if ( !$type ) {
1031  }
1032 
1033  return $type;
1034  }
1035 
1045  function findMediaType( $extMime ) {
1046  if ( strpos( $extMime, '.' ) === 0 ) {
1047  // If it's an extension, look up the mime types
1048  $m = $this->getTypesForExtension( substr( $extMime, 1 ) );
1049  if ( !$m ) {
1050  return MEDIATYPE_UNKNOWN;
1051  }
1052 
1053  $m = explode( ' ', $m );
1054  } else {
1055  // Normalize mime type
1056  if ( isset( $this->mMimeTypeAliases[$extMime] ) ) {
1057  $extMime = $this->mMimeTypeAliases[$extMime];
1058  }
1059 
1060  $m = array( $extMime );
1061  }
1062 
1063  foreach ( $m as $mime ) {
1064  foreach ( $this->mMediaTypes as $type => $codes ) {
1065  if ( in_array( $mime, $codes, true ) ) {
1066  return $type;
1067  }
1068  }
1069  }
1070 
1071  return MEDIATYPE_UNKNOWN;
1072  }
1073 
1083  public function getIEMimeTypes( $fileName, $chunk, $proposed ) {
1084  $ca = $this->getIEContentAnalyzer();
1085  return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
1086  }
1087 
1093  protected function getIEContentAnalyzer() {
1094  if ( is_null( $this->mIEAnalyzer ) ) {
1095  $this->mIEAnalyzer = new IEContentAnalyzer;
1096  }
1097  return $this->mIEAnalyzer;
1098  }
1099 }
wfShellExec
wfShellExec( $cmd, &$retval=null, $environ=array(), $limits=array(), $options=array())
Execute a shell command, with time and memory limits mirrored from the PHP configuration if supported...
Definition: GlobalFunctions.php:2804
php
skin txt MediaWiki includes four core it has been set as the default in MediaWiki since the replacing Monobook it had been been the default skin since before being replaced by Vector largely rewritten in while keeping its appearance Several legacy skins were removed in the as the burden of supporting them became too heavy to bear Those in etc for skin dependent CSS etc for skin dependent JavaScript These can also be customised on a per user by etc This feature has led to a wide variety of user styles becoming that gallery is a good place to ending in php
Definition: skin.txt:62
$mime
usually copyright or history_copyright This message must be in HTML not wikitext $subpages will be ignored and the rest of subPageSubtitle() will run. 'SkinTemplateBuildNavUrlsNav_urlsAfterPermalink' whether MediaWiki currently thinks this is a CSS JS page Hooks may change this value to override the return value of Title::isCssOrJsPage(). 'TitleIsAlwaysKnown' whether MediaWiki currently thinks this page is known isMovable() always returns false. $title whether MediaWiki currently thinks this page is movable Hooks may change this value to override the return value of Title::isMovable(). 'TitleIsWikitextPage' whether MediaWiki currently thinks this is a wikitext page Hooks may change this value to override the return value of Title::isWikitextPage() 'TitleMove' use UploadVerification and UploadVerifyFile instead where the first element is the message key and the remaining elements are used as parameters to the message based on mime etc Preferred in most cases over UploadVerification object with all info about the upload string $mime
Definition: hooks.txt:2573
$extensions
$extensions
Definition: importImages.php:62
text
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add text
Definition: design.txt:12
$f
$f
Definition: UtfNormalTest2.php:38
wfSuppressWarnings
wfSuppressWarnings( $end=false)
Reference-counted warning suppression.
Definition: GlobalFunctions.php:2387
MEDIATYPE_UNKNOWN
const MEDIATYPE_UNKNOWN
Definition: Defines.php:123
$s
$s
Definition: mergeMessageFileList.php:156
wfRestoreWarnings
wfRestoreWarnings()
Restore error level to previous value.
Definition: GlobalFunctions.php:2417
$lines
$lines
Definition: router.php:65
array
the array() calling protocol came about after MediaWiki 1.4rc1.
List of Api Query prop modules.
global
when a variable name is used in a it is silently declared as a new masking the global
Definition: design.txt:93
MM_WELL_KNOWN_MIME_INFO
const MM_WELL_KNOWN_MIME_INFO
Defines a set of well known mime info entries This is used as a fallback to mime.info files.
Definition: MimeMagic.php:94
IEContentAnalyzer
This class simulates Microsoft Internet Explorer's terribly broken and insecure MIME type detection a...
Definition: IEContentAnalyzer.php:27
wfDebug
wfDebug( $text, $dest='all')
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:933
MM_WELL_KNOWN_MIME_TYPES
const MM_WELL_KNOWN_MIME_TYPES
Defines a set of well known mime types This is used as a fallback to mime.types files.
Definition: MimeMagic.php:42
$matches
if(!defined( 'MEDIAWIKI')) if(!isset( $wgVersion)) $matches
Definition: NoLocalSettings.php:33
wfEscapeShellArg
wfEscapeShellArg()
Windows-compatible version of escapeshellarg() Windows doesn't recognise single-quotes in the shell,...
Definition: GlobalFunctions.php:2705
etc
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add etc
Definition: design.txt:12
XmlTypeCheck
Definition: XmlTypeCheck.php:23
$file
if(PHP_SAPI !='cli') $file
Definition: UtfNormalTest2.php:30
it
=Architecture==Two class hierarchies are used to provide the functionality associated with the different content models:*Content interface(and AbstractContent base class) define functionality that acts on the concrete content of a page, and *ContentHandler base class provides functionality specific to a content model, but not acting on concrete content. The most important function of ContentHandler is to act as a factory for the appropriate implementation of Content. These Content objects are to be used by MediaWiki everywhere, instead of passing page content around as text. All manipulation and analysis of page content must be done via the appropriate methods of the Content object. For each content model, a subclass of ContentHandler has to be registered with $wgContentHandlers. The ContentHandler object for a given content model can be obtained using ContentHandler::getForModelID($id). Also Title, WikiPage and Revision now have getContentHandler() methods for convenience. ContentHandler objects are singletons that provide functionality specific to the content type, but not directly acting on the content of some page. ContentHandler::makeEmptyContent() and ContentHandler::unserializeContent() can be used to create a Content object of the appropriate type. However, it is recommended to instead use WikiPage::getContent() resp. Revision::getContent() to get a page 's content as a Content object. These two methods should be the ONLY way in which page content is accessed. Another important function of ContentHandler objects is to define custom action handlers for a content model, see ContentHandler::getActionOverrides(). This is similar to what WikiPage::getActionOverrides() was already doing.==Serialization==With the ContentHandler facility, page content no longer has to be text based. Objects implementing the Content interface are used to represent and handle the content internally. For storage and data exchange, each content model supports at least one serialization format via ContentHandler::serializeContent($content). The list of supported formats for a given content model can be accessed using ContentHandler::getSupportedFormats(). Content serialization formats are identified using MIME type like strings. The following formats are built in:*text/x-wiki - wikitext *text/javascript - for js pages *text/css - for css pages *text/plain - for future use, e.g. with plain text messages. *text/html - for future use, e.g. with plain html messages. *application/vnd.php.serialized - for future use with the api and for extensions *application/json - for future use with the api, and for use by extensions *application/xml - for future use with the api, and for use by extensions In PHP, use the corresponding CONTENT_FORMAT_XXX constant. Note that when using the API to access page content, especially action=edit, action=parse and action=query &prop=revisions, the model and format of the content should always be handled explicitly. Without that information, interpretation of the provided content is not reliable. The same applies to XML dumps generated via maintenance/dumpBackup.php or Special:Export. Also note that the API will provide encapsulated, serialized content - so if the API was called with format=json, and contentformat is also json(or rather, application/json), the page content is represented as a string containing an escaped json structure. Extensions that use JSON to serialize some types of page content may provide specialized API modules that allow access to that content in a more natural form.==Compatibility==The ContentHandler facility is introduced in a way that should allow all existing code to keep functioning at least for pages that contain wikitext or other text based content. However, a number of functions and hooks have been deprecated in favor of new versions that are aware of the page 's content model, and will now generate warnings when used. Most importantly, the following functions have been deprecated:*Revisions::getText() and Revisions::getRawText() is deprecated in favor Revisions::getContent() *WikiPage::getText() is deprecated in favor WikiPage::getContent() Also, the old Article::getContent()(which returns text) is superceded by Article::getContentObject(). However, both methods should be avoided since they do not provide clean access to the page 's actual content. For instance, they may return a system message for non-existing pages. Use WikiPage::getContent() instead. Code that relies on a textual representation of the page content should eventually be rewritten. However, ContentHandler::getContentText() provides a stop-gap that can be used to get text for a page. Its behavior is controlled by $wgContentHandlerTextFallback it
Definition: contenthandler.txt:107
$args
if( $line===false) $args
Definition: cdb.php:62
$ext
$ext
Definition: NoLocalSettings.php:34
$path
$path
Definition: NoLocalSettings.php:35
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
MEDIATYPE_VIDEO
const MEDIATYPE_VIDEO
Definition: Defines.php:132
MEDIATYPE_MULTIMEDIA
const MEDIATYPE_MULTIMEDIA
Definition: Defines.php:134
$e
if( $useReadline) $e
Definition: eval.php:66
$IP
$IP
Definition: WebStart.php:88
$type
$type
Definition: testCompression.php:46
MEDIATYPE_AUDIO
const MEDIATYPE_AUDIO
Definition: Defines.php:129