MediaWiki  1.23.5
MimeMagic.php
Go to the documentation of this file.
1 <?php
42 define( 'MM_WELL_KNOWN_MIME_TYPES', <<<END_STRING
43 application/ogg ogx ogg ogm ogv oga spx
44 application/pdf pdf
45 application/vnd.oasis.opendocument.chart odc
46 application/vnd.oasis.opendocument.chart-template otc
47 application/vnd.oasis.opendocument.database odb
48 application/vnd.oasis.opendocument.formula odf
49 application/vnd.oasis.opendocument.formula-template otf
50 application/vnd.oasis.opendocument.graphics odg
51 application/vnd.oasis.opendocument.graphics-template otg
52 application/vnd.oasis.opendocument.image odi
53 application/vnd.oasis.opendocument.image-template oti
54 application/vnd.oasis.opendocument.presentation odp
55 application/vnd.oasis.opendocument.presentation-template otp
56 application/vnd.oasis.opendocument.spreadsheet ods
57 application/vnd.oasis.opendocument.spreadsheet-template ots
58 application/vnd.oasis.opendocument.text odt
59 application/vnd.oasis.opendocument.text-master otm
60 application/vnd.oasis.opendocument.text-template ott
61 application/vnd.oasis.opendocument.text-web oth
62 application/x-javascript js
63 application/x-shockwave-flash swf
64 audio/midi mid midi kar
65 audio/mpeg mpga mpa mp2 mp3
66 audio/x-aiff aif aiff aifc
67 audio/x-wav wav
68 audio/ogg oga spx ogg
69 image/x-bmp bmp
70 image/gif gif
71 image/jpeg jpeg jpg jpe
72 image/png png
73 image/svg+xml svg
74 image/svg svg
75 image/tiff tiff tif
76 image/vnd.djvu djvu
77 image/x.djvu djvu
78 image/x-djvu djvu
79 image/x-portable-pixmap ppm
80 image/x-xcf xcf
81 text/plain txt
82 text/html html htm
83 video/ogg ogv ogm ogg
84 video/mpeg mpg mpeg
85 END_STRING
86 );
87 
94 define( 'MM_WELL_KNOWN_MIME_INFO', <<<END_STRING
95 application/pdf [OFFICE]
96 application/vnd.oasis.opendocument.chart [OFFICE]
97 application/vnd.oasis.opendocument.chart-template [OFFICE]
98 application/vnd.oasis.opendocument.database [OFFICE]
99 application/vnd.oasis.opendocument.formula [OFFICE]
100 application/vnd.oasis.opendocument.formula-template [OFFICE]
101 application/vnd.oasis.opendocument.graphics [OFFICE]
102 application/vnd.oasis.opendocument.graphics-template [OFFICE]
103 application/vnd.oasis.opendocument.image [OFFICE]
104 application/vnd.oasis.opendocument.image-template [OFFICE]
105 application/vnd.oasis.opendocument.presentation [OFFICE]
106 application/vnd.oasis.opendocument.presentation-template [OFFICE]
107 application/vnd.oasis.opendocument.spreadsheet [OFFICE]
108 application/vnd.oasis.opendocument.spreadsheet-template [OFFICE]
109 application/vnd.oasis.opendocument.text [OFFICE]
110 application/vnd.oasis.opendocument.text-template [OFFICE]
111 application/vnd.oasis.opendocument.text-master [OFFICE]
112 application/vnd.oasis.opendocument.text-web [OFFICE]
113 text/javascript application/x-javascript [EXECUTABLE]
114 application/x-shockwave-flash [MULTIMEDIA]
115 audio/midi [AUDIO]
116 audio/x-aiff [AUDIO]
117 audio/x-wav [AUDIO]
118 audio/mp3 audio/mpeg [AUDIO]
119 application/ogg audio/ogg video/ogg [MULTIMEDIA]
120 image/x-bmp image/x-ms-bmp image/bmp [BITMAP]
121 image/gif [BITMAP]
122 image/jpeg [BITMAP]
123 image/png [BITMAP]
124 image/svg+xml [DRAWING]
125 image/tiff [BITMAP]
126 image/vnd.djvu [BITMAP]
127 image/x-xcf [BITMAP]
128 image/x-portable-pixmap [BITMAP]
129 text/plain [TEXT]
130 text/html [TEXT]
131 video/ogg [VIDEO]
132 video/mpeg [VIDEO]
133 unknown/unknown application/octet-stream application/x-empty [UNKNOWN]
134 END_STRING
135 );
136 
144 class MimeMagic {
145 
150  var $mMediaTypes = null;
151 
154  var $mMimeTypeAliases = null;
155 
158  var $mMimeToExt = null;
159 
162  var $mExtToMime = null;
163 
166  var $mIEAnalyzer;
167 
170  private static $instance = null;
171 
176  function __construct() {
181  global $wgMimeTypeFile, $IP;
182 
183  $types = MM_WELL_KNOWN_MIME_TYPES;
184 
185  if ( $wgMimeTypeFile == 'includes/mime.types' ) {
186  $wgMimeTypeFile = "$IP/$wgMimeTypeFile";
187  }
188 
189  if ( $wgMimeTypeFile ) {
190  if ( is_file( $wgMimeTypeFile ) and is_readable( $wgMimeTypeFile ) ) {
191  wfDebug( __METHOD__ . ": loading mime types from $wgMimeTypeFile\n" );
192  $types .= "\n";
193  $types .= file_get_contents( $wgMimeTypeFile );
194  } else {
195  wfDebug( __METHOD__ . ": can't load mime types from $wgMimeTypeFile\n" );
196  }
197  } else {
198  wfDebug( __METHOD__ . ": no mime types file defined, using build-ins only.\n" );
199  }
200 
201  $types = str_replace( array( "\r\n", "\n\r", "\n\n", "\r\r", "\r" ), "\n", $types );
202  $types = str_replace( "\t", " ", $types );
203 
204  $this->mMimeToExt = array();
205  $this->mToMime = array();
206 
207  $lines = explode( "\n", $types );
208  foreach ( $lines as $s ) {
209  $s = trim( $s );
210  if ( empty( $s ) ) {
211  continue;
212  }
213  if ( strpos( $s, '#' ) === 0 ) {
214  continue;
215  }
216 
217  $s = strtolower( $s );
218  $i = strpos( $s, ' ' );
219 
220  if ( $i === false ) {
221  continue;
222  }
223 
224  $mime = substr( $s, 0, $i );
225  $ext = trim( substr( $s, $i + 1 ) );
226 
227  if ( empty( $ext ) ) {
228  continue;
229  }
230 
231  if ( !empty( $this->mMimeToExt[$mime] ) ) {
232  $this->mMimeToExt[$mime] .= ' ' . $ext;
233  } else {
234  $this->mMimeToExt[$mime] = $ext;
235  }
236 
237  $extensions = explode( ' ', $ext );
238 
239  foreach ( $extensions as $e ) {
240  $e = trim( $e );
241  if ( empty( $e ) ) {
242  continue;
243  }
244 
245  if ( !empty( $this->mExtToMime[$e] ) ) {
246  $this->mExtToMime[$e] .= ' ' . $mime;
247  } else {
248  $this->mExtToMime[$e] = $mime;
249  }
250  }
251  }
252 
257  global $wgMimeInfoFile;
258  if ( $wgMimeInfoFile == 'includes/mime.info' ) {
259  $wgMimeInfoFile = "$IP/$wgMimeInfoFile";
260  }
261 
262  $info = MM_WELL_KNOWN_MIME_INFO;
263 
264  if ( $wgMimeInfoFile ) {
265  if ( is_file( $wgMimeInfoFile ) and is_readable( $wgMimeInfoFile ) ) {
266  wfDebug( __METHOD__ . ": loading mime info from $wgMimeInfoFile\n" );
267  $info .= "\n";
268  $info .= file_get_contents( $wgMimeInfoFile );
269  } else {
270  wfDebug( __METHOD__ . ": can't load mime info from $wgMimeInfoFile\n" );
271  }
272  } else {
273  wfDebug( __METHOD__ . ": no mime info file defined, using build-ins only.\n" );
274  }
275 
276  $info = str_replace( array( "\r\n", "\n\r", "\n\n", "\r\r", "\r" ), "\n", $info );
277  $info = str_replace( "\t", " ", $info );
278 
279  $this->mMimeTypeAliases = array();
280  $this->mMediaTypes = array();
281 
282  $lines = explode( "\n", $info );
283  foreach ( $lines as $s ) {
284  $s = trim( $s );
285  if ( empty( $s ) ) {
286  continue;
287  }
288  if ( strpos( $s, '#' ) === 0 ) {
289  continue;
290  }
291 
292  $s = strtolower( $s );
293  $i = strpos( $s, ' ' );
294 
295  if ( $i === false ) {
296  continue;
297  }
298 
299  #print "processing MIME INFO line $s<br>";
300 
301  $match = array();
302  if ( preg_match( '!\[\s*(\w+)\s*\]!', $s, $match ) ) {
303  $s = preg_replace( '!\[\s*(\w+)\s*\]!', '', $s );
304  $mtype = trim( strtoupper( $match[1] ) );
305  } else {
306  $mtype = MEDIATYPE_UNKNOWN;
307  }
308 
309  $m = explode( ' ', $s );
310 
311  if ( !isset( $this->mMediaTypes[$mtype] ) ) {
312  $this->mMediaTypes[$mtype] = array();
313  }
314 
315  foreach ( $m as $mime ) {
316  $mime = trim( $mime );
317  if ( empty( $mime ) ) {
318  continue;
319  }
320 
321  $this->mMediaTypes[$mtype][] = $mime;
322  }
323 
324  if ( count( $m ) > 1 ) {
325  $main = $m[0];
326  for ( $i = 1; $i < count( $m ); $i += 1 ) {
327  $mime = $m[$i];
328  $this->mMimeTypeAliases[$mime] = $main;
329  }
330  }
331  }
332 
333  }
334 
339  public static function singleton() {
340  if ( self::$instance === null ) {
341  self::$instance = new MimeMagic;
342  }
343  return self::$instance;
344  }
345 
354  public function getExtensionsForType( $mime ) {
355  $mime = strtolower( $mime );
356 
357  // Check the mime-to-ext map
358  if ( isset( $this->mMimeToExt[$mime] ) ) {
359  return $this->mMimeToExt[$mime];
360  }
361 
362  // Resolve the mime type to the canonical type
363  if ( isset( $this->mMimeTypeAliases[$mime] ) ) {
364  $mime = $this->mMimeTypeAliases[$mime];
365  if ( isset( $this->mMimeToExt[$mime] ) ) {
366  return $this->mMimeToExt[$mime];
367  }
368  }
369 
370  return null;
371  }
372 
380  public function getTypesForExtension( $ext ) {
381  $ext = strtolower( $ext );
382 
383  $r = isset( $this->mExtToMime[$ext] ) ? $this->mExtToMime[$ext] : null;
384  return $r;
385  }
386 
394  public function guessTypesForExtension( $ext ) {
395  $m = $this->getTypesForExtension( $ext );
396  if ( is_null( $m ) ) {
397  return null;
398  }
399 
400  // TODO: Check if this is needed; strtok( $m, ' ' ) should be sufficient
401  $m = trim( $m );
402  $m = preg_replace( '/\s.*$/', '', $m );
403 
404  return $m;
405  }
406 
416  public function isMatchingExtension( $extension, $mime ) {
417  $ext = $this->getExtensionsForType( $mime );
418 
419  if ( !$ext ) {
420  return null; // Unknown mime type
421  }
422 
423  $ext = explode( ' ', $ext );
424 
425  $extension = strtolower( $extension );
426  return in_array( $extension, $ext );
427  }
428 
437  public function isPHPImageType( $mime ) {
438  // As defined by imagegetsize and image_type_to_mime
439  static $types = array(
440  'image/gif', 'image/jpeg', 'image/png',
441  'image/x-bmp', 'image/xbm', 'image/tiff',
442  'image/jp2', 'image/jpeg2000', 'image/iff',
443  'image/xbm', 'image/x-xbitmap',
444  'image/vnd.wap.wbmp', 'image/vnd.xiff',
445  'image/x-photoshop',
446  'application/x-shockwave-flash',
447  );
448 
449  return in_array( $mime, $types );
450  }
451 
463  function isRecognizableExtension( $extension ) {
464  static $types = array(
465  // Types recognized by getimagesize()
466  'gif', 'jpeg', 'jpg', 'png', 'swf', 'psd',
467  'bmp', 'tiff', 'tif', 'jpc', 'jp2',
468  'jpx', 'jb2', 'swc', 'iff', 'wbmp',
469  'xbm',
470 
471  // Formats we recognize magic numbers for
472  'djvu', 'ogx', 'ogg', 'ogv', 'oga', 'spx',
473  'mid', 'pdf', 'wmf', 'xcf', 'webm', 'mkv', 'mka',
474  'webp',
475 
476  // XML formats we sure hope we recognize reliably
477  'svg',
478  );
479  return in_array( strtolower( $extension ), $types );
480  }
481 
501  public function improveTypeFromExtension( $mime, $ext ) {
502  if ( $mime === 'unknown/unknown' ) {
503  if ( $this->isRecognizableExtension( $ext ) ) {
504  wfDebug( __METHOD__ . ': refusing to guess mime type for .' .
505  "$ext file, we should have recognized it\n" );
506  } else {
507  // Not something we can detect, so simply
508  // trust the file extension
509  $mime = $this->guessTypesForExtension( $ext );
510  }
511  } elseif ( $mime === 'application/x-opc+zip' ) {
512  if ( $this->isMatchingExtension( $ext, $mime ) ) {
513  // A known file extension for an OPC file,
514  // find the proper mime type for that file extension
515  $mime = $this->guessTypesForExtension( $ext );
516  } else {
517  wfDebug( __METHOD__ . ": refusing to guess better type for $mime file, " .
518  ".$ext is not a known OPC extension.\n" );
519  $mime = 'application/zip';
520  }
521  }
522 
523  if ( isset( $this->mMimeTypeAliases[$mime] ) ) {
524  $mime = $this->mMimeTypeAliases[$mime];
525  }
526 
527  wfDebug( __METHOD__ . ": improved mime type for .$ext: $mime\n" );
528  return $mime;
529  }
530 
545  public function guessMimeType( $file, $ext = true ) {
546  if ( $ext ) { // TODO: make $ext default to false. Or better, remove it.
547  wfDebug( __METHOD__ . ": WARNING: use of the \$ext parameter is deprecated. " .
548  "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
549  }
550 
551  $mime = $this->doGuessMimeType( $file, $ext );
552 
553  if ( !$mime ) {
554  wfDebug( __METHOD__ . ": internal type detection failed for $file (.$ext)...\n" );
555  $mime = $this->detectMimeType( $file, $ext );
556  }
557 
558  if ( isset( $this->mMimeTypeAliases[$mime] ) ) {
559  $mime = $this->mMimeTypeAliases[$mime];
560  }
561 
562  wfDebug( __METHOD__ . ": guessed mime type of $file: $mime\n" );
563  return $mime;
564  }
565 
574  private function doGuessMimeType( $file, $ext ) { // TODO: remove $ext param
575  // Read a chunk of the file
577  $f = fopen( $file, 'rb' );
579 
580  if ( !$f ) {
581  return 'unknown/unknown';
582  }
583 
584  $fsize = filesize( $file );
585  if ( $fsize === false ) {
586  return 'unknown/unknown';
587  }
588 
589  $head = fread( $f, 1024 );
590  $tailLength = min( 65558, $fsize ); // 65558 = maximum size of a zip EOCDR
591  if ( fseek( $f, -1 * $tailLength, SEEK_END ) === -1 ) {
592  throw new MWException(
593  "Seeking $tailLength bytes from EOF failed in " . __METHOD__ );
594  }
595  $tail = fread( $f, $tailLength );
596  fclose( $f );
597 
598  wfDebug( __METHOD__ . ": analyzing head and tail of $file for magic numbers.\n" );
599 
600  // Hardcode a few magic number checks...
601  $headers = array(
602  // Multimedia...
603  'MThd' => 'audio/midi',
604  'OggS' => 'application/ogg',
605 
606  // Image formats...
607  // Note that WMF may have a bare header, no magic number.
608  "\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives?
609  "\xd7\xcd\xc6\x9a" => 'application/x-msmetafile',
610  '%PDF' => 'application/pdf',
611  'gimp xcf' => 'image/x-xcf',
612 
613  // Some forbidden fruit...
614  'MZ' => 'application/octet-stream', // DOS/Windows executable
615  "\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary
616  "\x7fELF" => 'application/octet-stream', // ELF binary
617  );
618 
619  foreach ( $headers as $magic => $candidate ) {
620  if ( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) {
621  wfDebug( __METHOD__ . ": magic header in $file recognized as $candidate\n" );
622  return $candidate;
623  }
624  }
625 
626  /* Look for WebM and Matroska files */
627  if ( strncmp( $head, pack( "C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) {
628  $doctype = strpos( $head, "\x42\x82" );
629  if ( $doctype ) {
630  // Next byte is datasize, then data (sizes larger than 1 byte are very stupid muxers)
631  $data = substr( $head, $doctype + 3, 8 );
632  if ( strncmp( $data, "matroska", 8 ) == 0 ) {
633  wfDebug( __METHOD__ . ": recognized file as video/x-matroska\n" );
634  return "video/x-matroska";
635  } elseif ( strncmp( $data, "webm", 4 ) == 0 ) {
636  wfDebug( __METHOD__ . ": recognized file as video/webm\n" );
637  return "video/webm";
638  }
639  }
640  wfDebug( __METHOD__ . ": unknown EBML file\n" );
641  return "unknown/unknown";
642  }
643 
644  /* Look for WebP */
645  if ( strncmp( $head, "RIFF", 4 ) == 0 && strncmp( substr( $head, 8, 8 ), "WEBPVP8 ", 8 ) == 0 ) {
646  wfDebug( __METHOD__ . ": recognized file as image/webp\n" );
647  return "image/webp";
648  }
649 
662  if ( ( strpos( $head, '<?php' ) !== false ) ||
663  ( strpos( $head, "<\x00?\x00p\x00h\x00p" ) !== false ) ||
664  ( strpos( $head, "<\x00?\x00 " ) !== false ) ||
665  ( strpos( $head, "<\x00?\x00\n" ) !== false ) ||
666  ( strpos( $head, "<\x00?\x00\t" ) !== false ) ||
667  ( strpos( $head, "<\x00?\x00=" ) !== false ) ) {
668 
669  wfDebug( __METHOD__ . ": recognized $file as application/x-php\n" );
670  return 'application/x-php';
671  }
672 
676  $xml = new XmlTypeCheck( $file );
677  if ( $xml->wellFormed ) {
678  global $wgXMLMimeTypes;
679  if ( isset( $wgXMLMimeTypes[$xml->getRootElement()] ) ) {
680  return $wgXMLMimeTypes[$xml->getRootElement()];
681  } else {
682  return 'application/xml';
683  }
684  }
685 
689  $script_type = null;
690 
691  # detect by shebang
692  if ( substr( $head, 0, 2 ) == "#!" ) {
693  $script_type = "ASCII";
694  } elseif ( substr( $head, 0, 5 ) == "\xef\xbb\xbf#!" ) {
695  $script_type = "UTF-8";
696  } elseif ( substr( $head, 0, 7 ) == "\xfe\xff\x00#\x00!" ) {
697  $script_type = "UTF-16BE";
698  } elseif ( substr( $head, 0, 7 ) == "\xff\xfe#\x00!" ) {
699  $script_type = "UTF-16LE";
700  }
701 
702  if ( $script_type ) {
703  if ( $script_type !== "UTF-8" && $script_type !== "ASCII" ) {
704  // Quick and dirty fold down to ASCII!
705  $pack = array( 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' );
706  $chars = unpack( $pack[$script_type], substr( $head, 2 ) );
707  $head = '';
708  foreach ( $chars as $codepoint ) {
709  if ( $codepoint < 128 ) {
710  $head .= chr( $codepoint );
711  } else {
712  $head .= '?';
713  }
714  }
715  }
716 
717  $match = array();
718 
719  if ( preg_match( '%/?([^\s]+/)(\w+)%', $head, $match ) ) {
720  $mime = "application/x-{$match[2]}";
721  wfDebug( __METHOD__ . ": shell script recognized as $mime\n" );
722  return $mime;
723  }
724  }
725 
726  // Check for ZIP variants (before getimagesize)
727  if ( strpos( $tail, "PK\x05\x06" ) !== false ) {
728  wfDebug( __METHOD__ . ": ZIP header present in $file\n" );
729  return $this->detectZipType( $head, $tail, $ext );
730  }
731 
733  $gis = getimagesize( $file );
735 
736  if ( $gis && isset( $gis['mime'] ) ) {
737  $mime = $gis['mime'];
738  wfDebug( __METHOD__ . ": getimagesize detected $file as $mime\n" );
739  return $mime;
740  }
741 
742  // Also test DjVu
743  $deja = new DjVuImage( $file );
744  if ( $deja->isValid() ) {
745  wfDebug( __METHOD__ . ": detected $file as image/vnd.djvu\n" );
746  return 'image/vnd.djvu';
747  }
748 
749  return false;
750  }
751 
765  function detectZipType( $header, $tail = null, $ext = false ) {
766  if ( $ext ) { # TODO: remove $ext param
767  wfDebug( __METHOD__ . ": WARNING: use of the \$ext parameter is deprecated. " .
768  "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
769  }
770 
771  $mime = 'application/zip';
772  $opendocTypes = array(
773  'chart-template',
774  'chart',
775  'formula-template',
776  'formula',
777  'graphics-template',
778  'graphics',
779  'image-template',
780  'image',
781  'presentation-template',
782  'presentation',
783  'spreadsheet-template',
784  'spreadsheet',
785  'text-template',
786  'text-master',
787  'text-web',
788  'text' );
789 
790  // http://lists.oasis-open.org/archives/office/200505/msg00006.html
791  $types = '(?:' . implode( '|', $opendocTypes ) . ')';
792  $opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
793 
794  $openxmlRegex = "/^\[Content_Types\].xml/";
795 
796  if ( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) {
797  $mime = $matches[1];
798  wfDebug( __METHOD__ . ": detected $mime from ZIP archive\n" );
799  } elseif ( preg_match( $openxmlRegex, substr( $header, 30 ) ) ) {
800  $mime = "application/x-opc+zip";
801  # TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere
802  if ( $ext !== true && $ext !== false ) {
807  if ( $this->isMatchingExtension( $ext, $mime ) ) {
808  /* A known file extension for an OPC file,
809  * find the proper mime type for that file extension
810  */
811  $mime = $this->guessTypesForExtension( $ext );
812  } else {
813  $mime = "application/zip";
814  }
815  }
816  wfDebug( __METHOD__ . ": detected an Open Packaging Conventions archive: $mime\n" );
817  } elseif ( substr( $header, 0, 8 ) == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" &&
818  ( $headerpos = strpos( $tail, "PK\x03\x04" ) ) !== false &&
819  preg_match( $openxmlRegex, substr( $tail, $headerpos + 30 ) ) ) {
820  if ( substr( $header, 512, 4 ) == "\xEC\xA5\xC1\x00" ) {
821  $mime = "application/msword";
822  }
823  switch ( substr( $header, 512, 6 ) ) {
824  case "\xEC\xA5\xC1\x00\x0E\x00":
825  case "\xEC\xA5\xC1\x00\x1C\x00":
826  case "\xEC\xA5\xC1\x00\x43\x00":
827  $mime = "application/vnd.ms-powerpoint";
828  break;
829  case "\xFD\xFF\xFF\xFF\x10\x00":
830  case "\xFD\xFF\xFF\xFF\x1F\x00":
831  case "\xFD\xFF\xFF\xFF\x22\x00":
832  case "\xFD\xFF\xFF\xFF\x23\x00":
833  case "\xFD\xFF\xFF\xFF\x28\x00":
834  case "\xFD\xFF\xFF\xFF\x29\x00":
835  case "\xFD\xFF\xFF\xFF\x10\x02":
836  case "\xFD\xFF\xFF\xFF\x1F\x02":
837  case "\xFD\xFF\xFF\xFF\x22\x02":
838  case "\xFD\xFF\xFF\xFF\x23\x02":
839  case "\xFD\xFF\xFF\xFF\x28\x02":
840  case "\xFD\xFF\xFF\xFF\x29\x02":
841  $mime = "application/vnd.msexcel";
842  break;
843  }
844 
845  wfDebug( __METHOD__ . ": detected a MS Office document with OPC trailer\n" );
846  } else {
847  wfDebug( __METHOD__ . ": unable to identify type of ZIP archive\n" );
848  }
849  return $mime;
850  }
851 
870  private function detectMimeType( $file, $ext = true ) {
871  global $wgMimeDetectorCommand;
872 
873  if ( $ext ) { # TODO: make $ext default to false. Or better, remove it.
874  wfDebug( __METHOD__ . ": WARNING: use of the \$ext parameter is deprecated. Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
875  }
876 
877  $m = null;
878  if ( $wgMimeDetectorCommand ) {
880  $m = wfShellExec( "$wgMimeDetectorCommand $args" );
881  } elseif ( function_exists( "finfo_open" ) && function_exists( "finfo_file" ) ) {
882 
883  # This required the fileinfo extension by PECL,
884  # see http://pecl.php.net/package/fileinfo
885  # This must be compiled into PHP
886  #
887  # finfo is the official replacement for the deprecated
888  # mime_content_type function, see below.
889  #
890  # If you may need to load the fileinfo extension at runtime, set
891  # $wgLoadFileinfoExtension in LocalSettings.php
892 
893  $mime_magic_resource = finfo_open( FILEINFO_MIME ); /* return mime type ala mimetype extension */
894 
895  if ( $mime_magic_resource ) {
896  $m = finfo_file( $mime_magic_resource, $file );
897  finfo_close( $mime_magic_resource );
898  } else {
899  wfDebug( __METHOD__ . ": finfo_open failed on " . FILEINFO_MIME . "!\n" );
900  }
901  } elseif ( function_exists( "mime_content_type" ) ) {
902 
903  # NOTE: this function is available since PHP 4.3.0, but only if
904  # PHP was compiled with --with-mime-magic or, before 4.3.2, with --enable-mime-magic.
905  #
906  # On Windows, you must set mime_magic.magicfile in php.ini to point to the mime.magic file bundled with PHP;
907  # sometimes, this may even be needed under linus/unix.
908  #
909  # Also note that this has been DEPRECATED in favor of the fileinfo extension by PECL, see above.
910  # see http://www.php.net/manual/en/ref.mime-magic.php for details.
911 
912  $m = mime_content_type( $file );
913  } else {
914  wfDebug( __METHOD__ . ": no magic mime detector found!\n" );
915  }
916 
917  if ( $m ) {
918  # normalize
919  $m = preg_replace( '![;, ].*$!', '', $m ); #strip charset, etc
920  $m = trim( $m );
921  $m = strtolower( $m );
922 
923  if ( strpos( $m, 'unknown' ) !== false ) {
924  $m = null;
925  } else {
926  wfDebug( __METHOD__ . ": magic mime type of $file: $m\n" );
927  return $m;
928  }
929  }
930 
931  // If desired, look at extension as a fallback.
932  if ( $ext === true ) {
933  $i = strrpos( $file, '.' );
934  $ext = strtolower( $i ? substr( $file, $i + 1 ) : '' );
935  }
936  if ( $ext ) {
937  if ( $this->isRecognizableExtension( $ext ) ) {
938  wfDebug( __METHOD__ . ": refusing to guess mime type for .$ext file, we should have recognized it\n" );
939  } else {
940  $m = $this->guessTypesForExtension( $ext );
941  if ( $m ) {
942  wfDebug( __METHOD__ . ": extension mime type of $file: $m\n" );
943  return $m;
944  }
945  }
946  }
947 
948  // Unknown type
949  wfDebug( __METHOD__ . ": failed to guess mime type for $file!\n" );
950  return 'unknown/unknown';
951  }
952 
969  function getMediaType( $path = null, $mime = null ) {
970  if ( !$mime && !$path ) {
971  return MEDIATYPE_UNKNOWN;
972  }
973 
974  // If mime type is unknown, guess it
975  if ( !$mime ) {
976  $mime = $this->guessMimeType( $path, false );
977  }
978 
979  // Special code for ogg - detect if it's video (theora),
980  // else label it as sound.
981  if ( $mime == 'application/ogg' && file_exists( $path ) ) {
982 
983  // Read a chunk of the file
984  $f = fopen( $path, "rt" );
985  if ( !$f ) {
986  return MEDIATYPE_UNKNOWN;
987  }
988  $head = fread( $f, 256 );
989  fclose( $f );
990 
991  $head = strtolower( $head );
992 
993  // This is an UGLY HACK, file should be parsed correctly
994  if ( strpos( $head, 'theora' ) !== false ) {
995  return MEDIATYPE_VIDEO;
996  } elseif ( strpos( $head, 'vorbis' ) !== false ) {
997  return MEDIATYPE_AUDIO;
998  } elseif ( strpos( $head, 'flac' ) !== false ) {
999  return MEDIATYPE_AUDIO;
1000  } elseif ( strpos( $head, 'speex' ) !== false ) {
1001  return MEDIATYPE_AUDIO;
1002  } else {
1003  return MEDIATYPE_MULTIMEDIA;
1004  }
1005  }
1006 
1007  // Check for entry for full mime type
1008  if ( $mime ) {
1009  $type = $this->findMediaType( $mime );
1010  if ( $type !== MEDIATYPE_UNKNOWN ) {
1011  return $type;
1012  }
1013  }
1014 
1015  // Check for entry for file extension
1016  if ( $path ) {
1017  $i = strrpos( $path, '.' );
1018  $e = strtolower( $i ? substr( $path, $i + 1 ) : '' );
1019 
1020  // TODO: look at multi-extension if this fails, parse from full path
1021  $type = $this->findMediaType( '.' . $e );
1022  if ( $type !== MEDIATYPE_UNKNOWN ) {
1023  return $type;
1024  }
1025  }
1026 
1027  // Check major mime type
1028  if ( $mime ) {
1029  $i = strpos( $mime, '/' );
1030  if ( $i !== false ) {
1031  $major = substr( $mime, 0, $i );
1032  $type = $this->findMediaType( $major );
1033  if ( $type !== MEDIATYPE_UNKNOWN ) {
1034  return $type;
1035  }
1036  }
1037  }
1038 
1039  if ( !$type ) {
1041  }
1042 
1043  return $type;
1044  }
1045 
1055  function findMediaType( $extMime ) {
1056  if ( strpos( $extMime, '.' ) === 0 ) {
1057  // If it's an extension, look up the mime types
1058  $m = $this->getTypesForExtension( substr( $extMime, 1 ) );
1059  if ( !$m ) {
1060  return MEDIATYPE_UNKNOWN;
1061  }
1062 
1063  $m = explode( ' ', $m );
1064  } else {
1065  // Normalize mime type
1066  if ( isset( $this->mMimeTypeAliases[$extMime] ) ) {
1067  $extMime = $this->mMimeTypeAliases[$extMime];
1068  }
1069 
1070  $m = array( $extMime );
1071  }
1072 
1073  foreach ( $m as $mime ) {
1074  foreach ( $this->mMediaTypes as $type => $codes ) {
1075  if ( in_array( $mime, $codes, true ) ) {
1076  return $type;
1077  }
1078  }
1079  }
1080 
1081  return MEDIATYPE_UNKNOWN;
1082  }
1083 
1093  public function getIEMimeTypes( $fileName, $chunk, $proposed ) {
1094  $ca = $this->getIEContentAnalyzer();
1095  return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
1096  }
1097 
1103  protected function getIEContentAnalyzer() {
1104  if ( is_null( $this->mIEAnalyzer ) ) {
1105  $this->mIEAnalyzer = new IEContentAnalyzer;
1106  }
1107  return $this->mIEAnalyzer;
1108  }
1109 }
wfShellExec
wfShellExec( $cmd, &$retval=null, $environ=array(), $limits=array(), $options=array())
Execute a shell command, with time and memory limits mirrored from the PHP configuration if supported...
Definition: GlobalFunctions.php:2804
php
skin txt MediaWiki includes four core it has been set as the default in MediaWiki since the replacing Monobook it had been been the default skin since before being replaced by Vector largely rewritten in while keeping its appearance Several legacy skins were removed in the as the burden of supporting them became too heavy to bear Those in etc for skin dependent CSS etc for skin dependent JavaScript These can also be customised on a per user by etc This feature has led to a wide variety of user styles becoming that gallery is a good place to ending in php
Definition: skin.txt:62
$mime
usually copyright or history_copyright This message must be in HTML not wikitext $subpages will be ignored and the rest of subPageSubtitle() will run. 'SkinTemplateBuildNavUrlsNav_urlsAfterPermalink' whether MediaWiki currently thinks this is a CSS JS page Hooks may change this value to override the return value of Title::isCssOrJsPage(). 'TitleIsAlwaysKnown' whether MediaWiki currently thinks this page is known isMovable() always returns false. $title whether MediaWiki currently thinks this page is movable Hooks may change this value to override the return value of Title::isMovable(). 'TitleIsWikitextPage' whether MediaWiki currently thinks this is a wikitext page Hooks may change this value to override the return value of Title::isWikitextPage() 'TitleMove' use UploadVerification and UploadVerifyFile instead where the first element is the message key and the remaining elements are used as parameters to the message based on mime etc Preferred in most cases over UploadVerification object with all info about the upload string $mime
Definition: hooks.txt:2573
$extensions
$extensions
Definition: importImages.php:62
text
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add text
Definition: design.txt:12
$f
$f
Definition: UtfNormalTest2.php:38
wfSuppressWarnings
wfSuppressWarnings( $end=false)
Reference-counted warning suppression.
Definition: GlobalFunctions.php:2387
MEDIATYPE_UNKNOWN
const MEDIATYPE_UNKNOWN
Definition: Defines.php:123
$s
$s
Definition: mergeMessageFileList.php:156
MWException
MediaWiki exception.
Definition: MWException.php:26
wfRestoreWarnings
wfRestoreWarnings()
Restore error level to previous value.
Definition: GlobalFunctions.php:2417
$lines
$lines
Definition: router.php:65
array
the array() calling protocol came about after MediaWiki 1.4rc1.
List of Api Query prop modules.
global
when a variable name is used in a it is silently declared as a new masking the global
Definition: design.txt:93
MM_WELL_KNOWN_MIME_INFO
const MM_WELL_KNOWN_MIME_INFO
Defines a set of well known mime info entries This is used as a fallback to mime.info files.
Definition: MimeMagic.php:94
IEContentAnalyzer
This class simulates Microsoft Internet Explorer's terribly broken and insecure MIME type detection a...
Definition: IEContentAnalyzer.php:27
wfDebug
wfDebug( $text, $dest='all')
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:933
MM_WELL_KNOWN_MIME_TYPES
const MM_WELL_KNOWN_MIME_TYPES
Defines a set of well known mime types This is used as a fallback to mime.types files.
Definition: MimeMagic.php:42
$matches
if(!defined( 'MEDIAWIKI')) if(!isset( $wgVersion)) $matches
Definition: NoLocalSettings.php:33
wfEscapeShellArg
wfEscapeShellArg()
Windows-compatible version of escapeshellarg() Windows doesn't recognise single-quotes in the shell,...
Definition: GlobalFunctions.php:2705
etc
design txt This is a brief overview of the new design More thorough and up to date information is available on the documentation wiki at etc Handles the details of getting and saving to the user table of the and dealing with sessions and cookies OutputPage Encapsulates the entire HTML page that will be sent in response to any server request It is used by calling its functions to add etc
Definition: design.txt:12
XmlTypeCheck
Definition: XmlTypeCheck.php:23
$file
if(PHP_SAPI !='cli') $file
Definition: UtfNormalTest2.php:30
it
=Architecture==Two class hierarchies are used to provide the functionality associated with the different content models:*Content interface(and AbstractContent base class) define functionality that acts on the concrete content of a page, and *ContentHandler base class provides functionality specific to a content model, but not acting on concrete content. The most important function of ContentHandler is to act as a factory for the appropriate implementation of Content. These Content objects are to be used by MediaWiki everywhere, instead of passing page content around as text. All manipulation and analysis of page content must be done via the appropriate methods of the Content object. For each content model, a subclass of ContentHandler has to be registered with $wgContentHandlers. The ContentHandler object for a given content model can be obtained using ContentHandler::getForModelID($id). Also Title, WikiPage and Revision now have getContentHandler() methods for convenience. ContentHandler objects are singletons that provide functionality specific to the content type, but not directly acting on the content of some page. ContentHandler::makeEmptyContent() and ContentHandler::unserializeContent() can be used to create a Content object of the appropriate type. However, it is recommended to instead use WikiPage::getContent() resp. Revision::getContent() to get a page 's content as a Content object. These two methods should be the ONLY way in which page content is accessed. Another important function of ContentHandler objects is to define custom action handlers for a content model, see ContentHandler::getActionOverrides(). This is similar to what WikiPage::getActionOverrides() was already doing.==Serialization==With the ContentHandler facility, page content no longer has to be text based. Objects implementing the Content interface are used to represent and handle the content internally. For storage and data exchange, each content model supports at least one serialization format via ContentHandler::serializeContent($content). The list of supported formats for a given content model can be accessed using ContentHandler::getSupportedFormats(). Content serialization formats are identified using MIME type like strings. The following formats are built in:*text/x-wiki - wikitext *text/javascript - for js pages *text/css - for css pages *text/plain - for future use, e.g. with plain text messages. *text/html - for future use, e.g. with plain html messages. *application/vnd.php.serialized - for future use with the api and for extensions *application/json - for future use with the api, and for use by extensions *application/xml - for future use with the api, and for use by extensions In PHP, use the corresponding CONTENT_FORMAT_XXX constant. Note that when using the API to access page content, especially action=edit, action=parse and action=query &prop=revisions, the model and format of the content should always be handled explicitly. Without that information, interpretation of the provided content is not reliable. The same applies to XML dumps generated via maintenance/dumpBackup.php or Special:Export. Also note that the API will provide encapsulated, serialized content - so if the API was called with format=json, and contentformat is also json(or rather, application/json), the page content is represented as a string containing an escaped json structure. Extensions that use JSON to serialize some types of page content may provide specialized API modules that allow access to that content in a more natural form.==Compatibility==The ContentHandler facility is introduced in a way that should allow all existing code to keep functioning at least for pages that contain wikitext or other text based content. However, a number of functions and hooks have been deprecated in favor of new versions that are aware of the page 's content model, and will now generate warnings when used. Most importantly, the following functions have been deprecated:*Revisions::getText() and Revisions::getRawText() is deprecated in favor Revisions::getContent() *WikiPage::getText() is deprecated in favor WikiPage::getContent() Also, the old Article::getContent()(which returns text) is superceded by Article::getContentObject(). However, both methods should be avoided since they do not provide clean access to the page 's actual content. For instance, they may return a system message for non-existing pages. Use WikiPage::getContent() instead. Code that relies on a textual representation of the page content should eventually be rewritten. However, ContentHandler::getContentText() provides a stop-gap that can be used to get text for a page. Its behavior is controlled by $wgContentHandlerTextFallback it
Definition: contenthandler.txt:107
$args
if( $line===false) $args
Definition: cdb.php:62
$ext
$ext
Definition: NoLocalSettings.php:34
$path
$path
Definition: NoLocalSettings.php:35
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
MEDIATYPE_VIDEO
const MEDIATYPE_VIDEO
Definition: Defines.php:132
MEDIATYPE_MULTIMEDIA
const MEDIATYPE_MULTIMEDIA
Definition: Defines.php:134
$e
if( $useReadline) $e
Definition: eval.php:66
$IP
$IP
Definition: WebStart.php:88
$type
$type
Definition: testCompression.php:46
MEDIATYPE_AUDIO
const MEDIATYPE_AUDIO
Definition: Defines.php:129