MediaWiki REL1_32
IEContentAnalyzer.php
Go to the documentation of this file.
1<?php
31 protected $baseTypeTable = [
32 'ambiguous' /*1*/ => [
33 'text/plain',
34 'application/octet-stream',
35 'application/x-netcdf', // [sic]
36 ],
37 'text' /*3*/ => [
38 'text/richtext', 'image/x-bitmap', 'application/postscript', 'application/base64',
39 'application/macbinhex40', 'application/x-cdf', 'text/scriptlet'
40 ],
41 'binary' /*4*/ => [
42 'application/pdf', 'audio/x-aiff', 'audio/basic', 'audio/wav', 'image/gif',
43 'image/pjpeg', 'image/jpeg', 'image/tiff', 'image/x-png', 'image/png', 'image/bmp',
44 'image/x-jg', 'image/x-art', 'image/x-emf', 'image/x-wmf', 'video/avi',
45 'video/x-msvideo', 'video/mpeg', 'application/x-compressed',
46 'application/x-zip-compressed', 'application/x-gzip-compressed', 'application/java',
47 'application/x-msdownload'
48 ],
49 'html' /*5*/ => [ 'text/html' ],
50 ];
51
55 protected $addedTypes = [
56 'ie07' => [
57 'text' => [ 'text/xml', 'application/xml' ]
58 ],
59 ];
60
67 protected $registry = [
68 '.323' => 'text/h323',
69 '.3g2' => 'video/3gpp2',
70 '.3gp' => 'video/3gpp',
71 '.3gp2' => 'video/3gpp2',
72 '.3gpp' => 'video/3gpp',
73 '.aac' => 'audio/aac',
74 '.ac3' => 'audio/ac3',
75 '.accda' => 'application/msaccess',
76 '.accdb' => 'application/msaccess',
77 '.accdc' => 'application/msaccess',
78 '.accde' => 'application/msaccess',
79 '.accdr' => 'application/msaccess',
80 '.accdt' => 'application/msaccess',
81 '.ade' => 'application/msaccess',
82 '.adp' => 'application/msaccess',
83 '.adts' => 'audio/aac',
84 '.ai' => 'application/postscript',
85 '.aif' => 'audio/aiff',
86 '.aifc' => 'audio/aiff',
87 '.aiff' => 'audio/aiff',
88 '.amc' => 'application/x-mpeg',
89 '.application' => 'application/x-ms-application',
90 '.asf' => 'video/x-ms-asf',
91 '.asx' => 'video/x-ms-asf',
92 '.au' => 'audio/basic',
93 '.avi' => 'video/avi',
94 '.bmp' => 'image/bmp',
95 '.caf' => 'audio/x-caf',
96 '.cat' => 'application/vnd.ms-pki.seccat',
97 '.cbo' => 'application/sha',
98 '.cdda' => 'audio/aiff',
99 '.cer' => 'application/x-x509-ca-cert',
100 '.conf' => 'text/plain',
101 '.crl' => 'application/pkix-crl',
102 '.crt' => 'application/x-x509-ca-cert',
103 '.css' => 'text/css',
104 '.csv' => 'application/vnd.ms-excel',
105 '.der' => 'application/x-x509-ca-cert',
106 '.dib' => 'image/bmp',
107 '.dif' => 'video/x-dv',
108 '.dll' => 'application/x-msdownload',
109 '.doc' => 'application/msword',
110 '.docm' => 'application/vnd.ms-word.document.macroEnabled.12',
111 '.docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
112 '.dot' => 'application/msword',
113 '.dotm' => 'application/vnd.ms-word.template.macroEnabled.12',
114 '.dotx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.template',
115 '.dv' => 'video/x-dv',
116 '.dwfx' => 'model/vnd.dwfx+xps',
117 '.edn' => 'application/vnd.adobe.edn',
118 '.eml' => 'message/rfc822',
119 '.eps' => 'application/postscript',
120 '.etd' => 'application/x-ebx',
121 '.exe' => 'application/x-msdownload',
122 '.fdf' => 'application/vnd.fdf',
123 '.fif' => 'application/fractals',
124 '.gif' => 'image/gif',
125 '.gsm' => 'audio/x-gsm',
126 '.hqx' => 'application/mac-binhex40',
127 '.hta' => 'application/hta',
128 '.htc' => 'text/x-component',
129 '.htm' => 'text/html',
130 '.html' => 'text/html',
131 '.htt' => 'text/webviewhtml',
132 '.hxa' => 'application/xml',
133 '.hxc' => 'application/xml',
134 '.hxd' => 'application/octet-stream',
135 '.hxe' => 'application/xml',
136 '.hxf' => 'application/xml',
137 '.hxh' => 'application/octet-stream',
138 '.hxi' => 'application/octet-stream',
139 '.hxk' => 'application/xml',
140 '.hxq' => 'application/octet-stream',
141 '.hxr' => 'application/octet-stream',
142 '.hxs' => 'application/octet-stream',
143 '.hxt' => 'application/xml',
144 '.hxv' => 'application/xml',
145 '.hxw' => 'application/octet-stream',
146 '.ico' => 'image/x-icon',
147 '.iii' => 'application/x-iphone',
148 '.ins' => 'application/x-internet-signup',
149 '.iqy' => 'text/x-ms-iqy',
150 '.isp' => 'application/x-internet-signup',
151 '.jfif' => 'image/jpeg',
152 '.jnlp' => 'application/x-java-jnlp-file',
153 '.jpe' => 'image/jpeg',
154 '.jpeg' => 'image/jpeg',
155 '.jpg' => 'image/jpeg',
156 '.jtx' => 'application/x-jtx+xps',
157 '.latex' => 'application/x-latex',
158 '.log' => 'text/plain',
159 '.m1v' => 'video/mpeg',
160 '.m2v' => 'video/mpeg',
161 '.m3u' => 'audio/x-mpegurl',
162 '.mac' => 'image/x-macpaint',
163 '.man' => 'application/x-troff-man',
164 '.mda' => 'application/msaccess',
165 '.mdb' => 'application/msaccess',
166 '.mde' => 'application/msaccess',
167 '.mfp' => 'application/x-shockwave-flash',
168 '.mht' => 'message/rfc822',
169 '.mhtml' => 'message/rfc822',
170 '.mid' => 'audio/mid',
171 '.midi' => 'audio/mid',
172 '.mod' => 'video/mpeg',
173 '.mov' => 'video/quicktime',
174 '.mp2' => 'video/mpeg',
175 '.mp2v' => 'video/mpeg',
176 '.mp3' => 'audio/mpeg',
177 '.mp4' => 'video/mp4',
178 '.mpa' => 'video/mpeg',
179 '.mpe' => 'video/mpeg',
180 '.mpeg' => 'video/mpeg',
181 '.mpf' => 'application/vnd.ms-mediapackage',
182 '.mpg' => 'video/mpeg',
183 '.mpv2' => 'video/mpeg',
184 '.mqv' => 'video/quicktime',
185 '.NMW' => 'application/nmwb',
186 '.nws' => 'message/rfc822',
187 '.odc' => 'text/x-ms-odc',
188 '.ols' => 'application/vnd.ms-publisher',
189 '.p10' => 'application/pkcs10',
190 '.p12' => 'application/x-pkcs12',
191 '.p7b' => 'application/x-pkcs7-certificates',
192 '.p7c' => 'application/pkcs7-mime',
193 '.p7m' => 'application/pkcs7-mime',
194 '.p7r' => 'application/x-pkcs7-certreqresp',
195 '.p7s' => 'application/pkcs7-signature',
196 '.pct' => 'image/pict',
197 '.pdf' => 'application/pdf',
198 '.pdx' => 'application/vnd.adobe.pdx',
199 '.pfx' => 'application/x-pkcs12',
200 '.pic' => 'image/pict',
201 '.pict' => 'image/pict',
202 '.pinstall' => 'application/x-picasa-detect',
203 '.pko' => 'application/vnd.ms-pki.pko',
204 '.png' => 'image/png',
205 '.pnt' => 'image/x-macpaint',
206 '.pntg' => 'image/x-macpaint',
207 '.pot' => 'application/vnd.ms-powerpoint',
208 '.potm' => 'application/vnd.ms-powerpoint.template.macroEnabled.12',
209 '.potx' => 'application/vnd.openxmlformats-officedocument.presentationml.template',
210 '.ppa' => 'application/vnd.ms-powerpoint',
211 '.ppam' => 'application/vnd.ms-powerpoint.addin.macroEnabled.12',
212 '.pps' => 'application/vnd.ms-powerpoint',
213 '.ppsm' => 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12',
214 '.ppsx' => 'application/vnd.openxmlformats-officedocument.presentationml.slideshow',
215 '.ppt' => 'application/vnd.ms-powerpoint',
216 '.pptm' => 'application/vnd.ms-powerpoint.presentation.macroEnabled.12',
217 '.pptx' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
218 '.prf' => 'application/pics-rules',
219 '.ps' => 'application/postscript',
220 '.pub' => 'application/vnd.ms-publisher',
221 '.pwz' => 'application/vnd.ms-powerpoint',
222 '.py' => 'text/plain',
223 '.pyw' => 'text/plain',
224 '.qht' => 'text/x-html-insertion',
225 '.qhtm' => 'text/x-html-insertion',
226 '.qt' => 'video/quicktime',
227 '.qti' => 'image/x-quicktime',
228 '.qtif' => 'image/x-quicktime',
229 '.qtl' => 'application/x-quicktimeplayer',
230 '.rat' => 'application/rat-file',
231 '.rmf' => 'application/vnd.adobe.rmf',
232 '.rmi' => 'audio/mid',
233 '.rqy' => 'text/x-ms-rqy',
234 '.rtf' => 'application/msword',
235 '.sct' => 'text/scriptlet',
236 '.sd2' => 'audio/x-sd2',
237 '.sdp' => 'application/sdp',
238 '.shtml' => 'text/html',
239 '.sit' => 'application/x-stuffit',
240 '.sldm' => 'application/vnd.ms-powerpoint.slide.macroEnabled.12',
241 '.sldx' => 'application/vnd.openxmlformats-officedocument.presentationml.slide',
242 '.slk' => 'application/vnd.ms-excel',
243 '.snd' => 'audio/basic',
244 '.so' => 'application/x-apachemodule',
245 '.sol' => 'text/plain',
246 '.sor' => 'text/plain',
247 '.spc' => 'application/x-pkcs7-certificates',
248 '.spl' => 'application/futuresplash',
249 '.sst' => 'application/vnd.ms-pki.certstore',
250 '.stl' => 'application/vnd.ms-pki.stl',
251 '.swf' => 'application/x-shockwave-flash',
252 '.thmx' => 'application/vnd.ms-officetheme',
253 '.tif' => 'image/tiff',
254 '.tiff' => 'image/tiff',
255 '.txt' => 'text/plain',
256 '.uls' => 'text/iuls',
257 '.vcf' => 'text/x-vcard',
258 '.vdx' => 'application/vnd.ms-visio.viewer',
259 '.vsd' => 'application/vnd.ms-visio.viewer',
260 '.vss' => 'application/vnd.ms-visio.viewer',
261 '.vst' => 'application/vnd.ms-visio.viewer',
262 '.vsx' => 'application/vnd.ms-visio.viewer',
263 '.vtx' => 'application/vnd.ms-visio.viewer',
264 '.wav' => 'audio/wav',
265 '.wax' => 'audio/x-ms-wax',
266 '.wbk' => 'application/msword',
267 '.wdp' => 'image/vnd.ms-photo',
268 '.wiz' => 'application/msword',
269 '.wm' => 'video/x-ms-wm',
270 '.wma' => 'audio/x-ms-wma',
271 '.wmd' => 'application/x-ms-wmd',
272 '.wmv' => 'video/x-ms-wmv',
273 '.wmx' => 'video/x-ms-wmx',
274 '.wmz' => 'application/x-ms-wmz',
275 '.wpl' => 'application/vnd.ms-wpl',
276 '.wsc' => 'text/scriptlet',
277 '.wvx' => 'video/x-ms-wvx',
278 '.xaml' => 'application/xaml+xml',
279 '.xbap' => 'application/x-ms-xbap',
280 '.xdp' => 'application/vnd.adobe.xdp+xml',
281 '.xfdf' => 'application/vnd.adobe.xfdf',
282 '.xht' => 'application/xhtml+xml',
283 '.xhtml' => 'application/xhtml+xml',
284 '.xla' => 'application/vnd.ms-excel',
285 '.xlam' => 'application/vnd.ms-excel.addin.macroEnabled.12',
286 '.xlk' => 'application/vnd.ms-excel',
287 '.xll' => 'application/vnd.ms-excel',
288 '.xlm' => 'application/vnd.ms-excel',
289 '.xls' => 'application/vnd.ms-excel',
290 '.xlsb' => 'application/vnd.ms-excel.sheet.binary.macroEnabled.12',
291 '.xlsm' => 'application/vnd.ms-excel.sheet.macroEnabled.12',
292 '.xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
293 '.xlt' => 'application/vnd.ms-excel',
294 '.xltm' => 'application/vnd.ms-excel.template.macroEnabled.12',
295 '.xltx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.template',
296 '.xlw' => 'application/vnd.ms-excel',
297 '.xml' => 'text/xml',
298 '.xps' => 'application/vnd.ms-xpsdocument',
299 '.xsl' => 'text/xml',
300 ];
301
307 protected $versions = [ 'ie05', 'ie06', 'ie07', 'ie07.strict', 'ie07.nohtml' ];
308
312 protected $typeTable = [];
313
315 function __construct() {
316 // Construct versioned type arrays from the base type array plus additions
317 $types = $this->baseTypeTable;
318 foreach ( $this->versions as $version ) {
319 if ( isset( $this->addedTypes[$version] ) ) {
320 foreach ( $this->addedTypes[$version] as $format => $addedTypes ) {
321 $types[$format] = array_merge( $types[$format], $addedTypes );
322 }
323 }
324 $this->typeTable[$version] = $types;
325 }
326 }
327
338 public function getRealMimesFromData( $fileName, $chunk, $proposed ) {
339 $types = $this->getMimesFromData( $fileName, $chunk, $proposed );
340 $types = array_map( [ $this, 'translateMimeType' ], $types );
341 return $types;
342 }
343
350 public function translateMimeType( $type ) {
351 static $table = [
352 'image/pjpeg' => 'image/jpeg',
353 'image/x-png' => 'image/png',
354 'image/x-wmf' => 'application/x-msmetafile',
355 'image/bmp' => 'image/x-bmp',
356 'application/x-zip-compressed' => 'application/zip',
357 'application/x-compressed' => 'application/x-compress',
358 'application/x-gzip-compressed' => 'application/x-gzip',
359 'audio/mid' => 'audio/midi',
360 ];
361 if ( isset( $table[$type] ) ) {
362 $type = $table[$type];
363 }
364 return $type;
365 }
366
376 public function getMimesFromData( $fileName, $chunk, $proposed ) {
377 $types = [];
378 foreach ( $this->versions as $version ) {
379 $types[$version] = $this->getMimeTypeForVersion( $version, $fileName, $chunk, $proposed );
380 }
381 return $types;
382 }
383
392 protected function getMimeTypeForVersion( $version, $fileName, $chunk, $proposed ) {
393 // Strip text after a semicolon
394 $semiPos = strpos( $proposed, ';' );
395 if ( $semiPos !== false ) {
396 $proposed = substr( $proposed, 0, $semiPos );
397 }
398
399 $proposedFormat = $this->getDataFormat( $version, $proposed );
400 if ( $proposedFormat == 'unknown'
401 && $proposed != 'multipart/mixed'
402 && $proposed != 'multipart/x-mixed-replace'
403 ) {
404 return $proposed;
405 }
406 if ( strval( $chunk ) === '' ) {
407 return $proposed;
408 }
409
410 // Truncate chunk at 255 bytes
411 $chunk = substr( $chunk, 0, 255 );
412
413 // IE does the Check*Headers() calls last, and instead does the following image
414 // type checks by directly looking for the magic numbers. What I do here should
415 // have the same effect since the magic number checks are identical in both cases.
416 $result = $this->sampleData( $version, $chunk );
417 $sampleFound = $result['found'];
418 $counters = $result['counters'];
419 $binaryType = $this->checkBinaryHeaders( $version, $chunk );
420 $textType = $this->checkTextHeaders( $version, $chunk );
421
422 if ( $proposed == 'text/html' && isset( $sampleFound['html'] ) ) {
423 return 'text/html';
424 }
425 if ( $proposed == 'image/gif' && $binaryType == 'image/gif' ) {
426 return 'image/gif';
427 }
428 if ( ( $proposed == 'image/pjpeg' || $proposed == 'image/jpeg' )
429 && $binaryType == 'image/pjpeg'
430 ) {
431 return $proposed;
432 }
433 // PNG check added in IE 7
434 if ( $version >= 'ie07'
435 && ( $proposed == 'image/x-png' || $proposed == 'image/png' )
436 && $binaryType == 'image/x-png'
437 ) {
438 return $proposed;
439 }
440
441 // CDF was removed in IE 7 so it won't be in $sampleFound for later versions
442 if ( isset( $sampleFound['cdf'] ) ) {
443 return 'application/x-cdf';
444 }
445
446 // RSS and Atom were added in IE 7 so they won't be in $sampleFound for
447 // previous versions
448 if ( isset( $sampleFound['rss'] ) ) {
449 return 'application/rss+xml';
450 }
451 if ( isset( $sampleFound['rdf-tag'] )
452 && isset( $sampleFound['rdf-url'] )
453 && isset( $sampleFound['rdf-purl'] )
454 ) {
455 return 'application/rss+xml';
456 }
457 if ( isset( $sampleFound['atom'] ) ) {
458 return 'application/atom+xml';
459 }
460
461 if ( isset( $sampleFound['xml'] ) ) {
462 // TODO: I'm not sure under what circumstances this flag is enabled
463 if ( strpos( $version, 'strict' ) !== false ) {
464 if ( $proposed == 'text/html' || $proposed == 'text/xml' ) {
465 return 'text/xml';
466 }
467 } else {
468 return 'text/xml';
469 }
470 }
471 if ( isset( $sampleFound['html'] ) ) {
472 // TODO: I'm not sure under what circumstances this flag is enabled
473 if ( strpos( $version, 'nohtml' ) !== false ) {
474 if ( $proposed == 'text/plain' ) {
475 return 'text/html';
476 }
477 } else {
478 return 'text/html';
479 }
480 }
481 if ( isset( $sampleFound['xbm'] ) ) {
482 return 'image/x-bitmap';
483 }
484 if ( isset( $sampleFound['binhex'] ) ) {
485 return 'application/macbinhex40';
486 }
487 if ( isset( $sampleFound['scriptlet'] ) ) {
488 if ( strpos( $version, 'strict' ) !== false ) {
489 if ( $proposed == 'text/plain' || $proposed == 'text/scriptlet' ) {
490 return 'text/scriptlet';
491 }
492 } else {
493 return 'text/scriptlet';
494 }
495 }
496
497 // Freaky heuristics to determine if the data is text or binary
498 // The heuristic is of course broken for non-ASCII text
499 if ( $counters['ctrl'] != 0 && ( $counters['ff'] + $counters['low'] )
500 < ( $counters['ctrl'] + $counters['high'] ) * 16
501 ) {
502 $kindOfBinary = true;
503 $type = $binaryType ?: $textType;
504 if ( $type === false ) {
505 $type = 'application/octet-stream';
506 }
507 } else {
508 $kindOfBinary = false;
509 $type = $textType ?: $binaryType;
510 if ( $type === false ) {
511 $type = 'text/plain';
512 }
513 }
514
515 // Check if the output format is ambiguous
516 // This generally means that detection failed, real types aren't ambiguous
517 $detectedFormat = $this->getDataFormat( $version, $type );
518 if ( $detectedFormat != 'ambiguous' ) {
519 return $type;
520 }
521
522 if ( $proposedFormat != 'ambiguous' ) {
523 // FormatAgreesWithData()
524 if ( $proposedFormat == 'text' && !$kindOfBinary ) {
525 return $proposed;
526 }
527 if ( $proposedFormat == 'binary' && $kindOfBinary ) {
528 return $proposed;
529 }
530 if ( $proposedFormat == 'html' ) {
531 return $proposed;
532 }
533 }
534
535 // Find a MIME type by searching the registry for the file extension.
536 $dotPos = strrpos( $fileName, '.' );
537 if ( $dotPos === false ) {
538 return $type;
539 }
540 $ext = substr( $fileName, $dotPos );
541 if ( isset( $this->registry[$ext] ) ) {
542 return $this->registry[$ext];
543 }
544
545 // TODO: If the extension has an application registered to it, IE will return
546 // application/octet-stream. We'll skip that, so we could erroneously
547 // return text/plain or application/x-netcdf where application/octet-stream
548 // would be correct.
549
550 return $type;
551 }
552
560 private function checkTextHeaders( $version, $chunk ) {
561 $chunk2 = substr( $chunk, 0, 2 );
562 $chunk4 = substr( $chunk, 0, 4 );
563 $chunk5 = substr( $chunk, 0, 5 );
564 if ( $chunk4 == '%PDF' ) {
565 return 'application/pdf';
566 }
567 if ( $chunk2 == '%!' ) {
568 return 'application/postscript';
569 }
570 if ( $chunk5 == '{\\rtf' ) {
571 return 'text/richtext';
572 }
573 if ( $chunk5 == 'begin' ) {
574 return 'application/base64';
575 }
576 return false;
577 }
578
586 private function checkBinaryHeaders( $version, $chunk ) {
587 $chunk2 = substr( $chunk, 0, 2 );
588 $chunk3 = substr( $chunk, 0, 3 );
589 $chunk4 = substr( $chunk, 0, 4 );
590 $chunk5 = substr( $chunk, 0, 5 );
591 $chunk5uc = strtoupper( $chunk5 );
592 $chunk8 = substr( $chunk, 0, 8 );
593 if ( $chunk5uc == 'GIF87' || $chunk5uc == 'GIF89' ) {
594 return 'image/gif';
595 }
596 if ( $chunk2 == "\xff\xd8" ) {
597 return 'image/pjpeg'; // actually plain JPEG but this is what IE returns
598 }
599
600 if ( $chunk2 == 'BM'
601 && substr( $chunk, 6, 2 ) == "\000\000"
602 && substr( $chunk, 8, 2 ) == "\000\000"
603 ) {
604 return 'image/bmp'; // another non-standard MIME
605 }
606 if ( $chunk4 == 'RIFF'
607 && substr( $chunk, 8, 4 ) == 'WAVE'
608 ) {
609 return 'audio/wav';
610 }
611 // These were integer literals in IE
612 // Perhaps the author was not sure what the target endianness was
613 if ( $chunk4 == ".sd\000"
614 || $chunk4 == ".snd"
615 || $chunk4 == "\000ds."
616 || $chunk4 == "dns."
617 ) {
618 return 'audio/basic';
619 }
620 if ( $chunk3 == "MM\000" ) {
621 return 'image/tiff';
622 }
623 if ( $chunk2 == 'MZ' ) {
624 return 'application/x-msdownload';
625 }
626 if ( $chunk8 == "\x89PNG\x0d\x0a\x1a\x0a" ) {
627 return 'image/x-png'; // [sic]
628 }
629 if ( strlen( $chunk ) >= 5 ) {
630 $byte2 = ord( $chunk[2] );
631 $byte4 = ord( $chunk[4] );
632 if ( $byte2 >= 3 && $byte2 <= 31 && $byte4 == 0 && $chunk2 == 'JG' ) {
633 return 'image/x-jg';
634 }
635 }
636 // More endian confusion?
637 if ( $chunk4 == 'MROF' ) {
638 return 'audio/x-aiff';
639 }
640 $chunk4_8 = substr( $chunk, 8, 4 );
641 if ( $chunk4 == 'FORM' && ( $chunk4_8 == 'AIFF' || $chunk4_8 == 'AIFC' ) ) {
642 return 'audio/x-aiff';
643 }
644 if ( $chunk4 == 'RIFF' && $chunk4_8 == 'AVI ' ) {
645 return 'video/avi';
646 }
647 if ( $chunk4 == "\x00\x00\x01\xb3" || $chunk4 == "\x00\x00\x01\xba" ) {
648 return 'video/mpeg';
649 }
650 if ( $chunk4 == "\001\000\000\000"
651 && substr( $chunk, 40, 4 ) == ' EMF'
652 ) {
653 return 'image/x-emf';
654 }
655 if ( $chunk4 == "\xd7\xcd\xc6\x9a" ) {
656 return 'image/x-wmf';
657 }
658 if ( $chunk4 == "\xca\xfe\xba\xbe" ) {
659 return 'application/java';
660 }
661 if ( $chunk2 == 'PK' ) {
662 return 'application/x-zip-compressed';
663 }
664 if ( $chunk2 == "\x1f\x9d" ) {
665 return 'application/x-compressed';
666 }
667 if ( $chunk2 == "\x1f\x8b" ) {
668 return 'application/x-gzip-compressed';
669 }
670 // Skip redundant check for ZIP
671 if ( $chunk5 == "MThd\000" ) {
672 return 'audio/mid';
673 }
674 if ( $chunk4 == '%PDF' ) {
675 return 'application/pdf';
676 }
677 return false;
678 }
679
687 protected function sampleData( $version, $chunk ) {
688 $found = [];
689 $counters = [
690 'ctrl' => 0,
691 'high' => 0,
692 'low' => 0,
693 'lf' => 0,
694 'cr' => 0,
695 'ff' => 0
696 ];
697 $htmlTags = [
698 'html',
699 'head',
700 'title',
701 'body',
702 'script',
703 'a href',
704 'pre',
705 'img',
706 'plaintext',
707 'table'
708 ];
709 $rdfUrl = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
710 $rdfPurl = 'http://purl.org/rss/1.0/';
711 $xbmMagic1 = '#define';
712 $xbmMagic2 = '_width';
713 $xbmMagic3 = '_bits';
714 $binhexMagic = 'converted with BinHex';
715 $chunkLength = strlen( $chunk );
716
717 for ( $offset = 0; $offset < $chunkLength; $offset++ ) {
718 $curChar = $chunk[$offset];
719 if ( $curChar == "\x0a" ) {
720 $counters['lf']++;
721 continue;
722 } elseif ( $curChar == "\x0d" ) {
723 $counters['cr']++;
724 continue;
725 } elseif ( $curChar == "\x0c" ) {
726 $counters['ff']++;
727 continue;
728 } elseif ( $curChar == "\t" ) {
729 $counters['low']++;
730 continue;
731 } elseif ( ord( $curChar ) < 32 ) {
732 $counters['ctrl']++;
733 continue;
734 } elseif ( ord( $curChar ) >= 128 ) {
735 $counters['high']++;
736 continue;
737 }
738
739 $counters['low']++;
740 if ( $curChar == '<' ) {
741 // XML
742 $remainder = substr( $chunk, $offset + 1 );
743 if ( !strncasecmp( $remainder, '?XML', 4 ) ) {
744 $nextChar = substr( $chunk, $offset + 5, 1 );
745 if ( $nextChar == ':' || $nextChar == ' ' || $nextChar == "\t" ) {
746 $found['xml'] = true;
747 }
748 }
749 // Scriptlet (JSP)
750 if ( !strncasecmp( $remainder, 'SCRIPTLET', 9 ) ) {
751 $found['scriptlet'] = true;
752 break;
753 }
754 // HTML
755 foreach ( $htmlTags as $tag ) {
756 if ( !strncasecmp( $remainder, $tag, strlen( $tag ) ) ) {
757 $found['html'] = true;
758 }
759 }
760 // Skip broken check for additional tags (HR etc.)
761
762 // CHANNEL replaced by RSS, RDF and FEED in IE 7
763 if ( $version < 'ie07' ) {
764 if ( !strncasecmp( $remainder, 'CHANNEL', 7 ) ) {
765 $found['cdf'] = true;
766 }
767 } else {
768 // RSS
769 if ( !strncasecmp( $remainder, 'RSS', 3 ) ) {
770 $found['rss'] = true;
771 break; // return from SampleData
772 }
773 if ( !strncasecmp( $remainder, 'rdf:RDF', 7 ) ) {
774 $found['rdf-tag'] = true;
775 // no break
776 }
777 if ( !strncasecmp( $remainder, 'FEED', 4 ) ) {
778 $found['atom'] = true;
779 break;
780 }
781 }
782 continue;
783 }
784 // Skip broken check for -->
785
786 // RSS URL checks
787 // For some reason both URLs must appear before it is recognised
788 $remainder = substr( $chunk, $offset );
789 if ( !strncasecmp( $remainder, $rdfUrl, strlen( $rdfUrl ) ) ) {
790 $found['rdf-url'] = true;
791 if ( isset( $found['rdf-tag'] )
792 && isset( $found['rdf-purl'] ) // [sic]
793 ) {
794 break;
795 }
796 continue;
797 }
798
799 if ( !strncasecmp( $remainder, $rdfPurl, strlen( $rdfPurl ) ) ) {
800 if ( isset( $found['rdf-tag'] )
801 && isset( $found['rdf-url'] ) // [sic]
802 ) {
803 break;
804 }
805 continue;
806 }
807
808 // XBM checks
809 if ( !strncasecmp( $remainder, $xbmMagic1, strlen( $xbmMagic1 ) ) ) {
810 $found['xbm1'] = true;
811 continue;
812 }
813 if ( $curChar == '_' ) {
814 if ( isset( $found['xbm2'] ) ) {
815 if ( !strncasecmp( $remainder, $xbmMagic3, strlen( $xbmMagic3 ) ) ) {
816 $found['xbm'] = true;
817 break;
818 }
819 } elseif ( isset( $found['xbm1'] ) ) {
820 if ( !strncasecmp( $remainder, $xbmMagic2, strlen( $xbmMagic2 ) ) ) {
821 $found['xbm2'] = true;
822 }
823 }
824 }
825
826 // BinHex
827 if ( !strncmp( $remainder, $binhexMagic, strlen( $binhexMagic ) ) ) {
828 $found['binhex'] = true;
829 }
830 }
831 return [ 'found' => $found, 'counters' => $counters ];
832 }
833
839 protected function getDataFormat( $version, $type ) {
840 $types = $this->typeTable[$version];
841 if ( $type == '(null)' || strval( $type ) === '' ) {
842 return 'ambiguous';
843 }
844 foreach ( $types as $format => $list ) {
845 if ( in_array( $type, $list ) ) {
846 return $format;
847 }
848 }
849 return 'unknown';
850 }
851}
This class simulates Microsoft Internet Explorer's terribly broken and insecure MIME type detection a...
$versions
IE versions which have been analysed to bring you this class, and for which some substantive differen...
getDataFormat( $version, $type)
checkBinaryHeaders( $version, $chunk)
Check for binary headers at the start of the chunk Confirmed same in 5 and 7.
sampleData( $version, $chunk)
Do heuristic checks on the bulk of the data sample.
getMimesFromData( $fileName, $chunk, $proposed)
Get the untranslated MIME types for all known versions.
$addedTypes
Changes to the type table in later versions of IE.
$registry
An approximation of the "Content Type" values in HKEY_CLASSES_ROOT in a typical Windows installation.
checkTextHeaders( $version, $chunk)
Check for text headers at the start of the chunk Confirmed same in 5 and 7.
$baseTypeTable
Relevant data taken from the type table in IE 5.
getMimeTypeForVersion( $version, $fileName, $chunk, $proposed)
Get the MIME type for a given named version.
$typeTable
Type table with versions expanded.
translateMimeType( $type)
Translate a MIME type from IE's idiosyncratic private types into more commonly understood type string...
getRealMimesFromData( $fileName, $chunk, $proposed)
Get the MIME types from getMimesFromData(), but convert the result from IE's idiosyncratic private ty...
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
The index of the header message $result[1]=The index of the body text message $result[2 through n]=Parameters passed to body text message. Please note the header message cannot receive/use parameters. 'ImgAuthModifyHeaders':Executed just before a file is streamed to a user via img_auth.php, allowing headers to be modified beforehand. $title:LinkTarget object & $headers:HTTP headers(name=> value, names are case insensitive). Two headers get special handling:If-Modified-Since(value must be a valid HTTP date) and Range(must be of the form "bytes=(\d*-\d*)") will be honored when streaming the file. 'ImportHandleLogItemXMLTag':When parsing a XML tag in a log item. Return false to stop further processing of the tag $reader:XMLReader object $logInfo:Array of information 'ImportHandlePageXMLTag':When parsing a XML tag in a page. Return false to stop further processing of the tag $reader:XMLReader object & $pageInfo:Array of information 'ImportHandleRevisionXMLTag':When parsing a XML tag in a page revision. Return false to stop further processing of the tag $reader:XMLReader object $pageInfo:Array of page information $revisionInfo:Array of revision information 'ImportHandleToplevelXMLTag':When parsing a top level XML tag. Return false to stop further processing of the tag $reader:XMLReader object 'ImportHandleUnknownUser':When a user doesn 't exist locally, this hook is called to give extensions an opportunity to auto-create it. If the auto-creation is successful, return false. $name:User name 'ImportHandleUploadXMLTag':When parsing a XML tag in a file upload. Return false to stop further processing of the tag $reader:XMLReader object $revisionInfo:Array of information 'ImportLogInterwikiLink':Hook to change the interwiki link used in log entries and edit summaries for transwiki imports. & $fullInterwikiPrefix:Interwiki prefix, may contain colons. & $pageTitle:String that contains page title. 'ImportSources':Called when reading from the $wgImportSources configuration variable. Can be used to lazy-load the import sources list. & $importSources:The value of $wgImportSources. Modify as necessary. See the comment in DefaultSettings.php for the detail of how to structure this array. 'InfoAction':When building information to display on the action=info page. $context:IContextSource object & $pageInfo:Array of information 'InitializeArticleMaybeRedirect':MediaWiki check to see if title is a redirect. & $title:Title object for the current page & $request:WebRequest & $ignoreRedirect:boolean to skip redirect check & $target:Title/string of redirect target & $article:Article object 'InternalParseBeforeLinks':during Parser 's internalParse method before links but after nowiki/noinclude/includeonly/onlyinclude and other processings. & $parser:Parser object & $text:string containing partially parsed text & $stripState:Parser 's internal StripState object 'InternalParseBeforeSanitize':during Parser 's internalParse method just before the parser removes unwanted/dangerous HTML tags and after nowiki/noinclude/includeonly/onlyinclude and other processings. Ideal for syntax-extensions after template/parser function execution which respect nowiki and HTML-comments. & $parser:Parser object & $text:string containing partially parsed text & $stripState:Parser 's internal StripState object 'InterwikiLoadPrefix':When resolving if a given prefix is an interwiki or not. Return true without providing an interwiki to continue interwiki search. $prefix:interwiki prefix we are looking for. & $iwData:output array describing the interwiki with keys iw_url, iw_local, iw_trans and optionally iw_api and iw_wikiid. 'InvalidateEmailComplete':Called after a user 's email has been invalidated successfully. $user:user(object) whose email is being invalidated 'IRCLineURL':When constructing the URL to use in an IRC notification. Callee may modify $url and $query, URL will be constructed as $url . $query & $url:URL to index.php & $query:Query string $rc:RecentChange object that triggered url generation 'IsFileCacheable':Override the result of Article::isFileCacheable()(if true) & $article:article(object) being checked 'IsTrustedProxy':Override the result of IP::isTrustedProxy() & $ip:IP being check & $result:Change this value to override the result of IP::isTrustedProxy() 'IsUploadAllowedFromUrl':Override the result of UploadFromUrl::isAllowedUrl() $url:URL used to upload from & $allowed:Boolean indicating if uploading is allowed for given URL 'isValidEmailAddr':Override the result of Sanitizer::validateEmail(), for instance to return false if the domain name doesn 't match your organization. $addr:The e-mail address entered by the user & $result:Set this and return false to override the internal checks 'isValidPassword':Override the result of User::isValidPassword() $password:The password entered by the user & $result:Set this and return false to override the internal checks $user:User the password is being validated for 'Language::getMessagesFileName':$code:The language code or the language we 're looking for a messages file for & $file:The messages file path, you can override this to change the location. 'LanguageGetMagic':DEPRECATED since 1.16! Use $magicWords in a file listed in $wgExtensionMessagesFiles instead. Use this to define synonyms of magic words depending of the language & $magicExtensions:associative array of magic words synonyms $lang:language code(string) 'LanguageGetNamespaces':Provide custom ordering for namespaces or remove namespaces. Do not use this hook to add namespaces. Use CanonicalNamespaces for that. & $namespaces:Array of namespaces indexed by their numbers 'LanguageGetSpecialPageAliases':DEPRECATED! Use $specialPageAliases in a file listed in $wgExtensionMessagesFiles instead. Use to define aliases of special pages names depending of the language & $specialPageAliases:associative array of magic words synonyms $lang:language code(string) 'LanguageGetTranslatedLanguageNames':Provide translated language names. & $names:array of language code=> language name $code:language of the preferred translations 'LanguageLinks':Manipulate a page 's language links. This is called in various places to allow extensions to define the effective language links for a page. $title:The page 's Title. & $links:Array with elements of the form "language:title" in the order that they will be output. & $linkFlags:Associative array mapping prefixed links to arrays of flags. Currently unused, but planned to provide support for marking individual language links in the UI, e.g. for featured articles. 'LanguageSelector':Hook to change the language selector available on a page. $out:The output page. $cssClassName:CSS class name of the language selector. 'LinkBegin':DEPRECATED since 1.28! Use HtmlPageLinkRendererBegin instead. Used when generating internal and interwiki links in Linker::link(), before processing starts. Return false to skip default processing and return $ret. See documentation for Linker::link() for details on the expected meanings of parameters. $skin:the Skin object $target:the Title that the link is pointing to & $html:the contents that the< a > tag should have(raw HTML) $result
Definition hooks.txt:2042
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
if(!is_readable( $file)) $ext
Definition router.php:55