Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
68.13% |
295 / 433 |
|
44.44% |
12 / 27 |
CRAP | |
0.00% |
0 / 1 |
MimeAnalyzer | |
68.29% |
295 / 432 |
|
44.44% |
12 / 27 |
1068.18 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
1 | |||
loadFiles | |
73.08% |
19 / 26 |
|
0.00% |
0 / 1 |
14.81 | |||
parseMimeTypes | |
89.47% |
17 / 19 |
|
0.00% |
0 / 1 |
7.06 | |||
parseMimeInfo | |
70.00% |
21 / 30 |
|
0.00% |
0 / 1 |
14.27 | |||
setLogger | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
addExtraTypes | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
addExtraInfo | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getExtensionsForType | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
getExtensionsFromMimeType | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
3 | |||
getMimeTypesFromExtension | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getMimeTypeFromExtensionOrNull | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
guessTypesForExtension | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getTypesForExtension | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
getExtensionFromMimeTypeOrNull | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
isMatchingExtension | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
isPHPImageType | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
2 | |||
isRecognizableExtension | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
1 | |||
improveTypeFromExtension | |
95.45% |
21 / 22 |
|
0.00% |
0 / 1 |
11 | |||
guessMimeType | |
53.85% |
7 / 13 |
|
0.00% |
0 / 1 |
5.57 | |||
doGuessMimeType | |
61.59% |
85 / 138 |
|
0.00% |
0 / 1 |
205.18 | |||
detectZipTypeFromFile | |
92.00% |
23 / 25 |
|
0.00% |
0 / 1 |
8.03 | |||
detectMicrosoftBinaryType | |
50.00% |
4 / 8 |
|
0.00% |
0 / 1 |
4.12 | |||
detectMimeType | |
0.00% |
0 / 28 |
|
0.00% |
0 / 1 |
110 | |||
getMediaType | |
81.40% |
35 / 43 |
|
0.00% |
0 / 1 |
25.12 | |||
findMediaType | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
7 | |||
getMediaTypes | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
isValidMajorMimeType | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | */ |
20 | |
21 | namespace Wikimedia\Mime; |
22 | |
23 | use Psr\Log\LoggerAwareInterface; |
24 | use Psr\Log\LoggerInterface; |
25 | use Psr\Log\NullLogger; |
26 | use UnexpectedValueException; |
27 | use Wikimedia\AtEase\AtEase; |
28 | use ZipDirectoryReader; |
29 | |
30 | /** |
31 | * @defgroup Mime Mime |
32 | * |
33 | * @ingroup Media |
34 | */ |
35 | |
36 | /** |
37 | * Detect MIME types of a file by mapping file extensions or parsing file contents. |
38 | * |
39 | * @since 1.28 |
40 | * @ingroup Mime |
41 | */ |
42 | class MimeAnalyzer implements LoggerAwareInterface { |
43 | /** @var string */ |
44 | protected $typeFile; |
45 | /** @var string */ |
46 | protected $infoFile; |
47 | /** @var string */ |
48 | protected $xmlTypes; |
49 | /** @var callable */ |
50 | protected $initCallback; |
51 | /** @var callable */ |
52 | protected $detectCallback; |
53 | /** @var callable */ |
54 | protected $guessCallback; |
55 | /** @var callable */ |
56 | protected $extCallback; |
57 | /** @var array Mapping of media types to arrays of MIME types */ |
58 | protected $mediaTypes = null; |
59 | /** @var array Map of MIME type aliases */ |
60 | protected $mimeTypeAliases = null; |
61 | /** @var array<string,string[]> Map of MIME types to file extensions */ |
62 | protected $mimeToExts = []; |
63 | /** @var array<string,string[]> Map of file extensions to MIME types */ |
64 | protected $extToMimes = []; |
65 | |
66 | /** @var array Map of file extensions types to MIME types (as a space separated list) */ |
67 | public $mExtToMime = []; // legacy name; field accessed by hooks |
68 | |
69 | /** @var string Extra MIME types, set for example by media handling extensions */ |
70 | private $extraTypes = ''; |
71 | /** @var string Extra MIME info, set for example by media handling extensions */ |
72 | private $extraInfo = ''; |
73 | |
74 | /** @var LoggerInterface */ |
75 | private $logger; |
76 | |
77 | /** @var string Use the full, built-in MIME mapping rather than load from a file */ |
78 | public const USE_INTERNAL = 'internal'; |
79 | |
80 | /** |
81 | * @param array $params Configuration map, includes: |
82 | * - typeFile: path to file with the list of known MIME types |
83 | * - infoFile: path to file with the MIME type info |
84 | * - xmlTypes: map of root element names to XML MIME types |
85 | * - initCallback: initialization callback that is passed this object [optional] |
86 | * - detectCallback: alternative to finfo that returns the mime type for a file. |
87 | * For example, the callback can return the output of "file -bi". [optional] |
88 | * - guessCallback: callback to improve the guessed MIME type using the file data. |
89 | * This is intended for fixing mistakes in fileinfo or "detectCallback". [optional] |
90 | * - extCallback: callback to improve the guessed MIME type using the extension. [optional] |
91 | * - logger: PSR-3 logger [optional] |
92 | * @note Constructing these instances is expensive due to file reads. |
93 | * A service or singleton pattern should be used to avoid creating instances again and again. |
94 | */ |
95 | public function __construct( array $params ) { |
96 | $this->typeFile = $params['typeFile']; |
97 | $this->infoFile = $params['infoFile']; |
98 | $this->xmlTypes = $params['xmlTypes']; |
99 | $this->initCallback = $params['initCallback'] ?? null; |
100 | $this->detectCallback = $params['detectCallback'] ?? null; |
101 | $this->guessCallback = $params['guessCallback'] ?? null; |
102 | $this->extCallback = $params['extCallback'] ?? null; |
103 | $this->logger = $params['logger'] ?? new NullLogger(); |
104 | |
105 | $this->loadFiles(); |
106 | } |
107 | |
108 | protected function loadFiles(): void { |
109 | # Allow media handling extensions adding MIME-types and MIME-info |
110 | if ( $this->initCallback ) { |
111 | call_user_func( $this->initCallback, $this ); |
112 | } |
113 | |
114 | $rawTypes = $this->extraTypes; |
115 | if ( $this->typeFile === self::USE_INTERNAL ) { |
116 | $this->mimeToExts = MimeMap::MIME_EXTENSIONS; |
117 | } else { |
118 | $this->mimeToExts = MimeMapMinimal::MIME_EXTENSIONS; |
119 | if ( $this->typeFile ) { |
120 | $rawTypes = file_get_contents( $this->typeFile ) . "\n" . $this->extraTypes; |
121 | } |
122 | } |
123 | if ( $rawTypes ) { |
124 | $this->parseMimeTypes( $rawTypes ); |
125 | } |
126 | |
127 | // Build the reverse mapping (extension => MIME type). |
128 | foreach ( $this->mimeToExts as $mime => $exts ) { |
129 | foreach ( $exts as $ext ) { |
130 | $this->extToMimes[$ext][] = $mime; |
131 | } |
132 | } |
133 | |
134 | // Migrate items from the legacy $this->mExtToMime field. |
135 | // TODO: Remove this when mExtToMime is finally removed. |
136 | foreach ( $this->mExtToMime as $ext => $mimes ) { |
137 | foreach ( explode( ' ', $mimes ) as $mime ) { |
138 | $this->extToMimes[$ext][] = $mime; |
139 | } |
140 | } |
141 | |
142 | $rawInfo = $this->extraInfo; |
143 | if ( $this->infoFile === self::USE_INTERNAL ) { |
144 | $this->mimeTypeAliases = MimeMap::MIME_TYPE_ALIASES; |
145 | $this->mediaTypes = MimeMap::MEDIA_TYPES; |
146 | } else { |
147 | $this->mimeTypeAliases = MimeMapMinimal::MIME_TYPE_ALIASES; |
148 | $this->mediaTypes = MimeMapMinimal::MEDIA_TYPES; |
149 | if ( $this->infoFile ) { |
150 | $rawInfo = file_get_contents( $this->infoFile ) . "\n" . $this->extraInfo; |
151 | } |
152 | } |
153 | if ( $rawInfo ) { |
154 | $this->parseMimeInfo( $rawInfo ); |
155 | } |
156 | } |
157 | |
158 | protected function parseMimeTypes( string $rawMimeTypes ): void { |
159 | $rawMimeTypes = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $rawMimeTypes ); |
160 | $rawMimeTypes = str_replace( "\t", " ", $rawMimeTypes ); |
161 | |
162 | $lines = explode( "\n", $rawMimeTypes ); |
163 | foreach ( $lines as $s ) { |
164 | $s = trim( $s ); |
165 | if ( $s === '' || str_starts_with( $s, '#' ) ) { |
166 | continue; |
167 | } |
168 | |
169 | $s = strtolower( $s ); |
170 | $i = strpos( $s, ' ' ); |
171 | |
172 | if ( $i === false ) { |
173 | continue; |
174 | } |
175 | |
176 | $ext = trim( substr( $s, $i + 1 ) ); |
177 | |
178 | if ( !$ext ) { |
179 | continue; |
180 | } |
181 | |
182 | $tokens = preg_split( '/\s+/', $s, -1, PREG_SPLIT_NO_EMPTY ); |
183 | if ( count( $tokens ) > 1 ) { |
184 | $mime = array_shift( $tokens ); |
185 | $this->mimeToExts[$mime] = array_values( array_unique( |
186 | array_merge( $this->mimeToExts[$mime] ?? [], $tokens ) ) ); |
187 | } |
188 | } |
189 | } |
190 | |
191 | protected function parseMimeInfo( string $rawMimeInfo ): void { |
192 | $rawMimeInfo = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $rawMimeInfo ); |
193 | $rawMimeInfo = str_replace( "\t", " ", $rawMimeInfo ); |
194 | |
195 | $lines = explode( "\n", $rawMimeInfo ); |
196 | foreach ( $lines as $s ) { |
197 | $s = trim( $s ); |
198 | if ( $s === '' || str_starts_with( $s, '#' ) ) { |
199 | continue; |
200 | } |
201 | |
202 | $s = strtolower( $s ); |
203 | $i = strpos( $s, ' ' ); |
204 | |
205 | if ( $i === false ) { |
206 | continue; |
207 | } |
208 | |
209 | # print "processing MIME INFO line $s<br>"; |
210 | |
211 | $match = []; |
212 | if ( preg_match( '!\[\s*(\w+)\s*\]!', $s, $match ) ) { |
213 | $s = preg_replace( '!\[\s*(\w+)\s*\]!', '', $s ); |
214 | $mtype = trim( strtoupper( $match[1] ) ); |
215 | } else { |
216 | $mtype = MEDIATYPE_UNKNOWN; |
217 | } |
218 | |
219 | $m = preg_split( '/\s+/', $s, -1, PREG_SPLIT_NO_EMPTY ); |
220 | |
221 | if ( !isset( $this->mediaTypes[$mtype] ) ) { |
222 | $this->mediaTypes[$mtype] = []; |
223 | } |
224 | |
225 | foreach ( $m as $mime ) { |
226 | $mime = trim( $mime ); |
227 | if ( !$mime ) { |
228 | continue; |
229 | } |
230 | |
231 | $this->mediaTypes[$mtype][] = $mime; |
232 | } |
233 | |
234 | if ( count( $m ) > 1 ) { |
235 | $main = $m[0]; |
236 | $mCount = count( $m ); |
237 | for ( $i = 1; $i < $mCount; $i += 1 ) { |
238 | $mime = $m[$i]; |
239 | $this->mimeTypeAliases[$mime] = $main; |
240 | } |
241 | } |
242 | } |
243 | } |
244 | |
245 | public function setLogger( LoggerInterface $logger ) { |
246 | $this->logger = $logger; |
247 | } |
248 | |
249 | /** |
250 | * Adds to the list mapping MIME to file extensions. |
251 | * |
252 | * As an extension author, you are encouraged to submit patches to |
253 | * MediaWiki's core to add new MIME types to MimeMap.php. |
254 | * |
255 | * @param string $types |
256 | */ |
257 | public function addExtraTypes( string $types ): void { |
258 | $this->extraTypes .= "\n" . $types; |
259 | } |
260 | |
261 | /** |
262 | * Adds to the list mapping MIME to media type. |
263 | * |
264 | * As an extension author, you are encouraged to submit patches to |
265 | * MediaWiki's core to add new MIME info to MimeMap.php. |
266 | * |
267 | * @param string $info |
268 | */ |
269 | public function addExtraInfo( string $info ): void { |
270 | $this->extraInfo .= "\n" . $info; |
271 | } |
272 | |
273 | /** |
274 | * Returns a list of file extensions for a given MIME type as a space |
275 | * separated string or null if the MIME type was unrecognized. Resolves |
276 | * MIME type aliases. |
277 | * |
278 | * @deprecated since 1.35 Use getExtensionsFromMimeType() instead. |
279 | * @param string $mime |
280 | * @return string|null |
281 | */ |
282 | public function getExtensionsForType( $mime ) { |
283 | $exts = $this->getExtensionsFromMimeType( $mime ); |
284 | return $exts ? implode( ' ', $exts ) : null; |
285 | } |
286 | |
287 | /** |
288 | * Returns an array of file extensions associated with a given MIME type. |
289 | * The returned array is empty if the MIME type was unrecognized. Resolves |
290 | * MIME type aliases. |
291 | * |
292 | * @since 1.35 |
293 | * @param string $mime |
294 | * @return string[] |
295 | */ |
296 | public function getExtensionsFromMimeType( string $mime ): array { |
297 | $mime = strtolower( $mime ); |
298 | if ( !isset( $this->mimeToExts[$mime] ) && isset( $this->mimeTypeAliases[$mime] ) ) { |
299 | $mime = $this->mimeTypeAliases[$mime]; |
300 | } |
301 | return $this->mimeToExts[$mime] ?? []; |
302 | } |
303 | |
304 | /** |
305 | * Returns an array of MIME types associated with a given file extension. |
306 | * The returned array is empty if the file extension is not associated with |
307 | * any MIME types. |
308 | * |
309 | * @since 1.35 |
310 | * @param string $ext |
311 | * @return string[] |
312 | */ |
313 | public function getMimeTypesFromExtension( string $ext ): array { |
314 | $ext = strtolower( $ext ); |
315 | return $this->extToMimes[$ext] ?? []; |
316 | } |
317 | |
318 | /** |
319 | * Returns a single MIME type for a given file extension or null if unknown. |
320 | * This is always the first type from the list returned by getMimeTypesFromExtension($ext). |
321 | * |
322 | * @since 1.35 |
323 | * @param string $ext |
324 | * @return string|null |
325 | */ |
326 | public function getMimeTypeFromExtensionOrNull( string $ext ): ?string { |
327 | $types = $this->getMimeTypesFromExtension( $ext ); |
328 | return $types[0] ?? null; |
329 | } |
330 | |
331 | /** |
332 | * Returns a single file extension for a given MIME type or null if unknown. |
333 | * This is always the first type from the list returned by getExtensionsFromMimeType($mime). |
334 | * |
335 | * @deprecated since 1.35 Use getMimeTypeFromExtensionOrNull() instead. |
336 | * @param string $ext |
337 | * @return string|null |
338 | */ |
339 | public function guessTypesForExtension( $ext ) { |
340 | return $this->getMimeTypeFromExtensionOrNull( $ext ); |
341 | } |
342 | |
343 | /** |
344 | * Returns a list of MIME types for a given file extension as a space |
345 | * separated string or null if the extension was unrecognized. |
346 | * |
347 | * @deprecated since 1.35 Use getMimeTypesFromExtension() instead. |
348 | * @param string $ext |
349 | * @return string|null |
350 | */ |
351 | public function getTypesForExtension( $ext ) { |
352 | $types = $this->getMimeTypesFromExtension( $ext ); |
353 | return $types ? implode( ' ', $types ) : null; |
354 | } |
355 | |
356 | /** |
357 | * Returns a single file extension for a given MIME type or null if unknown. |
358 | * This is always the first type from the list returned by getExtensionsFromMimeType($mime). |
359 | * |
360 | * @since 1.35 |
361 | * @param string $mime |
362 | * @return string|null |
363 | */ |
364 | public function getExtensionFromMimeTypeOrNull( string $mime ): ?string { |
365 | $exts = $this->getExtensionsFromMimeType( $mime ); |
366 | return $exts[0] ?? null; |
367 | } |
368 | |
369 | /** |
370 | * Tests if the extension matches the given MIME type. Returns true if a |
371 | * match was found, null if the MIME type is unknown, and false if the |
372 | * MIME type is known but no matches where found. |
373 | * |
374 | * @param string $extension |
375 | * @param string $mime |
376 | * @return bool|null |
377 | */ |
378 | public function isMatchingExtension( string $extension, string $mime ): ?bool { |
379 | $exts = $this->getExtensionsFromMimeType( $mime ); |
380 | |
381 | if ( !$exts ) { |
382 | return null; // Unknown MIME type |
383 | } |
384 | |
385 | return in_array( strtolower( $extension ), $exts ); |
386 | } |
387 | |
388 | /** |
389 | * Returns true if the MIME type is known to represent an image format |
390 | * supported by the PHP GD library. |
391 | * |
392 | * @deprecated since 1.40 |
393 | * @param string $mime |
394 | * @return bool |
395 | */ |
396 | public function isPHPImageType( string $mime ): bool { |
397 | wfDeprecated( __METHOD__, '1.40' ); |
398 | // As defined by imagegetsize and image_type_to_mime |
399 | static $types = [ |
400 | 'image/gif', 'image/jpeg', 'image/png', |
401 | 'image/x-bmp', 'image/xbm', 'image/tiff', |
402 | 'image/jp2', 'image/jpeg2000', 'image/iff', |
403 | 'image/xbm', 'image/x-xbitmap', |
404 | 'image/vnd.wap.wbmp', 'image/vnd.xiff', |
405 | 'image/x-photoshop', |
406 | 'application/x-shockwave-flash', |
407 | ]; |
408 | |
409 | return in_array( $mime, $types ); |
410 | } |
411 | |
412 | /** |
413 | * Returns true if the extension represents a type which can |
414 | * be reliably detected from its content. Use this to determine |
415 | * whether strict content checks should be applied to reject |
416 | * invalid uploads; if we can't identify the type we won't |
417 | * be able to say if it's invalid. |
418 | * |
419 | * @todo Be more accurate when using fancy MIME detector plugins; |
420 | * right now this is the bare minimum getimagesize() list. |
421 | * @param string $extension |
422 | * @return bool |
423 | */ |
424 | public function isRecognizableExtension( string $extension ): bool { |
425 | static $types = [ |
426 | // Types recognized by getimagesize() |
427 | 'gif', 'jpeg', 'jpg', 'png', 'swf', 'psd', |
428 | 'bmp', 'tiff', 'tif', 'jpc', 'jp2', |
429 | 'jpx', 'jb2', 'swc', 'iff', 'wbmp', |
430 | 'xbm', |
431 | |
432 | // Formats we recognize magic numbers for |
433 | 'djvu', 'ogx', 'ogg', 'ogv', 'oga', 'spx', 'opus', |
434 | 'mid', 'pdf', 'wmf', 'xcf', 'webm', 'mkv', 'mka', |
435 | 'webp', 'mp3', |
436 | |
437 | // XML formats we sure hope we recognize reliably |
438 | 'svg', |
439 | |
440 | // 3D formats |
441 | 'stl', |
442 | ]; |
443 | return in_array( strtolower( $extension ), $types ); |
444 | } |
445 | |
446 | /** |
447 | * Improves a MIME type using the file extension. Some file formats are very generic, |
448 | * so their MIME type is not very meaningful. A more useful MIME type can be derived |
449 | * by looking at the file extension. Typically, this method would be called on the |
450 | * result of guessMimeType(). |
451 | * |
452 | * XXX: Null-returning behavior is probably an accident and definitely confusing (T253483). |
453 | * |
454 | * @param string $mime The MIME type, typically guessed from a file's content. |
455 | * @param string $ext The file extension, as taken from the file name |
456 | * @return string|null The improved MIME type, or null if the MIME type is |
457 | * unknown/unknown and the extension is not recognized. |
458 | */ |
459 | public function improveTypeFromExtension( string $mime, string $ext ): ?string { |
460 | if ( $mime === 'unknown/unknown' ) { |
461 | if ( $this->isRecognizableExtension( $ext ) ) { |
462 | $this->logger->info( __METHOD__ . ': refusing to guess mime type for .' . |
463 | "$ext file, we should have recognized it" ); |
464 | } else { |
465 | // Not something we can detect, so simply |
466 | // trust the file extension |
467 | $mime = $this->getMimeTypeFromExtensionOrNull( $ext ); |
468 | } |
469 | } elseif ( $mime === 'application/x-opc+zip' |
470 | || $mime === 'application/vnd.oasis.opendocument' |
471 | ) { |
472 | if ( $this->isMatchingExtension( $ext, $mime ) ) { |
473 | // A known file extension for an OPC/ODF file, |
474 | // find the proper MIME type for that file extension |
475 | $mime = $this->getMimeTypeFromExtensionOrNull( $ext ); |
476 | } else { |
477 | $this->logger->info( __METHOD__ . |
478 | ": refusing to guess better type for $mime file, " . |
479 | ".$ext is not a known OPC/ODF extension." ); |
480 | $mime = 'application/zip'; |
481 | } |
482 | } elseif ( $mime === 'text/plain' && $this->findMediaType( ".$ext" ) === MEDIATYPE_TEXT ) { |
483 | // Textual types are sometimes not recognized properly. |
484 | // If detected as text/plain, and has an extension which is textual |
485 | // improve to the extension's type. For example, csv and json are often |
486 | // misdetected as text/plain. |
487 | $mime = $this->getMimeTypeFromExtensionOrNull( $ext ); |
488 | } |
489 | |
490 | # Media handling extensions can improve the MIME detected |
491 | $callback = $this->extCallback; |
492 | if ( $callback ) { |
493 | $callback( $this, $ext, $mime /* by reference */ ); |
494 | } |
495 | |
496 | if ( $mime !== null && isset( $this->mimeTypeAliases[$mime] ) ) { |
497 | $mime = $this->mimeTypeAliases[$mime]; |
498 | } |
499 | |
500 | $this->logger->info( __METHOD__ . ": improved mime type for .$ext: $mime" ); |
501 | return $mime; |
502 | } |
503 | |
504 | /** |
505 | * MIME type detection. This uses detectMimeType to detect the MIME type |
506 | * of the file, but applies additional checks to determine some well known |
507 | * file formats that may be missed or misinterpreted by the default MIME |
508 | * detection (namely XML based formats like XHTML or SVG, as well as ZIP |
509 | * based formats like OPC/ODF files). |
510 | * |
511 | * @param string $file The file to check |
512 | * @param string|bool $ext The file extension, or true (default) to extract |
513 | * it from the filename. Set it to false to ignore the extension. DEPRECATED! |
514 | * Set to false, use improveTypeFromExtension($mime, $ext) later to improve MIME type. |
515 | * @return string The MIME type of $file |
516 | */ |
517 | public function guessMimeType( string $file, $ext = true ): string { |
518 | if ( $ext ) { // TODO: make $ext default to false. Or better, remove it. |
519 | $this->logger->info( __METHOD__ . |
520 | ": WARNING: use of the \$ext parameter is deprecated. " . |
521 | "Use improveTypeFromExtension(\$mime, \$ext) instead." ); |
522 | } |
523 | |
524 | $mime = $this->doGuessMimeType( $file ); |
525 | |
526 | if ( !$mime ) { |
527 | $this->logger->info( __METHOD__ . |
528 | ": internal type detection failed for $file (.$ext)..." ); |
529 | $mime = $this->detectMimeType( $file, $ext ); |
530 | } |
531 | |
532 | if ( isset( $this->mimeTypeAliases[$mime] ) ) { |
533 | $mime = $this->mimeTypeAliases[$mime]; |
534 | } |
535 | |
536 | $this->logger->info( __METHOD__ . ": guessed mime type of $file: $mime" ); |
537 | return $mime; |
538 | } |
539 | |
540 | /** |
541 | * Guess the MIME type from the file contents. |
542 | * |
543 | * @param string $file |
544 | * @return bool|string |
545 | * @throws UnexpectedValueException |
546 | */ |
547 | private function doGuessMimeType( string $file ) { |
548 | // Read a chunk of the file |
549 | AtEase::suppressWarnings(); |
550 | $f = fopen( $file, 'rb' ); |
551 | AtEase::restoreWarnings(); |
552 | |
553 | if ( !$f ) { |
554 | return 'unknown/unknown'; |
555 | } |
556 | |
557 | $fsize = filesize( $file ); |
558 | if ( $fsize === false ) { |
559 | return 'unknown/unknown'; |
560 | } |
561 | |
562 | $head = fread( $f, 1024 ); |
563 | $head16k = $head . fread( $f, 16384 - 1024 ); // some WebM files have big headers |
564 | $tailLength = min( 65558, $fsize ); // 65558 = maximum size of a zip EOCDR |
565 | if ( fseek( $f, -1 * $tailLength, SEEK_END ) === -1 ) { |
566 | throw new UnexpectedValueException( |
567 | "Seeking $tailLength bytes from EOF failed in " . __METHOD__ ); |
568 | } |
569 | $tail = $tailLength ? fread( $f, $tailLength ) : ''; |
570 | |
571 | $this->logger->info( __METHOD__ . |
572 | ": analyzing head and tail of $file for magic numbers." ); |
573 | |
574 | // Hardcode a few magic number checks... |
575 | $headers = [ |
576 | // Multimedia... |
577 | 'MThd' => 'audio/midi', |
578 | 'OggS' => 'application/ogg', |
579 | 'ID3' => 'audio/mpeg', |
580 | "\xff\xfb" => 'audio/mpeg', // MPEG-1 layer 3 |
581 | "\xff\xf3" => 'audio/mpeg', // MPEG-2 layer 3 (lower sample rates) |
582 | "\xff\xe3" => 'audio/mpeg', // MPEG-2.5 layer 3 (very low sample rates) |
583 | |
584 | // Image formats... |
585 | // Note that WMF may have a bare header, no magic number. |
586 | "\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives? |
587 | "\xd7\xcd\xc6\x9a" => 'application/x-msmetafile', |
588 | '%PDF' => 'application/pdf', |
589 | 'gimp xcf' => 'image/x-xcf', |
590 | |
591 | // Some forbidden fruit... |
592 | 'MZ' => 'application/octet-stream', // DOS/Windows executable |
593 | "\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary |
594 | "\x7fELF" => 'application/octet-stream', // ELF binary |
595 | ]; |
596 | |
597 | foreach ( $headers as $magic => $candidate ) { |
598 | if ( str_starts_with( $head, $magic ) ) { |
599 | $this->logger->info( __METHOD__ . |
600 | ": magic header in $file recognized as $candidate" ); |
601 | return $candidate; |
602 | } |
603 | } |
604 | |
605 | /* Look for WebM and Matroska files */ |
606 | if ( str_starts_with( $head16k, pack( "C4", 0x1a, 0x45, 0xdf, 0xa3 ) ) ) { |
607 | $doctype = strpos( $head16k, "\x42\x82" ); |
608 | if ( $doctype ) { |
609 | // Next byte is datasize, then data (sizes larger than 1 byte are stupid muxers) |
610 | $data = substr( $head16k, $doctype + 3, 8 ); |
611 | if ( str_starts_with( $data, "matroska" ) ) { |
612 | $this->logger->info( __METHOD__ . ": recognized file as video/x-matroska" ); |
613 | return "video/x-matroska"; |
614 | } |
615 | |
616 | if ( str_starts_with( $data, "webm" ) ) { |
617 | // XXX HACK look for a video track, if we don't find it, this is an audio file |
618 | // This detection is very naive and doesn't parse the actual fields |
619 | // 0x86 byte indicates start of codecname field |
620 | // next byte is a variable length integer (vint) for the size of the value following it |
621 | // 8 (first bit is 1) indicates the smallest size vint, a single byte |
622 | // (unlikely we see larger vints here) |
623 | // 5 indicates a length of 5 ( V_VP8 or V_VP9 or V_AV1 ) |
624 | // Sometimes we see 0x86 instead of 0x85 because a |
625 | // non-conforming muxer wrote a null terminated string |
626 | $videotrack = str_contains( $head16k, "\x86\x85V_VP8" ) || |
627 | str_contains( $head16k, "\x86\x85V_VP9" ) || |
628 | str_contains( $head16k, "\x86\x85V_AV1" ) || |
629 | str_contains( $head16k, "\x86\x86V_VP8\x0" ) || |
630 | str_contains( $head16k, "\x86\x86V_VP9\x0" ) || |
631 | str_contains( $head16k, "\x86\x86V_AV1\x0" ); |
632 | |
633 | if ( $videotrack ) { |
634 | // There is a video track, so this is a video file. |
635 | $this->logger->info( __METHOD__ . ": recognized file as video/webm" ); |
636 | return "video/webm"; |
637 | } |
638 | |
639 | $this->logger->info( __METHOD__ . ": recognized file as audio/webm" ); |
640 | return "audio/webm"; |
641 | } |
642 | } |
643 | $this->logger->info( __METHOD__ . ": unknown EBML file" ); |
644 | return "unknown/unknown"; |
645 | } |
646 | |
647 | /* Look for WebP */ |
648 | if ( str_starts_with( $head, "RIFF" ) && substr( $head, 8, 7 ) === "WEBPVP8" ) { |
649 | $this->logger->info( __METHOD__ . ": recognized file as image/webp" ); |
650 | return "image/webp"; |
651 | } |
652 | |
653 | /* Look for JPEG2000 */ |
654 | if ( str_starts_with( $head, "\x00\x00\x00\x0cjP\x20\x20\x0d\x0a\x87\x0a" ) ) { |
655 | $this->logger->info( __METHOD__ . ": recognized as JPEG2000" ); |
656 | // we skip 4 bytes |
657 | if ( substr( $head, 16, 8 ) === "ftypjp2 " ) { |
658 | $this->logger->info( __METHOD__ . ": recognized file as image/jp2" ); |
659 | return 'image/jp2'; |
660 | } elseif ( substr( $head, 16, 8 ) === "ftypjpx " ) { |
661 | $this->logger->info( __METHOD__ . ": recognized file as image/jpx" ); |
662 | return 'image/jpx'; |
663 | } |
664 | } |
665 | |
666 | /* Look for MS Compound Binary (OLE) files */ |
667 | if ( str_starts_with( $head, "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" ) ) { |
668 | $this->logger->info( __METHOD__ . ': recognized MS CFB (OLE) file' ); |
669 | return $this->detectMicrosoftBinaryType( $f ); |
670 | } |
671 | |
672 | /** |
673 | * Look for PHP. Check for this before HTML/XML... Warning: this is a |
674 | * heuristic, and won't match a file with a lot of non-PHP before. It |
675 | * will also match text files which could be PHP. :) |
676 | * |
677 | * @todo FIXME: For this reason, the check is probably useless -- an attacker |
678 | * could almost certainly just pad the file with a lot of nonsense to |
679 | * circumvent the check in any case where it would be a security |
680 | * problem. On the other hand, it causes harmful false positives (bug |
681 | * 16583). The heuristic has been cut down to exclude three-character |
682 | * strings like "<? ", but should it be axed completely? |
683 | */ |
684 | if ( ( strpos( $head, '<?php' ) !== false ) || |
685 | ( strpos( $head, "<\x00?\x00p\x00h\x00p" ) !== false ) || |
686 | ( strpos( $head, "<\x00?\x00 " ) !== false ) || |
687 | ( strpos( $head, "<\x00?\x00\n" ) !== false ) || |
688 | ( strpos( $head, "<\x00?\x00\t" ) !== false ) || |
689 | ( strpos( $head, "<\x00?\x00=" ) !== false ) |
690 | ) { |
691 | $this->logger->info( __METHOD__ . ": recognized $file as application/x-php" ); |
692 | return 'application/x-php'; |
693 | } |
694 | |
695 | /** |
696 | * look for XML formats (XHTML and SVG) |
697 | */ |
698 | AtEase::suppressWarnings(); |
699 | $xml = new XmlTypeCheck( $file ); |
700 | AtEase::restoreWarnings(); |
701 | if ( $xml->wellFormed ) { |
702 | $xmlTypes = $this->xmlTypes; |
703 | // @phan-suppress-next-line PhanTypeMismatchDimFetch False positive |
704 | return $xmlTypes[$xml->getRootElement()] ?? 'application/xml'; |
705 | } |
706 | |
707 | /** |
708 | * look for shell scripts |
709 | */ |
710 | $script_type = null; |
711 | |
712 | # detect by shebang |
713 | if ( str_starts_with( $head, "#!" ) ) { |
714 | $script_type = "ASCII"; |
715 | } elseif ( str_starts_with( $head, "\xef\xbb\xbf#!" ) ) { |
716 | $script_type = "UTF-8"; |
717 | } elseif ( str_starts_with( $head, "\xfe\xff\x00#\x00!" ) ) { |
718 | $script_type = "UTF-16BE"; |
719 | } elseif ( str_starts_with( $head, "\xff\xfe#\x00!" ) ) { |
720 | $script_type = "UTF-16LE"; |
721 | } |
722 | |
723 | if ( $script_type ) { |
724 | if ( $script_type !== "UTF-8" && $script_type !== "ASCII" ) { |
725 | // Quick and dirty fold down to ASCII! |
726 | $pack = [ 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' ]; |
727 | $chars = unpack( $pack[$script_type], substr( $head, 2 ) ); |
728 | $head = ''; |
729 | foreach ( $chars as $codepoint ) { |
730 | if ( $codepoint < 128 ) { |
731 | $head .= chr( $codepoint ); |
732 | } else { |
733 | $head .= '?'; |
734 | } |
735 | } |
736 | } |
737 | |
738 | $match = []; |
739 | |
740 | if ( preg_match( '%/?([^\s]+/)(\w+)%', $head, $match ) ) { |
741 | $mime = "application/x-{$match[2]}"; |
742 | $this->logger->info( __METHOD__ . ": shell script recognized as $mime" ); |
743 | return $mime; |
744 | } |
745 | } |
746 | |
747 | // Check for ZIP variants (before getimagesize) |
748 | $eocdrPos = strpos( $tail, "PK\x05\x06" ); |
749 | if ( $eocdrPos !== false && $eocdrPos <= strlen( $tail ) - 22 ) { |
750 | $this->logger->info( __METHOD__ . ": ZIP signature present in $file" ); |
751 | // Check if it really is a ZIP file, make sure the EOCDR is at the end (T40432) |
752 | $commentLength = unpack( "n", substr( $tail, $eocdrPos + 20 ) )[1]; |
753 | if ( $eocdrPos + 22 + $commentLength !== strlen( $tail ) ) { |
754 | $this->logger->info( __METHOD__ . ": ZIP EOCDR not at end. Not a ZIP file." ); |
755 | } else { |
756 | return $this->detectZipTypeFromFile( $f ); |
757 | } |
758 | } |
759 | |
760 | // Check for STL (3D) files |
761 | // @see https://en.wikipedia.org/wiki/STL_(file_format) |
762 | if ( $fsize >= 15 && |
763 | stripos( $head, 'SOLID ' ) === 0 && |
764 | preg_match( '/\RENDSOLID .*$/i', $tail ) ) { |
765 | // ASCII STL file |
766 | return 'application/sla'; |
767 | } elseif ( $fsize > 84 ) { |
768 | // binary STL file |
769 | $triangles = substr( $head, 80, 4 ); |
770 | $triangles = unpack( 'V', $triangles ); |
771 | $triangles = reset( $triangles ); |
772 | if ( $triangles !== false && $fsize === 84 + ( $triangles * 50 ) ) { |
773 | return 'application/sla'; |
774 | } |
775 | } |
776 | |
777 | AtEase::suppressWarnings(); |
778 | $gis = getimagesize( $file ); |
779 | AtEase::restoreWarnings(); |
780 | |
781 | if ( $gis && isset( $gis['mime'] ) ) { |
782 | $mime = $gis['mime']; |
783 | $this->logger->info( __METHOD__ . ": getimagesize detected $file as $mime" ); |
784 | return $mime; |
785 | } |
786 | |
787 | # Media handling extensions can guess the MIME by content |
788 | # It's intentionally here so that if core is wrong about a type (false positive), |
789 | # people will hopefully nag and submit patches :) |
790 | $mime = false; |
791 | # Some strings by reference for performance - assuming well-behaved hooks |
792 | $callback = $this->guessCallback; |
793 | if ( $callback ) { |
794 | $callback( $this, $head, $tail, $file, $mime /* by reference */ ); |
795 | } |
796 | |
797 | return $mime; |
798 | } |
799 | |
800 | /** |
801 | * Detect application-specific file type of a given ZIP file. |
802 | * If it can't tell, return 'application/zip'. |
803 | * |
804 | * @internal |
805 | * @param resource $handle |
806 | * @return string |
807 | */ |
808 | public function detectZipTypeFromFile( $handle ) { |
809 | $types = []; |
810 | $status = ZipDirectoryReader::readHandle( |
811 | $handle, |
812 | static function ( $entry ) use ( &$types ) { |
813 | $name = $entry['name']; |
814 | $names = [ $name ]; |
815 | |
816 | // If there is a null character, cut off the name at it, because JDK's |
817 | // ZIP_GetEntry() uses strcmp() if the name hashes match. If a file name |
818 | // were constructed which had ".class\0" followed by a string chosen to |
819 | // make the hash collide with the truncated name, that file could be |
820 | // returned in response to a request for the .class file. |
821 | $nullPos = strpos( $entry['name'], "\000" ); |
822 | if ( $nullPos !== false ) { |
823 | $names[] = substr( $entry['name'], 0, $nullPos ); |
824 | } |
825 | |
826 | // If there is a trailing slash in the file name, we have to strip it, |
827 | // because that's what ZIP_GetEntry() does. |
828 | if ( preg_grep( '!\.class/?$!', $names ) ) { |
829 | $types[] = 'application/java'; |
830 | } |
831 | |
832 | if ( $name === '[Content_Types].xml' ) { |
833 | $types[] = 'application/x-opc+zip'; |
834 | } elseif ( $name === 'mimetype' ) { |
835 | $types[] = 'application/vnd.oasis.opendocument'; |
836 | } |
837 | } |
838 | ); |
839 | if ( !$status->isOK() ) { |
840 | $this->logger->info( "Error reading zip file: " . (string)$status ); |
841 | // This could be unknown/unknown but we have some weird phpunit test cases |
842 | return 'application/zip'; |
843 | } |
844 | if ( in_array( 'application/java', $types ) ) { |
845 | // For security, java detection takes precedence |
846 | return 'application/java'; |
847 | } elseif ( count( $types ) ) { |
848 | return $types[0]; |
849 | } else { |
850 | return 'application/zip'; |
851 | } |
852 | } |
853 | |
854 | /** |
855 | * Detect the type of a Microsoft Compound Binary a.k.a. OLE file. |
856 | * These are old style pre-ODF files such as .doc and .xls |
857 | * |
858 | * @param resource $handle An opened seekable file handle |
859 | * @return string The detected MIME type |
860 | */ |
861 | private function detectMicrosoftBinaryType( $handle ): string { |
862 | $info = MSCompoundFileReader::readHandle( $handle ); |
863 | if ( !$info['valid'] ) { |
864 | $this->logger->info( __METHOD__ . ': invalid file format' ); |
865 | return 'unknown/unknown'; |
866 | } |
867 | if ( !$info['mime'] ) { |
868 | $this->logger->info( __METHOD__ . ": unrecognised document subtype" ); |
869 | return 'unknown/unknown'; |
870 | } |
871 | return $info['mime']; |
872 | } |
873 | |
874 | /** |
875 | * Internal MIME type detection. Detection is done using the fileinfo |
876 | * extension if it is available. It can be overridden by callback, which could |
877 | * use an external program, for example. If detection fails and $ext is not false, |
878 | * the MIME type is guessed from the file extension, using getMimeTypeFromExtensionOrNull. |
879 | * |
880 | * If the MIME type is still unknown, getimagesize is used to detect the |
881 | * MIME type if the file is an image. If no MIME type can be determined, |
882 | * this function returns 'unknown/unknown'. |
883 | * |
884 | * @param string $file The file to check |
885 | * @param string|bool $ext The file extension, or true (default) to extract it from the filename. |
886 | * Set it to false to ignore the extension. DEPRECATED! Set to false, use |
887 | * improveTypeFromExtension($mime, $ext) later to improve MIME type. |
888 | * @return string The MIME type of $file |
889 | */ |
890 | private function detectMimeType( string $file, $ext = true ): string { |
891 | /** @todo Make $ext default to false. Or better, remove it. */ |
892 | if ( $ext ) { |
893 | $this->logger->info( __METHOD__ . |
894 | ": WARNING: use of the \$ext parameter is deprecated. " |
895 | . "Use improveTypeFromExtension(\$mime, \$ext) instead." ); |
896 | } |
897 | |
898 | $callback = $this->detectCallback; |
899 | if ( $callback ) { |
900 | $m = $callback( $file ); |
901 | } else { |
902 | $m = mime_content_type( $file ); |
903 | } |
904 | |
905 | if ( $m ) { |
906 | # normalize |
907 | $m = preg_replace( '![;, ].*$!', '', $m ); # strip charset, etc |
908 | $m = trim( $m ); |
909 | $m = strtolower( $m ); |
910 | |
911 | if ( !str_contains( $m, 'unknown' ) ) { |
912 | $this->logger->info( __METHOD__ . ": magic mime type of $file: $m" ); |
913 | return $m; |
914 | } |
915 | } |
916 | |
917 | // If desired, look at extension as a fallback. |
918 | if ( $ext === true ) { |
919 | $i = strrpos( $file, '.' ); |
920 | $ext = strtolower( $i ? substr( $file, $i + 1 ) : '' ); |
921 | } |
922 | if ( $ext ) { |
923 | if ( $this->isRecognizableExtension( $ext ) ) { |
924 | $this->logger->info( __METHOD__ . ": refusing to guess mime type for .$ext file, " |
925 | . "we should have recognized it" ); |
926 | } else { |
927 | $m = $this->getMimeTypeFromExtensionOrNull( $ext ); |
928 | if ( $m ) { |
929 | $this->logger->info( __METHOD__ . ": extension mime type of $file: $m" ); |
930 | return $m; |
931 | } |
932 | } |
933 | } |
934 | |
935 | // Unknown type |
936 | $this->logger->info( __METHOD__ . ": failed to guess mime type for $file!" ); |
937 | return 'unknown/unknown'; |
938 | } |
939 | |
940 | /** |
941 | * Determine the media type code for a file, using its MIME type, name and |
942 | * possibly its contents. |
943 | * |
944 | * This function relies on the findMediaType(), mapping extensions and MIME |
945 | * types to media types. |
946 | * |
947 | * @todo analyse file if need be |
948 | * @todo look at multiple extension, separately and together. |
949 | * |
950 | * @param string|null $path Full path to the image file, in case we have to look at the contents |
951 | * (if null, only the MIME type is used to determine the media type code). |
952 | * @param string|null $mime MIME type. If null it will be guessed using guessMimeType. |
953 | * @return string A value to be used with the MEDIATYPE_xxx constants. |
954 | */ |
955 | public function getMediaType( ?string $path = null, ?string $mime = null ): string { |
956 | if ( !$mime && !$path ) { |
957 | return MEDIATYPE_UNKNOWN; |
958 | } |
959 | |
960 | // If MIME type is unknown, guess it |
961 | if ( !$mime ) { |
962 | // @phan-suppress-next-line PhanTypeMismatchArgumentNullable False positive |
963 | $mime = $this->guessMimeType( $path, false ); |
964 | } |
965 | |
966 | // Special code for ogg - detect if it's video (theora), |
967 | // else label it as sound. |
968 | if ( $mime == 'application/ogg' && is_string( $path ) && file_exists( $path ) ) { |
969 | // Read a chunk of the file |
970 | $f = fopen( $path, "rt" ); |
971 | if ( !$f ) { |
972 | return MEDIATYPE_UNKNOWN; |
973 | } |
974 | $head = fread( $f, 256 ); |
975 | fclose( $f ); |
976 | |
977 | $head = str_replace( 'ffmpeg2theora', '', strtolower( $head ) ); |
978 | |
979 | // This is an UGLY HACK, file should be parsed correctly |
980 | if ( strpos( $head, 'theora' ) !== false ) { |
981 | return MEDIATYPE_VIDEO; |
982 | } elseif ( strpos( $head, 'vorbis' ) !== false ) { |
983 | return MEDIATYPE_AUDIO; |
984 | } elseif ( strpos( $head, 'flac' ) !== false ) { |
985 | return MEDIATYPE_AUDIO; |
986 | } elseif ( strpos( $head, 'speex' ) !== false ) { |
987 | return MEDIATYPE_AUDIO; |
988 | } elseif ( strpos( $head, 'opus' ) !== false ) { |
989 | return MEDIATYPE_AUDIO; |
990 | } else { |
991 | return MEDIATYPE_MULTIMEDIA; |
992 | } |
993 | } |
994 | |
995 | $type = null; |
996 | // Check for entry for full MIME type |
997 | if ( $mime ) { |
998 | $type = $this->findMediaType( $mime ); |
999 | if ( $type !== MEDIATYPE_UNKNOWN ) { |
1000 | return $type; |
1001 | } |
1002 | } |
1003 | |
1004 | // Check for entry for file extension |
1005 | if ( $path ) { |
1006 | $i = strrpos( $path, '.' ); |
1007 | $e = strtolower( $i ? substr( $path, $i + 1 ) : '' ); |
1008 | |
1009 | // TODO: look at multi-extension if this fails, parse from full path |
1010 | $type = $this->findMediaType( '.' . $e ); |
1011 | if ( $type !== MEDIATYPE_UNKNOWN ) { |
1012 | return $type; |
1013 | } |
1014 | } |
1015 | |
1016 | // Check major MIME type |
1017 | if ( $mime ) { |
1018 | $i = strpos( $mime, '/' ); |
1019 | if ( $i !== false ) { |
1020 | $major = substr( $mime, 0, $i ); |
1021 | $type = $this->findMediaType( $major ); |
1022 | if ( $type !== MEDIATYPE_UNKNOWN ) { |
1023 | return $type; |
1024 | } |
1025 | } |
1026 | } |
1027 | |
1028 | if ( !$type ) { |
1029 | $type = MEDIATYPE_UNKNOWN; |
1030 | } |
1031 | |
1032 | return $type; |
1033 | } |
1034 | |
1035 | /** |
1036 | * Returns a media code matching the given MIME type or file extension. |
1037 | * |
1038 | * File extensions are represented by a string starting with a dot (.) to |
1039 | * distinguish them from MIME types. |
1040 | * |
1041 | * @param string $extMime |
1042 | * @return int|string |
1043 | */ |
1044 | private function findMediaType( string $extMime ) { |
1045 | if ( strpos( $extMime, '.' ) === 0 ) { |
1046 | // If it's an extension, look up the MIME types |
1047 | $m = $this->getMimeTypesFromExtension( substr( $extMime, 1 ) ); |
1048 | if ( !$m ) { |
1049 | return MEDIATYPE_UNKNOWN; |
1050 | } |
1051 | } else { |
1052 | // Normalize MIME type |
1053 | if ( isset( $this->mimeTypeAliases[$extMime] ) ) { |
1054 | $extMime = $this->mimeTypeAliases[$extMime]; |
1055 | } |
1056 | |
1057 | $m = [ $extMime ]; |
1058 | } |
1059 | |
1060 | foreach ( $m as $mime ) { |
1061 | foreach ( $this->mediaTypes as $type => $codes ) { |
1062 | if ( in_array( $mime, $codes, true ) ) { |
1063 | return $type; |
1064 | } |
1065 | } |
1066 | } |
1067 | |
1068 | return MEDIATYPE_UNKNOWN; |
1069 | } |
1070 | |
1071 | /** |
1072 | * Returns an array of media types (MEDIATYPE_xxx constants) |
1073 | * |
1074 | * @return string[] |
1075 | */ |
1076 | public function getMediaTypes(): array { |
1077 | return array_keys( $this->mediaTypes ); |
1078 | } |
1079 | |
1080 | /** |
1081 | * Check if major_mime has a value accepted by enum in a database schema. |
1082 | * |
1083 | * @since 1.42.0 (also backported to 1.39.7, 1.40.3 and 1.41.1) |
1084 | * |
1085 | * @param string $type |
1086 | * @return bool |
1087 | */ |
1088 | public function isValidMajorMimeType( string $type ): bool { |
1089 | // From maintenance/tables-generated.sql => img_major_mime |
1090 | $types = [ |
1091 | 'unknown', |
1092 | 'application', |
1093 | 'audio', |
1094 | 'image', |
1095 | 'text', |
1096 | 'video', |
1097 | 'message', |
1098 | 'model', |
1099 | 'multipart', |
1100 | 'chemical', |
1101 | ]; |
1102 | |
1103 | return in_array( $type, $types ); |
1104 | } |
1105 | } |
1106 | |
1107 | /** @deprecated class alias since 1.43 */ |
1108 | class_alias( MimeAnalyzer::class, 'MimeAnalyzer' ); |