MediaWiki master
UploadVerification.php
Go to the documentation of this file.
1<?php
10namespace MediaWiki\Upload;
11
18use Wikimedia\Mime\MimeAnalyzer;
20
34
35 private const SAFE_XML_ENCODINGS = [
36 'UTF-8',
37 'US-ASCII',
38 'ISO-8859-1',
39 'ISO-8859-2',
40 'UTF-16',
41 'UTF-32',
42 'WINDOWS-1250',
43 'WINDOWS-1251',
44 'WINDOWS-1252',
45 'WINDOWS-1253',
46 'WINDOWS-1254',
47 'WINDOWS-1255',
48 'WINDOWS-1256',
49 'WINDOWS-1257',
50 'WINDOWS-1258',
51 ];
52
53 public const CONSTRUCTOR_OPTIONS = [
60 ];
61
62 private ServiceOptions $config;
63 private MimeAnalyzer $mimeAnalyzer;
64 private SVGCSSChecker $SVGCSSChecker;
65
70 public function __construct(
71 ServiceOptions $config,
72 MimeAnalyzer $mimeAnalyzer,
73 SVGCSSChecker $SVGCSSChecker
74 ) {
75 $config->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
76 $this->config = $config;
77 $this->mimeAnalyzer = $mimeAnalyzer;
78 $this->SVGCSSChecker = $SVGCSSChecker;
79 }
80
89 private function verifyMimeType( $mime ) {
90 $verifyMimeType = $this->config->get( MainConfigNames::VerifyMimeType );
91 if ( $verifyMimeType ) {
92 wfDebug( "mime: <$mime>" );
93 $mimeTypeExclusions = $this->config
95 if ( UploadBase::checkFileExtension( $mime, $mimeTypeExclusions ) ) {
96 return [ 'filetype-badmime', $mime ];
97 }
98 }
99
100 return true;
101 }
102
118 public function verifyFile( string $path, string $ext, array $fileProps ) {
119 $config = $this->config;
120 $verifyMimeType = $config->get( MainConfigNames::VerifyMimeType );
121 $disableUploadScriptChecks = $config->get( MainConfigNames::DisableUploadScriptChecks );
122 $status = $this->verifyPartialFile( $path, $ext, $fileProps );
123 if ( $status !== true ) {
124 return $status;
125 }
126
127 $mime = $fileProps['mime'];
128
129 if ( $verifyMimeType ) {
130 # XXX: Missing extension will be caught by validateName() via getTitle()
131 if ( $ext !== '' &&
132 !$this->verifyExtension( $mime, $ext )
133 ) {
134 return [ 'filetype-mime-mismatch', $ext, $mime ];
135 }
136 }
137
138 # check for htmlish code and javascript
139 if ( !$disableUploadScriptChecks ) {
140 if ( $ext === 'svg' || $mime === 'image/svg+xml' ) {
141 $svgStatus = $this->detectScriptInSvg( $path, false );
142 if ( $svgStatus !== false ) {
143 return $svgStatus;
144 }
145 }
146 }
147
148 $handler = $mime !== null ? MediaHandler::getHandler( $mime ) : null;
149 if ( $handler ) {
150 $handlerStatus = $handler->verifyUpload( $path );
151 if ( !$handlerStatus->isOK() ) {
152 $errors = $handlerStatus->getErrorsArray();
153
154 return reset( $errors );
155 }
156 }
157
158 // TODO: Perhaps we should have a hook here akin to UploadVerifyFile
159 // except that it doesn't pass an UploadBase, and it would run
160 // even when someone is verifying a file not going through UploadBase.
161
162 wfDebug( __METHOD__ . ": all clear; passing." );
163
164 return true;
165 }
166
182 public function verifyPartialFile( string $path, string $ext, array $fileProps ) {
183 $config = $this->config;
184 $disableUploadScriptChecks = $config->get( MainConfigNames::DisableUploadScriptChecks );
185
186 # check MIME type, if desired
187 $mime = $fileProps['file-mime'];
188 $status = $this->verifyMimeType( $mime );
189 if ( $status !== true ) {
190 return $status;
191 }
192
193 # check for htmlish code and javascript
194 if ( !$disableUploadScriptChecks ) {
195 if ( $this->detectScript( $path, $mime, $ext ) ) {
196 return [ 'uploadscripted' ];
197 }
198 if ( $ext === 'svg' || $mime === 'image/svg+xml' ) {
199 $svgStatus = $this->detectScriptInSvg( $path, true );
200 if ( $svgStatus !== false ) {
201 return $svgStatus;
202 }
203 }
204 }
205
206 # Scan the uploaded file for viruses
207 $virus = $this->detectVirus( $path );
208 if ( $virus ) {
209 return [ 'uploadvirus', $virus ];
210 }
211
212 return true;
213 }
214
223 public function verifyExtension( $mime, $extension ) {
224 $magic = $this->mimeAnalyzer;
225
226 if ( !$mime || $mime === 'unknown' || $mime === 'unknown/unknown' ) {
227 if ( !$magic->isRecognizableExtension( $extension ) ) {
228 wfDebug( __METHOD__ . ": passing file with unknown detected mime type; " .
229 "unrecognized extension '$extension', can't verify" );
230
231 return true;
232 }
233
234 wfDebug( __METHOD__ . ": rejecting file with unknown detected mime type; " .
235 "recognized extension '$extension', so probably invalid file" );
236 return false;
237 }
238
239 $match = $magic->isMatchingExtension( $extension, $mime );
240
241 if ( $match === null ) {
242 if ( $magic->getMimeTypesFromExtension( $extension ) !== [] ) {
243 wfDebug( __METHOD__ . ": No extension known for $mime, but we know a mime for $extension" );
244
245 return false;
246 }
247
248 wfDebug( __METHOD__ . ": no file extension known for mime type $mime, passing file" );
249 return true;
250 }
251
252 if ( $match ) {
253 wfDebug( __METHOD__ . ": mime type $mime matches extension $extension, passing file" );
254
256 return true;
257 }
258
259 wfDebug( __METHOD__
260 . ": mime type $mime mismatches file extension $extension, rejecting file" );
261
262 return false;
263 }
264
280 public function detectScript( $file, $mime, $extension ) {
281 # ugly hack: for text files, always look at the entire file.
282 # For binary field, just check the first K.
283
284 if ( str_starts_with( $mime ?? '', 'text/' ) ) {
285 $chunk = file_get_contents( $file );
286 } else {
287 $fp = fopen( $file, 'rb' );
288 if ( !$fp ) {
289 return false;
290 }
291 $chunk = fread( $fp, 1024 );
292 fclose( $fp );
293 }
294
295 $chunk = strtolower( $chunk );
296
297 if ( !$chunk ) {
298 return false;
299 }
300
301 # decode from UTF-16 if needed (could be used for obfuscation).
302 if ( str_starts_with( $chunk, "\xfe\xff" ) ) {
303 $enc = 'UTF-16BE';
304 } elseif ( str_starts_with( $chunk, "\xff\xfe" ) ) {
305 $enc = 'UTF-16LE';
306 } else {
307 $enc = null;
308 }
309
310 if ( $enc !== null ) {
311 // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged
312 $chunk = @iconv( $enc, "ASCII//IGNORE", $chunk );
313 }
314
315 $chunk = trim( $chunk );
316
318 wfDebug( __METHOD__ . ": checking for embedded scripts and HTML stuff" );
319
320 # check for HTML doctype
321 if ( preg_match( "/<!DOCTYPE *X?HTML/i", $chunk ) ) {
322 return true;
323 }
324
325 // Some browsers will interpret obscure xml encodings as UTF-8, while
326 // PHP/expat will interpret the given encoding in the xml declaration (T49304)
327 if ( $extension === 'svg' || str_starts_with( $mime ?? '', 'image/svg' ) ) {
328 if ( $this->checkXMLEncodingMismatch( $file ) ) {
329 return true;
330 }
331 }
332
333 // Quick check for HTML heuristics in old IE and Safari.
334 //
335 // The exact heuristics IE uses are checked separately via verifyMimeType(), so we
336 // don't need them all here as it can cause many false positives.
337 //
338 // Check for `<script` and such still to forbid script tags and embedded HTML in SVG:
339 $tags = [
340 '<body',
341 '<head',
342 '<html', # also in safari
343 '<script', # also in safari
344 ];
345
346 foreach ( $tags as $tag ) {
347 if ( str_contains( $chunk, $tag ) ) {
348 wfDebug( __METHOD__ . ": found something that may make it be mistaken for html: $tag" );
349
350 return true;
351 }
352 }
353
354 /*
355 * look for JavaScript
356 */
357
358 # resolve entity-refs to look at attributes. may be harsh on big files... cache result?
359 $chunk = Sanitizer::decodeCharReferences( $chunk );
360
361 # look for script-types
362 if ( preg_match( '!type\s*=\s*[\'"]?\s*(?:\w*/)?(?:ecma|java)!im', $chunk ) ) {
363 wfDebug( __METHOD__ . ": found script types" );
364
365 return true;
366 }
367
368 # look for html-style script-urls
369 if ( preg_match( '!(?:href|src|data)\s*=\s*[\'"]?\s*(?:ecma|java)script:!im', $chunk ) ) {
370 wfDebug( __METHOD__ . ": found html-style script urls" );
371
372 return true;
373 }
374
375 # look for css-style script-urls
376 if ( preg_match( '!url\s*\‍(\s*[\'"]?\s*(?:ecma|java)script:!im', $chunk ) ) {
377 wfDebug( __METHOD__ . ": found css-style script urls" );
378
379 return true;
380 }
381
382 wfDebug( __METHOD__ . ": no scripts found" );
383
384 return false;
385 }
386
394 private function checkXMLEncodingMismatch( $file ) {
395 // https://mimesniff.spec.whatwg.org/#resource-header says browsers
396 // should read the first 1445 bytes. Do 4096 bytes for good measure.
397 // XML Spec says XML declaration if present must be first thing in file
398 // other than BOM
399 $contents = file_get_contents( $file, false, null, 0, 4096 );
400 $encodingRegex = '!encoding[ \t\n\r]*=[ \t\n\r]*[\'"](.*?)[\'"]!si';
401
402 if ( preg_match( "!<\?xml\b(.*?)\?>!si", $contents, $matches ) ) {
403 if ( preg_match( $encodingRegex, $matches[1], $encMatch )
404 && !in_array( strtoupper( $encMatch[1] ), self::SAFE_XML_ENCODINGS )
405 ) {
406 wfDebug( __METHOD__ . ": Found unsafe XML encoding '{$encMatch[1]}'" );
407
408 return true;
409 }
410 } elseif ( preg_match( "!<\?xml\b!i", $contents ) ) {
411 // Start of XML declaration without an end in the first 4096 bytes
412 // bytes. There shouldn't be a legitimate reason for this to happen.
413 wfDebug( __METHOD__ . ": Unmatched XML declaration start" );
414
415 return true;
416 } elseif ( str_starts_with( $contents, "\x4C\x6F\xA7\x94" ) ) {
417 // EBCDIC encoded XML
418 wfDebug( __METHOD__ . ": EBCDIC Encoded XML" );
419
420 return true;
421 }
422
423 // It's possible the file is encoded with multibyte encoding, so re-encode attempt to
424 // detect the encoding in case it specifies an encoding not allowed in self::SAFE_XML_ENCODINGS
425 $attemptEncodings = [ 'UTF-16', 'UTF-16BE', 'UTF-32', 'UTF-32BE' ];
426 foreach ( $attemptEncodings as $encoding ) {
427 // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged
428 $str = @iconv( $encoding, 'UTF-8', $contents );
429 if ( $str != '' && preg_match( "!<\?xml\b(.*?)\?>!si", $str, $matches ) ) {
430 if ( preg_match( $encodingRegex, $matches[1], $encMatch )
431 && !in_array( strtoupper( $encMatch[1] ), self::SAFE_XML_ENCODINGS )
432 ) {
433 wfDebug( __METHOD__ . ": Found unsafe XML encoding '{$encMatch[1]}'" );
434
435 return true;
436 }
437 } elseif ( $str != '' && preg_match( "!<\?xml\b!i", $str ) ) {
438 // Start of XML declaration without an end in the first 4096 bytes
439 // bytes. There shouldn't be a legitimate reason for this to happen.
440 wfDebug( __METHOD__ . ": Unmatched XML declaration start" );
441
442 return true;
443 }
444 }
445
446 return false;
447 }
448
457 private function detectScriptInSvg( $filename, $partial ) {
458 $check = new XmlTypeCheck(
459 $filename,
460 $this->checkSvgScriptCallback( ... ),
461 true,
462 [
463 'processing_instruction_handler' => $this->checkSvgPICallback( ... ),
464 'external_dtd_handler' => $this->checkSvgExternalDTD( ... ),
465 ]
466 );
467 if ( $check->wellFormed !== true ) {
468 // Invalid xml (T60553)
469 // But only when non-partial (T67724)
470 return $partial ? false : [ 'uploadinvalidxml' ];
471 }
472
473 if ( $check->filterMatch ) {
474 return $check->filterMatchType;
475 }
476
477 return false;
478 }
479
487 private function checkSvgPICallback( $target, $data ) {
488 // Don't allow external stylesheets (T59550)
489 if ( preg_match( '/xml-stylesheet/i', $target ) ) {
490 return [ 'upload-scripted-pi-callback' ];
491 }
492
493 return false;
494 }
495
508 private function checkSvgExternalDTD( $type, $publicId, $systemId ) {
509 // This doesn't include the XHTML+MathML+SVG doctype since we don't
510 // allow XHTML anyway.
511 static $allowedDTDs = [
512 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd',
513 'http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd',
514 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd',
515 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-tiny.dtd',
516 // https://phabricator.wikimedia.org/T168856
517 'http://www.w3.org/TR/2001/PR-SVG-20010719/DTD/svg10.dtd',
518 ];
519 if ( $type !== 'PUBLIC'
520 || !in_array( $systemId, $allowedDTDs )
521 || !str_starts_with( $publicId, "-//W3C//" )
522 ) {
523 return [ 'upload-scripted-dtd' ];
524 }
525 return false;
526 }
527
538 private function checkSvgScriptCallback( $element, $attribs, $data = null ) {
539 [ $namespace, $strippedElement ] = self::splitXmlNamespace( $element );
540
541 // We specifically don't include:
542 // http://www.w3.org/1999/xhtml (T62771)
543 static $validNamespaces = [
544 '',
545 'adobe:ns:meta/',
546 'http://cipa.jp/exif/1.0/',
547 'http://creativecommons.org/ns#',
548 'http://developer.sonyericsson.com/cell/1.0/',
549 'http://inkscape.sourceforge.net/dtd/sodipodi-0.dtd',
550 'http://iptc.org/std/iptc4xmpcore/1.0/xmlns/',
551 'http://iptc.org/std/iptc4xmpext/2008-02-29/',
552 'http://leica-camera.com/digital-shift-assistant/1.0/',
553 'http://ns.acdsee.com/iptc/1.0/',
554 'http://ns.acdsee.com/regions/',
555 'http://ns.adobe.com/adobeillustrator/10.0/',
556 'http://ns.adobe.com/adobesvgviewerextensions/3.0/',
557 'http://ns.adobe.com/album/1.0/',
558 'http://ns.adobe.com/camera-raw-defaults/1.0/',
559 'http://ns.adobe.com/camera-raw-embedded-lens-profile/1.0/',
560 'http://ns.adobe.com/camera-raw-saved-settings/1.0/',
561 'http://ns.adobe.com/camera-raw-settings/1.0/',
562 'http://ns.adobe.com/creatoratom/1.0/',
563 'http://ns.adobe.com/dicom/',
564 'http://ns.adobe.com/exif/1.0/',
565 'http://ns.adobe.com/exif/1.0/aux/',
566 'http://ns.adobe.com/extensibility/1.0/',
567 'http://ns.adobe.com/flows/1.0/',
568 'http://ns.adobe.com/hdr-gain-map/1.0/',
569 'http://ns.adobe.com/hdr-metadata/1.0/',
570 'http://ns.adobe.com/ix/1.0/',
571 'http://ns.adobe.com/lightroom/1.0/',
572 'http://ns.adobe.com/illustrator/1.0/',
573 'http://ns.adobe.com/imagereplacement/1.0/',
574 'http://ns.adobe.com/pdf/1.3/',
575 'http://ns.adobe.com/pdfx/1.3/',
576 'http://ns.adobe.com/photoshop/1.0/',
577 'http://ns.adobe.com/photoshop/1.0/camera-profile',
578 'http://ns.adobe.com/photoshop/1.0/panorama-profile',
579 'http://ns.adobe.com/raw/1.0/',
580 'http://ns.adobe.com/swf/1.0/',
581 'http://ns.adobe.com/saveforweb/1.0/',
582 'http://ns.adobe.com/tiff/1.0/',
583 'http://ns.adobe.com/variables/1.0/',
584 'http://ns.adobe.com/xap/1.0/',
585 'http://ns.adobe.com/xap/1.0/bj/',
586 'http://ns.adobe.com/xap/1.0/g/',
587 'http://ns.adobe.com/xap/1.0/g/img/',
588 'http://ns.adobe.com/xap/1.0/mm/',
589 'http://ns.adobe.com/xap/1.0/plus/',
590 'http://ns.adobe.com/xap/1.0/rights/',
591 'http://ns.adobe.com/xap/1.0/stype/dimensions#',
592 'http://ns.adobe.com/xap/1.0/stype/font#',
593 'http://ns.adobe.com/xap/1.0/stype/manifestitem#',
594 'http://ns.adobe.com/xap/1.0/stype/resourceevent#',
595 'http://ns.adobe.com/xap/1.0/stype/resourceref#',
596 'http://ns.adobe.com/xap/1.0/stype/version#',
597 'http://ns.adobe.com/xap/1.0/t/pg/',
598 'http://ns.adobe.com/xmp/1.0/dynamicmedia/',
599 'http://ns.adobe.com/xmp/identifier/qual/1.0/',
600 'http://ns.adobe.com/xmp/note/',
601 'http://ns.adobe.com/xmp/stype/area#',
602 'http://ns.apple.com/adjustment-settings/1.0/',
603 'http://ns.apple.com/faceinfo/1.0/',
604 'http://ns.apple.com/hdrgainmap/1.0/',
605 'http://ns.apple.com/pixeldatainfo/1.0/',
606 'http://ns.exiftool.org/1.0/',
607 'http://ns.extensis.com/extensis/1.0/',
608 'http://ns.fastpictureviewer.com/fpv/1.0/',
609 'http://ns.google.com/photos/1.0/audio/',
610 'http://ns.google.com/photos/1.0/camera/',
611 'http://ns.google.com/photos/1.0/container/',
612 'http://ns.google.com/photos/1.0/creations/',
613 'http://ns.google.com/photos/1.0/depthmap/',
614 'http://ns.google.com/photos/1.0/focus/',
615 'http://ns.google.com/photos/1.0/image/',
616 'http://ns.google.com/photos/1.0/panorama/',
617 'http://ns.google.com/photos/dd/1.0/profile/',
618 'http://ns.google.com/videos/1.0/spherical/',
619 'http://ns.idimager.com/ics/1.0/',
620 'http://ns.iview-multimedia.com/mediapro/1.0/',
621 'http://ns.leiainc.com/photos/1.0/image/',
622 'http://ns.microsoft.com/expressionmedia/1.0/',
623 'http://ns.microsoft.com/photo/1.0',
624 'http://ns.microsoft.com/photo/1.1',
625 'http://ns.microsoft.com/photo/1.2/',
626 'http://ns.microsoft.com/photo/1.2/t/region#',
627 'http://ns.microsoft.com/photo/1.2/t/regioninfo#',
628 'http://ns.nikon.com/asteroid/1.0/',
629 'http://ns.nikon.com/nine/1.0/',
630 'http://ns.nikon.com/sdc/1.0/',
631 'http://ns.optimasc.com/dex/1.0/',
632 'http://ns.seal/2024/1.0/',
633 'http://ns.useplus.org/ldf/xmp/1.0/',
634 'http://prismstandard.org/namespaces/basic/2.0/',
635 'http://prismstandard.org/namespaces/pmi/2.2/',
636 'http://prismstandard.org/namespaces/prismusagerights/2.1/',
637 'http://prismstandard.org/namespaces/prl/2.1/',
638 'http://prismstandard.org/namespaces/prm/3.0/',
639 'http://purl.org/dc/elements/1.1/',
640 'http://purl.org/dc/elements/1.1',
641 'http://rs.tdwg.org/dwc/index.htm',
642 'http://schemas.microsoft.com/visio/2003/svgextensions/',
643 'http://sodipodi.sourceforge.net/dtd/sodipodi-0.dtd',
644 'http://taptrix.com/inkpad/svg_extensions',
645 'http://www.digikam.org/ns/1.0/',
646 'http://www.dji.com/drone-dji/1.0/',
647 'http://www.metadataworkinggroup.com/schemas/collections/',
648 'http://www.metadataworkinggroup.com/schemas/keywords/',
649 'http://www.metadataworkinggroup.com/schemas/regions/',
650 'http://web.resource.org/cc/',
651 'http://www.freesoftware.fsf.org/bkchem/cdml',
652 'http://www.inkscape.org/namespaces/inkscape',
653 'http://www.opengis.net/gml',
654 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
655 'http://www.w3.org/2000/01/rdf-schema#',
656 'http://www.w3.org/2000/svg',
657 'http://www.w3.org/2000/02/svg/testsuite/description/', // T278044
658 'http://www.w3.org/tr/rec-rdf-syntax/',
659 'http://xmp.gettyimages.com/gift/1.0/',
660 ];
661
662 // Inkscape mangles namespace definitions created by Adobe Illustrator.
663 // This is nasty but harmless. (T144827)
664 $isBuggyInkscape = preg_match( '/^&(#38;)*ns_[a-z_]+;$/', $namespace );
665
666 if ( !( $isBuggyInkscape || in_array( $namespace, $validNamespaces ) ) ) {
667 wfDebug( __METHOD__ . ": Non-svg namespace '$namespace' in uploaded file." );
668 return [ 'uploadscriptednamespace', $namespace ];
669 }
670
671 // check for elements that can contain javascript
672 if ( $strippedElement === 'script' ) {
673 wfDebug( __METHOD__ . ": Found script element '$element' in uploaded file." );
674
675 return [ 'uploaded-script-svg', $strippedElement ];
676 }
677
678 // e.g., <svg xmlns="http://www.w3.org/2000/svg">
679 // <handler xmlns:ev="http://www.w3.org/2001/xml-events" ev:event="load">alert(1)</handler> </svg>
680 if ( $strippedElement === 'handler' ) {
681 wfDebug( __METHOD__ . ": Found scriptable element '$element' in uploaded file." );
682
683 return [ 'uploaded-script-svg', $strippedElement ];
684 }
685
686 // SVG reported in Feb '12 that used xml:stylesheet to generate javascript block
687 if ( $strippedElement === 'stylesheet' ) {
688 wfDebug( __METHOD__ . ": Found scriptable element '$element' in uploaded file." );
689
690 return [ 'uploaded-script-svg', $strippedElement ];
691 }
692
693 // Block iframes, in case they pass the namespace check
694 if ( $strippedElement === 'iframe' ) {
695 wfDebug( __METHOD__ . ": iframe in uploaded file." );
696
697 return [ 'uploaded-script-svg', $strippedElement ];
698 }
699
700 // Check <style> css
701 if ( $strippedElement === 'style' ) {
702 $cssCheck = $this->SVGCSSChecker->checkStyleTag( $data );
703 if ( $cssCheck !== true ) {
704 wfDebug( __METHOD__ . ": hostile css in style element. " . $cssCheck[0] );
705
706 return [ 'uploaded-hostile-svg', $cssCheck[0], $cssCheck[1], $cssCheck[2] ];
707 }
708 }
709
710 static $cssAttrs = [ 'font', 'clip-path', 'fill', 'filter', 'marker',
711 'marker-end', 'marker-mid', 'marker-start', 'mask', 'stroke', 'cursor' ];
712
713 foreach ( $attribs as $attrib => $value ) {
714 // If attributeNamespace is '', it is relative to its element's namespace
715 [ $attributeNamespace, $stripped ] = self::splitXmlNamespace( $attrib );
716 $value = strtolower( $value );
717
718 if ( !(
719 // Inkscape element's have valid attribs that start with on and are safe, fail all others
720 // We are assuming here that the SVG will be interpreted
721 // under XML serialization. This is not safe for SVGs
722 // embedded directly in HTML.
723 $namespace === 'http://www.inkscape.org/namespaces/inkscape' &&
724 $attributeNamespace === ''
725 ) && str_starts_with( $stripped, 'on' )
726 ) {
727 wfDebug( __METHOD__
728 . ": Found event-handler attribute '$attrib'='$value' in uploaded file." );
729
730 return [ 'uploaded-event-handler-on-svg', $attrib, $value ];
731 }
732
733 // Do not allow relative links, or unsafe url schemas.
734 // For <a> tags, only data:, http: and https: and same-document
735 // fragment links are allowed.
736 // For all other tags, only 'data:' and fragments (#) are allowed.
737 if (
738 $stripped === 'href'
739 && $value !== ''
740 && !str_starts_with( $value, 'data:' )
741 && !str_starts_with( $value, '#' )
742 && !( $strippedElement === 'a' && preg_match( '!^https?://!i', $value ) )
743 ) {
744 wfDebug( __METHOD__ . ": Found href attribute <$strippedElement "
745 . "'$attrib'='$value' in uploaded file." );
746
747 return [ 'uploaded-href-attribute-svg', $strippedElement, $attrib, $value ];
748 }
749
750 // Only allow 'data:\' targets that should be safe.
751 // This prevents vectors like image/svg, text/xml, application/xml, and text/html, which can contain scripts
752 if ( $stripped === 'href' && strncasecmp( 'data:', $value, 5 ) === 0 ) {
753 // RFC2397 parameters.
754 // This is only slightly slower than (;[\w;]+)*.
755 // phpcs:ignore Generic.Files.LineLength
756 $parameters = '(?>;[a-zA-Z0-9\!#$&\'*+.^_`{|}~-]+=(?>[a-zA-Z0-9\!#$&\'*+.^_`{|}~-]+|"(?>[\0-\x0c\x0e-\x21\x23-\x5b\x5d-\x7f]+|\\\\[\0-\x7f])*"))*(?:;base64)?';
757
758 if ( !preg_match( "!^data:\s*image/(gif|jpeg|jpg|a?png|webp|avif)$parameters,!i", $value ) ) {
759 wfDebug( __METHOD__ . ": Found href with data URI with MIME type that is not allowed "
760 . "\"<$strippedElement '$attrib'='$value'...\" in uploaded file." );
761 return [ 'uploaded-href-unsafe-target-svg', $strippedElement, $attrib, $value ];
762 }
763 }
764
765 // Change href with animate from (http://html5sec.org/#137).
766 if ( $stripped === 'attributename'
767 && $strippedElement === 'animate'
768 && $this->stripXmlNamespace( $value ) === 'href'
769 ) {
770 wfDebug( __METHOD__ . ": Found animate that might be changing href using from "
771 . "\"<$strippedElement '$attrib'='$value'...\" in uploaded file." );
772
773 return [ 'uploaded-animate-svg', $strippedElement, $attrib, $value ];
774 }
775
776 // Use set/animate to add event-handler attribute to parent.
777 if ( ( $strippedElement === 'set' || $strippedElement === 'animate' )
778 && $stripped === 'attributename'
779 && str_starts_with( $value, 'on' )
780 ) {
781 wfDebug( __METHOD__ . ": Found svg setting event-handler attribute with "
782 . "\"<$strippedElement $stripped='$value'...\" in uploaded file." );
783
784 return [ 'uploaded-setting-event-handler-svg', $strippedElement, $stripped, $value ];
785 }
786
787 // use set to add href attribute to parent element.
788 if ( $strippedElement === 'set'
789 && $stripped === 'attributename'
790 && str_contains( $value, 'href' )
791 ) {
792 wfDebug( __METHOD__ . ": Found svg setting href attribute '$value' in uploaded file." );
793
794 return [ 'uploaded-setting-href-svg' ];
795 }
796
797 // use set to add a remote / data / script target to an element.
798 if ( $strippedElement === 'set'
799 && $stripped === 'to'
800 && preg_match( '!(http|https|data|script):!im', $value )
801 ) {
802 wfDebug( __METHOD__ . ": Found svg setting attribute to '$value' in uploaded file." );
803
804 return [ 'uploaded-wrong-setting-svg', $value ];
805 }
806
807 // use handler attribute with remote / data / script.
808 if ( $stripped === 'handler' && preg_match( '!(http|https|data|script):!im', $value ) ) {
809 wfDebug( __METHOD__ . ": Found svg setting handler with remote/data/script "
810 . "'$attrib'='$value' in uploaded file." );
811
812 return [ 'uploaded-setting-handler-svg', $attrib, $value ];
813 }
814
815 // use CSS styles to bring in remote code.
816 if ( $stripped === 'style'
817 && $this->SVGCSSChecker->checkStyleAttribute( $value ) !== true
818 ) {
819 wfDebug( __METHOD__ . ": Found svg setting a style with "
820 . "remote url '$attrib'='$value' in uploaded file." );
821 return [ 'uploaded-remote-url-svg', $attrib, $value ];
822 }
823
824 // Several attributes can include css, css character escaping isn't allowed.
825 if ( in_array( $stripped, $cssAttrs, true )
826 && $this->SVGCSSChecker->checkPresentationalAttribute( $value ) !== true
827 ) {
828 wfDebug( __METHOD__ . ": Found svg setting a style with "
829 . "remote url '$attrib'='$value' in uploaded file." );
830 return [ 'uploaded-remote-url-svg', $attrib, $value ];
831 }
832
833 // image filters can pull in url, which could be svg that executes scripts.
834 // Only allow url( "#foo" ).
835 // Do not allow url( http://example.com )
836 // TODO: It seems like the line above already does this check.
837 if ( $strippedElement === 'image'
838 && $stripped === 'filter'
839 && preg_match( '!url\s*\‍(\s*["\']?[^#]!im', $value )
840 ) {
841 wfDebug( __METHOD__ . ": Found image filter with url: "
842 . "\"<$strippedElement $stripped='$value'...\" in uploaded file." );
843
844 return [ 'uploaded-image-filter-svg', $strippedElement, $stripped, $value ];
845 }
846 }
847
848 return false; // No scripts detected
849 }
850
857 private function splitXmlNamespace( $element ) {
858 // 'http://www.w3.org/2000/svg:script' -> [ 'http://www.w3.org/2000/svg', 'script' ]
859 $parts = explode( ':', strtolower( $element ) );
860 $name = array_pop( $parts );
861 $ns = implode( ':', $parts );
862
863 return [ $ns, $name ];
864 }
865
870 private function stripXmlNamespace( $element ) {
871 // 'http://www.w3.org/2000/svg:script' -> 'script'
872 return self::splitXmlNamespace( $element )[1];
873 }
874
887 public function detectVirus( $file ) {
888 $mainConfig = $this->config;
889 $antivirus = $mainConfig->get( MainConfigNames::Antivirus );
890 $antivirusSetup = $mainConfig->get( MainConfigNames::AntivirusSetup );
891 $antivirusRequired = $mainConfig->get( MainConfigNames::AntivirusRequired );
892 if ( !$antivirus ) {
893 wfDebug( __METHOD__ . ": virus scanner disabled" );
894
895 return null;
896 }
897
898 if ( !( $antivirusSetup[$antivirus] ?? false ) ) {
899 throw new ConfigException( "Unknown virus scanner: $antivirus" );
900 }
901
902 # look up scanner configuration
903 $command = $antivirusSetup[$antivirus]['command'];
904 $exitCodeMap = $antivirusSetup[$antivirus]['codemap'];
905 $msgPattern = $antivirusSetup[$antivirus]['messagepattern'] ?? null;
906
907 if ( !str_contains( $command, "%f" ) ) {
908 # simple pattern: append file to scan
909 $command .= " " . Shell::escape( $file );
910 } else {
911 # complex pattern: replace "%f" with file to scan
912 $command = str_replace( "%f", Shell::escape( $file ), $command );
913 }
914
915 wfDebug( __METHOD__ . ": running virus scan: $command " );
916
917 # execute virus scanner
918 $exitCode = false;
919
920 # NOTE: there's a 50-line workaround to make stderr redirection work on windows, too.
921 # that does not seem to be worth the pain.
922 # Ask me (Duesentrieb) about it if it's ever needed.
923 $output = wfShellExecWithStderr( $command, $exitCode );
924
925 # map exit code to AV_xxx constants.
926 $mappedCode = $exitCode;
927 if ( $exitCodeMap ) {
928 if ( isset( $exitCodeMap[$exitCode] ) ) {
929 $mappedCode = $exitCodeMap[$exitCode];
930 } elseif ( isset( $exitCodeMap["*"] ) ) {
931 $mappedCode = $exitCodeMap["*"];
932 }
933 }
934
935 # NB: AV_NO_VIRUS is 0, but AV_SCAN_FAILED is false,
936 # so we need the strict equalities === and thus can't use a switch here
937 if ( $mappedCode === AV_SCAN_FAILED ) {
938 # scan failed (code was mapped to false by $exitCodeMap)
939 wfDebug( __METHOD__ . ": failed to scan $file (code $exitCode)." );
940
941 $output = $antivirusRequired
942 ? wfMessage( 'virus-scanfailed', [ $exitCode ] )->text()
943 : null;
944 } elseif ( $mappedCode === AV_SCAN_ABORTED ) {
945 # scan failed because filetype is unknown (probably immune)
946 wfDebug( __METHOD__ . ": unsupported file type $file (code $exitCode)." );
947 $output = null;
948 } elseif ( $mappedCode === AV_NO_VIRUS ) {
949 # no virus found
950 wfDebug( __METHOD__ . ": file passed virus scan." );
951 $output = false;
952 } else {
953 $output = trim( $output );
954
955 if ( !$output ) {
956 $output = true; # if there's no output, return true
957 } elseif ( $msgPattern ) {
958 $groups = [];
959 if ( preg_match( $msgPattern, $output, $groups ) && $groups[1] ) {
960 $output = $groups[1];
961 }
962 }
963
964 wfDebug( __METHOD__ . ": FOUND VIRUS! scanner feedback: $output" );
965 }
966
967 return $output;
968 }
969}
const AV_SCAN_FAILED
Definition Defines.php:86
const AV_SCAN_ABORTED
Definition Defines.php:85
const AV_NO_VIRUS
Definition Defines.php:83
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfShellExecWithStderr( $cmd, &$retval=null, $environ=[], $limits=[])
Execute a shell command, returning both stdout and stderr.
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Exceptions for config failures.
A class for passing options to services.
assertRequiredOptions(array $expectedKeys)
Assert that the list of options provided in this instance exactly match $expectedKeys,...
A class containing constants representing the names of configuration variables.
const MimeTypeExclusions
Name constant for the MimeTypeExclusions setting, for use with Config::get()
const AntivirusSetup
Name constant for the AntivirusSetup setting, for use with Config::get()
const VerifyMimeType
Name constant for the VerifyMimeType setting, for use with Config::get()
const DisableUploadScriptChecks
Name constant for the DisableUploadScriptChecks setting, for use with Config::get()
const Antivirus
Name constant for the Antivirus setting, for use with Config::get()
const AntivirusRequired
Name constant for the AntivirusRequired setting, for use with Config::get()
Base media handler class.
HTML sanitizer for MediaWiki.
Definition Sanitizer.php:32
Executes shell commands.
Definition Shell.php:32
Ensure SVG files cannot load external resources via URLs in CSS.
static checkFileExtension( $ext, $list)
Perform case-insensitive match against a list of file extensions.
Service to verify file uploads are safe.
detectVirus( $file)
Generic wrapper function for a virus scanner program.
verifyFile(string $path, string $ext, array $fileProps)
Verifies that the upload file is safe.
verifyExtension( $mime, $extension)
Checks if the MIME type of the uploaded file matches the file extension.
__construct(ServiceOptions $config, MimeAnalyzer $mimeAnalyzer, SVGCSSChecker $SVGCSSChecker)
verifyPartialFile(string $path, string $ext, array $fileProps)
A verification routine suitable for partial files.
detectScript( $file, $mime, $extension)
Heuristic for detecting files that could contain JavaScript instructions or things that may look like...
XML syntax and type checker.