MediaWiki master
UploadVerification.php
Go to the documentation of this file.
1<?php
10namespace MediaWiki\Upload;
11
18use Wikimedia\Mime\MimeAnalyzer;
20
34
35 private const SAFE_XML_ENCODINGS = [
36 'UTF-8',
37 'US-ASCII',
38 'ISO-8859-1',
39 'ISO-8859-2',
40 'UTF-16',
41 'UTF-32',
42 'WINDOWS-1250',
43 'WINDOWS-1251',
44 'WINDOWS-1252',
45 'WINDOWS-1253',
46 'WINDOWS-1254',
47 'WINDOWS-1255',
48 'WINDOWS-1256',
49 'WINDOWS-1257',
50 'WINDOWS-1258',
51 ];
52
53 public const CONSTRUCTOR_OPTIONS = [
60 ];
61
62 private ServiceOptions $config;
63 private MimeAnalyzer $mimeAnalyzer;
64 private SVGCSSChecker $SVGCSSChecker;
65
70 public function __construct(
71 ServiceOptions $config,
72 MimeAnalyzer $mimeAnalyzer,
73 SVGCSSChecker $SVGCSSChecker
74 ) {
75 $config->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
76 $this->config = $config;
77 $this->mimeAnalyzer = $mimeAnalyzer;
78 $this->SVGCSSChecker = $SVGCSSChecker;
79 }
80
89 private function verifyMimeType( $mime ) {
90 $verifyMimeType = $this->config->get( MainConfigNames::VerifyMimeType );
91 if ( $verifyMimeType ) {
92 wfDebug( "mime: <$mime>" );
93 $mimeTypeExclusions = $this->config
95 if ( UploadBase::checkFileExtension( $mime, $mimeTypeExclusions ) ) {
96 return [ 'filetype-badmime', $mime ];
97 }
98 }
99
100 return true;
101 }
102
118 public function verifyFile( string $path, string $ext, array $fileProps ) {
119 $config = $this->config;
120 $verifyMimeType = $config->get( MainConfigNames::VerifyMimeType );
121 $disableUploadScriptChecks = $config->get( MainConfigNames::DisableUploadScriptChecks );
122 $status = $this->verifyPartialFile( $path, $ext, $fileProps );
123 if ( $status !== true ) {
124 return $status;
125 }
126
127 $mime = $fileProps['mime'];
128
129 if ( $verifyMimeType ) {
130 # XXX: Missing extension will be caught by validateName() via getTitle()
131 if ( $ext !== '' &&
132 !$this->verifyExtension( $mime, $ext )
133 ) {
134 return [ 'filetype-mime-mismatch', $ext, $mime ];
135 }
136 }
137
138 # check for htmlish code and javascript
139 if ( !$disableUploadScriptChecks ) {
140 if ( $ext === 'svg' || $mime === 'image/svg+xml' ) {
141 $svgStatus = $this->detectScriptInSvg( $path, false );
142 if ( $svgStatus !== false ) {
143 return $svgStatus;
144 }
145 }
146 }
147
148 $handler = $mime !== null ? MediaHandler::getHandler( $mime ) : null;
149 if ( $handler ) {
150 $handlerStatus = $handler->verifyUpload( $path );
151 if ( !$handlerStatus->isOK() ) {
152 $errors = $handlerStatus->getErrorsArray();
153
154 return reset( $errors );
155 }
156 }
157
158 // TODO: Perhaps we should have a hook here akin to UploadVerifyFile
159 // except that it doesn't pass an UploadBase, and it would run
160 // even when someone is verifying a file not going through UploadBase.
161
162 wfDebug( __METHOD__ . ": all clear; passing." );
163
164 return true;
165 }
166
182 public function verifyPartialFile( string $path, string $ext, array $fileProps ) {
183 $config = $this->config;
184 $disableUploadScriptChecks = $config->get( MainConfigNames::DisableUploadScriptChecks );
185
186 # check MIME type, if desired
187 $mime = $fileProps['file-mime'];
188 $status = $this->verifyMimeType( $mime );
189 if ( $status !== true ) {
190 return $status;
191 }
192
193 # check for htmlish code and javascript
194 if ( !$disableUploadScriptChecks ) {
195 if ( $this->detectScript( $path, $mime, $ext ) ) {
196 return [ 'uploadscripted' ];
197 }
198 if ( $ext === 'svg' || $mime === 'image/svg+xml' ) {
199 $svgStatus = $this->detectScriptInSvg( $path, true );
200 if ( $svgStatus !== false ) {
201 return $svgStatus;
202 }
203 }
204 }
205
206 # Scan the uploaded file for viruses
207 $virus = $this->detectVirus( $path );
208 if ( $virus ) {
209 return [ 'uploadvirus', $virus ];
210 }
211
212 return true;
213 }
214
223 public function verifyExtension( $mime, $extension ) {
224 $magic = $this->mimeAnalyzer;
225
226 if ( !$mime || $mime === 'unknown' || $mime === 'unknown/unknown' ) {
227 if ( !$magic->isRecognizableExtension( $extension ) ) {
228 wfDebug( __METHOD__ . ": passing file with unknown detected mime type; " .
229 "unrecognized extension '$extension', can't verify" );
230
231 return true;
232 }
233
234 wfDebug( __METHOD__ . ": rejecting file with unknown detected mime type; " .
235 "recognized extension '$extension', so probably invalid file" );
236 return false;
237 }
238
239 $match = $magic->isMatchingExtension( $extension, $mime );
240
241 if ( $match === null ) {
242 if ( $magic->getMimeTypesFromExtension( $extension ) !== [] ) {
243 wfDebug( __METHOD__ . ": No extension known for $mime, but we know a mime for $extension" );
244
245 return false;
246 }
247
248 wfDebug( __METHOD__ . ": no file extension known for mime type $mime, passing file" );
249 return true;
250 }
251
252 if ( $match ) {
253 wfDebug( __METHOD__ . ": mime type $mime matches extension $extension, passing file" );
254
256 return true;
257 }
258
259 wfDebug( __METHOD__
260 . ": mime type $mime mismatches file extension $extension, rejecting file" );
261
262 return false;
263 }
264
280 public function detectScript( $file, $mime, $extension ) {
281 # ugly hack: for text files, always look at the entire file.
282 # For binary field, just check the first K.
283
284 if ( str_starts_with( $mime ?? '', 'text/' ) ) {
285 $chunk = file_get_contents( $file );
286 } else {
287 $fp = fopen( $file, 'rb' );
288 if ( !$fp ) {
289 return false;
290 }
291 $chunk = fread( $fp, 1024 );
292 fclose( $fp );
293 }
294
295 $chunk = strtolower( $chunk );
296
297 if ( !$chunk ) {
298 return false;
299 }
300
301 # decode from UTF-16 if needed (could be used for obfuscation).
302 if ( str_starts_with( $chunk, "\xfe\xff" ) ) {
303 $enc = 'UTF-16BE';
304 } elseif ( str_starts_with( $chunk, "\xff\xfe" ) ) {
305 $enc = 'UTF-16LE';
306 } else {
307 $enc = null;
308 }
309
310 if ( $enc !== null ) {
311 // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged
312 $chunk = @iconv( $enc, "ASCII//IGNORE", $chunk );
313 }
314
315 $chunk = trim( $chunk );
316
318 wfDebug( __METHOD__ . ": checking for embedded scripts and HTML stuff" );
319
320 # check for HTML doctype
321 if ( preg_match( "/<!DOCTYPE *X?HTML/i", $chunk ) ) {
322 return true;
323 }
324
325 // Some browsers will interpret obscure xml encodings as UTF-8, while
326 // PHP/expat will interpret the given encoding in the xml declaration (T49304)
327 if ( $extension === 'svg' || str_starts_with( $mime ?? '', 'image/svg' ) ) {
328 if ( $this->checkXMLEncodingMismatch( $file ) ) {
329 return true;
330 }
331 }
332
333 // Quick check for HTML heuristics in old IE and Safari.
334 //
335 // The exact heuristics IE uses are checked separately via verifyMimeType(), so we
336 // don't need them all here as it can cause many false positives.
337 //
338 // Check for `<script` and such still to forbid script tags and embedded HTML in SVG:
339 $tags = [
340 '<body',
341 '<head',
342 '<html', # also in safari
343 '<script', # also in safari
344 ];
345
346 foreach ( $tags as $tag ) {
347 if ( str_contains( $chunk, $tag ) ) {
348 wfDebug( __METHOD__ . ": found something that may make it be mistaken for html: $tag" );
349
350 return true;
351 }
352 }
353
354 /*
355 * look for JavaScript
356 */
357
358 # resolve entity-refs to look at attributes. may be harsh on big files... cache result?
359 $chunk = Sanitizer::decodeCharReferences( $chunk );
360
361 # look for script-types
362 if ( preg_match( '!type\s*=\s*[\'"]?\s*(?:\w*/)?(?:ecma|java)!im', $chunk ) ) {
363 wfDebug( __METHOD__ . ": found script types" );
364
365 return true;
366 }
367
368 # look for html-style script-urls
369 if ( preg_match( '!(?:href|src|data)\s*=\s*[\'"]?\s*(?:ecma|java)script:!im', $chunk ) ) {
370 wfDebug( __METHOD__ . ": found html-style script urls" );
371
372 return true;
373 }
374
375 # look for css-style script-urls
376 if ( preg_match( '!url\s*\‍(\s*[\'"]?\s*(?:ecma|java)script:!im', $chunk ) ) {
377 wfDebug( __METHOD__ . ": found css-style script urls" );
378
379 return true;
380 }
381
382 wfDebug( __METHOD__ . ": no scripts found" );
383
384 return false;
385 }
386
394 private function checkXMLEncodingMismatch( $file ) {
395 // https://mimesniff.spec.whatwg.org/#resource-header says browsers
396 // should read the first 1445 bytes. Do 4096 bytes for good measure.
397 // XML Spec says XML declaration if present must be first thing in file
398 // other than BOM
399 $contents = file_get_contents( $file, false, null, 0, 4096 );
400 $encodingRegex = '!encoding[ \t\n\r]*=[ \t\n\r]*[\'"](.*?)[\'"]!si';
401
402 if ( preg_match( "!<\?xml\b(.*?)\?>!si", $contents, $matches ) ) {
403 if ( preg_match( $encodingRegex, $matches[1], $encMatch )
404 && !in_array( strtoupper( $encMatch[1] ), self::SAFE_XML_ENCODINGS )
405 ) {
406 wfDebug( __METHOD__ . ": Found unsafe XML encoding '{$encMatch[1]}'" );
407
408 return true;
409 }
410 } elseif ( preg_match( "!<\?xml\b!i", $contents ) ) {
411 // Start of XML declaration without an end in the first 4096 bytes
412 // bytes. There shouldn't be a legitimate reason for this to happen.
413 wfDebug( __METHOD__ . ": Unmatched XML declaration start" );
414
415 return true;
416 } elseif ( str_starts_with( $contents, "\x4C\x6F\xA7\x94" ) ) {
417 // EBCDIC encoded XML
418 wfDebug( __METHOD__ . ": EBCDIC Encoded XML" );
419
420 return true;
421 }
422
423 // It's possible the file is encoded with multibyte encoding, so re-encode attempt to
424 // detect the encoding in case it specifies an encoding not allowed in self::SAFE_XML_ENCODINGS
425 $attemptEncodings = [ 'UTF-16', 'UTF-16BE', 'UTF-32', 'UTF-32BE' ];
426 foreach ( $attemptEncodings as $encoding ) {
427 // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged
428 $str = @iconv( $encoding, 'UTF-8', $contents );
429 if ( $str != '' && preg_match( "!<\?xml\b(.*?)\?>!si", $str, $matches ) ) {
430 if ( preg_match( $encodingRegex, $matches[1], $encMatch )
431 && !in_array( strtoupper( $encMatch[1] ), self::SAFE_XML_ENCODINGS )
432 ) {
433 wfDebug( __METHOD__ . ": Found unsafe XML encoding '{$encMatch[1]}'" );
434
435 return true;
436 }
437 } elseif ( $str != '' && preg_match( "!<\?xml\b!i", $str ) ) {
438 // Start of XML declaration without an end in the first 4096 bytes
439 // bytes. There shouldn't be a legitimate reason for this to happen.
440 wfDebug( __METHOD__ . ": Unmatched XML declaration start" );
441
442 return true;
443 }
444 }
445
446 return false;
447 }
448
457 private function detectScriptInSvg( $filename, $partial ) {
458 $check = new XmlTypeCheck(
459 $filename,
460 $this->checkSvgScriptCallback( ... ),
461 true,
462 [
463 'processing_instruction_handler' => $this->checkSvgPICallback( ... ),
464 'external_dtd_handler' => $this->checkSvgExternalDTD( ... ),
465 ]
466 );
467 if ( $check->wellFormed !== true ) {
468 // Invalid xml (T60553)
469 // But only when non-partial (T67724)
470 return $partial ? false : [ 'uploadinvalidxml' ];
471 }
472
473 if ( $check->filterMatch ) {
474 return $check->filterMatchType;
475 }
476
477 return false;
478 }
479
486 private function checkSvgPICallback( $target ) {
487 // Don't allow external stylesheets (T59550)
488 if ( preg_match( '/xml-stylesheet/i', $target ) ) {
489 return [ 'upload-scripted-pi-callback' ];
490 }
491
492 return false;
493 }
494
507 private function checkSvgExternalDTD( $type, $publicId, $systemId ) {
508 // This doesn't include the XHTML+MathML+SVG doctype since we don't
509 // allow XHTML anyway.
510 static $allowedDTDs = [
511 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd',
512 'http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd',
513 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd',
514 'http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-tiny.dtd',
515 // https://phabricator.wikimedia.org/T168856
516 'http://www.w3.org/TR/2001/PR-SVG-20010719/DTD/svg10.dtd',
517 ];
518 if ( $type !== 'PUBLIC'
519 || !in_array( $systemId, $allowedDTDs )
520 || !str_starts_with( $publicId, "-//W3C//" )
521 ) {
522 return [ 'upload-scripted-dtd' ];
523 }
524 return false;
525 }
526
537 private function checkSvgScriptCallback( $element, $attribs, $data = null ) {
538 [ $namespace, $strippedElement ] = self::splitXmlNamespace( $element );
539
540 // We specifically don't include:
541 // http://www.w3.org/1999/xhtml (T62771)
542 static $validNamespaces = [
543 '',
544 'adobe:ns:meta/',
545 'http://cipa.jp/exif/1.0/',
546 'http://creativecommons.org/ns#',
547 'http://developer.sonyericsson.com/cell/1.0/',
548 'http://inkscape.sourceforge.net/dtd/sodipodi-0.dtd',
549 'http://iptc.org/std/iptc4xmpcore/1.0/xmlns/',
550 'http://iptc.org/std/iptc4xmpext/2008-02-29/',
551 'http://leica-camera.com/digital-shift-assistant/1.0/',
552 'http://ns.acdsee.com/iptc/1.0/',
553 'http://ns.acdsee.com/regions/',
554 'http://ns.adobe.com/adobeillustrator/10.0/',
555 'http://ns.adobe.com/adobesvgviewerextensions/3.0/',
556 'http://ns.adobe.com/album/1.0/',
557 'http://ns.adobe.com/camera-raw-defaults/1.0/',
558 'http://ns.adobe.com/camera-raw-embedded-lens-profile/1.0/',
559 'http://ns.adobe.com/camera-raw-saved-settings/1.0/',
560 'http://ns.adobe.com/camera-raw-settings/1.0/',
561 'http://ns.adobe.com/creatoratom/1.0/',
562 'http://ns.adobe.com/dicom/',
563 'http://ns.adobe.com/exif/1.0/',
564 'http://ns.adobe.com/exif/1.0/aux/',
565 'http://ns.adobe.com/extensibility/1.0/',
566 'http://ns.adobe.com/flows/1.0/',
567 'http://ns.adobe.com/hdr-gain-map/1.0/',
568 'http://ns.adobe.com/hdr-metadata/1.0/',
569 'http://ns.adobe.com/ix/1.0/',
570 'http://ns.adobe.com/lightroom/1.0/',
571 'http://ns.adobe.com/illustrator/1.0/',
572 'http://ns.adobe.com/imagereplacement/1.0/',
573 'http://ns.adobe.com/pdf/1.3/',
574 'http://ns.adobe.com/pdfx/1.3/',
575 'http://ns.adobe.com/photoshop/1.0/',
576 'http://ns.adobe.com/photoshop/1.0/camera-profile',
577 'http://ns.adobe.com/photoshop/1.0/panorama-profile',
578 'http://ns.adobe.com/raw/1.0/',
579 'http://ns.adobe.com/swf/1.0/',
580 'http://ns.adobe.com/saveforweb/1.0/',
581 'http://ns.adobe.com/tiff/1.0/',
582 'http://ns.adobe.com/variables/1.0/',
583 'http://ns.adobe.com/xap/1.0/',
584 'http://ns.adobe.com/xap/1.0/bj/',
585 'http://ns.adobe.com/xap/1.0/g/',
586 'http://ns.adobe.com/xap/1.0/g/img/',
587 'http://ns.adobe.com/xap/1.0/mm/',
588 'http://ns.adobe.com/xap/1.0/plus/',
589 'http://ns.adobe.com/xap/1.0/rights/',
590 'http://ns.adobe.com/xap/1.0/stype/dimensions#',
591 'http://ns.adobe.com/xap/1.0/stype/font#',
592 'http://ns.adobe.com/xap/1.0/stype/manifestitem#',
593 'http://ns.adobe.com/xap/1.0/stype/resourceevent#',
594 'http://ns.adobe.com/xap/1.0/stype/resourceref#',
595 'http://ns.adobe.com/xap/1.0/stype/version#',
596 'http://ns.adobe.com/xap/1.0/t/pg/',
597 'http://ns.adobe.com/xmp/1.0/dynamicmedia/',
598 'http://ns.adobe.com/xmp/identifier/qual/1.0/',
599 'http://ns.adobe.com/xmp/note/',
600 'http://ns.adobe.com/xmp/stype/area#',
601 'http://ns.apple.com/adjustment-settings/1.0/',
602 'http://ns.apple.com/faceinfo/1.0/',
603 'http://ns.apple.com/hdrgainmap/1.0/',
604 'http://ns.apple.com/pixeldatainfo/1.0/',
605 'http://ns.exiftool.org/1.0/',
606 'http://ns.extensis.com/extensis/1.0/',
607 'http://ns.fastpictureviewer.com/fpv/1.0/',
608 'http://ns.google.com/photos/1.0/audio/',
609 'http://ns.google.com/photos/1.0/camera/',
610 'http://ns.google.com/photos/1.0/container/',
611 'http://ns.google.com/photos/1.0/creations/',
612 'http://ns.google.com/photos/1.0/depthmap/',
613 'http://ns.google.com/photos/1.0/focus/',
614 'http://ns.google.com/photos/1.0/image/',
615 'http://ns.google.com/photos/1.0/panorama/',
616 'http://ns.google.com/photos/dd/1.0/profile/',
617 'http://ns.google.com/videos/1.0/spherical/',
618 'http://ns.idimager.com/ics/1.0/',
619 'http://ns.iview-multimedia.com/mediapro/1.0/',
620 'http://ns.leiainc.com/photos/1.0/image/',
621 'http://ns.microsoft.com/expressionmedia/1.0/',
622 'http://ns.microsoft.com/photo/1.0',
623 'http://ns.microsoft.com/photo/1.1',
624 'http://ns.microsoft.com/photo/1.2/',
625 'http://ns.microsoft.com/photo/1.2/t/region#',
626 'http://ns.microsoft.com/photo/1.2/t/regioninfo#',
627 'http://ns.nikon.com/asteroid/1.0/',
628 'http://ns.nikon.com/nine/1.0/',
629 'http://ns.nikon.com/sdc/1.0/',
630 'http://ns.optimasc.com/dex/1.0/',
631 'http://ns.seal/2024/1.0/',
632 'http://ns.useplus.org/ldf/xmp/1.0/',
633 'http://prismstandard.org/namespaces/basic/2.0/',
634 'http://prismstandard.org/namespaces/pmi/2.2/',
635 'http://prismstandard.org/namespaces/prismusagerights/2.1/',
636 'http://prismstandard.org/namespaces/prl/2.1/',
637 'http://prismstandard.org/namespaces/prm/3.0/',
638 'http://purl.org/dc/elements/1.1/',
639 'http://purl.org/dc/elements/1.1',
640 'http://rs.tdwg.org/dwc/index.htm',
641 'http://schemas.microsoft.com/visio/2003/svgextensions/',
642 'http://sodipodi.sourceforge.net/dtd/sodipodi-0.dtd',
643 'http://taptrix.com/inkpad/svg_extensions',
644 'http://www.digikam.org/ns/1.0/',
645 'http://www.dji.com/drone-dji/1.0/',
646 'http://www.metadataworkinggroup.com/schemas/collections/',
647 'http://www.metadataworkinggroup.com/schemas/keywords/',
648 'http://www.metadataworkinggroup.com/schemas/regions/',
649 'http://web.resource.org/cc/',
650 'http://www.freesoftware.fsf.org/bkchem/cdml',
651 'http://www.inkscape.org/namespaces/inkscape',
652 'http://www.opengis.net/gml',
653 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
654 'http://www.w3.org/2000/01/rdf-schema#',
655 'http://www.w3.org/2000/svg',
656 'http://www.w3.org/2000/02/svg/testsuite/description/', // T278044
657 'http://www.w3.org/tr/rec-rdf-syntax/',
658 'http://xmp.gettyimages.com/gift/1.0/',
659 ];
660
661 // Inkscape mangles namespace definitions created by Adobe Illustrator.
662 // This is nasty but harmless. (T144827)
663 $isBuggyInkscape = preg_match( '/^&(#38;)*ns_[a-z_]+;$/', $namespace );
664
665 if ( !( $isBuggyInkscape || in_array( $namespace, $validNamespaces ) ) ) {
666 wfDebug( __METHOD__ . ": Non-svg namespace '$namespace' in uploaded file." );
667 return [ 'uploadscriptednamespace', $namespace ];
668 }
669
670 // check for elements that can contain javascript
671 if ( $strippedElement === 'script' ) {
672 wfDebug( __METHOD__ . ": Found script element '$element' in uploaded file." );
673
674 return [ 'uploaded-script-svg', $strippedElement ];
675 }
676
677 // e.g., <svg xmlns="http://www.w3.org/2000/svg">
678 // <handler xmlns:ev="http://www.w3.org/2001/xml-events" ev:event="load">alert(1)</handler> </svg>
679 if ( $strippedElement === 'handler' ) {
680 wfDebug( __METHOD__ . ": Found scriptable element '$element' in uploaded file." );
681
682 return [ 'uploaded-script-svg', $strippedElement ];
683 }
684
685 // SVG reported in Feb '12 that used xml:stylesheet to generate javascript block
686 if ( $strippedElement === 'stylesheet' ) {
687 wfDebug( __METHOD__ . ": Found scriptable element '$element' in uploaded file." );
688
689 return [ 'uploaded-script-svg', $strippedElement ];
690 }
691
692 // Block iframes, in case they pass the namespace check
693 if ( $strippedElement === 'iframe' ) {
694 wfDebug( __METHOD__ . ": iframe in uploaded file." );
695
696 return [ 'uploaded-script-svg', $strippedElement ];
697 }
698
699 // Check <style> css
700 if ( $strippedElement === 'style' ) {
701 $cssCheck = $this->SVGCSSChecker->checkStyleTag( $data );
702 if ( $cssCheck !== true ) {
703 wfDebug( __METHOD__ . ": hostile css in style element. " . $cssCheck[0] );
704
705 return [ 'uploaded-hostile-svg', $cssCheck[0], $cssCheck[1], $cssCheck[2] ];
706 }
707 }
708
709 static $cssAttrs = [ 'font', 'clip-path', 'fill', 'filter', 'marker',
710 'marker-end', 'marker-mid', 'marker-start', 'mask', 'stroke', 'cursor' ];
711
712 foreach ( $attribs as $attrib => $value ) {
713 // If attributeNamespace is '', it is relative to its element's namespace
714 [ $attributeNamespace, $stripped ] = self::splitXmlNamespace( $attrib );
715 $value = strtolower( $value );
716
717 if ( !(
718 // Inkscape element's have valid attribs that start with on and are safe, fail all others
719 // We are assuming here that the SVG will be interpreted
720 // under XML serialization. This is not safe for SVGs
721 // embedded directly in HTML.
722 $namespace === 'http://www.inkscape.org/namespaces/inkscape' &&
723 $attributeNamespace === ''
724 ) && str_starts_with( $stripped, 'on' )
725 ) {
726 wfDebug( __METHOD__
727 . ": Found event-handler attribute '$attrib'='$value' in uploaded file." );
728
729 return [ 'uploaded-event-handler-on-svg', $attrib, $value ];
730 }
731
732 // Do not allow relative links, or unsafe url schemas.
733 // For <a> tags, only data:, http: and https: and same-document
734 // fragment links are allowed.
735 // For all other tags, only 'data:' and fragments (#) are allowed.
736 if (
737 $stripped === 'href'
738 && $value !== ''
739 && !str_starts_with( $value, 'data:' )
740 && !str_starts_with( $value, '#' )
741 && !( $strippedElement === 'a' && preg_match( '!^https?://!i', $value ) )
742 ) {
743 wfDebug( __METHOD__ . ": Found href attribute <$strippedElement "
744 . "'$attrib'='$value' in uploaded file." );
745
746 return [ 'uploaded-href-attribute-svg', $strippedElement, $attrib, $value ];
747 }
748
749 // Only allow 'data:\' targets that should be safe.
750 // This prevents vectors like image/svg, text/xml, application/xml, and text/html, which can contain scripts
751 if ( $stripped === 'href' && strncasecmp( 'data:', $value, 5 ) === 0 ) {
752 // RFC2397 parameters.
753 // This is only slightly slower than (;[\w;]+)*.
754 // phpcs:ignore Generic.Files.LineLength
755 $parameters = '(?>;[a-zA-Z0-9\!#$&\'*+.^_`{|}~-]+=(?>[a-zA-Z0-9\!#$&\'*+.^_`{|}~-]+|"(?>[\0-\x0c\x0e-\x21\x23-\x5b\x5d-\x7f]+|\\\\[\0-\x7f])*"))*(?:;base64)?';
756
757 if ( !preg_match( "!^data:\s*image/(gif|jpeg|jpg|a?png|webp|avif)$parameters,!i", $value ) ) {
758 wfDebug( __METHOD__ . ": Found href with data URI with MIME type that is not allowed "
759 . "\"<$strippedElement '$attrib'='$value'...\" in uploaded file." );
760 return [ 'uploaded-href-unsafe-target-svg', $strippedElement, $attrib, $value ];
761 }
762 }
763
764 // Change href with animate from (http://html5sec.org/#137).
765 if ( $stripped === 'attributename'
766 && $strippedElement === 'animate'
767 && $this->stripXmlNamespace( $value ) === 'href'
768 ) {
769 wfDebug( __METHOD__ . ": Found animate that might be changing href using from "
770 . "\"<$strippedElement '$attrib'='$value'...\" in uploaded file." );
771
772 return [ 'uploaded-animate-svg', $strippedElement, $attrib, $value ];
773 }
774
775 // Use set/animate to add event-handler attribute to parent.
776 if ( ( $strippedElement === 'set' || $strippedElement === 'animate' )
777 && $stripped === 'attributename'
778 && str_starts_with( $value, 'on' )
779 ) {
780 wfDebug( __METHOD__ . ": Found svg setting event-handler attribute with "
781 . "\"<$strippedElement $stripped='$value'...\" in uploaded file." );
782
783 return [ 'uploaded-setting-event-handler-svg', $strippedElement, $stripped, $value ];
784 }
785
786 // use set to add href attribute to parent element.
787 if ( $strippedElement === 'set'
788 && $stripped === 'attributename'
789 && str_contains( $value, 'href' )
790 ) {
791 wfDebug( __METHOD__ . ": Found svg setting href attribute '$value' in uploaded file." );
792
793 return [ 'uploaded-setting-href-svg' ];
794 }
795
796 // use set to add a remote / data / script target to an element.
797 if ( $strippedElement === 'set'
798 && $stripped === 'to'
799 && preg_match( '!(http|https|data|script):!im', $value )
800 ) {
801 wfDebug( __METHOD__ . ": Found svg setting attribute to '$value' in uploaded file." );
802
803 return [ 'uploaded-wrong-setting-svg', $value ];
804 }
805
806 // use handler attribute with remote / data / script.
807 if ( $stripped === 'handler' && preg_match( '!(http|https|data|script):!im', $value ) ) {
808 wfDebug( __METHOD__ . ": Found svg setting handler with remote/data/script "
809 . "'$attrib'='$value' in uploaded file." );
810
811 return [ 'uploaded-setting-handler-svg', $attrib, $value ];
812 }
813
814 // use CSS styles to bring in remote code.
815 if ( $stripped === 'style'
816 && $this->SVGCSSChecker->checkStyleAttribute( $value ) !== true
817 ) {
818 wfDebug( __METHOD__ . ": Found svg setting a style with "
819 . "remote url '$attrib'='$value' in uploaded file." );
820 return [ 'uploaded-remote-url-svg', $attrib, $value ];
821 }
822
823 // Several attributes can include css, css character escaping isn't allowed.
824 if ( in_array( $stripped, $cssAttrs, true )
825 && $this->SVGCSSChecker->checkPresentationalAttribute( $value ) !== true
826 ) {
827 wfDebug( __METHOD__ . ": Found svg setting a style with "
828 . "remote url '$attrib'='$value' in uploaded file." );
829 return [ 'uploaded-remote-url-svg', $attrib, $value ];
830 }
831
832 // image filters can pull in url, which could be svg that executes scripts.
833 // Only allow url( "#foo" ).
834 // Do not allow url( http://example.com )
835 // TODO: It seems like the line above already does this check.
836 if ( $strippedElement === 'image'
837 && $stripped === 'filter'
838 && preg_match( '!url\s*\‍(\s*["\']?[^#]!im', $value )
839 ) {
840 wfDebug( __METHOD__ . ": Found image filter with url: "
841 . "\"<$strippedElement $stripped='$value'...\" in uploaded file." );
842
843 return [ 'uploaded-image-filter-svg', $strippedElement, $stripped, $value ];
844 }
845 }
846
847 return false; // No scripts detected
848 }
849
856 private function splitXmlNamespace( $element ) {
857 // 'http://www.w3.org/2000/svg:script' -> [ 'http://www.w3.org/2000/svg', 'script' ]
858 $parts = explode( ':', strtolower( $element ) );
859 $name = array_pop( $parts );
860 $ns = implode( ':', $parts );
861
862 return [ $ns, $name ];
863 }
864
869 private function stripXmlNamespace( $element ) {
870 // 'http://www.w3.org/2000/svg:script' -> 'script'
871 return self::splitXmlNamespace( $element )[1];
872 }
873
886 public function detectVirus( $file ) {
887 $mainConfig = $this->config;
888 $antivirus = $mainConfig->get( MainConfigNames::Antivirus );
889 $antivirusSetup = $mainConfig->get( MainConfigNames::AntivirusSetup );
890 $antivirusRequired = $mainConfig->get( MainConfigNames::AntivirusRequired );
891 if ( !$antivirus ) {
892 wfDebug( __METHOD__ . ": virus scanner disabled" );
893
894 return null;
895 }
896
897 if ( !( $antivirusSetup[$antivirus] ?? false ) ) {
898 throw new ConfigException( "Unknown virus scanner: $antivirus" );
899 }
900
901 # look up scanner configuration
902 $command = $antivirusSetup[$antivirus]['command'];
903 $exitCodeMap = $antivirusSetup[$antivirus]['codemap'];
904 $msgPattern = $antivirusSetup[$antivirus]['messagepattern'] ?? null;
905
906 if ( !str_contains( $command, "%f" ) ) {
907 # simple pattern: append file to scan
908 $command .= " " . Shell::escape( $file );
909 } else {
910 # complex pattern: replace "%f" with file to scan
911 $command = str_replace( "%f", Shell::escape( $file ), $command );
912 }
913
914 wfDebug( __METHOD__ . ": running virus scan: $command " );
915
916 # execute virus scanner
917 $exitCode = false;
918
919 # NOTE: there's a 50-line workaround to make stderr redirection work on windows, too.
920 # that does not seem to be worth the pain.
921 # Ask me (Duesentrieb) about it if it's ever needed.
922 $output = wfShellExecWithStderr( $command, $exitCode );
923
924 # map exit code to AV_xxx constants.
925 $mappedCode = $exitCode;
926 if ( $exitCodeMap ) {
927 if ( isset( $exitCodeMap[$exitCode] ) ) {
928 $mappedCode = $exitCodeMap[$exitCode];
929 } elseif ( isset( $exitCodeMap["*"] ) ) {
930 $mappedCode = $exitCodeMap["*"];
931 }
932 }
933
934 # NB: AV_NO_VIRUS is 0, but AV_SCAN_FAILED is false,
935 # so we need the strict equalities === and thus can't use a switch here
936 if ( $mappedCode === AV_SCAN_FAILED ) {
937 # scan failed (code was mapped to false by $exitCodeMap)
938 wfDebug( __METHOD__ . ": failed to scan $file (code $exitCode)." );
939
940 $output = $antivirusRequired
941 ? wfMessage( 'virus-scanfailed', [ $exitCode ] )->text()
942 : null;
943 } elseif ( $mappedCode === AV_SCAN_ABORTED ) {
944 # scan failed because filetype is unknown (probably immune)
945 wfDebug( __METHOD__ . ": unsupported file type $file (code $exitCode)." );
946 $output = null;
947 } elseif ( $mappedCode === AV_NO_VIRUS ) {
948 # no virus found
949 wfDebug( __METHOD__ . ": file passed virus scan." );
950 $output = false;
951 } else {
952 $output = trim( $output );
953
954 if ( !$output ) {
955 $output = true; # if there's no output, return true
956 } elseif ( $msgPattern ) {
957 $groups = [];
958 if ( preg_match( $msgPattern, $output, $groups ) && $groups[1] ) {
959 $output = $groups[1];
960 }
961 }
962
963 wfDebug( __METHOD__ . ": FOUND VIRUS! scanner feedback: $output" );
964 }
965
966 return $output;
967 }
968}
const AV_SCAN_FAILED
Definition Defines.php:86
const AV_SCAN_ABORTED
Definition Defines.php:85
const AV_NO_VIRUS
Definition Defines.php:83
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfShellExecWithStderr( $cmd, &$retval=null, $environ=[], $limits=[])
Execute a shell command, returning both stdout and stderr.
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Exceptions for config failures.
A class for passing options to services.
assertRequiredOptions(array $expectedKeys)
Assert that the list of options provided in this instance exactly match $expectedKeys,...
A class containing constants representing the names of configuration variables.
const MimeTypeExclusions
Name constant for the MimeTypeExclusions setting, for use with Config::get()
const AntivirusSetup
Name constant for the AntivirusSetup setting, for use with Config::get()
const VerifyMimeType
Name constant for the VerifyMimeType setting, for use with Config::get()
const DisableUploadScriptChecks
Name constant for the DisableUploadScriptChecks setting, for use with Config::get()
const Antivirus
Name constant for the Antivirus setting, for use with Config::get()
const AntivirusRequired
Name constant for the AntivirusRequired setting, for use with Config::get()
Base media handler class.
HTML sanitizer for MediaWiki.
Definition Sanitizer.php:34
Executes shell commands.
Definition Shell.php:32
Ensure SVG files cannot load external resources via URLs in CSS.
static checkFileExtension( $ext, $list)
Perform case-insensitive match against a list of file extensions.
Service to verify file uploads are safe.
detectVirus( $file)
Generic wrapper function for a virus scanner program.
verifyFile(string $path, string $ext, array $fileProps)
Verifies that the upload file is safe.
verifyExtension( $mime, $extension)
Checks if the MIME type of the uploaded file matches the file extension.
__construct(ServiceOptions $config, MimeAnalyzer $mimeAnalyzer, SVGCSSChecker $SVGCSSChecker)
verifyPartialFile(string $path, string $ext, array $fileProps)
A verification routine suitable for partial files.
detectScript( $file, $mime, $extension)
Heuristic for detecting files that could contain JavaScript instructions or things that may look like...
XML syntax and type checker.