MediaWiki master
SVGCSSChecker.php
Go to the documentation of this file.
1<?php
2namespace MediaWiki\Upload;
3
5use Wikimedia\CSS\Objects\AtRule;
6use Wikimedia\CSS\Objects\Token;
7use Wikimedia\CSS\Parser\Parser as CSSParser;
8
19
25 private const BANNED_AT_RULE = [
26 'charset',
27 'import'
28 ];
29
34 private const BANNED_FUNCS = [
35 'src',
36 'image',
37 'image-set'
38 ];
39
46 public function checkStyleAttribute( string $value ) {
47 if ( preg_match( '/[\000-\010\013\016-\037\177]/', $value ) ) {
48 return [ 'invalid-control-character', 0, 0 ];
49 }
50 $cssParser = CSSParser::newFromString( $value );
51 $decList = $cssParser->parseDeclarationList();
52 $errors = $cssParser->getParseErrors();
53 if ( $errors ) {
54 // For style attributes with syntax errors, as a fallback
55 // we see if MW's wikitext sanitizer would alter the
56 // style attribute in any way. If no, then we assume it
57 // is safe. There are enough files with errors in style
58 // attributes that don't use any risky features like
59 // css comments or url(), that this is worth it.
60 $alteredStyle = Sanitizer::checkCss( $value );
61 if ( $alteredStyle === $value ) {
62 // No sketchy CSS features used, its ok despite errors
63 return true;
64 }
65 return [ $errors[0][0], $errors[0][1], $errors[0][2] ];
66 }
67
68 $res = $this->validateTokens( $decList->toTokenArray() );
69 if ( $res !== true ) {
70 return $res;
71 }
72 return true;
73 }
74
83 public function checkPresentationalAttribute( $value ) {
84 if ( preg_match( '/[\000-\010\013\016-\037\177]/', $value ) ) {
85 return [ 'invalid-control-character', 0, 0 ];
86 }
87 $cssParser = CSSParser::newFromString( $value );
88 $cvList = $cssParser->parseComponentValueList();
89 $errors = $cssParser->getParseErrors();
90 if ( $errors ) {
91 return [ $errors[0][0], $errors[0][1], $errors[0][2] ];
92 }
93
94 $res = $this->validateTokens( $cvList->toTokenArray() );
95 if ( $res !== true ) {
96 return $res;
97 }
98 return true;
99 }
100
109 public function checkStyleTag( $value ) {
110 if ( preg_match( '/[\000-\010\013\016-\037\177]/', $value ) ) {
111 return [ 'invalid-control-character', 0, 0 ];
112 }
113 $cssParser = CSSParser::newFromString( $value );
114 $stylesheet = $cssParser->parseStylesheet();
115
116 $errors = $cssParser->getParseErrors();
117 if ( $errors ) {
118 return [ $errors[0][0], $errors[0][1], $errors[0][2] ];
119 }
120
121 $topLevelRules = $stylesheet->getRuleList();
122 foreach ( $topLevelRules as $rule ) {
123 if ( $rule instanceof AtRule ) {
124 $res = $this->validateAtRule( $rule );
125 if ( $res !== true ) {
126 return $res;
127 }
128 if ( $rule->getName() === 'font-face' ) {
129 // @font-face has laxer rules
130 $res = $this->validateTokens( $rule->toTokenArray(), true );
131 if ( $res !== true ) {
132 return $res;
133 }
134 continue;
135 }
136 }
137 // Note, this incidentally @namespace foo url( 'https://example.com' );
138 // We don't care about that but its super obscure so doesn't matter.
139 $res = $this->validateTokens( $rule->toTokenArray() );
140 if ( $res !== true ) {
141 return $res;
142 }
143 }
144 return true;
145 }
146
153 private function validateAtRule( AtRule $rule ) {
154 $name = strtolower( $rule->getName() );
155 if (
156 in_array( $name, self::BANNED_AT_RULE ) ||
157 preg_match( '/[^-a-z]/', $name )
158 ) {
159 return [ "banned-at-rule-$name", $rule->getPosition()[0], $rule->getPosition()[1] ];
160 }
161 return true;
162 }
163
171 private function validateTokens( array $tokens, $allowDataFonts = false ) {
172 // Go through all the tokens, and make sure none of them
173 // are url(). Except we allow urls that reference the current
174 // document. data: urls are not allowed because the predecessor
175 // to this class banned them. It is unclear why, perhaps the worry
176 // is embedding an SVG inside the data url to bypass sanitizer.
177 // We also ban the image and image-set() functions because they
178 // allow setting a url without the url function inside.
179 // We also ban src() for forwards-compatibility.
180 for ( $i = 0; $i < count( $tokens ); $i++ ) {
181 $token = $tokens[$i];
182 // unquoted urls are a T_URL where quoted urls are T_FUNCTION.
183 if ( $token->type() === Token::T_URL ) {
184 if (
185 !str_starts_with( $token->value(), '#' ) &&
186 !( $allowDataFonts &&
187 ( str_starts_with( $token->value(), 'data:font/' )
188 || str_starts_with( $token->value(), 'data:;base64,' ) ) /* T71008#717580 */
189 )
190 ) {
191 return [ 'banned-url', $token->getPosition()[0], $token->getPosition()[1] ];
192 }
193 } elseif ( $token->type() === Token::T_BAD_URL ) {
194 // In theory browsers should ignore this, but
195 // better to err on the side of failing when something
196 // weird is going on.
197 return [ 'banned-url', $token->getPosition()[0], $token->getPosition()[1] ];
198 } elseif ( $token->type() === Token::T_FUNCTION && strtolower( $token->value() ) === 'url' ) {
199 for ( $j = $i + 1; $j < count( $tokens ) && $tokens[$j]->type() === Token::T_WHITESPACE; $j++ );
200 if ( $j < count( $tokens ) && $tokens[$j]->type() === Token::T_STRING ) {
201 if (
202 str_starts_with( $tokens[$j]->value(), '#' ) ||
203 ( $allowDataFonts &&
204 ( str_starts_with( $tokens[$j]->value(), 'data:font/' )
205 || str_starts_with( $tokens[$j]->value(), 'data:;base64,' ) ) /* T71008#717580 */
206 )
207 ) {
208 continue;
209 }
210 }
211 return [ 'banned-url', $token->getPosition()[0], $token->getPosition()[1] ];
212 } elseif (
213 $token->type() === Token::T_FUNCTION &&
214 in_array( strtolower( $token->value() ), self::BANNED_FUNCS )
215 ) {
216 return [ 'banned-function-' . $token->value(), $token->getPosition()[0], $token->getPosition()[1] ];
217 }
218 }
219 return true;
220 }
221}
HTML sanitizer for MediaWiki.
Definition Sanitizer.php:32
Ensure SVG files cannot load external resources via URLs in CSS.
checkStyleAttribute(string $value)
entrypoint to check style="..." attributes
checkStyleTag( $value)
Entrypoint to check <style> tags.
checkPresentationalAttribute( $value)
entrypoint to check presentational attributes like fill