Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
96.52% |
471 / 488 |
|
40.00% |
6 / 15 |
CRAP | |
0.00% |
0 / 1 |
SecurityCheckPlugin | |
96.52% |
471 / 488 |
|
40.00% |
6 / 15 |
83 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
assertRequiredConfig | |
50.00% |
1 / 2 |
|
0.00% |
0 / 1 |
2.50 | |||
getMergeVariableInfoClosure | |
92.68% |
38 / 41 |
|
0.00% |
0 / 1 |
18.13 | |||
analyzeStringLiteralStatement | |
97.30% |
36 / 37 |
|
0.00% |
0 / 1 |
8 | |||
taintToString | |
98.08% |
51 / 52 |
|
0.00% |
0 / 1 |
5 | |||
builtinFuncHasTaint | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getBuiltinFuncTaint | |
100.00% |
35 / 35 |
|
100.00% |
1 / 1 |
7 | |||
assertFunctionTaintArrayWellFormed | |
55.56% |
5 / 9 |
|
0.00% |
0 / 1 |
13.62 | |||
getCustomFuncTaints | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
isFalsePositive | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
parseTaintLine | |
100.00% |
40 / 40 |
|
100.00% |
1 / 1 |
12 | |||
modifyParamSinkTaint | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
modifyArgTaint | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
convertTaintNameToConstant | |
85.71% |
24 / 28 |
|
0.00% |
0 / 1 |
16.75 | |||
getPHPFuncTaints | |
100.00% |
237 / 237 |
|
100.00% |
1 / 1 |
1 | |||
getBeforeLoopBodyAnalysisVisitorClassName | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 |
1 | <?php declare( strict_types=1 ); |
2 | |
3 | /** |
4 | * Base class for SecurityCheckPlugin. Extend if you want to customize. |
5 | * |
6 | * Copyright (C) 2017 Brian Wolff <bawolff@gmail.com> |
7 | * |
8 | * This program is free software; you can redistribute it and/or modify |
9 | * it under the terms of the GNU General Public License as published by |
10 | * the Free Software Foundation; either version 2 of the License, or |
11 | * (at your option) any later version. |
12 | * |
13 | * This program is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | * GNU General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU General Public License along |
19 | * with this program; if not, write to the Free Software Foundation, Inc., |
20 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
21 | */ |
22 | |
23 | namespace SecurityCheckPlugin; |
24 | |
25 | use AssertionError; |
26 | use ast\Node; |
27 | use Closure; |
28 | use Error; |
29 | use Phan\CodeBase; |
30 | use Phan\Config; |
31 | use Phan\Language\Context; |
32 | use Phan\Language\Element\Comment\Builder; |
33 | use Phan\Language\Element\FunctionInterface; |
34 | use Phan\Language\Element\Variable; |
35 | use Phan\Language\FQSEN\FullyQualifiedFunctionLikeName; |
36 | use Phan\Language\FQSEN\FullyQualifiedMethodName; |
37 | use Phan\Language\Scope; |
38 | use Phan\PluginV3; |
39 | use Phan\PluginV3\AnalyzeLiteralStatementCapability; |
40 | use Phan\PluginV3\BeforeLoopBodyAnalysisCapability; |
41 | use Phan\PluginV3\MergeVariableInfoCapability; |
42 | use Phan\PluginV3\PostAnalyzeNodeCapability; |
43 | use Phan\PluginV3\PreAnalyzeNodeCapability; |
44 | |
45 | /** |
46 | * Base class used by the Generic and MediaWiki flavours of the plugin. |
47 | */ |
48 | abstract class SecurityCheckPlugin extends PluginV3 implements |
49 | PostAnalyzeNodeCapability, |
50 | PreAnalyzeNodeCapability, |
51 | BeforeLoopBodyAnalysisCapability, |
52 | MergeVariableInfoCapability, |
53 | AnalyzeLiteralStatementCapability |
54 | { |
55 | use TaintednessAccessorsTrait; |
56 | |
57 | // Various taint flags. The _EXEC_ varieties mean |
58 | // that it is unsafe to assign that type of taint |
59 | // to the variable in question. |
60 | |
61 | public const NO_TAINT = 0; |
62 | |
63 | // Flag to denote that we don't know |
64 | public const UNKNOWN_TAINT = 1 << 0; |
65 | |
66 | // Flag for function parameters and the like, where it |
67 | // preserves whatever taint the function is given. |
68 | public const PRESERVE_TAINT = 1 << 1; |
69 | |
70 | // In future might separate out different types of html quoting. |
71 | // e.g. "<div data-foo='" . htmlspecialchars( $bar ) . "'>"; |
72 | // is unsafe. |
73 | public const HTML_TAINT = 1 << 2; |
74 | public const HTML_EXEC_TAINT = 1 << 3; |
75 | |
76 | public const SQL_TAINT = 1 << 4; |
77 | public const SQL_EXEC_TAINT = 1 << 5; |
78 | |
79 | public const SHELL_TAINT = 1 << 6; |
80 | public const SHELL_EXEC_TAINT = 1 << 7; |
81 | |
82 | public const SERIALIZE_TAINT = 1 << 8; |
83 | public const SERIALIZE_EXEC_TAINT = 1 << 9; |
84 | |
85 | // Tainted paths, as input to include(), require() and some FS functions (path traversal) |
86 | public const PATH_TAINT = 1 << 10; |
87 | public const PATH_EXEC_TAINT = 1 << 11; |
88 | |
89 | // User-controlled code, for RCE |
90 | public const CODE_TAINT = 1 << 12; |
91 | public const CODE_EXEC_TAINT = 1 << 13; |
92 | |
93 | // User-controlled regular expressions, for ReDoS |
94 | public const REGEX_TAINT = 1 << 14; |
95 | public const REGEX_EXEC_TAINT = 1 << 15; |
96 | |
97 | // To allow people to add other application specific taints. |
98 | public const CUSTOM1_TAINT = 1 << 16; |
99 | public const CUSTOM1_EXEC_TAINT = 1 << 17; |
100 | public const CUSTOM2_TAINT = 1 << 18; |
101 | public const CUSTOM2_EXEC_TAINT = 1 << 19; |
102 | |
103 | // Special purpose for supporting MediaWiki's IDatabase::select |
104 | // and friends. Like SQL_TAINT, but only applies to the numeric |
105 | // keys of an array. Note: These are not included in YES_TAINT/EXEC_TAINT. |
106 | // e.g. given $f = [ $_GET['foo'] ]; $f would have the flag, but |
107 | // $g = $_GET['foo']; or $h = [ 's' => $_GET['foo'] ] would not. |
108 | // The associative keys also have this flag if they are tainted. |
109 | // It is also assumed anything with this flag will also have |
110 | // the SQL_TAINT flag set. |
111 | public const SQL_NUMKEY_TAINT = 1 << 20; |
112 | public const SQL_NUMKEY_EXEC_TAINT = 1 << 21; |
113 | |
114 | // For double escaped variables |
115 | public const ESCAPED_TAINT = 1 << 22; |
116 | public const ESCAPED_EXEC_TAINT = 1 << 23; |
117 | |
118 | // Special purpose flags (Starting at 2^28) |
119 | // TODO Renumber these. Requires changing format of the hardcoded arrays |
120 | // Cancel's out all EXEC flags on a function arg if arg is array. |
121 | public const ARRAY_OK = 1 << 28; |
122 | |
123 | // Do not allow autodetected taint info override given taint. |
124 | // TODO Store this and other special flags somewhere else in the FunctionTaintedness object, not |
125 | // as normal taint flags. |
126 | public const NO_OVERRIDE = 1 << 29; |
127 | |
128 | public const VARIADIC_PARAM = 1 << 30; |
129 | |
130 | // *All* function flags |
131 | //TODO Add a structure test for this |
132 | public const FUNCTION_FLAGS = self::ARRAY_OK | self::NO_OVERRIDE; |
133 | |
134 | // Combination flags. |
135 | |
136 | // YES_TAINT denotes all taint a user controlled variable would have |
137 | public const YES_TAINT = self::HTML_TAINT | self::SQL_TAINT | self::SHELL_TAINT | self::SERIALIZE_TAINT | |
138 | self::PATH_TAINT | self::CODE_TAINT | self::REGEX_TAINT | self::CUSTOM1_TAINT | self::CUSTOM2_TAINT; |
139 | public const EXEC_TAINT = self::YES_TAINT << 1; |
140 | // @phan-suppress-next-line PhanUnreferencedPublicClassConstant |
141 | public const YES_EXEC_TAINT = self::YES_TAINT | self::EXEC_TAINT; |
142 | |
143 | // ALL taint is YES + special purpose taints, but not including special flags. |
144 | public const ALL_TAINT = self::YES_TAINT | self::SQL_NUMKEY_TAINT | self::ESCAPED_TAINT; |
145 | public const ALL_EXEC_TAINT = |
146 | self::EXEC_TAINT | self::SQL_NUMKEY_EXEC_TAINT | self::ESCAPED_EXEC_TAINT; |
147 | public const ALL_YES_EXEC_TAINT = self::ALL_TAINT | self::ALL_EXEC_TAINT; |
148 | |
149 | // Taints that support backpropagation. |
150 | public const BACKPROP_TAINTS = self::ALL_EXEC_TAINT; |
151 | |
152 | public const ESCAPES_HTML = ( self::YES_TAINT & ~self::HTML_TAINT ) | self::ESCAPED_EXEC_TAINT; |
153 | |
154 | // As the name would suggest, this must include *ALL* possible taint flags. |
155 | public const ALL_TAINT_FLAGS = self::ALL_YES_EXEC_TAINT | self::FUNCTION_FLAGS | |
156 | self::UNKNOWN_TAINT | self::PRESERVE_TAINT | self::VARIADIC_PARAM; |
157 | |
158 | /** |
159 | * Used to print taint debug data, see BlockAnalysisVisitor::PHAN_DEBUG_VAR_REGEX |
160 | */ |
161 | private const DEBUG_TAINTEDNESS_REGEXP = |
162 | '/@phan-debug-var-taintedness\s+\$(' . Builder::WORD_REGEX . '(,\s*\$' . Builder::WORD_REGEX . ')*)/'; |
163 | // @phan-suppress-previous-line PhanAccessClassConstantInternal It's just perfect for use here |
164 | |
165 | public const PARAM_ANNOTATION_REGEX = |
166 | '/@param-taint\s+&?(?P<variadic>\.\.\.)?\$(?P<paramname>\S+)\s+(?P<taint>.*)$/'; |
167 | |
168 | /** |
169 | * @var self Passed to the visitor for context |
170 | */ |
171 | public static $pluginInstance; |
172 | |
173 | /** |
174 | * @var array<array<FunctionTaintedness|MethodLinks>> Cache of parsed docblocks. This is declared here (as opposed |
175 | * to the BaseVisitor) so that PHPUnit can snapshot and restore it. |
176 | * @phan-var array<array{0:FunctionTaintedness,1:MethodLinks}> |
177 | */ |
178 | public static $docblockCache = []; |
179 | |
180 | /** @var FunctionTaintedness[] Cache of taintedness of builtin functions */ |
181 | private static $builtinFuncTaintCache = []; |
182 | |
183 | /** |
184 | * Save the subclass instance to make it accessible from the visitor |
185 | */ |
186 | public function __construct() { |
187 | $this->assertRequiredConfig(); |
188 | self::$pluginInstance = $this; |
189 | } |
190 | |
191 | /** |
192 | * Ensure that the options we need are enabled. |
193 | */ |
194 | private function assertRequiredConfig(): void { |
195 | if ( Config::get_quick_mode() ) { |
196 | throw new AssertionError( 'Quick mode must be disabled to run taint-check' ); |
197 | } |
198 | } |
199 | |
200 | /** |
201 | * @inheritDoc |
202 | */ |
203 | public function getMergeVariableInfoClosure(): Closure { |
204 | /** |
205 | * For branches that are not guaranteed to be executed, merge taint info for any involved |
206 | * variable across all branches. |
207 | * |
208 | * @note This method is HOT, so keep it optimized |
209 | * |
210 | * @param Variable $variable |
211 | * @param Scope[] $scopeList |
212 | * @param bool $varExistsInAllScopes @phan-unused-param |
213 | * @suppress PhanUnreferencedClosure, PhanUndeclaredProperty, UnusedSuppression |
214 | */ |
215 | return static function ( Variable $variable, array $scopeList, bool $varExistsInAllScopes ) { |
216 | $varName = $variable->getName(); |
217 | |
218 | $vars = []; |
219 | $firstVar = null; |
220 | foreach ( $scopeList as $scope ) { |
221 | $localVar = $scope->getVariableByNameOrNull( $varName ); |
222 | if ( $localVar ) { |
223 | if ( !$firstVar ) { |
224 | $firstVar = $localVar; |
225 | } else { |
226 | $vars[] = $localVar; |
227 | } |
228 | } |
229 | } |
230 | |
231 | if ( !$firstVar ) { |
232 | return; |
233 | } |
234 | |
235 | $taintedness = $prevTaint = $firstVar->taintedness ?? null; |
236 | $methodLinks = $prevLinks = $firstVar->taintedMethodLinks ?? null; |
237 | $error = $prevErr = $firstVar->taintedOriginalError ?? null; |
238 | |
239 | foreach ( $vars as $localVar ) { |
240 | // Below we only merge data if it's non-null in the current scope and different from the previous |
241 | // branch. Using arrays to save all previous values and then in_array seems useless on MW core, |
242 | // since >99% cases of duplication are already covered by these simple checks. |
243 | |
244 | $taintOrNull = $localVar->taintedness ?? null; |
245 | if ( $taintOrNull && $taintOrNull !== $prevTaint ) { |
246 | $prevTaint = $taintOrNull; |
247 | if ( $taintedness ) { |
248 | $taintedness->mergeWith( $taintOrNull ); |
249 | } else { |
250 | $taintedness = $taintOrNull; |
251 | } |
252 | } |
253 | |
254 | $variableObjLinksOrNull = $localVar->taintedMethodLinks ?? null; |
255 | if ( $variableObjLinksOrNull && $variableObjLinksOrNull !== $prevLinks ) { |
256 | $prevLinks = $variableObjLinksOrNull; |
257 | if ( $methodLinks ) { |
258 | $methodLinks->mergeWith( $variableObjLinksOrNull ); |
259 | } else { |
260 | $methodLinks = $variableObjLinksOrNull; |
261 | } |
262 | } |
263 | |
264 | $varErrorOrNull = $localVar->taintedOriginalError ?? null; |
265 | if ( $varErrorOrNull && $varErrorOrNull !== $prevErr ) { |
266 | $prevErr = $varErrorOrNull; |
267 | if ( $error ) { |
268 | $error->mergeWith( $varErrorOrNull ); |
269 | } else { |
270 | $error = $varErrorOrNull; |
271 | } |
272 | } |
273 | } |
274 | |
275 | if ( $taintedness ) { |
276 | self::setTaintednessRaw( $variable, $taintedness ); |
277 | } |
278 | if ( $methodLinks ) { |
279 | self::setMethodLinks( $variable, $methodLinks ); |
280 | } |
281 | if ( $error ) { |
282 | self::setCausedByRaw( $variable, $error ); |
283 | } |
284 | }; |
285 | } |
286 | |
287 | /** |
288 | * Print the taintedness of a variable, when requested |
289 | * @see BlockAnalysisVisitor::analyzeSubstituteVarAssert() |
290 | * @inheritDoc |
291 | * @suppress PhanUndeclaredProperty, UnusedSuppression |
292 | */ |
293 | public function analyzeStringLiteralStatement( CodeBase $codeBase, Context $context, string $statement ): bool { |
294 | $found = false; |
295 | if ( preg_match_all( self::DEBUG_TAINTEDNESS_REGEXP, $statement, $matches, PREG_SET_ORDER ) ) { |
296 | foreach ( $matches as $group ) { |
297 | foreach ( explode( ',', $group[1] ) as $rawVar ) { |
298 | $varName = ltrim( trim( $rawVar ), '$' ); |
299 | if ( $context->getScope()->hasVariableWithName( $varName ) ) { |
300 | $var = $context->getScope()->getVariableByName( $varName ); |
301 | $taintOrNull = self::getTaintednessRaw( $var ); |
302 | $taint = $taintOrNull ? $taintOrNull->toShortString() : 'unset'; |
303 | $msg = "Variable {CODE} has taintedness: {DETAILS}"; |
304 | $params = [ "\$$varName", $taint ]; |
305 | } else { |
306 | $msg = "Variable {CODE} doesn't exist in scope"; |
307 | $params = [ "\$$varName" ]; |
308 | } |
309 | self::emitIssue( |
310 | $codeBase, |
311 | $context, |
312 | 'SecurityCheckDebugTaintedness', |
313 | $msg, |
314 | $params |
315 | ); |
316 | $found = true; |
317 | } |
318 | } |
319 | } elseif ( strpos( $statement, '@taint-check-debug-method-first-arg' ) !== false ) { |
320 | // FIXME This is a hack. The annotation is INTERNAL, for use only in the backpropoffsets-blowup |
321 | // test. We should either find a better way to test that, or maybe add a public annotation |
322 | // for debugging taintedness of a method (probably unreadable on a single line). |
323 | $funcName = preg_replace( '/@taint-check-debug-method-first-arg ([a-z:]+)\b.*/i', '$1', $statement ); |
324 | // Let any exception bubble up here, the annotation is for internal use in testing |
325 | $fqsen = FullyQualifiedMethodName::fromStringInContext( $funcName, $context ); |
326 | $method = $codeBase->getMethodByFQSEN( $fqsen ); |
327 | /** @var FunctionTaintedness|null $fTaint */ |
328 | $fTaint = $method->funcTaint ?? null; |
329 | if ( !$fTaint ) { |
330 | return false; |
331 | } |
332 | self::emitIssue( |
333 | $codeBase, |
334 | $context, |
335 | 'SecurityCheckDebugTaintedness', |
336 | "Method {CODE} has first param with taintedness: {DETAILS}", |
337 | [ $funcName, $fTaint->getParamSinkTaint( 0 )->toShortString() ] |
338 | ); |
339 | return true; |
340 | } |
341 | return $found; |
342 | } |
343 | |
344 | /** |
345 | * Get a string representation of a taint integer |
346 | * |
347 | * The prefix ~ means all input taints except the letter given. |
348 | * The prefix * means the EXEC version of the taint. |
349 | * |
350 | * @param int $taint |
351 | * @return string |
352 | */ |
353 | public static function taintToString( int $taint ): string { |
354 | if ( $taint === self::NO_TAINT ) { |
355 | return 'NONE'; |
356 | } |
357 | |
358 | // Note, order matters here. |
359 | static $mapping = [ |
360 | self::UNKNOWN_TAINT => 'UNKNOWN', |
361 | self::PRESERVE_TAINT => 'PRESERVE', |
362 | self::ALL_TAINT => 'ALL', |
363 | self::YES_TAINT => 'YES', |
364 | self::YES_TAINT & |
365 | ( ~self::HTML_TAINT ) => '~HTML', |
366 | self::YES_TAINT & |
367 | ( ~self::SQL_TAINT ) => '~SQL', |
368 | self::YES_TAINT & |
369 | ( ~self::SHELL_TAINT ) => '~SHELL', |
370 | self::YES_TAINT & |
371 | ( ~self::SERIALIZE_TAINT ) => '~SERIALIZE', |
372 | self::YES_TAINT & |
373 | ( ~self::CUSTOM1_TAINT ) => '~CUSTOM1', |
374 | self::YES_TAINT & |
375 | ( ~self::CUSTOM2_TAINT ) => '~CUSTOM2', |
376 | // We skip ~ versions of flags which shouldn't be possible. |
377 | self::HTML_TAINT => 'HTML', |
378 | self::SQL_TAINT => 'SQL', |
379 | self::SHELL_TAINT => 'SHELL', |
380 | self::ESCAPED_TAINT => 'ESCAPED', |
381 | self::SERIALIZE_TAINT => 'SERIALIZE', |
382 | self::CUSTOM1_TAINT => 'CUSTOM1', |
383 | self::CUSTOM2_TAINT => 'CUSTOM2', |
384 | self::CODE_TAINT => 'CODE', |
385 | self::PATH_TAINT => 'PATH', |
386 | self::REGEX_TAINT => 'REGEX', |
387 | self::SQL_NUMKEY_TAINT => 'SQL_NUMKEY', |
388 | self::ARRAY_OK => 'ARRAY_OK', |
389 | self::ALL_EXEC_TAINT => '*ALL', |
390 | self::HTML_EXEC_TAINT => '*HTML', |
391 | self::SQL_EXEC_TAINT => '*SQL', |
392 | self::SHELL_EXEC_TAINT => '*SHELL', |
393 | self::ESCAPED_EXEC_TAINT => '*ESCAPED', |
394 | self::SERIALIZE_EXEC_TAINT => '*SERIALIZE', |
395 | self::CUSTOM1_EXEC_TAINT => '*CUSTOM1', |
396 | self::CUSTOM2_EXEC_TAINT => '*CUSTOM2', |
397 | self::CODE_EXEC_TAINT => '*CODE', |
398 | self::PATH_EXEC_TAINT => '*PATH', |
399 | self::REGEX_EXEC_TAINT => '*REGEX', |
400 | self::SQL_NUMKEY_EXEC_TAINT => '*SQL_NUMKEY', |
401 | ]; |
402 | |
403 | $types = []; |
404 | foreach ( $mapping as $bitmap => $val ) { |
405 | if ( ( $bitmap & $taint ) === $bitmap ) { |
406 | $types[] = $val; |
407 | $taint &= ~$bitmap; |
408 | } |
409 | } |
410 | if ( $taint !== 0 ) { |
411 | $types[] = "Unrecognized: $taint"; |
412 | } |
413 | return implode( ', ', $types ); |
414 | } |
415 | |
416 | /** |
417 | * @param FullyQualifiedFunctionLikeName $fqsen |
418 | * @return bool |
419 | */ |
420 | public function builtinFuncHasTaint( FullyQualifiedFunctionLikeName $fqsen ): bool { |
421 | return $this->getBuiltinFuncTaint( $fqsen ) !== null; |
422 | } |
423 | |
424 | /** |
425 | * Get the taintedness of a function |
426 | * |
427 | * This allows overriding the default taint of a function |
428 | * |
429 | * If you want to provide custom taint hints for your application, |
430 | * override the getCustomFuncTaints() |
431 | * |
432 | * @param FullyQualifiedFunctionLikeName $fqsen The function/method in question |
433 | * @return FunctionTaintedness|null Null to autodetect taintedness |
434 | */ |
435 | public function getBuiltinFuncTaint( FullyQualifiedFunctionLikeName $fqsen ): ?FunctionTaintedness { |
436 | $name = (string)$fqsen; |
437 | |
438 | if ( isset( self::$builtinFuncTaintCache[$name] ) ) { |
439 | return self::$builtinFuncTaintCache[$name]; |
440 | } |
441 | |
442 | static $funcTaints = null; |
443 | if ( $funcTaints === null ) { |
444 | $funcTaints = $this->getCustomFuncTaints() + $this->getPHPFuncTaints(); |
445 | } |
446 | |
447 | if ( isset( $funcTaints[$name] ) ) { |
448 | $rawFuncTaint = $funcTaints[$name]; |
449 | if ( $rawFuncTaint instanceof FunctionTaintedness ) { |
450 | $funcTaint = $rawFuncTaint; |
451 | } else { |
452 | self::assertFunctionTaintArrayWellFormed( $rawFuncTaint ); |
453 | // Note: for backcompat, we set NO_OVERRIDE everywhere. |
454 | $overallFlags = ( $rawFuncTaint['overall'] & self::FUNCTION_FLAGS ) | self::NO_OVERRIDE; |
455 | $funcTaint = new FunctionTaintedness( new Taintedness( $rawFuncTaint['overall'] & ~$overallFlags ) ); |
456 | $funcTaint->addOverallFlags( $overallFlags ); |
457 | unset( $rawFuncTaint['overall'] ); |
458 | foreach ( $rawFuncTaint as $i => $val ) { |
459 | assert( ( $val & self::UNKNOWN_TAINT ) === 0, 'Cannot set UNKNOWN' ); |
460 | $paramFlags = ( $val & self::FUNCTION_FLAGS ) | self::NO_OVERRIDE; |
461 | // TODO Split sink and preserve in the hardcoded arrays |
462 | if ( $val & self::VARIADIC_PARAM ) { |
463 | $pTaint = new Taintedness( $val & ~( self::VARIADIC_PARAM | $paramFlags ) ); |
464 | $funcTaint->setVariadicParamSinkTaint( $i, $pTaint->withOnly( self::ALL_EXEC_TAINT ) ); |
465 | $funcTaint->setVariadicParamPreservedTaint( |
466 | $i, |
467 | $pTaint->without( self::ALL_EXEC_TAINT )->asPreservedTaintedness() |
468 | ); |
469 | $funcTaint->addVariadicParamFlags( $paramFlags ); |
470 | } else { |
471 | $pTaint = new Taintedness( $val & ~$paramFlags ); |
472 | $funcTaint->setParamSinkTaint( $i, $pTaint->withOnly( self::ALL_EXEC_TAINT ) ); |
473 | $funcTaint->setParamPreservedTaint( |
474 | $i, |
475 | $pTaint->without( self::ALL_EXEC_TAINT )->asPreservedTaintedness() |
476 | ); |
477 | $funcTaint->addParamFlags( $i, $paramFlags ); |
478 | } |
479 | } |
480 | } |
481 | self::$builtinFuncTaintCache[$name] = $funcTaint; |
482 | return self::$builtinFuncTaintCache[$name]; |
483 | } |
484 | return null; |
485 | } |
486 | |
487 | /** |
488 | * Assert that a taintedness array is well-formed, and fail hard if it isn't. |
489 | * |
490 | * @param int[] $taint |
491 | */ |
492 | private static function assertFunctionTaintArrayWellFormed( array $taint ): void { |
493 | if ( !isset( $taint['overall'] ) ) { |
494 | throw new Error( 'Overall taint must be set' ); |
495 | } |
496 | |
497 | foreach ( $taint as $i => $t ) { |
498 | if ( !is_int( $i ) && $i !== 'overall' ) { |
499 | throw new Error( "Taint indexes must be int or 'overall', got '$i'" ); |
500 | } |
501 | if ( !is_int( $t ) || ( $t & ~self::ALL_TAINT_FLAGS ) ) { |
502 | throw new Error( "Wrong taint index $i, got: " . var_export( $t, true ) ); |
503 | } |
504 | if ( $t & ~self::ALL_TAINT_FLAGS ) { |
505 | throw new Error( "Taint index $i has unknown flags: " . decbin( $t ) ); |
506 | } |
507 | } |
508 | } |
509 | |
510 | /** |
511 | * Get an array of function taints custom for the application |
512 | * |
513 | * @return array<string,int[]|FunctionTaintedness> Array of function taints. The keys are FQSENs. The values can be |
514 | * either FunctionTaintedness objects, or arrays with 'overall' string key and numeric keys for parameters. |
515 | * |
516 | * For example: [ self::YES_TAINT, 'overall' => self::NO_TAINT ] |
517 | * means that the taint of the return value is the same as the taint |
518 | * of the first arg, and all other args are ignored. |
519 | * [ self::HTML_EXEC_TAINT, 'overall' => self::NO_TAINT ] |
520 | * Means that the first arg is output in an html context (e.g. like echo) |
521 | * [ self::YES_TAINT & ~self::HTML_TAINT, 'overall' => self::NO_TAINT ] |
522 | * Means that the function removes html taint (escapes) e.g. htmlspecialchars |
523 | * [ 'overall' => self::YES_TAINT ] |
524 | * Means that it returns a tainted value (e.g. return $_POST['foo']; ) |
525 | * @see FunctionTaintedness for more details |
526 | */ |
527 | abstract protected function getCustomFuncTaints(): array; |
528 | |
529 | /** |
530 | * Can be used to force specific issues to be marked false positives |
531 | * |
532 | * For example, a specific application might be able to recognize |
533 | * that we are in a CLI context, and thus the XSS is really a false positive. |
534 | * |
535 | * @param int $combinedTaint Combined and adjusted taint of LHS+RHS |
536 | * @param string &$msg Issue description (so plugin can modify to state why false) |
537 | * @param Context $context |
538 | * @param CodeBase $code_base |
539 | * @return bool Is this a false positive? |
540 | * @suppress PhanUnusedPublicMethodParameter No param is used |
541 | */ |
542 | public function isFalsePositive( |
543 | int $combinedTaint, |
544 | string &$msg, |
545 | Context $context, |
546 | CodeBase $code_base |
547 | ): bool { |
548 | return false; |
549 | } |
550 | |
551 | /** |
552 | * Given a param description line, extract taint |
553 | * |
554 | * This is to allow putting taint information in method docblocks. |
555 | * If a function has a docblock comment like: |
556 | * * @param-taint $foo escapes_html |
557 | * This converts that line into: |
558 | * ( self::YES_TAINT & ~self::SQL_TAINT ) |
559 | * Multiple taint types are separated by commas |
560 | * (which are interpreted as bitwise OR ( "|" ). Future versions |
561 | * might support more complex bitwise operators, but for now it |
562 | * doesn't seem needed. |
563 | * |
564 | * The following keywords are supported where {type} can be |
565 | * html, sql, shell, serialize, custom1, custom2, sql_numkey, |
566 | * escaped. |
567 | * * {type} - just set the flag. 99% you should only use 'none' or 'tainted' |
568 | * * exec_{type} - sets the exec flag. |
569 | * * escapes_{type} - self::YES_TAINT & ~self::{type}_TAINT. |
570 | * Note: escapes_html adds the exec_escaped flag, use |
571 | * escapes_htmlnoent if the value is safe to double encode. |
572 | * * onlysafefor_{type} |
573 | * Same as above, intended for return type declarations. |
574 | * Only difference is that onlysafefor_html sets ESCAPED_TAINT instead |
575 | * of ESCAPED_EXEC_TAINT |
576 | * * none - self::NO_TAINT |
577 | * * tainted - self::YES_TAINT |
578 | * * array_ok - sets self::ARRAY_OK |
579 | * * allow_override - Allow autodetected taints to override annotation |
580 | * |
581 | * @todo What about ~ operator? |
582 | * @note The special casing to have escapes_html always add exec_escaped |
583 | * (and having htmlnoent exist) is "experimental" and may change in |
584 | * future versions (Maybe all types should set exec_escaped. Maybe it |
585 | * should be explicit) |
586 | * @note Excluding UNKNOWN here on purpose, as if we're setting it, it's not unknown |
587 | * @param string $line A line from the docblock |
588 | * @return array|null Array of [taintedness, flags], or null on no info |
589 | * @phan-return array{0:Taintedness,1:int}|null |
590 | */ |
591 | public static function parseTaintLine( string $line ): ?array { |
592 | $types = '(?P<type>htmlnoent|html|sql|shell|serialize|custom1|' |
593 | . 'custom2|code|path|regex|sql_numkey|escaped|none|tainted)'; |
594 | $prefixes = '(?P<prefix>escapes|onlysafefor|exec)'; |
595 | $taintExpr = "(?P<taint>(?:{$prefixes}_)?$types|array_ok|allow_override)"; |
596 | |
597 | $filteredLine = preg_replace( "/((?:$taintExpr,? *)+)(?: .*)?$/", '$1', $line ); |
598 | $taints = explode( ',', strtolower( $filteredLine ) ); |
599 | $taints = array_map( 'trim', $taints ); |
600 | |
601 | $overallTaint = new Taintedness( self::NO_TAINT ); |
602 | $overallFlags = self::NO_OVERRIDE; |
603 | $numberOfTaintsProcessed = 0; |
604 | foreach ( $taints as $taint ) { |
605 | $taintParts = []; |
606 | if ( !preg_match( "/^$taintExpr$/", $taint, $taintParts ) ) { |
607 | continue; |
608 | } |
609 | $numberOfTaintsProcessed++; |
610 | if ( $taintParts['taint'] === 'array_ok' ) { |
611 | $overallFlags |= self::ARRAY_OK; |
612 | continue; |
613 | } |
614 | if ( $taintParts['taint'] === 'allow_override' ) { |
615 | $overallFlags &= ~self::NO_OVERRIDE; |
616 | continue; |
617 | } |
618 | $taintAsInt = self::convertTaintNameToConstant( $taintParts['type'] ); |
619 | switch ( $taintParts['prefix'] ) { |
620 | case '': |
621 | $overallTaint->add( $taintAsInt ); |
622 | break; |
623 | case 'exec': |
624 | $overallTaint->add( Taintedness::flagsAsYesToExecTaint( $taintAsInt ) ); |
625 | break; |
626 | case 'escapes': |
627 | case 'onlysafefor': |
628 | $overallTaint->add( self::YES_TAINT & ~$taintAsInt ); |
629 | if ( $taintParts['type'] === 'html' ) { |
630 | if ( $taintParts['prefix'] === 'escapes' ) { |
631 | $overallTaint->add( self::ESCAPED_EXEC_TAINT ); |
632 | } else { |
633 | $overallTaint->add( self::ESCAPED_TAINT ); |
634 | } |
635 | } |
636 | break; |
637 | } |
638 | } |
639 | if ( $numberOfTaintsProcessed === 0 ) { |
640 | return null; |
641 | } |
642 | return [ $overallTaint, $overallFlags ]; |
643 | } |
644 | |
645 | /** |
646 | * Hook to override the sink taintedness of a method parameter depending on the current argument. |
647 | * |
648 | * @internal This method is unstable and may be removed without prior notice. |
649 | * |
650 | * @param Taintedness $paramSinkTaint |
651 | * @param Taintedness $curArgTaintedness |
652 | * @param Node $argument Note: This hook is not called on literals |
653 | * @param int $argIndex Which argument number is this |
654 | * @param FunctionInterface $func The function/method being called |
655 | * @param FunctionTaintedness $funcTaint Taint of method formal parameters |
656 | * @param Context $context Context object |
657 | * @param CodeBase $code_base CodeBase object |
658 | * @return Taintedness The taint to use for actual parameter |
659 | * @suppress PhanUnusedPublicMethodParameter |
660 | */ |
661 | public function modifyParamSinkTaint( |
662 | Taintedness $paramSinkTaint, |
663 | Taintedness $curArgTaintedness, |
664 | Node $argument, |
665 | int $argIndex, |
666 | FunctionInterface $func, |
667 | FunctionTaintedness $funcTaint, |
668 | Context $context, |
669 | CodeBase $code_base |
670 | ): Taintedness { |
671 | // no-op |
672 | return $paramSinkTaint; |
673 | } |
674 | |
675 | /** |
676 | * Hook to override how taint of an argument to method call is calculated |
677 | * |
678 | * @param Taintedness $curArgTaintedness |
679 | * @param Node $argument Note: This hook is not called on literals |
680 | * @param int $argIndex Which argument number is this |
681 | * @param FunctionInterface $func The function/method being called |
682 | * @param FunctionTaintedness $funcTaint Taint of method formal parameters |
683 | * @param Context $context Context object |
684 | * @param CodeBase $code_base CodeBase object |
685 | * @return Taintedness The taint to use for actual parameter |
686 | * @suppress PhanUnusedPublicMethodParameter |
687 | */ |
688 | public function modifyArgTaint( |
689 | Taintedness $curArgTaintedness, |
690 | Node $argument, |
691 | int $argIndex, |
692 | FunctionInterface $func, |
693 | FunctionTaintedness $funcTaint, |
694 | Context $context, |
695 | CodeBase $code_base |
696 | ): Taintedness { |
697 | // no-op |
698 | return $curArgTaintedness; |
699 | } |
700 | |
701 | /** |
702 | * Convert a string like 'html' to self::HTML_TAINT. |
703 | * |
704 | * @note htmlnoent treated like self::HTML_TAINT. |
705 | * @param string $name one of: |
706 | * html, sql, shell, serialize, custom1, custom2, code, path, regex, sql_numkey, |
707 | * escaped, none (= self::NO_TAINT), tainted (= self::YES_TAINT) |
708 | * @return int One of the TAINT constants |
709 | */ |
710 | public static function convertTaintNameToConstant( string $name ): int { |
711 | switch ( $name ) { |
712 | case 'html': |
713 | case 'htmlnoent': |
714 | return self::HTML_TAINT; |
715 | case 'sql': |
716 | return self::SQL_TAINT; |
717 | case 'shell': |
718 | return self::SHELL_TAINT; |
719 | case 'serialize': |
720 | return self::SERIALIZE_TAINT; |
721 | case 'custom1': |
722 | return self::CUSTOM1_TAINT; |
723 | case 'custom2': |
724 | return self::CUSTOM2_TAINT; |
725 | case 'code': |
726 | return self::CODE_TAINT; |
727 | case 'path': |
728 | return self::PATH_TAINT; |
729 | case 'regex': |
730 | return self::REGEX_TAINT; |
731 | case 'sql_numkey': |
732 | return self::SQL_NUMKEY_TAINT; |
733 | case 'escaped': |
734 | return self::ESCAPED_TAINT; |
735 | case 'tainted': |
736 | return self::YES_TAINT; |
737 | case 'none': |
738 | return self::NO_TAINT; |
739 | default: |
740 | throw new AssertionError( "$name not valid taint" ); |
741 | } |
742 | } |
743 | |
744 | /** |
745 | * Taints for builtin php functions |
746 | * |
747 | * @return int[][] List of func taints (See getBuiltinFuncTaint()) |
748 | * @phan-return array<string,int[]> |
749 | */ |
750 | private function getPHPFuncTaints(): array { |
751 | $pregMatchTaint = [ |
752 | self::REGEX_EXEC_TAINT, |
753 | self::YES_TAINT, |
754 | // TODO: Possibly unsafe pass-by-ref |
755 | self::NO_TAINT, |
756 | self::NO_TAINT, |
757 | self::NO_TAINT, |
758 | 'overall' => self::NO_TAINT, |
759 | ]; |
760 | $pregReplaceTaint = [ |
761 | self::REGEX_EXEC_TAINT, |
762 | // TODO: This is used for strings (in preg_replace) and callbacks (in preg_replace_callback) |
763 | self::YES_TAINT, |
764 | self::YES_TAINT, |
765 | self::NO_TAINT, |
766 | self::NO_TAINT, |
767 | 'overall' => self::NO_TAINT |
768 | ]; |
769 | return [ |
770 | '\htmlentities' => [ |
771 | self::ESCAPES_HTML, |
772 | 'overall' => self::ESCAPED_TAINT |
773 | ], |
774 | '\htmlspecialchars' => [ |
775 | self::ESCAPES_HTML, |
776 | 'overall' => self::ESCAPED_TAINT |
777 | ], |
778 | '\escapeshellarg' => [ |
779 | ~self::SHELL_TAINT & self::YES_TAINT, |
780 | 'overall' => self::NO_TAINT |
781 | ], |
782 | // TODO: Perhaps we should distinguish arguments escape vs command escape |
783 | '\escapeshellcmd' => [ |
784 | ~self::SHELL_TAINT & self::YES_TAINT, |
785 | 'overall' => self::NO_TAINT |
786 | ], |
787 | '\shell_exec' => [ |
788 | self::SHELL_EXEC_TAINT, |
789 | 'overall' => self::YES_TAINT |
790 | ], |
791 | '\passthru' => [ |
792 | self::SHELL_EXEC_TAINT, |
793 | self::NO_TAINT, |
794 | 'overall' => self::NO_TAINT |
795 | ], |
796 | '\exec' => [ |
797 | self::SHELL_EXEC_TAINT, |
798 | // TODO: This is an unsafe passbyref |
799 | self::NO_TAINT, |
800 | self::NO_TAINT, |
801 | 'overall' => self::YES_TAINT |
802 | ], |
803 | '\system' => [ |
804 | self::SHELL_EXEC_TAINT, |
805 | self::NO_TAINT, |
806 | 'overall' => self::YES_TAINT |
807 | ], |
808 | '\proc_open' => [ |
809 | self::SHELL_EXEC_TAINT, |
810 | self::NO_TAINT, |
811 | // TODO: Unsafe passbyref |
812 | self::NO_TAINT, |
813 | self::NO_TAINT, |
814 | self::NO_TAINT, |
815 | self::NO_TAINT, |
816 | // TODO: Perhaps not so safe |
817 | 'overall' => self::NO_TAINT |
818 | ], |
819 | '\popen' => [ |
820 | self::SHELL_EXEC_TAINT, |
821 | self::NO_TAINT, |
822 | // TODO: Perhaps not so safe |
823 | 'overall' => self::NO_TAINT |
824 | ], |
825 | // Or any time the serialized data comes from a trusted source. |
826 | '\serialize' => [ |
827 | 'overall' => self::YES_TAINT & ~self::SERIALIZE_TAINT, |
828 | ], |
829 | '\unserialize' => [ |
830 | self::SERIALIZE_EXEC_TAINT, |
831 | 'overall' => self::NO_TAINT, |
832 | ], |
833 | '\mysql_query' => [ |
834 | self::SQL_EXEC_TAINT, |
835 | 'overall' => self::UNKNOWN_TAINT |
836 | ], |
837 | '\mysqli_query' => [ |
838 | self::NO_TAINT, |
839 | self::SQL_EXEC_TAINT, |
840 | 'overall' => self::UNKNOWN_TAINT |
841 | ], |
842 | '\mysqli::query' => [ |
843 | self::SQL_EXEC_TAINT, |
844 | 'overall' => self::UNKNOWN_TAINT |
845 | ], |
846 | '\mysqli_real_query' => [ |
847 | self::NO_TAINT, |
848 | self::SQL_EXEC_TAINT, |
849 | 'overall' => self::UNKNOWN_TAINT |
850 | ], |
851 | '\mysqli::real_query' => [ |
852 | self::SQL_EXEC_TAINT, |
853 | 'overall' => self::UNKNOWN_TAINT |
854 | ], |
855 | '\sqlite_query' => [ |
856 | self::NO_TAINT, |
857 | self::SQL_EXEC_TAINT, |
858 | self::NO_TAINT, |
859 | self::NO_TAINT, |
860 | 'overall' => self::UNKNOWN_TAINT |
861 | ], |
862 | '\sqlite_single_query' => [ |
863 | self::NO_TAINT, |
864 | self::SQL_EXEC_TAINT, |
865 | self::NO_TAINT, |
866 | self::NO_TAINT, |
867 | 'overall' => self::UNKNOWN_TAINT |
868 | ], |
869 | // Note: addslashes, addcslashes etc. intentionally omitted because they're not |
870 | // enough to avoid SQLi. |
871 | '\mysqli_escape_string' => [ |
872 | self::NO_TAINT, |
873 | self::YES_TAINT & ~self::SQL_TAINT, |
874 | 'overall' => self::NO_TAINT |
875 | ], |
876 | '\mysqli_real_escape_string' => [ |
877 | self::NO_TAINT, |
878 | self::YES_TAINT & ~self::SQL_TAINT, |
879 | 'overall' => self::NO_TAINT |
880 | ], |
881 | '\mysqli::escape_string' => [ |
882 | self::YES_TAINT & ~self::SQL_TAINT, |
883 | 'overall' => self::NO_TAINT |
884 | ], |
885 | '\mysqli::real_escape_string' => [ |
886 | self::YES_TAINT & ~self::SQL_TAINT, |
887 | 'overall' => self::NO_TAINT |
888 | ], |
889 | '\sqlite_escape_string' => [ |
890 | self::YES_TAINT & ~self::SQL_TAINT, |
891 | 'overall' => self::NO_TAINT |
892 | ], |
893 | '\PDO::query' => [ |
894 | self::SQL_EXEC_TAINT, |
895 | self::NO_TAINT, |
896 | self::NO_TAINT, |
897 | self::NO_TAINT, |
898 | 'overall' => self::UNKNOWN_TAINT |
899 | ], |
900 | '\PDO::prepare' => [ |
901 | self::SQL_EXEC_TAINT, |
902 | self::NO_TAINT, |
903 | 'overall' => self::UNKNOWN_TAINT |
904 | ], |
905 | '\PDO::exec' => [ |
906 | self::SQL_EXEC_TAINT, |
907 | 'overall' => self::NO_TAINT |
908 | ], |
909 | '\base64_encode' => [ |
910 | self::YES_TAINT & ~self::HTML_TAINT, |
911 | 'overall' => self::NO_TAINT |
912 | ], |
913 | '\file_put_contents' => [ |
914 | self::PATH_EXEC_TAINT, |
915 | self::NO_TAINT, |
916 | self::NO_TAINT, |
917 | self::NO_TAINT, |
918 | 'overall' => self::NO_TAINT |
919 | ], |
920 | // TODO: What about file_get_contents() and file() ? |
921 | '\fopen' => [ |
922 | self::PATH_EXEC_TAINT, |
923 | self::NO_TAINT, |
924 | self::NO_TAINT, |
925 | self::NO_TAINT, |
926 | // TODO: Perhaps not so safe |
927 | 'overall' => self::NO_TAINT |
928 | ], |
929 | '\opendir' => [ |
930 | self::PATH_EXEC_TAINT, |
931 | self::NO_TAINT, |
932 | // TODO: Perhaps not so safe |
933 | 'overall' => self::NO_TAINT |
934 | ], |
935 | '\rawurlencode' => [ |
936 | self::YES_TAINT & ~self::PATH_TAINT, |
937 | 'overall' => self::NO_TAINT |
938 | ], |
939 | '\urlencode' => [ |
940 | self::YES_TAINT & ~self::PATH_TAINT, |
941 | 'overall' => self::NO_TAINT |
942 | ], |
943 | '\printf' => [ |
944 | self::HTML_EXEC_TAINT, |
945 | // TODO We could check if the respective specifiers are safe |
946 | self::HTML_EXEC_TAINT | self::VARIADIC_PARAM, |
947 | 'overall' => self::NO_TAINT |
948 | ], |
949 | '\preg_filter' => [ |
950 | self::REGEX_EXEC_TAINT, |
951 | self::YES_TAINT, |
952 | self::YES_TAINT, |
953 | self::NO_TAINT, |
954 | self::NO_TAINT, |
955 | 'overall' => self::NO_TAINT |
956 | ], |
957 | '\preg_grep' => [ |
958 | self::REGEX_EXEC_TAINT, |
959 | self::YES_TAINT, |
960 | self::NO_TAINT, |
961 | 'overall' => self::NO_TAINT |
962 | ], |
963 | '\preg_match_all' => $pregMatchTaint, |
964 | '\preg_match' => $pregMatchTaint, |
965 | '\preg_quote' => [ |
966 | self::YES_TAINT & ~self::REGEX_TAINT, |
967 | self::NO_TAINT, |
968 | 'overall' => self::NO_TAINT |
969 | ], |
970 | '\preg_replace' => $pregReplaceTaint, |
971 | '\preg_replace_callback' => $pregReplaceTaint, |
972 | '\preg_replace_callback_array' => [ |
973 | self::REGEX_EXEC_TAINT, |
974 | self::YES_TAINT, |
975 | self::NO_TAINT, |
976 | self::NO_TAINT, |
977 | self::NO_TAINT, |
978 | 'overall' => self::NO_TAINT |
979 | ], |
980 | '\preg_split' => [ |
981 | self::REGEX_EXEC_TAINT, |
982 | self::YES_TAINT, |
983 | self::NO_TAINT, |
984 | self::NO_TAINT, |
985 | 'overall' => self::NO_TAINT |
986 | ], |
987 | // We assume that hashing functions are safe, see T272492 |
988 | '\md5' => [ |
989 | self::NO_TAINT, |
990 | self::NO_TAINT, |
991 | 'overall' => self::NO_TAINT |
992 | ], |
993 | '\sha1' => [ |
994 | self::NO_TAINT, |
995 | self::NO_TAINT, |
996 | 'overall' => self::NO_TAINT |
997 | ], |
998 | '\crc32' => [ |
999 | self::NO_TAINT, |
1000 | 'overall' => self::NO_TAINT |
1001 | ], |
1002 | ]; |
1003 | } |
1004 | |
1005 | /** |
1006 | * @inheritDoc |
1007 | */ |
1008 | public static function getBeforeLoopBodyAnalysisVisitorClassName(): string { |
1009 | return TaintednessLoopVisitor::class; |
1010 | } |
1011 | } |