Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
88.43% |
1032 / 1167 |
|
67.80% |
40 / 59 |
CRAP | |
0.00% |
0 / 1 |
TaintednessBaseVisitor | |
88.43% |
1032 / 1167 |
|
67.80% |
40 / 59 |
811.97 | |
0.00% |
0 / 1 |
addFuncTaint | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
ensureFuncTaintIsSet | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
maybeAddFuncError | |
100.00% |
40 / 40 |
|
100.00% |
1 / 1 |
22 | |||
mergeTaintError | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
mergeFuncError | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
addTaintError | |
87.50% |
7 / 8 |
|
0.00% |
0 / 1 |
7.10 | |||
ensureTaintednessIsSet | |
50.00% |
3 / 6 |
|
0.00% |
0 / 1 |
6.00 | |||
setTaintedness | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
5 | |||
getDefiningFuncIfDifferent | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
4 | |||
getPossibleFuncDefinitions | |
87.50% |
21 / 24 |
|
0.00% |
0 / 1 |
9.16 | |||
getTaintOfFunction | |
100.00% |
24 / 24 |
|
100.00% |
1 / 1 |
8 | |||
getSetKnownTaintOfFunctionWithoutAnalysis | |
100.00% |
24 / 24 |
|
100.00% |
1 / 1 |
8 | |||
analyzeFunc | |
94.74% |
18 / 19 |
|
0.00% |
0 / 1 |
9.01 | |||
getDocBlockTaintOfFunc | |
100.00% |
87 / 87 |
|
100.00% |
1 / 1 |
20 | |||
getTaintByType | |
91.94% |
57 / 62 |
|
0.00% |
0 / 1 |
33.57 | |||
getTaintMaskForTypedElement | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
getTaintMaskForType | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
getPossibleFutureTaintOfElement | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getCurrentMethod | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
getTaintedness | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 | |||
getTaintednessNode | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
2 | |||
getTaintednessPhanObj | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 | |||
resolveOffset | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
3 | |||
resolveValue | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
getPropInCurrentScopeByName | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getCtxN | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
getObjsForNodeForNumkeyBackprop | |
67.53% |
52 / 77 |
|
0.00% |
0 / 1 |
153.80 | |||
getPropFromNode | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
2 | |||
getDebugInfo | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
dbgInfo | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
linkParamAndFunc | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
2 | |||
mergeTaintDependencies | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
7 | |||
markAllDependentMethodsExec | |
97.22% |
35 / 36 |
|
0.00% |
0 / 1 |
14 | |||
markAllDependentMethodsExecForNode | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
3 | |||
markAllDependentVarsYes | |
100.00% |
24 / 24 |
|
100.00% |
1 / 1 |
6 | |||
getCausedByLinesForFunc | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getActualFuncWithCausedBy | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
debug | |
53.33% |
8 / 15 |
|
0.00% |
0 / 1 |
14.50 | |||
getCallableFromNode | |
88.89% |
16 / 18 |
|
0.00% |
0 / 1 |
10.14 | |||
getFirstElmFromArrayOrGenerator | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
4.59 | |||
taintToIssuesAndSeverities | |
96.67% |
29 / 30 |
|
0.00% |
0 / 1 |
11 | |||
maybeEmitIssueSimplified | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
1 | |||
maybeEmitIssue | |
97.44% |
38 / 39 |
|
0.00% |
0 / 1 |
13 | |||
isIssueSuppressedOrFalsePositive | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
4 | |||
handleMethodCall | |
100.00% |
103 / 103 |
|
100.00% |
1 / 1 |
22 | |||
translateNamedArg | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
backpropagateArgTaint | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
handlePassByRef | |
100.00% |
21 / 21 |
|
100.00% |
1 / 1 |
9 | |||
getPassByRefObjFromNode | |
54.55% |
6 / 11 |
|
0.00% |
0 / 1 |
14.01 | |||
getHardcodedPreservedTaintForFunc | |
70.78% |
172 / 243 |
|
0.00% |
0 / 1 |
260.00 | |||
getBinOpTaintMask | |
90.00% |
36 / 40 |
|
0.00% |
0 / 1 |
6.04 | |||
getNodeType | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
2 | |||
nodeIsArray | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
6.10 | |||
nodeCanBeArray | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
5 | |||
nodeCanBeString | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
3.14 | |||
elementCanBeNumkey | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
5 | |||
nodeCanBeIntKey | |
90.00% |
9 / 10 |
|
0.00% |
0 / 1 |
7.05 | |||
getReturnObjsOfFunc | |
91.67% |
11 / 12 |
|
0.00% |
0 / 1 |
3.01 | |||
isSubclassOf | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 |
1 | <?php declare( strict_types=1 ); |
2 | |
3 | namespace SecurityCheckPlugin; |
4 | |
5 | use ast\Node; |
6 | use Closure; |
7 | use Exception; |
8 | use Generator; |
9 | use Phan\AST\ASTReverter; |
10 | use Phan\AST\ContextNode; |
11 | use Phan\AST\UnionTypeVisitor; |
12 | use Phan\BlockAnalysisVisitor; |
13 | use Phan\CodeBase; |
14 | use Phan\Debug; |
15 | use Phan\Exception\CodeBaseException; |
16 | use Phan\Exception\FQSENException; |
17 | use Phan\Exception\IssueException; |
18 | use Phan\Exception\NodeException; |
19 | use Phan\Exception\UnanalyzableException; |
20 | use Phan\Issue; |
21 | use Phan\Language\Context; |
22 | use Phan\Language\Element\FunctionInterface; |
23 | use Phan\Language\Element\GlobalVariable; |
24 | use Phan\Language\Element\Method; |
25 | use Phan\Language\Element\PassByReferenceVariable; |
26 | use Phan\Language\Element\Property; |
27 | use Phan\Language\Element\TypedElementInterface; |
28 | use Phan\Language\Element\Variable; |
29 | use Phan\Language\FQSEN\FullyQualifiedClassName; |
30 | use Phan\Language\FQSEN\FullyQualifiedFunctionLikeName; |
31 | use Phan\Language\FQSEN\FullyQualifiedFunctionName; |
32 | use Phan\Language\FQSEN\FullyQualifiedMethodName; |
33 | use Phan\Language\Type\GenericArrayType; |
34 | use Phan\Language\Type\LiteralTypeInterface; |
35 | use Phan\Language\UnionType; |
36 | |
37 | /** |
38 | * Trait for the Tainedness visitor subclasses. Mostly contains |
39 | * utility methods. |
40 | * |
41 | * Copyright (C) 2017 Brian Wolff <bawolff@gmail.com> |
42 | * |
43 | * This program is free software; you can redistribute it and/or modify |
44 | * it under the terms of the GNU General Public License as published by |
45 | * the Free Software Foundation; either version 2 of the License, or |
46 | * (at your option) any later version. |
47 | * |
48 | * This program is distributed in the hope that it will be useful, |
49 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
50 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
51 | * GNU General Public License for more details. |
52 | * |
53 | * You should have received a copy of the GNU General Public License along |
54 | * with this program; if not, write to the Free Software Foundation, Inc., |
55 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
56 | */ |
57 | /** |
58 | * @property-read Context $context |
59 | * @property-read \Phan\CodeBase $code_base |
60 | */ |
61 | trait TaintednessBaseVisitor { |
62 | use TaintednessAccessorsTrait; |
63 | |
64 | /** @var null|string|bool|resource filehandle to output debug messages */ |
65 | private $debugOutput; |
66 | |
67 | /** @var Context|null Override the file/line number to emit issues */ |
68 | protected $overrideContext; |
69 | |
70 | /** |
71 | * @var bool[] FQSENs of classes without __toString, map of [ (string)FQSEN => true ] |
72 | */ |
73 | protected static $fqsensWithoutToStringCache = []; |
74 | |
75 | /** |
76 | * Merge taintedness of a function/method |
77 | * |
78 | * @param FunctionInterface $func |
79 | * @param FunctionTaintedness $taint |
80 | */ |
81 | protected function addFuncTaint( FunctionInterface $func, FunctionTaintedness $taint ): void { |
82 | $curTaint = self::getFuncTaint( $func ); |
83 | if ( $curTaint ) { |
84 | $newTaint = $curTaint->asMergedWith( $taint ); |
85 | } else { |
86 | $newTaint = $taint; |
87 | } |
88 | self::doSetFuncTaint( $func, $newTaint ); |
89 | } |
90 | |
91 | /** |
92 | * Ensure a function-like has its taintedness set and not unknown |
93 | * |
94 | * @param FunctionInterface $func |
95 | */ |
96 | protected function ensureFuncTaintIsSet( FunctionInterface $func ): void { |
97 | if ( !self::getFuncTaint( $func ) ) { |
98 | self::doSetFuncTaint( $func, new FunctionTaintedness( Taintedness::newSafe() ) ); |
99 | } |
100 | } |
101 | |
102 | /** |
103 | * @param FunctionInterface $func |
104 | * @param Context|string|null $reason To override the caused-by line |
105 | * @param FunctionTaintedness $addedTaint |
106 | * @param FunctionTaintedness $allNewTaint |
107 | * @param MethodLinks|null $returnLinks NOTE: These are only used for preserved params, since for sink params |
108 | * we're already adding a Taintedness with the expected EXEC bits. |
109 | */ |
110 | private function maybeAddFuncError( |
111 | FunctionInterface $func, |
112 | $reason, |
113 | FunctionTaintedness $addedTaint, |
114 | FunctionTaintedness $allNewTaint, |
115 | MethodLinks $returnLinks = null |
116 | ): void { |
117 | if ( !is_string( $reason ) ) { |
118 | $newErrors = [ $this->dbgInfo( $reason ?? $this->context ) ]; |
119 | } else { |
120 | $newErrors = [ $reason ]; |
121 | } |
122 | if ( $this->overrideContext && !( $this->isHook ?? false ) ) { |
123 | // @phan-suppress-previous-line PhanUndeclaredProperty |
124 | $newErrors[] = $this->dbgInfo( $this->overrideContext ); |
125 | } |
126 | |
127 | $hasReturnLinks = $returnLinks && !$returnLinks->isEmpty(); |
128 | |
129 | // Future TODO: we might consider using PreservedTaintedness from the funcs instead of MethodLinks, but using |
130 | // links is more consistent with what we do for non-function causedby lines. |
131 | |
132 | $newErr = self::getFuncCausedByRawCloneOrEmpty( $func ); |
133 | |
134 | foreach ( $addedTaint->getSinkParamKeysNoVariadic() as $key ) { |
135 | if ( $reason || $allNewTaint->canOverrideNonVariadicParam( $key ) ) { |
136 | $curTaint = $addedTaint->getParamSinkTaint( $key ); |
137 | if ( $curTaint->has( SecurityCheckPlugin::ALL_EXEC_TAINT ) ) { |
138 | $newErr->addParamSinkLines( $key, $newErrors, $curTaint->asExecToYesTaint() ); |
139 | } |
140 | } |
141 | } |
142 | foreach ( $addedTaint->getPreserveParamKeysNoVariadic() as $key ) { |
143 | if ( $hasReturnLinks && ( $reason || $allNewTaint->canOverrideNonVariadicParam( $key ) ) ) { |
144 | $newErr->addParamPreservedLines( |
145 | $key, |
146 | $newErrors, |
147 | Taintedness::newSafe(), |
148 | $returnLinks->asFilteredForFuncAndParam( $func, $key ) |
149 | ); |
150 | } |
151 | } |
152 | $variadicIndex = $addedTaint->getVariadicParamIndex(); |
153 | if ( $variadicIndex !== null && ( $reason || $allNewTaint->canOverrideVariadicParam() ) ) { |
154 | $sinkVariadic = $addedTaint->getVariadicParamSinkTaint(); |
155 | if ( $sinkVariadic && $sinkVariadic->has( SecurityCheckPlugin::ALL_EXEC_TAINT ) ) { |
156 | $newErr->addVariadicParamSinkLines( |
157 | $variadicIndex, |
158 | $newErrors, |
159 | $sinkVariadic->asExecToYesTaint() |
160 | ); |
161 | } |
162 | if ( $hasReturnLinks ) { |
163 | $newErr->addVariadicParamPreservedLines( |
164 | $variadicIndex, |
165 | $newErrors, |
166 | Taintedness::newSafe(), |
167 | $returnLinks->asFilteredForFuncAndParam( $func, $variadicIndex ) |
168 | ); |
169 | } |
170 | } |
171 | |
172 | $curTaint = $addedTaint->getOverall(); |
173 | if ( ( $reason || $allNewTaint->canOverrideOverall() ) && $curTaint->has( SecurityCheckPlugin::ALL_TAINT ) ) { |
174 | // Note, the generic error shouldn't have any link |
175 | $newErr->addGenericLines( $newErrors, $curTaint ); |
176 | } |
177 | |
178 | self::setFuncCausedByRaw( $func, $newErr ); |
179 | } |
180 | |
181 | /** |
182 | * Add the given caused-by lines to $element. |
183 | * |
184 | * @param TypedElementInterface $element |
185 | * @param CausedByLines $rightError |
186 | */ |
187 | protected function mergeTaintError( TypedElementInterface $element, CausedByLines $rightError ): void { |
188 | assert( !$element instanceof FunctionInterface, 'Should use mergeFuncTaintError' ); |
189 | |
190 | $curError = self::getCausedByRaw( $element ); |
191 | |
192 | if ( !$curError ) { |
193 | $newLeftError = $rightError; |
194 | } else { |
195 | $newLeftError = $curError->asMergedWith( $rightError ); |
196 | } |
197 | |
198 | self::setCausedByRaw( $element, $newLeftError ); |
199 | } |
200 | |
201 | /** |
202 | * @param FunctionInterface $func |
203 | * @param FunctionCausedByLines $newError |
204 | * @param FunctionTaintedness $allFuncTaint Used to check NO_OVERRIDE |
205 | */ |
206 | protected function mergeFuncError( |
207 | FunctionInterface $func, |
208 | FunctionCausedByLines $newError, |
209 | FunctionTaintedness $allFuncTaint |
210 | ): void { |
211 | $funcError = self::getFuncCausedByRawCloneOrEmpty( $func ); |
212 | $funcError->mergeWith( $newError, $allFuncTaint ); |
213 | self::setFuncCausedByRaw( $func, $funcError ); |
214 | } |
215 | |
216 | /** |
217 | * Add the current context to taintedOriginalError book-keeping |
218 | * |
219 | * This allows us to show users what line caused an issue. |
220 | * |
221 | * @param TypedElementInterface $elem Where to put it |
222 | * @param Taintedness $taintedness |
223 | * @param MethodLinks|null $links |
224 | * @param string|null $reason To override the caused by line |
225 | */ |
226 | protected function addTaintError( |
227 | TypedElementInterface $elem, |
228 | Taintedness $taintedness, |
229 | ?MethodLinks $links, |
230 | string $reason = null |
231 | ): void { |
232 | assert( !$elem instanceof FunctionInterface, 'Should use addFuncTaintError' ); |
233 | |
234 | if ( !$taintedness->has( SecurityCheckPlugin::ALL_TAINT ) && ( !$links || $links->isEmpty() ) ) { |
235 | // Don't add book-keeping if no actual taint was added. |
236 | return; |
237 | } |
238 | |
239 | $newErrors = $reason !== null ? [ $reason ] : [ $this->dbgInfo() ]; |
240 | if ( $this->overrideContext && !( $this->isHook ?? false ) ) { |
241 | // @phan-suppress-previous-line PhanUndeclaredProperty |
242 | $newErrors[] = $this->dbgInfo( $this->overrideContext ); |
243 | } |
244 | |
245 | $newErr = self::getCausedByRawCloneOrEmpty( $elem ); |
246 | $newErr->addLines( $newErrors, $taintedness, $links ); |
247 | self::setCausedByRaw( $elem, $newErr ); |
248 | } |
249 | |
250 | /** |
251 | * Ensures that the given variable obj has some taintedness set, initializing to safe if it doesn't. |
252 | * |
253 | * @param TypedElementInterface $varObj |
254 | */ |
255 | protected function ensureTaintednessIsSet( TypedElementInterface $varObj ): void { |
256 | if ( !self::getTaintednessRaw( $varObj ) ) { |
257 | self::setTaintednessRaw( $varObj, Taintedness::newSafe() ); |
258 | } |
259 | if ( $varObj instanceof GlobalVariable ) { |
260 | $gVarObj = $varObj->getElement(); |
261 | if ( !self::getTaintednessRaw( $gVarObj ) ) { |
262 | self::setTaintednessRaw( $gVarObj, Taintedness::newSafe() ); |
263 | } |
264 | } |
265 | } |
266 | |
267 | /** |
268 | * Change the taintedness of $variableObj. |
269 | * |
270 | * @param TypedElementInterface $variableObj |
271 | * @param Taintedness $taintedness |
272 | * @param bool $override |
273 | */ |
274 | private function setTaintedness( |
275 | TypedElementInterface $variableObj, |
276 | Taintedness $taintedness, |
277 | bool $override |
278 | ): void { |
279 | assert( !$variableObj instanceof FunctionInterface, 'Must use setFuncTaint for functions' ); |
280 | |
281 | if ( |
282 | $variableObj instanceof Property && |
283 | $variableObj->getClassFQSEN() === FullyQualifiedClassName::getStdClassFQSEN() |
284 | ) { |
285 | // Phan conflates all stdClass props, see https://github.com/phan/phan/issues/3869 |
286 | // Avoid doing the same with taintedness, as that would cause weird issues (see |
287 | // 'stdclassconflation' test). |
288 | // TODO Is it possible to store prop taintedness in the Variable object? |
289 | // that would be similar to a fine-grained handling of arrays. |
290 | return; |
291 | } |
292 | |
293 | if ( $override ) { |
294 | $newTaint = $taintedness; |
295 | } else { |
296 | $curTaint = self::getTaintednessRaw( $variableObj ); |
297 | if ( !$curTaint ) { |
298 | $newTaint = $taintedness; |
299 | } else { |
300 | // NOTE: Do NOT merge in place here, as that would change the taintedness for all variable |
301 | // objects of which $variableObj is a clone! |
302 | $newTaint = $curTaint->asMergedWith( $taintedness ); |
303 | } |
304 | } |
305 | self::setTaintednessRaw( $variableObj, $newTaint ); |
306 | } |
307 | |
308 | /** |
309 | * Given a func, if it has a defining func different from itself, return that defining func. Returns null otherwise. |
310 | * |
311 | * @param FunctionInterface $func |
312 | * @return FunctionInterface|null |
313 | */ |
314 | private function getDefiningFuncIfDifferent( FunctionInterface $func ): ?FunctionInterface { |
315 | if ( $func instanceof Method && $func->hasDefiningFQSEN() ) { |
316 | $definingFQSEN = $func->getDefiningFQSEN(); |
317 | if ( $definingFQSEN !== $func->getFQSEN() ) { |
318 | return $this->code_base->getMethodByFQSEN( $definingFQSEN ); |
319 | } |
320 | } |
321 | return null; |
322 | } |
323 | |
324 | /** |
325 | * Get a list of places to look for function taint info |
326 | * |
327 | * @todo How to handle multiple function definitions (phan "alternates") |
328 | * @param FunctionInterface $func |
329 | * @return Generator<FunctionInterface> |
330 | */ |
331 | private function getPossibleFuncDefinitions( FunctionInterface $func ): Generator { |
332 | yield $func; |
333 | |
334 | // If we don't have a defining func, stay with the same func. |
335 | // definingFunc is used later on during fallback processing. |
336 | $definingFunc = $this->getDefiningFuncIfDifferent( $func ); |
337 | if ( $definingFunc ) { |
338 | yield $definingFunc; |
339 | } |
340 | if ( $func instanceof Method ) { |
341 | try { |
342 | $class = $func->getClass( $this->code_base ); |
343 | } catch ( CodeBaseException $e ) { |
344 | $this->debug( __METHOD__, "Class not found for func $func: " . $this->getDebugInfo( $e ) ); |
345 | return; |
346 | } |
347 | |
348 | // Iterate through the whole hierarchy to see if the method was defined in an interface or trait. A few |
349 | // notes on this: |
350 | // - getNonParentAncestorFQSENList (and similar methods in Class and Method) only go one level up, and |
351 | // would not give us e.g. the interfaces implemented by the parent class. |
352 | // - asExpandedTypes would work, but it has a non-zero overhead, and most importantly, we would cause phan |
353 | // to emit issues like RedefinedClass in places where phan wouldn't normally emit them. |
354 | // - It's unclear whether this code should also look for method definitions in classes (and not just |
355 | // interfaces/traits). And more generally, what would the expectations for *-taint annotations be. |
356 | $curClass = $class; |
357 | // Use a safeguard in case this goes out of control (e.g., broken code with circular inheritance). |
358 | $depth = 0; |
359 | do { |
360 | $depth++; |
361 | $nonParents = $curClass->getNonParentAncestorFQSENList(); |
362 | |
363 | foreach ( $nonParents as $nonParentFQSEN ) { |
364 | if ( $this->code_base->hasClassWithFQSEN( $nonParentFQSEN ) ) { |
365 | $nonParent = $this->code_base->getClassByFQSEN( $nonParentFQSEN ); |
366 | // TODO Assuming this is a direct invocation, but it doesn't always make sense |
367 | $directInvocation = true; |
368 | if ( $nonParent->hasMethodWithName( $this->code_base, $func->getName(), $directInvocation ) ) { |
369 | yield $nonParent->getMethodByName( $this->code_base, $func->getName() ); |
370 | } |
371 | } |
372 | } |
373 | if ( |
374 | !$curClass->hasParentType() || |
375 | !$this->code_base->hasClassWithFQSEN( $curClass->getParentClassFQSEN() ) |
376 | ) { |
377 | break; |
378 | } |
379 | $curClass = $curClass->getParentClass( $this->code_base ); |
380 | } while ( $depth < 20 ); |
381 | } |
382 | } |
383 | |
384 | /** |
385 | * This is also for methods and other function like things |
386 | * @note This is not guaranteed to return a clone |
387 | * |
388 | * @param FunctionInterface $func What function/method to look up |
389 | * @return FunctionTaintedness Always a clone |
390 | */ |
391 | protected function getTaintOfFunction( FunctionInterface $func ): FunctionTaintedness { |
392 | $funcTaint = self::getFuncTaint( $func ); |
393 | if ( $funcTaint !== null ) { |
394 | return $funcTaint; |
395 | } |
396 | |
397 | $annotatedTaint = $this->getSetKnownTaintOfFunctionWithoutAnalysis( $func ); |
398 | if ( $annotatedTaint ) { |
399 | return $annotatedTaint; |
400 | } |
401 | |
402 | $isPHPInternalFunc = $func->isPHPInternal(); |
403 | if ( !$isPHPInternalFunc ) { |
404 | // PHP internal functions cannot be analyzed because they don't have a body. |
405 | $funcToAnalyze = $this->getDefiningFuncIfDifferent( $func ) ?: $func; |
406 | $this->analyzeFunc( $funcToAnalyze ); |
407 | $analyzedFuncTaint = self::getFuncTaint( $funcToAnalyze ); |
408 | if ( $analyzedFuncTaint !== null ) { |
409 | return $analyzedFuncTaint; |
410 | } |
411 | } |
412 | |
413 | $taintFromReturnType = $this->getTaintByType( $func->getUnionType() ); |
414 | if ( !$isPHPInternalFunc ) { |
415 | // If we haven't seen this function before, first of all check the return type. If it |
416 | // returns a safe type (like int), it's safe. |
417 | $taint = new FunctionTaintedness( $taintFromReturnType ); |
418 | self::doSetFuncTaint( $func, $taint ); |
419 | $this->maybeAddFuncError( $func, null, $taint, $taint ); |
420 | } else { |
421 | // Assume that anything really dangerous we've already hardcoded. So just preserve taint. |
422 | $overall = $taintFromReturnType->isSafe() |
423 | ? $taintFromReturnType |
424 | : new Taintedness( SecurityCheckPlugin::PRESERVE_TAINT ); |
425 | $taint = new FunctionTaintedness( $overall ); |
426 | // We're not adding any error here, since it's presumably unnecessary for PHP internal stuff. |
427 | self::doSetFuncTaint( $func, $taint ); |
428 | } |
429 | return $taint; |
430 | } |
431 | |
432 | /** |
433 | * Given a function, find out if it has any hardcoded/annotated taint, or whether it should inherit its taint |
434 | * from an alternate definition. If anything was found, set that taintedness in the func object and return it. |
435 | * In particular, this does NOT cause $func to be analyzed. |
436 | * |
437 | * @param FunctionInterface $func |
438 | * @return FunctionTaintedness|null |
439 | */ |
440 | private function getSetKnownTaintOfFunctionWithoutAnalysis( FunctionInterface $func ): ?FunctionTaintedness { |
441 | $funcsToTry = $this->getPossibleFuncDefinitions( $func ); |
442 | foreach ( $funcsToTry as $trialFunc ) { |
443 | /** @var FunctionInterface $trialFunc */ |
444 | if ( !$trialFunc->isPHPInternal() ) { |
445 | // PHP internal functions can't have a docblock. |
446 | $taintData = $this->getDocBlockTaintOfFunc( $trialFunc ); |
447 | if ( $taintData !== null ) { |
448 | [ $taint, $methodLinks ] = $taintData; |
449 | self::doSetFuncTaint( $func, $taint ); |
450 | // TODO Make this more granular if possible |
451 | $errorDesc = 'annotations in ' . $trialFunc->getFQSEN()->__toString(); |
452 | $this->maybeAddFuncError( $func, $errorDesc, $taint, $taint, $methodLinks ); |
453 | return $taint; |
454 | } |
455 | } |
456 | |
457 | $trialFuncName = $trialFunc->getFQSEN(); |
458 | $taint = SecurityCheckPlugin::$pluginInstance->getBuiltinFuncTaint( $trialFuncName ); |
459 | if ( $taint !== null ) { |
460 | $taint = clone $taint; |
461 | self::doSetFuncTaint( $func, $taint ); |
462 | if ( !$func->isPHPInternal() ) { |
463 | // Caused-by lines are presumably unnecessary for PHP internal stuff. |
464 | $this->maybeAddFuncError( $func, "Builtin-$trialFuncName", $taint, $taint ); |
465 | } |
466 | return $taint; |
467 | } |
468 | } |
469 | |
470 | $definingFunc = $this->getDefiningFuncIfDifferent( $func ); |
471 | if ( $definingFunc ) { |
472 | $definingFuncTaint = self::getFuncTaint( $definingFunc ); |
473 | if ( $definingFuncTaint !== null ) { |
474 | return $definingFuncTaint; |
475 | } |
476 | } |
477 | |
478 | return null; |
479 | } |
480 | |
481 | /** |
482 | * Analyze a function. This is very similar to Analyzable::analyze, but avoids several checks |
483 | * used by phan for performance. Phan doesn't know about taintedness, so it may decide to skip |
484 | * a re-analysis which we need. |
485 | * @todo This is a bit hacky. |
486 | * @todo We should implement our own perf checks, e.g. if the method as already called with |
487 | * the same taintedness, taint links, etc. for all params. |
488 | * @see \Phan\Analysis\Analyzable::analyze() |
489 | * |
490 | * @param FunctionInterface $func |
491 | */ |
492 | public function analyzeFunc( FunctionInterface $func ): void { |
493 | $node = $func->getNode(); |
494 | if ( !$node ) { |
495 | return; |
496 | } |
497 | |
498 | if ( $this->context->isInFunctionLikeScope() && $func->getFQSEN() === $this->context->getFunctionLikeFQSEN() ) { |
499 | // Avoid pointless recursion |
500 | return; |
501 | } |
502 | |
503 | static $depth = 0; |
504 | // @todo Tune the max depth. Raw benchmarking shows very little difference between e.g. |
505 | // 5 and 10. However, while with higher values we can detect more issues and avoid more |
506 | // false positives, it becomes harder to tell where an issue is coming from. |
507 | // Thus, this value should be increased only when we'll have better error reporting. |
508 | if ( $depth > 5 ) { |
509 | // $this->debug( __METHOD__, 'WARNING: aborting analysis earlier due to max depth' ); |
510 | return; |
511 | } |
512 | if ( $node->kind === \ast\AST_CLOSURE && isset( $node->children['uses'] ) ) { |
513 | return; |
514 | } |
515 | $depth++; |
516 | |
517 | // Like Analyzable::analyze, clone the context to avoid overriding anything |
518 | $context = clone $func->getContext(); |
519 | // @phan-suppress-next-line PhanUndeclaredMethod All implementations have it |
520 | if ( $func->getRecursionDepth() !== 0 ) { |
521 | // Add the arguments types to the internal scope of the function, see |
522 | // https://github.com/phan/phan/issues/3848 |
523 | foreach ( $func->getParameterList() as $parameter ) { |
524 | $context->addScopeVariable( $parameter->cloneAsNonVariadic() ); |
525 | } |
526 | } |
527 | try { |
528 | ( new BlockAnalysisVisitor( $this->code_base, $context ) )( |
529 | $node |
530 | ); |
531 | } finally { |
532 | $depth--; |
533 | } |
534 | } |
535 | |
536 | /** |
537 | * Obtain taint information from a docblock comment. |
538 | * |
539 | * @param FunctionInterface $func The function to check |
540 | * @return array<FunctionTaintedness|MethodLinks>|null null for no info |
541 | * @phan-return array{0:FunctionTaintedness,1:MethodLinks}|null |
542 | */ |
543 | protected function getDocBlockTaintOfFunc( FunctionInterface $func ): ?array { |
544 | // Note that we're not using the hashed docblock for caching, because the same docblock |
545 | // may have different meanings in different contexts. E.g. @return self |
546 | $fqsen = (string)$func->getFQSEN(); |
547 | if ( isset( SecurityCheckPlugin::$docblockCache[ $fqsen ] ) ) { |
548 | [ $taint, $links ] = SecurityCheckPlugin::$docblockCache[ $fqsen ]; |
549 | return [ clone $taint, clone $links ]; |
550 | } |
551 | |
552 | $docBlock = $func->getDocComment(); |
553 | if ( $docBlock === null ) { |
554 | return null; |
555 | } |
556 | if ( strpos( $docBlock, '-taint' ) === false ) { |
557 | // Lightweight check for methods that certainly aren't annotated |
558 | return null; |
559 | } |
560 | $lines = explode( "\n", $docBlock ); |
561 | /** @param string[] $args */ |
562 | $invalidLineIssueEmitter = function ( string $msg, array $args ) use ( $func ): void { |
563 | SecurityCheckPlugin::emitIssue( |
564 | $this->code_base, |
565 | // Emit issues at the line of the signature |
566 | $func->getContext(), |
567 | 'SecurityCheckInvalidAnnotation', |
568 | $msg, |
569 | $args |
570 | ); |
571 | }; |
572 | // Note, not forCaller, as that doesn't see variadic parameters |
573 | $calleeParamList = $func->getParameterList(); |
574 | $validTaintEncountered = false; |
575 | // Assume that if some of the taint is specified, then |
576 | // the person would specify all the dangerous taints, so |
577 | // don't set the unknown flag if not taint annotation on |
578 | // @return. |
579 | $funcTaint = new FunctionTaintedness( Taintedness::newSafe() ); |
580 | // TODO $fakeMethodLinks here is a bit hacky... |
581 | $fakeMethodLinks = new MethodLinks(); |
582 | foreach ( $lines as $line ) { |
583 | $m = []; |
584 | $trimmedLine = ltrim( rtrim( $line ), "* \t/" ); |
585 | if ( strpos( $trimmedLine, '@param-taint' ) === 0 ) { |
586 | $matched = preg_match( SecurityCheckPlugin::PARAM_ANNOTATION_REGEX, $trimmedLine, $m ); |
587 | if ( !$matched ) { |
588 | $invalidLineIssueEmitter( "Cannot parse taint line '{COMMENT}'", [ $trimmedLine ] ); |
589 | continue; |
590 | } |
591 | |
592 | $paramNumber = null; |
593 | $isVariadic = null; |
594 | foreach ( $calleeParamList as $i => $param ) { |
595 | if ( $m['paramname'] === $param->getName() ) { |
596 | $paramNumber = $i; |
597 | $isVariadic = $param->isVariadic(); |
598 | break; |
599 | } |
600 | } |
601 | if ( $paramNumber === null ) { |
602 | $invalidLineIssueEmitter( |
603 | 'Annotated parameter ${PARAMETER} not found in the signature', |
604 | [ $m['paramname'] ] |
605 | ); |
606 | continue; |
607 | } |
608 | |
609 | $annotatedAsVariadic = $m['variadic'] !== ''; |
610 | if ( $isVariadic !== $annotatedAsVariadic ) { |
611 | $msg = $isVariadic |
612 | ? 'Variadic parameter ${PARAMETER} should be annotated as `...${PARAMETER}`' |
613 | : 'Non-variadic parameter ${PARAMETER} should be annotated as `${PARAMETER}`'; |
614 | $invalidLineIssueEmitter( $msg, [ $m['paramname'], $m['paramname'] ] ); |
615 | } |
616 | $taintData = SecurityCheckPlugin::parseTaintLine( $m['taint'] ); |
617 | if ( $taintData === null ) { |
618 | $invalidLineIssueEmitter( "Invalid param taintedness '{COMMENT}'", [ $m['taint'] ] ); |
619 | continue; |
620 | } |
621 | /** @var Taintedness $taint */ |
622 | [ $taint, $flags ] = $taintData; |
623 | $sinkTaint = $taint->withOnly( SecurityCheckPlugin::ALL_EXEC_TAINT ); |
624 | $preserveTaint = $taint->without( SecurityCheckPlugin::ALL_EXEC_TAINT )->asPreservedTaintedness(); |
625 | if ( $isVariadic ) { |
626 | $funcTaint->setVariadicParamSinkTaint( $paramNumber, $sinkTaint ); |
627 | $funcTaint->setVariadicParamPreservedTaint( $paramNumber, $preserveTaint ); |
628 | $funcTaint->addVariadicParamFlags( $flags ); |
629 | } else { |
630 | $funcTaint->setParamSinkTaint( $paramNumber, $sinkTaint ); |
631 | $funcTaint->setParamPreservedTaint( $paramNumber, $preserveTaint ); |
632 | $funcTaint->addParamFlags( $paramNumber, $flags ); |
633 | } |
634 | $fakeMethodLinks->initializeParamForFunc( $func, $paramNumber ); |
635 | $validTaintEncountered = true; |
636 | if ( ( $taint->get() & SecurityCheckPlugin::ESCAPES_HTML ) === SecurityCheckPlugin::ESCAPES_HTML ) { |
637 | // Special case to auto-set anything that escapes html to detect double escaping. |
638 | $funcTaint->setOverall( $funcTaint->getOverall()->with( SecurityCheckPlugin::ESCAPED_TAINT ) ); |
639 | } |
640 | } elseif ( strpos( $trimmedLine, '@return-taint' ) === 0 ) { |
641 | $taintLine = substr( $trimmedLine, strlen( '@return-taint' ) + 1 ); |
642 | $taintData = SecurityCheckPlugin::parseTaintLine( $taintLine ); |
643 | if ( $taintData === null ) { |
644 | $invalidLineIssueEmitter( "Invalid return taintedness '{COMMENT}'", [ $taintLine ] ); |
645 | continue; |
646 | } |
647 | /** @var Taintedness $taint */ |
648 | [ $taint, $flags ] = $taintData; |
649 | if ( $taint->has( SecurityCheckPlugin::ALL_EXEC_TAINT ) ) { |
650 | $invalidLineIssueEmitter( "Return taintedness cannot be exec", [] ); |
651 | continue; |
652 | } |
653 | $funcTaint->setOverall( $taint ); |
654 | $funcTaint->addOverallFlags( $flags ); |
655 | $validTaintEncountered = true; |
656 | } |
657 | } |
658 | |
659 | if ( !$validTaintEncountered ) { |
660 | $this->debug( __METHOD__, 'Possibly wrong taint annotation in docblock: ' . json_encode( $docBlock ) ); |
661 | } |
662 | |
663 | SecurityCheckPlugin::$docblockCache[ $fqsen ] = $validTaintEncountered |
664 | ? [ clone $funcTaint, clone $fakeMethodLinks ] |
665 | : null; |
666 | return SecurityCheckPlugin::$docblockCache[ $fqsen ]; |
667 | } |
668 | |
669 | /** |
670 | * Given a type, determine what type of taint |
671 | * |
672 | * e.g. Integers are probably untainted since its hard to do evil |
673 | * with them, but mark strings as unknown since we don't know. |
674 | * |
675 | * Only use as a fallback |
676 | * @param UnionType $types The types |
677 | * @return Taintedness |
678 | */ |
679 | protected function getTaintByType( UnionType $types ): Taintedness { |
680 | // NOTE: This flattens intersection types |
681 | $typelist = $types->getUniqueFlattenedTypeSet(); |
682 | if ( !$typelist ) { |
683 | // $this->debug( __METHOD__, "Setting type unknown due to no type info." ); |
684 | return new Taintedness( SecurityCheckPlugin::UNKNOWN_TAINT ); |
685 | } |
686 | |
687 | $taint = new Taintedness( SecurityCheckPlugin::NO_TAINT ); |
688 | $isPossiblyUnknown = false; |
689 | foreach ( $typelist as $type ) { |
690 | if ( $type instanceof LiteralTypeInterface ) { |
691 | // We're going to assume that literals aren't tainted... |
692 | continue; |
693 | } |
694 | switch ( $type->getName() ) { |
695 | case 'int': |
696 | case 'non-zero-int': |
697 | case 'float': |
698 | case 'bool': |
699 | case 'false': |
700 | case 'true': |
701 | case 'null': |
702 | case 'void': |
703 | case 'class-string': |
704 | case 'callable-string': |
705 | case 'callable-object': |
706 | case 'callable-array': |
707 | break; |
708 | case 'string': |
709 | case 'non-empty-string': |
710 | case 'Closure': |
711 | case 'callable': |
712 | case 'array': |
713 | case 'iterable': |
714 | case 'object': |
715 | case 'resource': |
716 | case 'mixed': |
717 | case 'non-empty-mixed': |
718 | case 'non-null-mixed': |
719 | // $this->debug( __METHOD__, "Taint set unknown due to type '$type'." ); |
720 | $isPossiblyUnknown = true; |
721 | break; |
722 | default: |
723 | if ( $type->hasTemplateTypeRecursive() ) { |
724 | // TODO Can we do better for template types? |
725 | $isPossiblyUnknown = true; |
726 | break; |
727 | } |
728 | |
729 | if ( !$type->isObjectWithKnownFQSEN() ) { |
730 | // Likely some phan-specific types not included above |
731 | $this->debug( __METHOD__, " $type (" . get_class( $type ) . ') not a class?' ); |
732 | $isPossiblyUnknown = true; |
733 | break; |
734 | } |
735 | |
736 | $fqsenStr = $type->asFQSEN()->__toString(); |
737 | if ( isset( self::$fqsensWithoutToStringCache[$fqsenStr] ) ) { |
738 | $isPossiblyUnknown = true; |
739 | break; |
740 | } |
741 | |
742 | // This means specific class, so look up __toString() |
743 | $toStringFQSEN = FullyQualifiedMethodName::fromStringInContext( |
744 | $fqsenStr . '::__toString', |
745 | $this->context |
746 | ); |
747 | if ( !$this->code_base->hasMethodWithFQSEN( $toStringFQSEN ) ) { |
748 | // This is common in a void context. |
749 | // e.g. code like $this->foo() will reach this |
750 | // check. |
751 | self::$fqsensWithoutToStringCache[$fqsenStr] = true; |
752 | $isPossiblyUnknown = true; |
753 | break; |
754 | } |
755 | $toString = $this->code_base->getMethodByFQSEN( $toStringFQSEN ); |
756 | $toStringTaint = $this->getTaintOfFunction( $toString ); |
757 | $taint->mergeWith( $toStringTaint->getOverall()->without( |
758 | SecurityCheckPlugin::PRESERVE_TAINT | SecurityCheckPlugin::ALL_EXEC_TAINT |
759 | ) ); |
760 | } |
761 | } |
762 | if ( $isPossiblyUnknown ) { |
763 | $taint->add( SecurityCheckPlugin::UNKNOWN_TAINT ); |
764 | } |
765 | return $taint; |
766 | } |
767 | |
768 | /** |
769 | * Get what taint types are allowed on a typed element (i.e. use its type to rule out |
770 | * impossible taint types). |
771 | * |
772 | * @param TypedElementInterface $var |
773 | * @return Taintedness|null Null means all taints, checking for null is faster than ORing |
774 | */ |
775 | protected function getTaintMaskForTypedElement( TypedElementInterface $var ): ?Taintedness { |
776 | if ( $var instanceof GlobalVariable ) { |
777 | // TODO We wouldn't need to do this if phan didn't infer real types for global variables. |
778 | // See https://github.com/phan/phan/issues/4518 |
779 | $var = $var->getElement(); |
780 | } |
781 | // Note, we must use the real union type because: |
782 | // 1 - The non-real type might be wrong |
783 | // 2 - The non-real type might be incomplete (e.g. when analysing a func without docblock |
784 | // we still don't know all the possible types of the params). |
785 | return $this->getTaintMaskForType( $var->getUnionType()->getRealUnionType() ); |
786 | } |
787 | |
788 | /** |
789 | * Get what taint types are allowed on an element with the given type. |
790 | * |
791 | * @param UnionType $type |
792 | * @return Taintedness|null Null for all flags |
793 | */ |
794 | protected function getTaintMaskForType( UnionType $type ): ?Taintedness { |
795 | $typeTaint = $this->getTaintByType( $type ); |
796 | |
797 | if ( $typeTaint->has( SecurityCheckPlugin::UNKNOWN_TAINT ) ) { |
798 | return null; |
799 | } |
800 | return $typeTaint; |
801 | } |
802 | |
803 | /** |
804 | * Get what taint the element could have in the future. For instance, a func parameter may initially |
805 | * have no taint, but it may become tainted depending on the argument. |
806 | * @todo Ensure this won't miss any case (aside from when phan infers a wrong real type) |
807 | * |
808 | * @param TypedElementInterface $el |
809 | * @return Taintedness|null Null for all taints |
810 | */ |
811 | protected function getPossibleFutureTaintOfElement( TypedElementInterface $el ): ?Taintedness { |
812 | return $this->getTaintMaskForTypedElement( $el ); |
813 | } |
814 | |
815 | /** |
816 | * Get name of current method (for debugging purposes) |
817 | * |
818 | * @return string Name of method or "[no method]" |
819 | */ |
820 | protected function getCurrentMethod(): string { |
821 | return $this->context->isInFunctionLikeScope() ? |
822 | (string)$this->context->getFunctionLikeFQSEN() : '[no method]'; |
823 | } |
824 | |
825 | /** |
826 | * Get the taintedness of something from the AST tree. |
827 | * |
828 | * @param mixed $expr An expression from the AST tree. |
829 | * @return TaintednessWithError |
830 | */ |
831 | protected function getTaintedness( $expr ): TaintednessWithError { |
832 | if ( $expr instanceof Node ) { |
833 | return $this->getTaintednessNode( $expr ); |
834 | } |
835 | |
836 | assert( is_scalar( $expr ) || $expr === null ); |
837 | // Optim: avoid using TaintednessWithError::newEmpty() |
838 | return new TaintednessWithError( |
839 | new Taintedness( SecurityCheckPlugin::NO_TAINT ), |
840 | new CausedByLines(), |
841 | new MethodLinks() |
842 | ); |
843 | } |
844 | |
845 | /** |
846 | * Give an AST node, find its taint. This always returns a copy. |
847 | * |
848 | * @param Node $node |
849 | * @return TaintednessWithError |
850 | * @suppress PhanUndeclaredProperty |
851 | */ |
852 | protected function getTaintednessNode( Node $node ): TaintednessWithError { |
853 | // Performance: use isset(), not property_exists() |
854 | if ( isset( $node->taint ) ) { |
855 | // Return cached result. Cache hit ratio should ideally be 100%, because we should never have to retrieve |
856 | // the taintedness of a node without having analyzed it first. For now the ratio is lower because |
857 | // we don't cache the result of cheap nodes. |
858 | return $node->taint; |
859 | } |
860 | // TODO This might just a return a default if no cached data. |
861 | |
862 | // Debug::printNode( $node ); |
863 | // Make sure to update the line number, or the same issue may be reported |
864 | // more than once on different lines (see test 'multilineissue'). |
865 | $oldLine = $this->context->getLineNumberStart(); |
866 | $this->context->setLineNumberStart( $node->lineno ); |
867 | |
868 | $visitor = new TaintednessVisitor( $this->code_base, $this->context ); |
869 | try { |
870 | return $visitor->analyzeNodeAndGetTaintedness( $node ); |
871 | } finally { |
872 | $this->context->setLineNumberStart( $oldLine ); |
873 | } |
874 | } |
875 | |
876 | /** |
877 | * Given a phan object (not method/function) find its taint. This always returns a copy |
878 | * for existing objects. |
879 | * |
880 | * @param TypedElementInterface $variableObj |
881 | * @return Taintedness |
882 | */ |
883 | protected function getTaintednessPhanObj( TypedElementInterface $variableObj ): Taintedness { |
884 | assert( !$variableObj instanceof FunctionInterface, "This method cannot be used with methods" ); |
885 | $taintOrNull = self::getTaintednessRaw( $variableObj ); |
886 | if ( $taintOrNull !== null ) { |
887 | $mask = $this->getTaintMaskForTypedElement( $variableObj ); |
888 | $taintedness = $mask !== null ? $taintOrNull->withOnly( $mask->get() ) : clone $taintOrNull; |
889 | // echo "$varName has taintedness $taintedness due to last time\n"; |
890 | } else { |
891 | $type = $variableObj->getUnionType(); |
892 | $taintedness = $this->getTaintByType( $type ); |
893 | // $this->debug( " \$" . $variableObj->getName() . " first sight." |
894 | // . " taintedness set to $taintedness due to type $type\n"; |
895 | } |
896 | return $taintedness; |
897 | } |
898 | |
899 | /** |
900 | * Shortcut to resolve array offsets, which includes: |
901 | * - Ensuring that the value is not null: null is used for implicit dims like in `$a[] = $b`; we can't say |
902 | * for sure what the offset will be, and this method would return null (interpreted as offset 0), which is |
903 | * most likely wrong. |
904 | * - Casting floats to integers, since using a float as array key raises a warning (and crashes taint-check) |
905 | * in PHP 8.1 (T307504) |
906 | * - Letting nodes that represent resources (e.g. `STDIN`) pass through, since they're not scalar and certainly |
907 | * not valid offsets (see https://github.com/phan/phan/issues/4659). |
908 | * |
909 | * @param Node|mixed $rawOffset |
910 | * @return Node|mixed |
911 | */ |
912 | protected function resolveOffset( $rawOffset ) { |
913 | assert( $rawOffset !== null ); |
914 | $resolved = $this->resolveValue( $rawOffset ); |
915 | // phpcs:ignore MediaWiki.Usage.ForbiddenFunctions.is_resource |
916 | if ( is_resource( $resolved ) ) { |
917 | return $rawOffset; |
918 | } |
919 | return is_float( $resolved ) ? (int)$resolved : $resolved; |
920 | } |
921 | |
922 | /** |
923 | * Shortcut to try and turn an AST element (Node or already literal) into an equivalent PHP |
924 | * scalar value. |
925 | * |
926 | * @param Node|mixed $value A Node or a scalar value from the AST |
927 | * @return Node|mixed An equivalent scalar PHP value, or $value if it cannot be resolved |
928 | */ |
929 | protected function resolveValue( $value ) { |
930 | if ( !$value instanceof Node ) { |
931 | return $value; |
932 | } |
933 | return $this->getCtxN( $value )->getEquivalentPHPScalarValue(); |
934 | } |
935 | |
936 | /** |
937 | * Get a property by name in the current scope, failing hard if it cannot be found. |
938 | * @param string $propName |
939 | * @return Property |
940 | */ |
941 | private function getPropInCurrentScopeByName( string $propName ): Property { |
942 | assert( $this->context->isInClassScope() ); |
943 | $clazz = $this->context->getClassInScope( $this->code_base ); |
944 | |
945 | assert( $clazz->hasPropertyWithName( $this->code_base, $propName ) ); |
946 | return $clazz->getPropertyByName( $this->code_base, $propName ); |
947 | } |
948 | |
949 | /** |
950 | * Quick wrapper to get the ContextNode for a node |
951 | * |
952 | * @param Node|mixed $node |
953 | * @return ContextNode |
954 | */ |
955 | protected function getCtxN( $node ): ContextNode { |
956 | return new ContextNode( |
957 | $this->code_base, |
958 | $this->context, |
959 | $node |
960 | ); |
961 | } |
962 | |
963 | /** |
964 | * Given a node, return the Phan variable objects that |
965 | * correspond to that node to which we can backpropagate a NUMKEY taintedness. |
966 | * |
967 | * @todo This should be handled together with the non-numkey case |
968 | * |
969 | * @param Node $node AST node in question |
970 | * @return TypedElementInterface[] Array of various phan objects corresponding to $node |
971 | */ |
972 | protected function getObjsForNodeForNumkeyBackprop( Node $node ): array { |
973 | $cn = $this->getCtxN( $node ); |
974 | |
975 | // TODO For now we only backprop in the simple case, to avoid tons of false positives, unless |
976 | // the env flag is set (chiefly for tests) |
977 | $definitelyNumkey = !getenv( 'SECCHECK_NUMKEY_SPERIMENTAL' ); |
978 | |
979 | switch ( $node->kind ) { |
980 | case \ast\AST_PROP: |
981 | case \ast\AST_NULLSAFE_PROP: |
982 | case \ast\AST_STATIC_PROP: |
983 | $prop = $this->getPropFromNode( $node ); |
984 | return $prop && $this->elementCanBeNumkey( $prop, $definitelyNumkey ) ? [ $prop ] : []; |
985 | case \ast\AST_VAR: |
986 | case \ast\AST_CLOSURE_VAR: |
987 | if ( Variable::isHardcodedGlobalVariableWithName( $cn->getVariableName() ) ) { |
988 | return []; |
989 | } |
990 | try { |
991 | $var = $cn->getVariable(); |
992 | return $this->elementCanBeNumkey( $var, $definitelyNumkey ) ? [ $var ] : []; |
993 | } catch ( NodeException | IssueException $e ) { |
994 | $this->debug( __METHOD__, "variable not in scope?? " . $this->getDebugInfo( $e ) ); |
995 | return []; |
996 | } |
997 | case \ast\AST_ENCAPS_LIST: |
998 | case \ast\AST_ARRAY: |
999 | $results = []; |
1000 | foreach ( $node->children as $child ) { |
1001 | if ( !$child instanceof Node ) { |
1002 | continue; |
1003 | } |
1004 | |
1005 | if ( |
1006 | $node->kind === \ast\AST_ARRAY && |
1007 | $child->children['key'] !== null && !$this->nodeCanBeIntKey( $child->children['key'] ) |
1008 | ) { |
1009 | continue; |
1010 | } |
1011 | $results = array_merge( $this->getObjsForNodeForNumkeyBackprop( $child ), $results ); |
1012 | } |
1013 | return $results; |
1014 | case \ast\AST_ARRAY_ELEM: |
1015 | $results = []; |
1016 | if ( $node->children['key'] instanceof Node ) { |
1017 | $results = array_merge( |
1018 | $this->getObjsForNodeForNumkeyBackprop( $node->children['key'] ), |
1019 | $results |
1020 | ); |
1021 | } |
1022 | if ( $node->children['value'] instanceof Node ) { |
1023 | $results = array_merge( |
1024 | $this->getObjsForNodeForNumkeyBackprop( $node->children['value'] ), |
1025 | $results |
1026 | ); |
1027 | } |
1028 | return $results; |
1029 | case \ast\AST_CAST: |
1030 | // Future todo might be to ignore casts to ints, since |
1031 | // such things should be safe. Unclear if that makes |
1032 | // sense in all circumstances. |
1033 | if ( $node->children['expr'] instanceof Node ) { |
1034 | return $this->getObjsForNodeForNumkeyBackprop( $node->children['expr'] ); |
1035 | } |
1036 | return []; |
1037 | case \ast\AST_DIM: |
1038 | if ( $node->children['expr'] instanceof Node ) { |
1039 | // For now just consider the outermost array. |
1040 | // FIXME. doesn't handle tainted array keys! |
1041 | return $this->getObjsForNodeForNumkeyBackprop( $node->children['expr'] ); |
1042 | } |
1043 | return []; |
1044 | case \ast\AST_UNARY_OP: |
1045 | $var = $node->children['expr']; |
1046 | return $var instanceof Node ? $this->getObjsForNodeForNumkeyBackprop( $var ) : []; |
1047 | case \ast\AST_BINARY_OP: |
1048 | $left = $node->children['left']; |
1049 | $right = $node->children['right']; |
1050 | $leftObj = $left instanceof Node ? $this->getObjsForNodeForNumkeyBackprop( $left ) : []; |
1051 | $rightObj = $right instanceof Node ? $this->getObjsForNodeForNumkeyBackprop( $right ) : []; |
1052 | return array_merge( $leftObj, $rightObj ); |
1053 | case \ast\AST_CONDITIONAL: |
1054 | $t = $node->children['true']; |
1055 | $f = $node->children['false']; |
1056 | $tObj = $t instanceof Node ? $this->getObjsForNodeForNumkeyBackprop( $t ) : []; |
1057 | $fObj = $f instanceof Node ? $this->getObjsForNodeForNumkeyBackprop( $f ) : []; |
1058 | return array_merge( $tObj, $fObj ); |
1059 | case \ast\AST_CONST: |
1060 | case \ast\AST_CLASS_CONST: |
1061 | case \ast\AST_CLASS_NAME: |
1062 | case \ast\AST_MAGIC_CONST: |
1063 | case \ast\AST_ISSET: |
1064 | case \ast\AST_NEW: |
1065 | // For now we don't do methods, only variables |
1066 | // Also don't do args to function calls. |
1067 | // Unclear if this makes sense. |
1068 | return []; |
1069 | case \ast\AST_CALL: |
1070 | case \ast\AST_STATIC_CALL: |
1071 | case \ast\AST_METHOD_CALL: |
1072 | case \ast\AST_NULLSAFE_METHOD_CALL: |
1073 | if ( $definitelyNumkey ) { |
1074 | // This case is too hard for now. |
1075 | return []; |
1076 | } |
1077 | $ctxNode = $this->getCtxN( $node ); |
1078 | // @todo Future todo might be to still return arguments when catching an exception. |
1079 | if ( $node->kind === \ast\AST_CALL ) { |
1080 | if ( $node->children['expr']->kind !== \ast\AST_NAME ) { |
1081 | // TODO Handle this case! |
1082 | return []; |
1083 | } |
1084 | try { |
1085 | $func = $ctxNode->getFunction( $node->children['expr']->children['name'] ); |
1086 | } catch ( IssueException | FQSENException $e ) { |
1087 | $this->debug( __METHOD__, "FIXME func not found: " . $this->getDebugInfo( $e ) ); |
1088 | return []; |
1089 | } |
1090 | } else { |
1091 | $methodName = $node->children['method']; |
1092 | try { |
1093 | $func = $ctxNode->getMethod( $methodName, $node->kind === \ast\AST_STATIC_CALL, true ); |
1094 | } catch ( NodeException | CodeBaseException | IssueException $e ) { |
1095 | $this->debug( __METHOD__, "FIXME method not found: " . $this->getDebugInfo( $e ) ); |
1096 | return []; |
1097 | } |
1098 | } |
1099 | try { |
1100 | return $this->getReturnObjsOfFunc( $func ); |
1101 | } catch ( Exception $e ) { |
1102 | $this->debug( __METHOD__, "FIXME: " . $this->getDebugInfo( $e ) ); |
1103 | return []; |
1104 | } |
1105 | case \ast\AST_PRE_INC: |
1106 | case \ast\AST_PRE_DEC: |
1107 | case \ast\AST_POST_INC: |
1108 | case \ast\AST_POST_DEC: |
1109 | $children = $node->children; |
1110 | assert( count( $children ) === 1 ); |
1111 | return $this->getObjsForNodeForNumkeyBackprop( reset( $children ) ); |
1112 | default: |
1113 | // TODO Should probably handle AST_MATCH & friends |
1114 | // Debug::printNode( $node ); |
1115 | // This should really be a visitor that recurses into |
1116 | // things. |
1117 | $this->debug( __METHOD__, "FIXME unhandled case" |
1118 | . Debug::nodeName( $node ) . "\n" |
1119 | ); |
1120 | return []; |
1121 | } |
1122 | } |
1123 | |
1124 | /** |
1125 | * @param Node $node |
1126 | * @return Property|null |
1127 | */ |
1128 | protected function getPropFromNode( Node $node ): ?Property { |
1129 | try { |
1130 | return $this->getCtxN( $node )->getProperty( $node->kind === \ast\AST_STATIC_PROP ); |
1131 | } catch ( NodeException | IssueException | UnanalyzableException $e ) { |
1132 | $this->debug( __METHOD__, "Cannot determine " . |
1133 | "property (Maybe don't know what class) - " . |
1134 | $this->getDebugInfo( $e ) |
1135 | ); |
1136 | return null; |
1137 | } |
1138 | } |
1139 | |
1140 | /** |
1141 | * Extract some useful debug data from an exception |
1142 | * @param Exception $e |
1143 | * @return string |
1144 | */ |
1145 | protected function getDebugInfo( Exception $e ): string { |
1146 | return $e instanceof IssueException |
1147 | ? $e->getIssueInstance()->__toString() |
1148 | : ( get_class( $e ) . " {$e->getMessage()}" ); |
1149 | } |
1150 | |
1151 | /** |
1152 | * Get the current filename and line. |
1153 | * |
1154 | * @param Context|null $context Override the context to make debug info for |
1155 | * @return string path/to/file +linenumber |
1156 | */ |
1157 | protected function dbgInfo( Context $context = null ): string { |
1158 | $ctx = $context ?: $this->context; |
1159 | // Using a + instead of : so that I can just copy and paste |
1160 | // into a vim command line. |
1161 | return $ctx->getFile() . ' +' . $ctx->getLineNumberStart(); |
1162 | } |
1163 | |
1164 | /** |
1165 | * Link together a Method and its parameters,the idea being if the method gets called with something evil |
1166 | * later, we can traceback anything it might affect. |
1167 | * Note that we don't do this for functions with hardcoded taint, in which case we assume that any dangerous |
1168 | * association was already hardcoded. This is also good for performance, because hardcoded function tend to be |
1169 | * used a lot (for MW, think of methods in Database or in Html). |
1170 | * |
1171 | * @param Variable $param The variable object for the parameter. This can also be |
1172 | * instance of Parameter (subclass of Variable). |
1173 | * @param FunctionInterface $func The function/method in question |
1174 | * @param int $i Which argument number is $param |
1175 | */ |
1176 | protected function linkParamAndFunc( Variable $param, FunctionInterface $func, int $i ): void { |
1177 | // $this->debug( __METHOD__, "Linking '$param' to '$func' arg $i" ); |
1178 | |
1179 | // TODO Use $func's builtin/annotated taintedness (available in PreTaintednessVisitor) to check this per |
1180 | // parameter (looking at NO_OVERRIDE) |
1181 | $canLinkParam = !SecurityCheckPlugin::$pluginInstance->builtinFuncHasTaint( $func->getFQSEN() ); |
1182 | if ( !$canLinkParam ) { |
1183 | return; |
1184 | } |
1185 | |
1186 | self::ensureVarLinksForArgExist( $func, $i ); |
1187 | |
1188 | $paramLinks = self::getMethodLinksCloneOrEmpty( $param ); |
1189 | $paramLinks->initializeParamForFunc( $func, $i ); |
1190 | self::setMethodLinks( $param, $paramLinks ); |
1191 | } |
1192 | |
1193 | /** |
1194 | * Given a LHS and RHS make all the methods that can set RHS also for LHS |
1195 | * |
1196 | * Given 2 variables (e.g. $lhs = $rhs ), see to it that any function/method |
1197 | * which we marked as being able to set the value of rhs, is also marked |
1198 | * as being able to set the value of lhs. We use this information to figure |
1199 | * out what method parameter is causing the return statement to be tainted. |
1200 | * |
1201 | * @warning Be careful calling this function if lhs already has taint |
1202 | * or rhs side is a compound statement. This could result in misattribution |
1203 | * of where the taint is coming from. |
1204 | * |
1205 | * This also merges the information on what line caused the taint. |
1206 | * |
1207 | * @param TypedElementInterface $lhs Source of method list |
1208 | * @param MethodLinks $rhsLinks New links |
1209 | * @param bool $override |
1210 | */ |
1211 | protected function mergeTaintDependencies( |
1212 | TypedElementInterface $lhs, |
1213 | MethodLinks $rhsLinks, |
1214 | bool $override |
1215 | ): void { |
1216 | // So if we have $a = $b; |
1217 | // First we find out all the methods that can set $b |
1218 | // Then we add $a to the list of variables that those methods can set. |
1219 | // Last we add these methods to $a's list of all methods that can set it. |
1220 | |
1221 | $curLinks = self::getMethodLinks( $lhs ); |
1222 | if ( $override || !$curLinks ) { |
1223 | $newLinks = $rhsLinks; |
1224 | } else { |
1225 | $newLinks = $curLinks->asMergedWith( $rhsLinks ); |
1226 | } |
1227 | |
1228 | if ( $lhs instanceof Property || $lhs instanceof GlobalVariable || $lhs instanceof PassByReferenceVariable ) { |
1229 | // Don't attach things like Variable and Parameter. These are local elements, and setting taint |
1230 | // on them in markAllDependentVarsYes would have no effect. Additionally, since phan creates a new |
1231 | // Parameter object for each analysis, we will end up with duplicated links that do nothing but |
1232 | // eating memory. |
1233 | foreach ( $newLinks->getMethodAndParamTuples() as [ $method, $index ] ) { |
1234 | $varLinks = self::getVarLinks( $method, $index ); |
1235 | assert( $varLinks instanceof VarLinksSet ); |
1236 | // $this->debug( __METHOD__, "During assignment, we link $lhs to $method($index)" ); |
1237 | $varLinks->attach( $lhs, $newLinks->asPreservedTaintednessForFuncParam( $method, $index ) ); |
1238 | } |
1239 | } |
1240 | |
1241 | self::setMethodLinks( $lhs, $newLinks ); |
1242 | } |
1243 | |
1244 | /** |
1245 | * Mark any function setting a specific variable as EXEC taint |
1246 | * |
1247 | * If you do something like echo $this->foo; |
1248 | * This method is called to make all things that set $this->foo |
1249 | * as TAINT_EXEC. |
1250 | * |
1251 | * @note This might have annoying false positives with widely used properties |
1252 | * that are used with different levels of escaping, which is not a good idea anyway. |
1253 | * |
1254 | * @param TypedElementInterface $var The variable in question |
1255 | * @param Taintedness $taint What taint to mark them as. |
1256 | * @param CausedByLines|null $additionalError Any extra caused-by lines to add |
1257 | */ |
1258 | protected function markAllDependentMethodsExec( |
1259 | TypedElementInterface $var, |
1260 | Taintedness $taint, |
1261 | CausedByLines $additionalError = null |
1262 | ): void { |
1263 | $futureTaint = $this->getPossibleFutureTaintOfElement( $var ); |
1264 | if ( $futureTaint !== null && !$futureTaint->has( $taint->get() ) ) { |
1265 | return; |
1266 | } |
1267 | // Ensure we only set exec bits, not normal taint bits. |
1268 | $taint = $taint->withOnly( SecurityCheckPlugin::BACKPROP_TAINTS ); |
1269 | if ( $taint->isSafe() || $this->isIssueSuppressedOrFalsePositive( $taint ) ) { |
1270 | return; |
1271 | } |
1272 | |
1273 | $varLinks = self::getMethodLinks( $var ); |
1274 | if ( $varLinks === null || $varLinks->isEmpty() ) { |
1275 | return; |
1276 | } |
1277 | $backpropError = self::getCausedByRawCloneOrEmpty( $var ); |
1278 | if ( $additionalError ) { |
1279 | $backpropError->mergeWith( $additionalError ); |
1280 | } |
1281 | |
1282 | // $this->debug( __METHOD__, "Setting {$var->getName()} exec {$taint->toShortString()}" ); |
1283 | $oldMem = memory_get_peak_usage(); |
1284 | foreach ( $taint->decomposeForLinks( $varLinks ) as [ $curLinks, $curTaint ] ) { |
1285 | /** @var LinksSet $curLinks */ |
1286 | /** @var Taintedness $curTaint */ |
1287 | foreach ( $curLinks as $method ) { |
1288 | $paramInfo = $curLinks[$method]; |
1289 | // Note, not forCaller, as that doesn't see variadic parameters |
1290 | $calleeParamList = $method->getParameterList(); |
1291 | $paramTaint = new FunctionTaintedness( Taintedness::newSafe() ); |
1292 | $funcError = new FunctionCausedByLines(); |
1293 | foreach ( $paramInfo->getParams() as $i => $paramOffsets ) { |
1294 | $curParTaint = $curTaint->asMovedAtRelevantOffsetsForBackprop( $paramOffsets ); |
1295 | $curBackpropError = $backpropError |
1296 | ->withTaintAddedToMethodArgLinks( $curParTaint->asExecToYesTaint(), $method, $i ); |
1297 | if ( isset( $calleeParamList[$i] ) && $calleeParamList[$i]->isVariadic() ) { |
1298 | $paramTaint->setVariadicParamSinkTaint( $i, $curParTaint ); |
1299 | $funcError->setVariadicParamSinkLines( $i, $curBackpropError ); |
1300 | } else { |
1301 | $paramTaint->setParamSinkTaint( $i, $curParTaint ); |
1302 | $funcError->setParamSinkLines( $i, $curBackpropError ); |
1303 | } |
1304 | // $this->debug( __METHOD__, "Setting method $method arg $i as $taint due to dependency on $var" ); |
1305 | } |
1306 | $this->addFuncTaint( $method, $paramTaint ); |
1307 | $newFuncTaint = self::getFuncTaint( $method ); |
1308 | assert( $newFuncTaint !== null ); |
1309 | $this->maybeAddFuncError( $method, null, $paramTaint, $newFuncTaint ); |
1310 | $this->mergeFuncError( $method, $funcError, $newFuncTaint ); |
1311 | } |
1312 | } |
1313 | |
1314 | $newMem = memory_get_peak_usage(); |
1315 | $diffMem = round( ( $newMem - $oldMem ) / ( 1024 * 1024 ) ); |
1316 | if ( $diffMem > 2 ) { |
1317 | $this->debug( __METHOD__, "Memory spike $diffMem for variable " . $var->getName() ); |
1318 | } |
1319 | } |
1320 | |
1321 | /** |
1322 | * Mark any function setting a specific variable as EXEC taint |
1323 | * |
1324 | * If you do something like echo $this->foo; |
1325 | * This method is called to make all things that set $this->foo |
1326 | * as TAINT_EXEC. |
1327 | * |
1328 | * @note This might have annoying false positives with widely used properties |
1329 | * that are used with different levels of escaping, which is not a good idea anyway. |
1330 | * |
1331 | * @param Node $node |
1332 | * @param Taintedness $taint What taint to mark them as. |
1333 | * @param CausedByLines|null $additionalError Additional caused-by lines to propagate |
1334 | * @param bool $tempNumkey Temporary param |
1335 | */ |
1336 | protected function markAllDependentMethodsExecForNode( |
1337 | Node $node, |
1338 | Taintedness $taint, |
1339 | CausedByLines $additionalError = null, |
1340 | bool $tempNumkey = false |
1341 | ): void { |
1342 | if ( !$tempNumkey ) { |
1343 | $backpropVisitor = new TaintednessBackpropVisitor( |
1344 | $this->code_base, |
1345 | $this->context, |
1346 | $taint, |
1347 | $additionalError |
1348 | ); |
1349 | $backpropVisitor( $node ); |
1350 | return; |
1351 | } |
1352 | $phanObjs = $this->getObjsForNodeForNumkeyBackprop( $node ); |
1353 | foreach ( array_unique( $phanObjs ) as $phanObj ) { |
1354 | $this->markAllDependentMethodsExec( $phanObj, $taint, $additionalError ); |
1355 | } |
1356 | } |
1357 | |
1358 | /** |
1359 | * This happens when someone calls foo( $evilTaintedVar ); |
1360 | * |
1361 | * It makes sure that any variable that the function foo() sets takes on |
1362 | * the taint of the supplied argument. |
1363 | * |
1364 | * @param FunctionInterface $method The function or method in question |
1365 | * @param int $i The number of the argument in question. |
1366 | * @param Taintedness $taint The taint to apply. |
1367 | * @param CausedByLines $error Caused-by lines to propagate |
1368 | */ |
1369 | protected function markAllDependentVarsYes( |
1370 | FunctionInterface $method, |
1371 | int $i, |
1372 | Taintedness $taint, |
1373 | CausedByLines $error |
1374 | ): void { |
1375 | if ( $method->isPHPInternal() ) { |
1376 | return; |
1377 | } |
1378 | $varLinks = self::getVarLinks( $method, $i ); |
1379 | if ( $varLinks === null ) { |
1380 | return; |
1381 | } |
1382 | |
1383 | $taintAdjusted = $taint->withOnly( SecurityCheckPlugin::ALL_TAINT ); |
1384 | |
1385 | foreach ( $varLinks as $var ) { |
1386 | $presTaint = $varLinks[$var]; |
1387 | if ( $var instanceof PassByReferenceVariable ) { |
1388 | // TODO This should become unnecessary once the TODO in handleMethodCall about postponing |
1389 | // handlePassByRef is resolved. |
1390 | $var = $var->getElement(); |
1391 | } |
1392 | assert( $var instanceof TypedElementInterface ); |
1393 | |
1394 | $taintToPropagate = $presTaint->asTaintednessForArgument( $taintAdjusted ); |
1395 | |
1396 | $adjustedCausedBy = self::getCausedByRawCloneOrEmpty( $var ) |
1397 | ->withTaintAddedToMethodArgLinks( $taintToPropagate, $method, $i ); |
1398 | self::setCausedByRaw( $var, $adjustedCausedBy ); |
1399 | $this->setTaintedness( $var, $taintToPropagate, false ); |
1400 | $this->addTaintError( $var, $taintToPropagate, null ); |
1401 | if ( $var instanceof GlobalVariable ) { |
1402 | $globalVar = $var->getElement(); |
1403 | $adjustedGlobalCausedBy = self::getCausedByRawCloneOrEmpty( $globalVar ) |
1404 | ->withTaintAddedToMethodArgLinks( $taintToPropagate, $method, $i ); |
1405 | self::setCausedByRaw( $globalVar, $adjustedGlobalCausedBy ); |
1406 | $this->setTaintedness( $globalVar, $taintToPropagate, false ); |
1407 | $this->addTaintError( $globalVar, $taintToPropagate, null ); |
1408 | } |
1409 | $this->mergeTaintError( $var, $error ); |
1410 | } |
1411 | } |
1412 | |
1413 | /** |
1414 | * Get the original cause of taint for the given func |
1415 | * |
1416 | * @param FunctionInterface $element |
1417 | * @return FunctionCausedByLines |
1418 | */ |
1419 | private function getCausedByLinesForFunc( FunctionInterface $element ): FunctionCausedByLines { |
1420 | $element = $this->getActualFuncWithCausedBy( $element ); |
1421 | return self::getFuncCausedByRawCloneOrEmpty( $element ); |
1422 | } |
1423 | |
1424 | /** |
1425 | * Given a phan element, get the actual element where caused-by data is stored. For instance, for methods, this |
1426 | * returns the defining methods. |
1427 | * |
1428 | * @param FunctionInterface $element |
1429 | * @return FunctionInterface |
1430 | */ |
1431 | private function getActualFuncWithCausedBy( FunctionInterface $element ): FunctionInterface { |
1432 | if ( SecurityCheckPlugin::$pluginInstance->builtinFuncHasTaint( $element->getFQSEN() ) ) { |
1433 | return $element; |
1434 | } |
1435 | $definingFunc = $this->getDefiningFuncIfDifferent( $element ); |
1436 | return $definingFunc ?? $element; |
1437 | } |
1438 | |
1439 | /** |
1440 | * Output a debug message to stdout. |
1441 | * |
1442 | * @param string $method __METHOD__ in question |
1443 | * @param string $msg debug message |
1444 | */ |
1445 | public function debug( $method, $msg ): void { |
1446 | if ( $this->debugOutput === null ) { |
1447 | $errorOutput = getenv( "SECCHECK_DEBUG" ); |
1448 | if ( $errorOutput && $errorOutput !== '-' ) { |
1449 | $this->debugOutput = fopen( $errorOutput, "w" ); |
1450 | } elseif ( $errorOutput === '-' ) { |
1451 | $this->debugOutput = '-'; |
1452 | } else { |
1453 | $this->debugOutput = false; |
1454 | } |
1455 | } |
1456 | $line = $method . "\33[1m " . $this->dbgInfo() . " \33[0m" . $msg . "\n"; |
1457 | if ( $this->debugOutput && $this->debugOutput !== '-' ) { |
1458 | fwrite( |
1459 | $this->debugOutput, |
1460 | $line |
1461 | ); |
1462 | } elseif ( $this->debugOutput === '-' ) { |
1463 | // @phan-suppress-next-line PhanPluginRemoveDebugEcho This is the only wanted debug echo |
1464 | echo $line; |
1465 | } |
1466 | } |
1467 | |
1468 | /** |
1469 | * Given an AST node that's a callable, try and determine what it is |
1470 | * |
1471 | * This is intended for functions that register callbacks. |
1472 | * |
1473 | * @param Node|mixed $node The thingy from AST expected to be a Callable |
1474 | * @return FunctionInterface|null |
1475 | */ |
1476 | protected function getCallableFromNode( $node ): ?FunctionInterface { |
1477 | if ( is_string( $node ) ) { |
1478 | // Easy case, 'Foo::Bar' |
1479 | // NOTE: ContextNode::getFunctionFromNode has a TODO about returning something here. |
1480 | // And also NOTE: 'self::methodname()' is not valid PHP. |
1481 | // TODO: We should probably emit a non-security issue in the missing case |
1482 | if ( strpos( $node, '::' ) === false ) { |
1483 | $callback = FullyQualifiedFunctionName::fromFullyQualifiedString( $node ); |
1484 | return $this->code_base->hasFunctionWithFQSEN( $callback ) |
1485 | ? $this->code_base->getFunctionByFQSEN( $callback ) |
1486 | : null; |
1487 | } |
1488 | $callback = FullyQualifiedMethodName::fromFullyQualifiedString( $node ); |
1489 | return $this->code_base->hasMethodWithFQSEN( $callback ) |
1490 | ? $this->code_base->getMethodByFQSEN( $callback ) |
1491 | : null; |
1492 | } |
1493 | if ( !$node instanceof Node ) { |
1494 | return null; |
1495 | } |
1496 | if ( |
1497 | $node->kind === \ast\AST_CLOSURE || |
1498 | $node->kind === \ast\AST_VAR || |
1499 | ( $node->kind === \ast\AST_ARRAY && count( $node->children ) === 2 ) |
1500 | ) { |
1501 | // Note: intentionally emitting any issues here. |
1502 | $funcs = $this->getCtxN( $node )->getFunctionFromNode(); |
1503 | return self::getFirstElmFromArrayOrGenerator( $funcs ); |
1504 | } |
1505 | return null; |
1506 | } |
1507 | |
1508 | /** |
1509 | * Utility function to get the first element from an iterable that can be either an array or a generator |
1510 | * @phan-template T |
1511 | * @param iterable $iter |
1512 | * @phan-param iterable<T> $iter |
1513 | * @return mixed|null Null if $iter is empty |
1514 | * @phan-return T|null |
1515 | */ |
1516 | protected static function getFirstElmFromArrayOrGenerator( iterable $iter ) { |
1517 | if ( is_array( $iter ) ) { |
1518 | return $iter ? $iter[0] : null; |
1519 | } |
1520 | assert( $iter instanceof Generator ); |
1521 | return $iter->current() ?: null; |
1522 | } |
1523 | |
1524 | /** |
1525 | * Get the issue names and severities given a taint, as well as the relevant taint type for each issue. |
1526 | * |
1527 | * @param int $combinedTaint The taint to warn for. I.e. The exec flags |
1528 | * from LHS shifted to non-exec bitwise AND'd with the rhs taint. |
1529 | * @return array[] List of issue type, severity, and taint type |
1530 | * @phan-return non-empty-list<array{0:string,1:int,2:int}> |
1531 | */ |
1532 | public function taintToIssuesAndSeverities( int $combinedTaint ): array { |
1533 | $issues = []; |
1534 | if ( $combinedTaint & SecurityCheckPlugin::HTML_TAINT ) { |
1535 | $issues[] = [ 'SecurityCheck-XSS', Issue::SEVERITY_NORMAL, SecurityCheckPlugin::HTML_TAINT ]; |
1536 | } |
1537 | if ( $combinedTaint & ( SecurityCheckPlugin::SQL_TAINT | SecurityCheckPlugin::SQL_NUMKEY_TAINT ) ) { |
1538 | $issues[] = [ |
1539 | 'SecurityCheck-SQLInjection', |
1540 | Issue::SEVERITY_CRITICAL, |
1541 | SecurityCheckPlugin::SQL_TAINT | SecurityCheckPlugin::SQL_NUMKEY_TAINT |
1542 | ]; |
1543 | } |
1544 | if ( $combinedTaint & SecurityCheckPlugin::SHELL_TAINT ) { |
1545 | $issues[] = [ 'SecurityCheck-ShellInjection', Issue::SEVERITY_CRITICAL, SecurityCheckPlugin::SHELL_TAINT ]; |
1546 | } |
1547 | if ( $combinedTaint & SecurityCheckPlugin::SERIALIZE_TAINT ) { |
1548 | // For now this is low because it seems to have a lot of false positives. |
1549 | $issues[] = [ |
1550 | 'SecurityCheck-PHPSerializeInjection', |
1551 | Issue::SEVERITY_NORMAL, |
1552 | SecurityCheckPlugin::SERIALIZE_TAINT |
1553 | ]; |
1554 | } |
1555 | if ( $combinedTaint & SecurityCheckPlugin::ESCAPED_TAINT ) { |
1556 | $issues[] = [ 'SecurityCheck-DoubleEscaped', Issue::SEVERITY_NORMAL, SecurityCheckPlugin::ESCAPED_TAINT ]; |
1557 | } |
1558 | if ( $combinedTaint & SecurityCheckPlugin::PATH_TAINT ) { |
1559 | $issues[] = [ 'SecurityCheck-PathTraversal', Issue::SEVERITY_CRITICAL, SecurityCheckPlugin::PATH_TAINT ]; |
1560 | } |
1561 | if ( $combinedTaint & SecurityCheckPlugin::CODE_TAINT ) { |
1562 | $issues[] = [ 'SecurityCheck-RCE', Issue::SEVERITY_CRITICAL, SecurityCheckPlugin::CODE_TAINT ]; |
1563 | } |
1564 | if ( $combinedTaint & SecurityCheckPlugin::REGEX_TAINT ) { |
1565 | $issues[] = [ 'SecurityCheck-ReDoS', Issue::SEVERITY_NORMAL, SecurityCheckPlugin::REGEX_TAINT ]; |
1566 | } |
1567 | if ( $combinedTaint & SecurityCheckPlugin::CUSTOM1_TAINT ) { |
1568 | $issues[] = [ 'SecurityCheck-CUSTOM1', Issue::SEVERITY_NORMAL, SecurityCheckPlugin::CUSTOM1_TAINT ]; |
1569 | } |
1570 | if ( $combinedTaint & SecurityCheckPlugin::CUSTOM2_TAINT ) { |
1571 | $issues[] = [ 'SecurityCheck-CUSTOM2', Issue::SEVERITY_NORMAL, SecurityCheckPlugin::CUSTOM2_TAINT ]; |
1572 | } |
1573 | |
1574 | return $issues; |
1575 | } |
1576 | |
1577 | /** |
1578 | * Simplified version of maybeEmitIssue which makes the following assumptions: |
1579 | * - The caller would compute the RHS taint only to feed it to maybeEmitIssue |
1580 | * - The message should be followed by caused-by lines |
1581 | * - These caused-by lines should be taken from the same object passed as RHS |
1582 | * - Only caused-by lines having the LHS taint should be included |
1583 | * If these conditions hold true, then this method should be preferred. |
1584 | * |
1585 | * @warning DO NOT use this method if the caller already needs to compute the RHS |
1586 | * taintedness! The taint would be computed twice! |
1587 | * |
1588 | * @param Taintedness $lhsTaint |
1589 | * @param mixed $rhsElement |
1590 | * @param string $msg |
1591 | * @param array $params Additional parameters for the message template |
1592 | * @phan-param list<string|FullyQualifiedFunctionLikeName> $params |
1593 | */ |
1594 | public function maybeEmitIssueSimplified( |
1595 | Taintedness $lhsTaint, |
1596 | $rhsElement, |
1597 | string $msg, |
1598 | array $params = [] |
1599 | ): void { |
1600 | $rhsTaint = $this->getTaintedness( $rhsElement ); |
1601 | $this->maybeEmitIssue( |
1602 | $lhsTaint, |
1603 | $rhsTaint->getTaintedness(), |
1604 | $msg . '{DETAILS}', |
1605 | array_merge( $params, [ $rhsTaint->getError() ] ) |
1606 | ); |
1607 | } |
1608 | |
1609 | /** |
1610 | * Emit an issue using the appropriate issue type |
1611 | * |
1612 | * If $this->overrideContext is set, it will use that for the |
1613 | * file/line number to report. This is meant as a hack, so that |
1614 | * in MW we can force hook related issues to be in the extension |
1615 | * instead of where the hook is called from in MW core. |
1616 | * |
1617 | * @param Taintedness $lhsTaint Taint of left hand side (or equivalent) |
1618 | * @param Taintedness $rhsTaint Taint of right hand side (or equivalent) |
1619 | * @param string $msg Issue description |
1620 | * @param array|Closure $msgParamsOrGetter Message parameters passed to emitIssue. Can also be a closure |
1621 | * that returns said parameters, for performance. |
1622 | * @phan-param list|Closure():list $msgParamsOrGetter |
1623 | */ |
1624 | public function maybeEmitIssue( |
1625 | Taintedness $lhsTaint, |
1626 | Taintedness $rhsTaint, |
1627 | string $msg, |
1628 | $msgParamsOrGetter |
1629 | ): void { |
1630 | $rhsIsUnknown = $rhsTaint->has( SecurityCheckPlugin::UNKNOWN_TAINT ); |
1631 | if ( $rhsIsUnknown && $lhsTaint->has( SecurityCheckPlugin::ALL_EXEC_TAINT ) ) { |
1632 | $combinedTaintInt = SecurityCheckPlugin::NO_TAINT; |
1633 | } else { |
1634 | $combinedTaint = Taintedness::intersectForSink( $lhsTaint, $rhsTaint ); |
1635 | if ( $combinedTaint->isSafe() ) { |
1636 | return; |
1637 | } |
1638 | $combinedTaintInt = Taintedness::flagsAsExecToYesTaint( $combinedTaint->get() ); |
1639 | } |
1640 | |
1641 | if ( |
1642 | ( $combinedTaintInt === SecurityCheckPlugin::NO_TAINT && $rhsIsUnknown ) || |
1643 | SecurityCheckPlugin::$pluginInstance->isFalsePositive( |
1644 | $combinedTaintInt, |
1645 | $msg, |
1646 | // FIXME should this be $this->overrideContext ? |
1647 | $this->context, |
1648 | $this->code_base |
1649 | ) |
1650 | ) { |
1651 | $issues = [ |
1652 | [ 'SecurityCheck-LikelyFalsePositive', Issue::SEVERITY_LOW, $combinedTaintInt ] |
1653 | ]; |
1654 | } else { |
1655 | $issues = $this->taintToIssuesAndSeverities( $combinedTaintInt ); |
1656 | } |
1657 | |
1658 | if ( !$issues ) { |
1659 | return; |
1660 | } |
1661 | |
1662 | $context = $this->context; |
1663 | if ( $this->overrideContext ) { |
1664 | // If we are overriding the file/line number, |
1665 | // report the original line number as well. |
1666 | $msg .= " (Originally at: $this->context)"; |
1667 | $context = $this->overrideContext; |
1668 | } |
1669 | |
1670 | $msgParams = $msgParamsOrGetter instanceof Closure ? $msgParamsOrGetter() : $msgParamsOrGetter; |
1671 | // Phan doesn't analyze the ternary correctly and thinks this might also be a closure. |
1672 | '@phan-var list $msgParams'; |
1673 | |
1674 | foreach ( $issues as [ $issueType, $severity, $relevantTaint ] ) { |
1675 | $curMsgParams = []; |
1676 | foreach ( $msgParams as $i => $par ) { |
1677 | $curMsgParams[$i] = $par instanceof CausedByLines |
1678 | ? $par->toStringForIssue( $relevantTaint ) |
1679 | : $par; |
1680 | } |
1681 | SecurityCheckPlugin::emitIssue( |
1682 | $this->code_base, |
1683 | $context, |
1684 | $issueType, |
1685 | $msg, |
1686 | $curMsgParams, |
1687 | $severity |
1688 | ); |
1689 | } |
1690 | } |
1691 | |
1692 | /** |
1693 | * Method to determine if a potential error isn't really real |
1694 | * |
1695 | * This is useful when a specific warning would have a side effect |
1696 | * and we want to know whether we should suppress the side effect in |
1697 | * addition to the warning. |
1698 | * |
1699 | * @param Taintedness $lhsTaint Must have at least one EXEC flag set |
1700 | * @return bool |
1701 | */ |
1702 | public function isIssueSuppressedOrFalsePositive( Taintedness $lhsTaint ): bool { |
1703 | $lhsTaintInt = $lhsTaint->get(); |
1704 | assert( ( $lhsTaintInt & SecurityCheckPlugin::ALL_EXEC_TAINT ) !== SecurityCheckPlugin::NO_TAINT ); |
1705 | $combinedTaint = Taintedness::flagsAsExecToYesTaint( $lhsTaintInt ); |
1706 | |
1707 | $issues = $this->taintToIssuesAndSeverities( $combinedTaint ); |
1708 | $context = $this->overrideContext ?: $this->context; |
1709 | foreach ( $issues as [ $issueType ] ) { |
1710 | if ( $context->hasSuppressIssue( $this->code_base, $issueType ) ) { |
1711 | return true; |
1712 | } |
1713 | } |
1714 | |
1715 | $msg = "[dummy msg for false positive check]"; |
1716 | return SecurityCheckPlugin::$pluginInstance->isFalsePositive( |
1717 | $combinedTaint, |
1718 | $msg, |
1719 | // not using $this->overrideContext to be consistent with maybeEmitIssue() |
1720 | $this->context, |
1721 | $this->code_base |
1722 | ); |
1723 | } |
1724 | |
1725 | /** |
1726 | * Somebody invokes a method or function (or something similar) |
1727 | * |
1728 | * This has to figure out: |
1729 | * Is the return value of the call tainted |
1730 | * Are any of the arguments tainted |
1731 | * Does the function do anything scary with its arguments |
1732 | * It also has to maintain quite a bit of book-keeping. |
1733 | * |
1734 | * @param FunctionInterface $func |
1735 | * @param FullyQualifiedFunctionLikeName $funcName |
1736 | * @param array $args Arguments to function/method |
1737 | * @phan-param array<Node|mixed> $args |
1738 | * @param bool $computePreserve Whether the caller wants to know which taintedness is preserved by this call |
1739 | * @param bool $isHookHandler Whether we're analyzing a hook handler for a Hooks::run call. |
1740 | * FIXME This is MW-specific |
1741 | * @return TaintednessWithError|null Taint The resulting taint of the expression, or null if |
1742 | * $computePreserve is false |
1743 | */ |
1744 | public function handleMethodCall( |
1745 | FunctionInterface $func, |
1746 | FullyQualifiedFunctionLikeName $funcName, |
1747 | array $args, |
1748 | bool $computePreserve = true, |
1749 | bool $isHookHandler = false |
1750 | ): ?TaintednessWithError { |
1751 | $taint = $this->getTaintOfFunction( $func ); |
1752 | $funcError = $this->getCausedByLinesForFunc( $func ); |
1753 | |
1754 | $preserveArgumentsData = []; |
1755 | foreach ( $args as $i => $argument ) { |
1756 | if ( !( $argument instanceof Node ) ) { |
1757 | // Literal value |
1758 | continue; |
1759 | } |
1760 | $curParFlags = $taint->getParamFlags( $i ); |
1761 | if ( ( $curParFlags & SecurityCheckPlugin::ARRAY_OK ) && $this->nodeIsArray( $argument ) ) { |
1762 | // This function specifies that arrays are always ok, so skip. |
1763 | continue; |
1764 | } |
1765 | |
1766 | if ( $argument->kind === \ast\AST_NAMED_ARG ) { |
1767 | [ $i, $argument, $argName ] = $this->translateNamedArg( $argument, $func ); |
1768 | if ( $i === null || !$argument instanceof Node ) { |
1769 | // Cannot find argument or it's literal |
1770 | continue; |
1771 | } |
1772 | $argName = "`$argName`"; |
1773 | } else { |
1774 | $argName = '#' . ( $i + 1 ); |
1775 | } |
1776 | |
1777 | $paramSinkTaint = $taint->getParamSinkTaint( $i ); |
1778 | $paramSinkError = $funcError->getParamSinkLines( $i ); |
1779 | |
1780 | $argTaintWithError = $this->getTaintednessNode( $argument ); |
1781 | $curArgTaintedness = $argTaintWithError->getTaintedness(); |
1782 | $baseArgError = $argTaintWithError->getError(); |
1783 | if ( |
1784 | $paramSinkTaint->has( SecurityCheckPlugin::SQL_NUMKEY_EXEC_TAINT ) |
1785 | && $curArgTaintedness->has( SecurityCheckPlugin::SQL_TAINT ) |
1786 | && $this->nodeCanBeString( $argument ) |
1787 | ) { |
1788 | // Special case to make NUMKEY work right for non-array values. |
1789 | // TODO Should consider if this is really best approach. |
1790 | $curArgTaintedness->add( SecurityCheckPlugin::SQL_NUMKEY_TAINT ); |
1791 | } |
1792 | |
1793 | $paramSinkTaint = SecurityCheckPlugin::$pluginInstance->modifyParamSinkTaint( |
1794 | $paramSinkTaint, |
1795 | $curArgTaintedness, |
1796 | $argument, |
1797 | $i, |
1798 | $func, |
1799 | $taint, |
1800 | $this->context, |
1801 | $this->code_base |
1802 | ); |
1803 | |
1804 | // Add a hook in order to special case for codebases. This is primarily used as a hack so that in mediawiki |
1805 | // the Message class doesn't have double escape taint if method takes Message|string. |
1806 | // TODO This is quite hacky. |
1807 | $curArgTaintedness = SecurityCheckPlugin::$pluginInstance->modifyArgTaint( |
1808 | $curArgTaintedness, |
1809 | $argument, |
1810 | $i, |
1811 | $func, |
1812 | $taint, |
1813 | $this->context, |
1814 | $this->code_base |
1815 | ); |
1816 | |
1817 | // TODO: We also need to handle the case where someFunc( $execArg ) for pass by reference where |
1818 | // the parameter is later executed outside the func. |
1819 | if ( $curArgTaintedness->has( SecurityCheckPlugin::ALL_TAINT ) ) { |
1820 | $this->markAllDependentVarsYes( $func, $i, $curArgTaintedness, $baseArgError ); |
1821 | } |
1822 | |
1823 | // We are doing something like evilMethod( $arg ); where $arg is a parameter to the current function. |
1824 | // So backpropagate that assigning to $arg can cause evilness. |
1825 | if ( !$paramSinkTaint->isSafe() ) { |
1826 | $this->backpropagateArgTaint( $argument, $paramSinkTaint, $paramSinkError ); |
1827 | } |
1828 | |
1829 | $param = $func->getParameterForCaller( $i ); |
1830 | // @todo Internal funcs that pass by reference. Should we assume that their variables are tainted? Most |
1831 | // common example is probably preg_match, which may very well be tainted much of the time. |
1832 | // TODO: Ideally this should happen after all args have been processed, so it would account for any |
1833 | // last-minute modification of the dependent elements (e.g. markAllDependentVarsYes) and would see the |
1834 | // "final" value for refTaint. Right now this is not possible because links tracked by |
1835 | // markAllDependentVarsYes are imprecise and would introduce false positives. |
1836 | if ( $param && $param->isPassByReference() && !$func->isPHPInternal() ) { |
1837 | $this->handlePassByRef( $func, $argument, $i, $isHookHandler ); |
1838 | } |
1839 | |
1840 | /** @phan-return list */ |
1841 | $issueArgsGetter = function () use ( |
1842 | $funcName, $argName, $argument, $paramSinkError, $baseArgError |
1843 | ): array { |
1844 | // Always include the ordinal (it helps for repeated arguments) |
1845 | $taintedArg = $argName; |
1846 | $argStr = ASTReverter::toShortString( $argument ); |
1847 | if ( strlen( $argStr ) < 25 ) { |
1848 | // If we have a short representation of the arg, include it as well. |
1849 | $taintedArg .= " (`$argStr`)"; |
1850 | } |
1851 | |
1852 | return [ |
1853 | $funcName, |
1854 | $this->getCurrentMethod(), |
1855 | $taintedArg, |
1856 | $paramSinkError, |
1857 | $baseArgError, |
1858 | ]; |
1859 | }; |
1860 | |
1861 | $this->maybeEmitIssue( |
1862 | $paramSinkTaint, |
1863 | $curArgTaintedness, |
1864 | "Calling method {FUNCTIONLIKE}() in {FUNCTIONLIKE}" . |
1865 | " that outputs using tainted argument {CODE}.{DETAILS}{DETAILS}", |
1866 | $issueArgsGetter |
1867 | ); |
1868 | |
1869 | $preserveArgumentsData[$i] = [ $curArgTaintedness, $baseArgError ]; |
1870 | } |
1871 | |
1872 | if ( !$computePreserve ) { |
1873 | return null; |
1874 | } |
1875 | |
1876 | $hardcodedPreservedTaint = $this->getHardcodedPreservedTaintForFunc( $func, $preserveArgumentsData ); |
1877 | if ( $hardcodedPreservedTaint ) { |
1878 | return $hardcodedPreservedTaint; |
1879 | } |
1880 | $overallTaint = $taint->getOverall(); |
1881 | $combinedArgTaint = Taintedness::newSafe(); |
1882 | $combinedArgErrors = new CausedByLines(); |
1883 | foreach ( $preserveArgumentsData as $i => [ $curArgTaintedness, $baseArgError ] ) { |
1884 | if ( $taint->hasParamPreserve( $i ) ) { |
1885 | $parTaint = $taint->getParamPreservedTaint( $i ); |
1886 | $preservedArgTaint = $parTaint->asTaintednessForArgument( $curArgTaintedness ); |
1887 | $curArgLinks = MethodLinks::newEmpty(); |
1888 | } elseif ( |
1889 | $overallTaint->has( SecurityCheckPlugin::PRESERVE_TAINT | SecurityCheckPlugin::UNKNOWN_TAINT ) |
1890 | ) { |
1891 | // No info for this specific parameter, but the overall function either preserves taint |
1892 | // when unspecified or is unknown. So just pass the taint through, destroying the shape. |
1893 | $preservedArgTaint = $curArgTaintedness->asCollapsed(); |
1894 | $curArgLinks = MethodLinks::newEmpty(); |
1895 | } else { |
1896 | // This parameter has no taint info. And overall this function doesn't depend on param |
1897 | // for taint and isn't unknown. So we consider this argument untainted. |
1898 | continue; |
1899 | } |
1900 | |
1901 | $combinedArgTaint->mergeWith( $preservedArgTaint ); |
1902 | $curArgError = $baseArgError->asIntersectedWithTaintedness( $preservedArgTaint ); |
1903 | $relevantParamError = $funcError->getParamPreservedLines( $i ) |
1904 | ->asPreservingTaintednessAndLinks( $preservedArgTaint, $curArgLinks ); |
1905 | $curArgError->mergeWith( $relevantParamError ); |
1906 | // NOTE: If any line inside the callee's body is responsible for preserving the taintedness of more |
1907 | // than one argument, it will appear once per preserved argument in the overall caused-by of the |
1908 | // call expression. This is probably a good thing, but can increase the length of caused-by lines. |
1909 | // TODO Something like T291379 might help here. |
1910 | $combinedArgErrors->mergeWith( $curArgError ); |
1911 | } |
1912 | |
1913 | $callTaintedness = $overallTaint->without( |
1914 | SecurityCheckPlugin::PRESERVE_TAINT | SecurityCheckPlugin::ALL_EXEC_TAINT |
1915 | ); |
1916 | $combinedArgTaint->remove( SecurityCheckPlugin::ALL_EXEC_TAINT ); |
1917 | $callTaintedness->mergeWith( $combinedArgTaint ); |
1918 | $callError = $funcError->getGenericLines()->asMergedWith( $combinedArgErrors ); |
1919 | return new TaintednessWithError( $callTaintedness, $callError, MethodLinks::newEmpty() ); |
1920 | } |
1921 | |
1922 | /** |
1923 | * @todo This should possibly be part of the public interface upstream |
1924 | * @see \Phan\Analysis\ArgumentType::analyzeParameterListForCallback |
1925 | * @param Node $argument |
1926 | * @param FunctionInterface $func |
1927 | * @return array |
1928 | * @phan-return array{0:int|null,1:Node|mixed,2:?string} |
1929 | */ |
1930 | private function translateNamedArg( Node $argument, FunctionInterface $func ): array { |
1931 | [ 'name' => $argName, 'expr' => $argExpr ] = $argument->children; |
1932 | assert( $argExpr !== null ); |
1933 | |
1934 | foreach ( $func->getRealParameterList() as $i => $parameter ) { |
1935 | if ( $parameter->getName() === $argName ) { |
1936 | return [ $i, $argExpr, $argName ]; |
1937 | } |
1938 | } |
1939 | return [ null, null, null ]; |
1940 | } |
1941 | |
1942 | /** |
1943 | * @param Node $argument |
1944 | * @param Taintedness $taint |
1945 | * @param CausedByLines|null $funcError |
1946 | * |
1947 | * @todo This has false negatives, because we don't collect function arguments in |
1948 | * getPhanObjsForNode (we'd have to pass option 'all'), so we can't handle e.g. array_merge |
1949 | * right now. However, collecting all args would create false positives with functions where |
1950 | * the arg taint isn't propagated to the return value. Ideally, we'd want to include an argument |
1951 | * iff the corresponding parameter passes $taint through. |
1952 | * |
1953 | * @note It's important that we don't backpropagate taintedness to every returned object in case |
1954 | * of function calls, but just props and the like (so excluding vars). See test 'toomanydeps'. |
1955 | */ |
1956 | protected function backpropagateArgTaint( |
1957 | Node $argument, |
1958 | Taintedness $taint, |
1959 | CausedByLines $funcError = null |
1960 | ): void { |
1961 | if ( $taint->has( SecurityCheckPlugin::SQL_NUMKEY_EXEC_TAINT ) ) { |
1962 | // Special case for numkey, we need to "filter" the argument. |
1963 | // TODO This doesn't return arrays with mixed keys. Currently, doing so would result |
1964 | // in arrays being considered as a unit, and the taint would be backpropagated to all |
1965 | // values, even ones with string keys. See TODO in elementCanBeNumkey |
1966 | |
1967 | // TODO This should be limited to the outer array, see TODO in backpropnumkey test |
1968 | // Note that this is true in general for NUMKEY taint, not just when backpropagating it |
1969 | $numkeyTaint = $taint->withOnly( SecurityCheckPlugin::SQL_NUMKEY_EXEC_TAINT ); |
1970 | $this->markAllDependentMethodsExecForNode( $argument, $numkeyTaint, $funcError, true ); |
1971 | $taint = $taint->without( SecurityCheckPlugin::SQL_NUMKEY_EXEC_TAINT ); |
1972 | } |
1973 | |
1974 | $this->markAllDependentMethodsExecForNode( $argument, $taint, $funcError ); |
1975 | } |
1976 | |
1977 | /** |
1978 | * Handle pass-by-ref params when examining a function call. Phan handles passbyref by reanalyzing |
1979 | * the method with PassByReferenceVariable objects instead of Parameters. These objects contain |
1980 | * the info about the param, but proxy all calls to the underlying argument object. |
1981 | * We cannot 100% copy that behaviour: inside the function body, the local variable for the pbr param |
1982 | * would have the same taintedness as the argument, and things like `echo $pbr` would emit an issue |
1983 | * inside the function, which is unwanted for now. Additionally, it's unclear how we'd add a caused-by |
1984 | * entry for the line of the function call. |
1985 | * Hence, instead of adding taintedness to the underlying argument, we put it in a separate prop, which is only |
1986 | * written but never read inside the function body. Then after the call was analyzed, this method moves |
1987 | * the taintedness from the "special" prop onto the normal taintedness prop. We do the same thing for links, |
1988 | * so as to infer which taintedness from the argument is preserved by the function. |
1989 | * TODO In the future we might want to really copy phan's approach, as that would allow us to delete some hacks, |
1990 | * and handle conditionals inside the function body more accurately. |
1991 | * |
1992 | * @param FunctionInterface $func |
1993 | * @param Node $argument |
1994 | * @param int $i Position of the param |
1995 | * @param bool $isHookHandler Whether we're analyzing a hook handler for a Hooks::run call. |
1996 | * FIXME This is MW-specific |
1997 | */ |
1998 | private function handlePassByRef( |
1999 | FunctionInterface $func, |
2000 | Node $argument, |
2001 | int $i, |
2002 | bool $isHookHandler |
2003 | ): void { |
2004 | $argObj = $this->getPassByRefObjFromNode( $argument ); |
2005 | if ( !$argObj ) { |
2006 | return; |
2007 | } |
2008 | $refTaint = self::getTaintednessRef( $argObj ); |
2009 | if ( !$refTaint ) { |
2010 | // If no ref taint was set, it's likely due to a recursive call or another instance where phan is not |
2011 | // reanalyzing the callee with PassByReferenceVariable objects. |
2012 | return; |
2013 | } |
2014 | |
2015 | $globalVarObj = $argObj instanceof GlobalVariable ? $argObj->getElement() : null; |
2016 | // Move the ref taintedness to the "actual" taintedness of the object |
2017 | // Note: We assume that the order in which hook handlers are called is nondeterministic, thus |
2018 | // we never override arg taint for reference params in this case. |
2019 | $overrideTaint = !( $argObj instanceof Property || $globalVarObj || $isHookHandler ); |
2020 | // Note, the call itself is only responsible if it adds some taintedness |
2021 | $errTaint = clone $refTaint; |
2022 | $refLinks = self::getMethodLinksRef( $argObj ); |
2023 | if ( $refLinks && $refLinks->hasDataForFuncAndParam( $func, $i ) ) { |
2024 | $addedTaint = $refLinks->asPreservedTaintednessForFuncParam( $func, $i ) |
2025 | ->asTaintednessForArgument( $this->getTaintednessPhanObj( $argObj ) ); |
2026 | $refTaint->mergeWith( $addedTaint ); |
2027 | } |
2028 | |
2029 | $this->setTaintedness( $argObj, $refTaint, $overrideTaint ); |
2030 | $this->addTaintError( $argObj, $errTaint, null ); |
2031 | if ( $globalVarObj ) { |
2032 | $this->setTaintedness( $globalVarObj, $refTaint, false ); |
2033 | $this->addTaintError( $globalVarObj, $errTaint, null ); |
2034 | } |
2035 | // We clear method links since the by-ref call might have modified them, and precise tracking is not |
2036 | // trivial to implement, and most probably not worth the effort. |
2037 | self::setMethodLinks( $argObj, MethodLinks::newEmpty() ); |
2038 | self::clearRefData( $argObj ); |
2039 | } |
2040 | |
2041 | /** |
2042 | * Given the node of an argument that is passed by reference, return a list of phan objects |
2043 | * corresponding to that node. |
2044 | * |
2045 | * @param Node $node |
2046 | * @return TypedElementInterface|null |
2047 | */ |
2048 | private function getPassByRefObjFromNode( Node $node ): ?TypedElementInterface { |
2049 | $cn = $this->getCtxN( $node ); |
2050 | |
2051 | switch ( $node->kind ) { |
2052 | case \ast\AST_PROP: |
2053 | case \ast\AST_STATIC_PROP: |
2054 | return $this->getPropFromNode( $node ); |
2055 | case \ast\AST_VAR: |
2056 | if ( Variable::isHardcodedGlobalVariableWithName( $cn->getVariableName() ) ) { |
2057 | return null; |
2058 | } |
2059 | try { |
2060 | return $cn->getVariable(); |
2061 | } catch ( NodeException | IssueException $_ ) { |
2062 | return null; |
2063 | } |
2064 | case \ast\AST_DIM: |
2065 | // Phan doesn't handle this case with PassByReferenceVariable objects, so nothing we can do anyway. |
2066 | return null; |
2067 | default: |
2068 | $this->debug( __METHOD__, 'Unhandled pass-by-ref case: ' . Debug::nodeName( $node ) ); |
2069 | return null; |
2070 | } |
2071 | } |
2072 | |
2073 | /** |
2074 | * Get the taintedness of the return value of $func (a special-cased internal PHP function) given the taintedness |
2075 | * of its arguments. Note that this doesn't handle passbyref parameters. If the function is not special-cased, |
2076 | * returns null. |
2077 | * |
2078 | * @param FunctionInterface $func |
2079 | * @param array<array<Taintedness|CausedByLines>> $preserveArgumentsData Actual taintedness and caused-by lines of |
2080 | * each argument. Literal arguments aren't included here. |
2081 | * @phan-param array<int,array{0:Taintedness,1:CausedByLines}> $preserveArgumentsData |
2082 | * @return TaintednessWithError|null |
2083 | */ |
2084 | private function getHardcodedPreservedTaintForFunc( |
2085 | FunctionInterface $func, |
2086 | array $preserveArgumentsData |
2087 | ): ?TaintednessWithError { |
2088 | switch ( ltrim( $func->getName(), '\\' ) ) { |
2089 | // Functions that return one element of the array (first and only parameter) |
2090 | case 'array_pop': |
2091 | case 'array_shift': |
2092 | case 'current': |
2093 | case 'end': |
2094 | case 'next': |
2095 | case 'pos': |
2096 | case 'prev': |
2097 | case 'reset': |
2098 | if ( !isset( $preserveArgumentsData[0] ) ) { |
2099 | return TaintednessWithError::newEmpty(); |
2100 | } |
2101 | $taint = $preserveArgumentsData[0][0]->asValueFirstLevel(); |
2102 | $error = $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $taint ); |
2103 | return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() ); |
2104 | case 'array_values': |
2105 | // Same taintedness as the original array (first and only param), but with safe keys and numkey. |
2106 | if ( !isset( $preserveArgumentsData[0] ) ) { |
2107 | return TaintednessWithError::newEmpty(); |
2108 | } |
2109 | $taint = $preserveArgumentsData[0][0]->withoutKeys(); |
2110 | if ( $taint->has( SecurityCheckPlugin::SQL_TAINT ) ) { |
2111 | $taint->add( SecurityCheckPlugin::SQL_NUMKEY_TAINT ); |
2112 | } |
2113 | $error = $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $taint ); |
2114 | return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() ); |
2115 | // Functions that return a key from the array (first and only parameter) |
2116 | case 'key': |
2117 | case 'array_key_first': |
2118 | case 'array_key_last': |
2119 | // array_keys returns all keys from the array (first param), and can also take two more parameters |
2120 | // that don't contribute to the resulting taintedness. |
2121 | case 'array_keys': |
2122 | if ( !isset( $preserveArgumentsData[0] ) ) { |
2123 | return TaintednessWithError::newEmpty(); |
2124 | } |
2125 | $taint = $preserveArgumentsData[0][0]->asKeyForForeach(); |
2126 | $error = $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $taint ); |
2127 | return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() ); |
2128 | case 'array_change_key_case': |
2129 | // The overall shape remains the same, but the keys of the outermost array (first param) have different |
2130 | // case. Second param (lower vs upper) is safe. |
2131 | if ( !isset( $preserveArgumentsData[0] ) ) { |
2132 | return TaintednessWithError::newEmpty(); |
2133 | } |
2134 | // TODO: actually handle case changes! |
2135 | $taint = clone $preserveArgumentsData[0][0]; |
2136 | $error = $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $taint ); |
2137 | return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() ); |
2138 | case 'array_flip': |
2139 | // Swaps keys and values of the array (first and only param) |
2140 | if ( !isset( $preserveArgumentsData[0] ) ) { |
2141 | return TaintednessWithError::newEmpty(); |
2142 | } |
2143 | $taint = $preserveArgumentsData[0][0]->asKeyForForeach(); |
2144 | $taint->addKeysTaintedness( $preserveArgumentsData[0][0]->asValueFirstLevel()->get() ); |
2145 | $error = $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $taint ); |
2146 | return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() ); |
2147 | case 'implode': |
2148 | case 'join': |
2149 | // This function can be called in three different ways: |
2150 | // - implode( $string, $array ) -> joins elements in $array using $string |
2151 | // - implode( $array ) -> joins elements in $array using the empty string |
2152 | // - implode( $array, $string ) -> same as the first one but inverted params, deprecated in PHP 7.4, |
2153 | // removed in PHP 8 |
2154 | // TODO: Right now we don't support the deprecated syntax; should we? |
2155 | if ( isset( $preserveArgumentsData[0] ) ) { |
2156 | $joinerTaint = $preserveArgumentsData[0][0]->asCollapsed(); |
2157 | $joinerError = $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $joinerTaint ); |
2158 | } |
2159 | $combinedTaint = $joinerTaint ?? Taintedness::newSafe(); |
2160 | $combinedError = $joinerError ?? new CausedByLines(); |
2161 | if ( isset( $preserveArgumentsData[1] ) ) { |
2162 | $arrayTaint = $preserveArgumentsData[1][0]->withoutKeys()->asCollapsed(); |
2163 | $combinedTaint->mergeWith( $arrayTaint ); |
2164 | $combinedError->mergeWith( |
2165 | $preserveArgumentsData[1][1]->asIntersectedWithTaintedness( $arrayTaint ) |
2166 | ); |
2167 | } |
2168 | return new TaintednessWithError( $combinedTaint, $combinedError, MethodLinks::newEmpty() ); |
2169 | case 'array_fill': |
2170 | // array_fill( $start, $count, $value ) creates an array with $count copies of $value, starting |
2171 | // at key $start. The first two params are integers, and thus safe. |
2172 | if ( !isset( $preserveArgumentsData[2] ) ) { |
2173 | return TaintednessWithError::newEmpty(); |
2174 | } |
2175 | $preservedArgTaint = clone $preserveArgumentsData[2][0]; |
2176 | $taint = Taintedness::newSafe(); |
2177 | // TODO: We may actually be able to infer the actual keys, instead of setting as unknown |
2178 | $taint->setOffsetTaintedness( null, $preservedArgTaint ); |
2179 | // TODO: We should also add numkey if the argument has sql. |
2180 | $error = $preserveArgumentsData[2][1]->asIntersectedWithTaintedness( $preservedArgTaint ); |
2181 | return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() ); |
2182 | case 'array_fill_keys': |
2183 | // array_fill_keys( $keys, $value ) creates an array whose keys are the element in $keys, and whose |
2184 | // values are all equal to $value. |
2185 | $taint = Taintedness::newSafe(); |
2186 | $error = new CausedByLines(); |
2187 | if ( isset( $preserveArgumentsData[0] ) ) { |
2188 | $keysTaintedness = $preserveArgumentsData[0][0]->asValueFirstLevel(); |
2189 | $taint->addKeysTaintedness( $keysTaintedness->get() ); |
2190 | $error->mergeWith( $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $taint ) ); |
2191 | } |
2192 | if ( isset( $preserveArgumentsData[1] ) ) { |
2193 | $preservedValueTaint = $preserveArgumentsData[1][0]; |
2194 | $taint->setOffsetTaintedness( null, clone $preservedValueTaint ); |
2195 | $error->mergeWith( |
2196 | $preserveArgumentsData[1][1]->asIntersectedWithTaintedness( $preservedValueTaint ) |
2197 | ); |
2198 | } |
2199 | return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() ); |
2200 | case 'array_combine': |
2201 | // array_fill_keys( $keys, $values ) creates an array whose keys are the element in $keys, and whose |
2202 | // values the elements in $values. |
2203 | $taint = Taintedness::newSafe(); |
2204 | $error = new CausedByLines(); |
2205 | if ( isset( $preserveArgumentsData[0] ) ) { |
2206 | $keysTaintedness = $preserveArgumentsData[0][0]->asValueFirstLevel(); |
2207 | $taint->addKeysTaintedness( $keysTaintedness->get() ); |
2208 | $error->mergeWith( $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $taint ) ); |
2209 | } |
2210 | if ( isset( $preserveArgumentsData[1] ) ) { |
2211 | $valueTaint = $preserveArgumentsData[1][0]->withoutKeys(); |
2212 | $taint->mergeWith( $valueTaint ); |
2213 | $error->mergeWith( $preserveArgumentsData[1][1]->asIntersectedWithTaintedness( $valueTaint ) ); |
2214 | } |
2215 | return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() ); |
2216 | case 'array_unique': |
2217 | // Removes duplicate from an array (first param). We can't tell what gets removed, and what's the effect |
2218 | // of this function on array keys. Second param is safe. |
2219 | if ( !isset( $preserveArgumentsData[0] ) ) { |
2220 | return TaintednessWithError::newEmpty(); |
2221 | } |
2222 | $taint = $preserveArgumentsData[0][0]->asKnownKeysMadeUnknown(); |
2223 | $error = $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $taint ); |
2224 | return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() ); |
2225 | case 'array_diff': |
2226 | case 'array_diff_assoc': |
2227 | // - array_diff( $arr, $x_1, ..., $x_n ) returns elements in $arr that are NOT in any of the $x_i. |
2228 | // The equality of two elements is determined by looking at their values. |
2229 | // Only the first argument contributes to the preserved taintedness. |
2230 | // - array_diff_assoc does the same, but two elements are considered equal if they have the same value |
2231 | // AND the same key. |
2232 | if ( !isset( $preserveArgumentsData[0] ) ) { |
2233 | return TaintednessWithError::newEmpty(); |
2234 | } |
2235 | // We can't infer shape mutations because Taintedness doesn't keep track of the values, so just |
2236 | // return the taintedness of the first argument. |
2237 | $preservedArgTaint = clone $preserveArgumentsData[0][0]; |
2238 | return new TaintednessWithError( |
2239 | $preservedArgTaint, |
2240 | $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $preservedArgTaint ), |
2241 | MethodLinks::newEmpty() |
2242 | ); |
2243 | case 'array_diff_key': |
2244 | // array_diff_key( $arr, $x_1, ..., $x_n ) is similar to array_diff, but here two elements are |
2245 | // considered equal if they have the same key (regardless of the value). |
2246 | if ( !isset( $preserveArgumentsData[0] ) ) { |
2247 | return TaintednessWithError::newEmpty(); |
2248 | } |
2249 | /** @var Taintedness $taint */ |
2250 | [ $taint, $error ] = array_shift( $preserveArgumentsData ); |
2251 | $taint = clone $taint; |
2252 | foreach ( $preserveArgumentsData as $argData ) { |
2253 | $taint->removeKnownKeysFrom( $argData[0] ); |
2254 | // No argument besides the first one can contribute to caused-by lines, although |
2255 | // ideally we would remove the current error from $error. |
2256 | } |
2257 | // The shape is destroyed to avoid pretending that we know anything about the final shape of the array. |
2258 | return new TaintednessWithError( $taint->asKnownKeysMadeUnknown(), $error, MethodLinks::newEmpty() ); |
2259 | case 'array_intersect': |
2260 | case 'array_intersect_assoc': |
2261 | // - array_intersect( $arr_1, ..., $arr_n ) returns an array of elements that are in ALL of the $x_i. |
2262 | // The equality of two elements is determined by looking at their values. |
2263 | // Only values from the first array are used for the return value. |
2264 | // - array_intersect_assoc does the same, but two elements are considered equal if they have the same |
2265 | // value AND the same key. |
2266 | if ( !$preserveArgumentsData ) { |
2267 | return TaintednessWithError::newEmpty(); |
2268 | } |
2269 | // Note: we can't do an actual intersect on the values because Taintedness does not store them, but |
2270 | // intersecting the taintedness flags, although not perfect, is correct and approximates that. |
2271 | // The shape is destroyed to avoid pretending that we know anything about the final shape of the array. |
2272 | /** @var Taintedness $taint */ |
2273 | [ $taint, $error ] = array_shift( $preserveArgumentsData ); |
2274 | $taint = $taint->asKnownKeysMadeUnknown(); |
2275 | foreach ( $preserveArgumentsData as $argData ) { |
2276 | $taint->keepOnly( $argData[0]->get() ); |
2277 | // No argument besides the first one can contribute to caused-by lines, although |
2278 | // ideally we would intersect $error with the current error. |
2279 | } |
2280 | return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() ); |
2281 | case 'array_intersect_key': |
2282 | // array_intersect_key( $arr, $x_1, ..., $x_n ) is similar to array_intersect, but here two elements are |
2283 | // considered equal if they have the same key (irregardless of the value). |
2284 | if ( !isset( $preserveArgumentsData[0] ) ) { |
2285 | return TaintednessWithError::newEmpty(); |
2286 | } |
2287 | // We can't infer shape mutations because there might be unknown keys in either argument, so just |
2288 | // return the taintedness of the first argument. |
2289 | $preservedArgTaint = clone $preserveArgumentsData[0][0]; |
2290 | return new TaintednessWithError( |
2291 | $preservedArgTaint, |
2292 | $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $preservedArgTaint ), |
2293 | MethodLinks::newEmpty() |
2294 | ); |
2295 | // TODO The last parameter of these functions is a callback, so probably hard to handle. They're also |
2296 | // variadic, so we'd need to know the arg type to analyze the callback. |
2297 | case 'array_diff_uassoc': |
2298 | case 'array_diff_ukey': |
2299 | case 'array_intersect_uassoc': |
2300 | case 'array_intersect_ukey': |
2301 | case 'array_udiff': |
2302 | case 'array_udiff_assoc': |
2303 | case 'array_uintersect': |
2304 | case 'array_uintersect_assoc': |
2305 | // The last two params of these are callbacks, so twice as hard |
2306 | case 'array_udiff_uassoc': |
2307 | case 'array_uintersect_uassoc': |
2308 | // Only the taintedness from first argument is preserved. |
2309 | if ( !isset( $preserveArgumentsData[0] ) ) { |
2310 | return TaintednessWithError::newEmpty(); |
2311 | } |
2312 | $preservedArgTaint = clone $preserveArgumentsData[0][0]; |
2313 | return new TaintednessWithError( |
2314 | $preservedArgTaint, |
2315 | $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $preservedArgTaint ), |
2316 | MethodLinks::newEmpty() |
2317 | ); |
2318 | case 'array_map': |
2319 | // array_map( $cb, $arr, $arr_1, ..., $arr_n ) returns the result of applying $cb to all the array |
2320 | // arguments, element by element. |
2321 | // TODO: Analyze the callback. For now we only preserve taintedness of array arguments. |
2322 | unset( $preserveArgumentsData[0] ); |
2323 | $taint = Taintedness::newSafe(); |
2324 | $error = new CausedByLines(); |
2325 | foreach ( $preserveArgumentsData as [ $argTaint, $argError ] ) { |
2326 | $preservedArgTaint = $argTaint->asCollapsed(); |
2327 | $taint->mergeWith( $preservedArgTaint ); |
2328 | $error->mergeWith( $argError->asIntersectedWithTaintedness( $preservedArgTaint ) ); |
2329 | } |
2330 | return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() ); |
2331 | case 'array_filter': |
2332 | // array_filter( $arr, $cb, $mode ) filters the $arr by using $cb. |
2333 | // TODO: Analyze the callback. For now we preserve the whole taintedness of the array. |
2334 | if ( !isset( $preserveArgumentsData[0] ) ) { |
2335 | return TaintednessWithError::newEmpty(); |
2336 | } |
2337 | $preservedArgTaint = $preserveArgumentsData[0][0]->asKnownKeysMadeUnknown(); |
2338 | return new TaintednessWithError( |
2339 | $preservedArgTaint, |
2340 | $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $preservedArgTaint ), |
2341 | MethodLinks::newEmpty() |
2342 | ); |
2343 | case 'array_reduce': |
2344 | // array_reduce( $arr, $cb, $initial ) applies $cb to $arr to obtain a single value. |
2345 | // TODO: Analyze the callback. For now we preserve the whole taintedness of the array. |
2346 | if ( !isset( $preserveArgumentsData[0] ) ) { |
2347 | return TaintednessWithError::newEmpty(); |
2348 | } |
2349 | $preservedArgTaint = $preserveArgumentsData[0][0]->asCollapsed(); |
2350 | return new TaintednessWithError( |
2351 | $preservedArgTaint, |
2352 | $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $preservedArgTaint ), |
2353 | MethodLinks::newEmpty() |
2354 | ); |
2355 | case 'array_reverse': |
2356 | // array_reverse( $arr, $preserveKeys ) reverses the order of an array. String keys are always |
2357 | // preserved, the second param controls whether int keys are also preserved. |
2358 | // TODO: By knowing the value of the second arg, we could improve this by: |
2359 | // - Removing only int keys if false |
2360 | // - Preserving the whole shape if true |
2361 | if ( !isset( $preserveArgumentsData[0] ) ) { |
2362 | return TaintednessWithError::newEmpty(); |
2363 | } |
2364 | $preservedArgTaint = $preserveArgumentsData[0][0]->asKnownKeysMadeUnknown(); |
2365 | return new TaintednessWithError( |
2366 | $preservedArgTaint, |
2367 | $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $preservedArgTaint ), |
2368 | MethodLinks::newEmpty() |
2369 | ); |
2370 | case 'array_pad': |
2371 | // array_pad( $arr, $length, $val ) returns a copy of $arr padded to the size specified by $length |
2372 | // by adding copies of $val. |
2373 | if ( isset( $preserveArgumentsData[0] ) ) { |
2374 | $taint = clone $preserveArgumentsData[0][0]; |
2375 | $error = $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $taint ); |
2376 | } else { |
2377 | $taint = Taintedness::newSafe(); |
2378 | $error = new CausedByLines(); |
2379 | } |
2380 | if ( isset( $preserveArgumentsData[2] ) ) { |
2381 | $valArgTaint = $preserveArgumentsData[2][0]; |
2382 | $taint->setOffsetTaintedness( null, $valArgTaint ); |
2383 | $error->mergeWith( $preserveArgumentsData[2][1]->asIntersectedWithTaintedness( $valArgTaint ) ); |
2384 | } |
2385 | return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() ); |
2386 | case 'array_slice': |
2387 | // array_slice( $arr, $offset, $len, $preserveKeys ) returns the segment of $arr starting at $offset |
2388 | // and of size $len. String keys are always preserved, $preserveKeys controls whether int keys |
2389 | // are also preserved. |
2390 | if ( !isset( $preserveArgumentsData[0] ) ) { |
2391 | return TaintednessWithError::newEmpty(); |
2392 | } |
2393 | $preservedArgTaint = $preserveArgumentsData[0][0]->asKnownKeysMadeUnknown(); |
2394 | return new TaintednessWithError( |
2395 | $preservedArgTaint, |
2396 | $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $preservedArgTaint ), |
2397 | MethodLinks::newEmpty() |
2398 | ); |
2399 | case 'array_replace': |
2400 | // array_replace( $arr, $rep_1, ..., $rep_n ) returns a copy of $arr where each element is replaced |
2401 | // with the element having the same key in the rightmost argument. |
2402 | if ( !isset( $preserveArgumentsData[0] ) ) { |
2403 | return TaintednessWithError::newEmpty(); |
2404 | } |
2405 | $firstArgData = array_shift( $preserveArgumentsData ); |
2406 | /** @var Taintedness $taint */ |
2407 | $taint = clone $firstArgData[0]; |
2408 | $error = $firstArgData[1]->asIntersectedWithTaintedness( $taint ); |
2409 | foreach ( $preserveArgumentsData as [ $argTaint, $argError ] ) { |
2410 | $taint->arrayReplace( $argTaint ); |
2411 | // Note: we may be adding too many caused-by lines here |
2412 | $error->mergeWith( $argError->asIntersectedWithTaintedness( $argTaint ) ); |
2413 | } |
2414 | return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() ); |
2415 | case 'array_merge': |
2416 | // array_merge( $arr_1, ... $arr_n ) merges the given array arguments. If any two (or more) input arrays |
2417 | // have the same string key, the value from the rightmost argument with that key will be used. Integer |
2418 | // keys are always appended, and never replaced. Additionally, integer keys in the resulting array |
2419 | // will be renumbered incrementally starting from 0. |
2420 | if ( !$preserveArgumentsData ) { |
2421 | return TaintednessWithError::newEmpty(); |
2422 | } |
2423 | /** @var Taintedness $taint */ |
2424 | [ $taint, $error ] = array_shift( $preserveArgumentsData ); |
2425 | foreach ( $preserveArgumentsData as [ $argTaint, $argError ] ) { |
2426 | $taint->arrayMerge( $argTaint ); |
2427 | $error->mergeWith( $argError->asIntersectedWithTaintedness( $argTaint ) ); |
2428 | } |
2429 | return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() ); |
2430 | // TODO Handle these with recursion. |
2431 | case 'array_merge_recursive': |
2432 | case 'array_replace_recursive': |
2433 | $taint = Taintedness::newSafe(); |
2434 | $error = new CausedByLines(); |
2435 | foreach ( $preserveArgumentsData as [ $curArgTaintedness, $baseArgError ] ) { |
2436 | $preservedArgTaint = $curArgTaintedness->asKnownKeysMadeUnknown(); |
2437 | $taint->mergeWith( $preservedArgTaint ); |
2438 | $error->mergeWith( $baseArgError->asIntersectedWithTaintedness( $preservedArgTaint ) ); |
2439 | } |
2440 | return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() ); |
2441 | case 'array_chunk': |
2442 | // array_chunk( $array, $length, $preserve_keys = false ) returns a list of chunks of $array. The keys |
2443 | // in each chunk are the same of $array if $preserve_keys is true. Else, they're just numbers. |
2444 | if ( !isset( $preserveArgumentsData[0] ) ) { |
2445 | return TaintednessWithError::newEmpty(); |
2446 | } |
2447 | $taint = Taintedness::newSafe(); |
2448 | // TODO: Check value of $preserve_keys to determine the key taintedness more accurately. |
2449 | // For now, we just assume that keys are preserved. |
2450 | $taint->setOffsetTaintedness( null, $preserveArgumentsData[0][0]->asKnownKeysMadeUnknown() ); |
2451 | $error = $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $taint ); |
2452 | return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() ); |
2453 | default: |
2454 | return null; |
2455 | } |
2456 | } |
2457 | |
2458 | /** |
2459 | * Given a binary operator, compute which taint will be preserved. Safe ops don't preserve |
2460 | * any taint, whereas unsafe ops will preserve all taints. The taint of a binop is basically |
2461 | * ( lhs_taint | rhs_taint ) & taint_mask |
2462 | * |
2463 | * @warning This method should avoid computing the taint of $lhs and $rhs, because it might be |
2464 | * called in preorder, but it would trigger a postorder visit. |
2465 | * |
2466 | * @param Node $opNode |
2467 | * @param Node|mixed $lhs Either a Node or a scalar |
2468 | * @param Node|mixed $rhs Either a Node or a scalar |
2469 | * @return int |
2470 | */ |
2471 | protected function getBinOpTaintMask( Node $opNode, $lhs, $rhs ): int { |
2472 | static $safeBinOps = [ |
2473 | \ast\flags\BINARY_BOOL_XOR, |
2474 | \ast\flags\BINARY_DIV, |
2475 | \ast\flags\BINARY_IS_EQUAL, |
2476 | \ast\flags\BINARY_IS_IDENTICAL, |
2477 | \ast\flags\BINARY_IS_NOT_EQUAL, |
2478 | \ast\flags\BINARY_IS_NOT_IDENTICAL, |
2479 | \ast\flags\BINARY_IS_SMALLER, |
2480 | \ast\flags\BINARY_IS_SMALLER_OR_EQUAL, |
2481 | \ast\flags\BINARY_MOD, |
2482 | \ast\flags\BINARY_MUL, |
2483 | \ast\flags\BINARY_POW, |
2484 | // BINARY_ADD handled below due to array addition. |
2485 | \ast\flags\BINARY_SUB, |
2486 | \ast\flags\BINARY_BOOL_AND, |
2487 | \ast\flags\BINARY_BOOL_OR, |
2488 | \ast\flags\BINARY_IS_GREATER, |
2489 | \ast\flags\BINARY_IS_GREATER_OR_EQUAL, |
2490 | \ast\flags\BINARY_SHIFT_LEFT, |
2491 | \ast\flags\BINARY_SHIFT_RIGHT, |
2492 | \ast\flags\BINARY_SPACESHIP, |
2493 | ]; |
2494 | |
2495 | // This list is mostly used for debugging purposes |
2496 | static $knownUnsafeOps = [ |
2497 | \ast\flags\BINARY_ADD, |
2498 | \ast\flags\BINARY_CONCAT, |
2499 | \ast\flags\BINARY_COALESCE, |
2500 | // The result of bitwise ops can be a string, so we err on the side of caution. |
2501 | \ast\flags\BINARY_BITWISE_AND, |
2502 | \ast\flags\BINARY_BITWISE_OR, |
2503 | \ast\flags\BINARY_BITWISE_XOR, |
2504 | ]; |
2505 | |
2506 | if ( in_array( $opNode->flags, $safeBinOps, true ) ) { |
2507 | return SecurityCheckPlugin::NO_TAINT; |
2508 | } |
2509 | if ( |
2510 | $opNode->flags === \ast\flags\BINARY_ADD && |
2511 | ( !$this->nodeCanBeArray( $lhs ) || !$this->nodeCanBeArray( $rhs ) ) |
2512 | ) { |
2513 | // Array addition is the only way `+` can preserve taintedness; if at least one operand |
2514 | // is definitely NOT an array, then the result will be an integer, or a fatal error will |
2515 | // occurr (depending on the other operand). Note that if we cannot be 100% sure that the |
2516 | // node cannot be an array (e.g. if it has mixed type), we err on the side of caution and |
2517 | // consider it potentially tainted. |
2518 | return SecurityCheckPlugin::NO_TAINT; |
2519 | } |
2520 | |
2521 | if ( !in_array( $opNode->flags, $knownUnsafeOps, true ) ) { |
2522 | $this->debug( |
2523 | __METHOD__, |
2524 | 'Unhandled binop ' . Debug::astFlagDescription( $opNode->flags, $opNode->kind ) |
2525 | ); |
2526 | } |
2527 | |
2528 | return SecurityCheckPlugin::ALL_TAINT_FLAGS; |
2529 | } |
2530 | |
2531 | /** |
2532 | * Get the possible UnionType of a node, without emitting issues. |
2533 | * |
2534 | * @param Node $node |
2535 | * @return UnionType|null |
2536 | */ |
2537 | protected function getNodeType( Node $node ): ?UnionType { |
2538 | // Don't emit issues, as this method might be called e.g. on a LHS (see T249647). |
2539 | // FIXME Improve this. Is it still necessary now that we cache taintedness? |
2540 | $catchIssueException = false; |
2541 | // And since we don't emit issues, use a cloned context so phan won't cache any union type. In particular, |
2542 | // in the event of possibly-undefined union types, the issue about a variable being possibly undeclared would |
2543 | // get lost, because we don't emit it, and phan will cache the union type without the undefined bit. |
2544 | $ctx = clone $this->context; |
2545 | try { |
2546 | return UnionTypeVisitor::unionTypeFromNode( |
2547 | $this->code_base, |
2548 | $ctx, |
2549 | $node, |
2550 | $catchIssueException |
2551 | ); |
2552 | } catch ( IssueException $e ) { |
2553 | $this->debug( __METHOD__, "Got error " . $this->getDebugInfo( $e ) ); |
2554 | return null; |
2555 | } |
2556 | } |
2557 | |
2558 | /** |
2559 | * Given a Node, is it an array? (And definitely not a string) |
2560 | * |
2561 | * @param Node|mixed $node A node object or simple value from AST tree |
2562 | * @return bool Is it an array? |
2563 | */ |
2564 | protected function nodeIsArray( $node ): bool { |
2565 | if ( !( $node instanceof Node ) ) { |
2566 | // simple literal |
2567 | return false; |
2568 | } |
2569 | if ( $node->kind === \ast\AST_ARRAY ) { |
2570 | // Exit early in the simple case. |
2571 | return true; |
2572 | } |
2573 | $type = $this->getNodeType( $node ); |
2574 | return $type && $type->hasArrayLike( $this->code_base ) && |
2575 | !$type->hasMixedOrNonEmptyMixedType() && !$type->hasStringType(); |
2576 | } |
2577 | |
2578 | /** |
2579 | * Can $node potentially be an array? |
2580 | * |
2581 | * @param Node|mixed $node |
2582 | * @return bool |
2583 | */ |
2584 | protected function nodeCanBeArray( $node ): bool { |
2585 | if ( !( $node instanceof Node ) ) { |
2586 | return is_array( $node ); |
2587 | } |
2588 | $type = $this->getNodeType( $node ); |
2589 | if ( !$type ) { |
2590 | return true; |
2591 | } |
2592 | $type = $type->getRealUnionType(); |
2593 | return $type->hasArrayLike( $this->code_base ) || $type->hasMixedOrNonEmptyMixedType() || $type->isEmpty(); |
2594 | } |
2595 | |
2596 | /** |
2597 | * Given a Node, is it a string? |
2598 | * |
2599 | * @todo Unclear if this should return true for things that can |
2600 | * autocast to a string (e.g. ints) |
2601 | * @param Node|mixed $node A node object or simple value from AST tree |
2602 | * @return bool Is it a string? |
2603 | */ |
2604 | protected function nodeCanBeString( $node ): bool { |
2605 | if ( !( $node instanceof Node ) ) { |
2606 | // simple literal |
2607 | return is_string( $node ); |
2608 | } |
2609 | $type = $this->getNodeType( $node ); |
2610 | // @todo Should having mixed type result in returning false here? |
2611 | return $type && $type->hasStringType(); |
2612 | } |
2613 | |
2614 | /** |
2615 | * @param TypedElementInterface $el |
2616 | * @param bool $definitely Whether $el is *definitely* numkey, not just possibly |
2617 | * @return bool |
2618 | */ |
2619 | protected function elementCanBeNumkey( TypedElementInterface $el, bool $definitely ): bool { |
2620 | $type = $el->getUnionType()->getRealUnionType(); |
2621 | if ( $type->hasMixedOrNonEmptyMixedType() || $type->isEmpty() ) { |
2622 | return !$definitely; |
2623 | } |
2624 | if ( !$type->hasArray() ) { |
2625 | return false; |
2626 | } |
2627 | |
2628 | $keyTypes = GenericArrayType::keyUnionTypeFromTypeSetStrict( $el->getUnionType()->getRealTypeSet() ); |
2629 | // NOTE: This might lead to false positives if the array has mixed keys, but since we're talking about |
2630 | // SQLi, we prefer false positives. Also, the mixed keys case isn't fully handled, see backpropagateArgTaint |
2631 | return $definitely |
2632 | ? $keyTypes === GenericArrayType::KEY_INT |
2633 | : ( $keyTypes & GenericArrayType::KEY_INT ) !== 0; |
2634 | } |
2635 | |
2636 | /** |
2637 | * Given a Node that is used as array key, can the key be integer? |
2638 | * Floats are not considered ints here. |
2639 | * Note: this method cannot be 100% accurate. First, we don't use the real type, so we may have a false positive |
2640 | * if e.g. a parameter is annotated as string but the argument is an int. Second, even if something has a real type |
2641 | * and is not an integer, it could be a string that gets autocast to an integer. |
2642 | * |
2643 | * @param Node|mixed $node A node object or simple value from AST tree |
2644 | * @return bool Is it an int? |
2645 | * @fixme A lot of duplication with other similar methods... |
2646 | */ |
2647 | protected function nodeCanBeIntKey( $node ): bool { |
2648 | if ( !( $node instanceof Node ) ) { |
2649 | // simple number; make sure to include float here for PHP 8.1 compat: T307504 |
2650 | if ( is_int( $node ) || is_float( $node ) ) { |
2651 | return true; |
2652 | } |
2653 | // Strings that are canonical representation of numbers are coerced to int keys. |
2654 | $testArr = [ $node => 'foo' ]; |
2655 | $key = key( $testArr ); |
2656 | return is_int( $key ); |
2657 | } |
2658 | $type = $this->getNodeType( $node ); |
2659 | if ( !$type ) { |
2660 | return true; |
2661 | } |
2662 | return $type->hasIntType() || $type->hasMixedOrNonEmptyMixedType() || $type->isEmpty(); |
2663 | } |
2664 | |
2665 | /** |
2666 | * Get the phan objects from the return line of a Func/Method |
2667 | * |
2668 | * This is primarily used to handle the case where a method |
2669 | * returns a member (e.g. return $this->foo), and then something |
2670 | * else does something evil with it - e.g. echo $someObj->getFoo(). |
2671 | * This allows keeping track that $this->foo is outputted, so if |
2672 | * somewhere else in the code someone calls $someObj->setFoo( $unsafe ) |
2673 | * we can trigger a warning. |
2674 | * |
2675 | * This of course will only work in simple cases. It may also potentially |
2676 | * have false positives if one instance is used solely for escaped stuff |
2677 | * and a different instance is used for unsafe values that are later |
2678 | * escaped, as all the different instances are treated the same. |
2679 | * |
2680 | * It needs the return statement to be trivial (e.g. return $this->foo;). It |
2681 | * will not work even with something as simple as $a = $this->foo; return $a; |
2682 | * However, this code path will only happen if the plugin encounters the |
2683 | * code to output the value prior to reading the code that sets the value to |
2684 | * something evil. The other code path where the set happens first is much |
2685 | * more robust and hopefully the more common code path. |
2686 | * |
2687 | * @param FunctionInterface $func The function/method. Must use Analyzable trait |
2688 | * @return TypedElementInterface[] An array of phan objects |
2689 | */ |
2690 | public function getReturnObjsOfFunc( FunctionInterface $func ): array { |
2691 | $retObjs = self::getRetObjs( $func ); |
2692 | if ( $retObjs === null ) { |
2693 | // We still have to see the function. Analyze it now. |
2694 | $this->analyzeFunc( $func ); |
2695 | $retObjs = self::getRetObjs( $func ); |
2696 | if ( $retObjs === null ) { |
2697 | // If it still doesn't exist, perhaps we reached the recursion limit, or it may be a recursive |
2698 | // function, or a kind of function that we can't handle. |
2699 | return []; |
2700 | } |
2701 | } |
2702 | |
2703 | // Note that if a function is recursively calling itself, this list might be incomplete. |
2704 | // This could be remediated with another dynamic property (e.g. retObjsCollected), initialized |
2705 | // inside visitMethod in preorder, and set to true inside visitMethod in postorder. |
2706 | // It would be pointless, though, as returning a partial list is better than returning no list. |
2707 | return array_filter( |
2708 | $retObjs, |
2709 | static function ( TypedElementInterface $el ): bool { |
2710 | return !( $el instanceof Variable ); |
2711 | } |
2712 | ); |
2713 | } |
2714 | |
2715 | /** |
2716 | * Shorthand to check if $child is subclass of $parent. |
2717 | * |
2718 | * @param FullyQualifiedClassName $child |
2719 | * @param FullyQualifiedClassName $parent |
2720 | * @param CodeBase $codeBase |
2721 | * @return bool |
2722 | */ |
2723 | public static function isSubclassOf( |
2724 | FullyQualifiedClassName $child, |
2725 | FullyQualifiedClassName $parent, |
2726 | CodeBase $codeBase |
2727 | ): bool { |
2728 | return $child->asType()->asExpandedTypes( $codeBase )->hasType( $parent->asType() ); |
2729 | } |
2730 | } |