Code Coverage for /src/src/TaintednessBaseVisitor.php

	Code Coverage
	Lines			Functions and Methods				Classes and Traits
Total	88.43% covered (warning)	88.43%	1032 / 1167	67.80% covered (warning)	67.80%	40 / 59	CRAP	0.00% covered (danger)	0.00%	0 / 1
TaintednessBaseVisitor	88.43% covered (warning)	88.43%	1032 / 1167	67.80% covered (warning)	67.80%	40 / 59	811.97	0.00% covered (danger)	0.00%	0 / 1
addFuncTaint	100.00% covered (success)	100.00%	5 / 5	100.00% covered (success)	100.00%	1 / 1	2
ensureFuncTaintIsSet	100.00% covered (success)	100.00%	2 / 2	100.00% covered (success)	100.00%	1 / 1	2
maybeAddFuncError	100.00% covered (success)	100.00%	40 / 40	100.00% covered (success)	100.00%	1 / 1	22
mergeTaintError	100.00% covered (success)	100.00%	5 / 5	100.00% covered (success)	100.00%	1 / 1	2
mergeFuncError	100.00% covered (success)	100.00%	3 / 3	100.00% covered (success)	100.00%	1 / 1	1
addTaintError	87.50% covered (warning)	87.50%	7 / 8	0.00% covered (danger)	0.00%	0 / 1	7.10
ensureTaintednessIsSet	50.00% covered (danger)	50.00%	3 / 6	0.00% covered (danger)	0.00%	0 / 1	6.00
setTaintedness	100.00% covered (success)	100.00%	10 / 10	100.00% covered (success)	100.00%	1 / 1	5
getDefiningFuncIfDifferent	100.00% covered (success)	100.00%	5 / 5	100.00% covered (success)	100.00%	1 / 1	4
getPossibleFuncDefinitions	87.50% covered (warning)	87.50%	21 / 24	0.00% covered (danger)	0.00%	0 / 1	9.16
getTaintOfFunction	100.00% covered (success)	100.00%	24 / 24	100.00% covered (success)	100.00%	1 / 1	8
getSetKnownTaintOfFunctionWithoutAnalysis	100.00% covered (success)	100.00%	24 / 24	100.00% covered (success)	100.00%	1 / 1	8
analyzeFunc	94.74% covered (success)	94.74%	18 / 19	0.00% covered (danger)	0.00%	0 / 1	9.01
getDocBlockTaintOfFunc	100.00% covered (success)	100.00%	87 / 87	100.00% covered (success)	100.00%	1 / 1	20
getTaintByType	91.94% covered (success)	91.94%	57 / 62	0.00% covered (danger)	0.00%	0 / 1	33.57
getTaintMaskForTypedElement	100.00% covered (success)	100.00%	3 / 3	100.00% covered (success)	100.00%	1 / 1	2
getTaintMaskForType	100.00% covered (success)	100.00%	4 / 4	100.00% covered (success)	100.00%	1 / 1	2
getPossibleFutureTaintOfElement	100.00% covered (success)	100.00%	1 / 1	100.00% covered (success)	100.00%	1 / 1	1
getCurrentMethod	100.00% covered (success)	100.00%	2 / 2	100.00% covered (success)	100.00%	1 / 1	2
getTaintedness	100.00% covered (success)	100.00%	7 / 7	100.00% covered (success)	100.00%	1 / 1	3
getTaintednessNode	100.00% covered (success)	100.00%	7 / 7	100.00% covered (success)	100.00%	1 / 1	2
getTaintednessPhanObj	100.00% covered (success)	100.00%	7 / 7	100.00% covered (success)	100.00%	1 / 1	3
resolveOffset	100.00% covered (success)	100.00%	4 / 4	100.00% covered (success)	100.00%	1 / 1	3
resolveValue	100.00% covered (success)	100.00%	3 / 3	100.00% covered (success)	100.00%	1 / 1	2
getPropInCurrentScopeByName	100.00% covered (success)	100.00%	2 / 2	100.00% covered (success)	100.00%	1 / 1	1
getCtxN	100.00% covered (success)	100.00%	5 / 5	100.00% covered (success)	100.00%	1 / 1	1
getObjsForNodeForNumkeyBackprop	67.53% covered (warning)	67.53%	52 / 77	0.00% covered (danger)	0.00%	0 / 1	153.80
getPropFromNode	100.00% covered (success)	100.00%	7 / 7	100.00% covered (success)	100.00%	1 / 1	2
getDebugInfo	100.00% covered (success)	100.00%	3 / 3	100.00% covered (success)	100.00%	1 / 1	2
dbgInfo	100.00% covered (success)	100.00%	2 / 2	100.00% covered (success)	100.00%	1 / 1	2
linkParamAndFunc	100.00% covered (success)	100.00%	7 / 7	100.00% covered (success)	100.00%	1 / 1	2
mergeTaintDependencies	100.00% covered (success)	100.00%	9 / 9	100.00% covered (success)	100.00%	1 / 1	7
markAllDependentMethodsExec	97.22% covered (success)	97.22%	35 / 36	0.00% covered (danger)	0.00%	0 / 1	14
markAllDependentMethodsExecForNode	100.00% covered (success)	100.00%	12 / 12	100.00% covered (success)	100.00%	1 / 1	3
markAllDependentVarsYes	100.00% covered (success)	100.00%	24 / 24	100.00% covered (success)	100.00%	1 / 1	6
getCausedByLinesForFunc	100.00% covered (success)	100.00%	2 / 2	100.00% covered (success)	100.00%	1 / 1	1
getActualFuncWithCausedBy	100.00% covered (success)	100.00%	4 / 4	100.00% covered (success)	100.00%	1 / 1	2
debug	53.33% covered (warning)	53.33%	8 / 15	0.00% covered (danger)	0.00%	0 / 1	14.50
getCallableFromNode	88.89% covered (warning)	88.89%	16 / 18	0.00% covered (danger)	0.00%	0 / 1	10.14
getFirstElmFromArrayOrGenerator	66.67% covered (warning)	66.67%	2 / 3	0.00% covered (danger)	0.00%	0 / 1	4.59
taintToIssuesAndSeverities	96.67% covered (success)	96.67%	29 / 30	0.00% covered (danger)	0.00%	0 / 1	11
maybeEmitIssueSimplified	100.00% covered (success)	100.00%	7 / 7	100.00% covered (success)	100.00%	1 / 1	1
maybeEmitIssue	97.44% covered (success)	97.44%	38 / 39	0.00% covered (danger)	0.00%	0 / 1	13
isIssueSuppressedOrFalsePositive	100.00% covered (success)	100.00%	14 / 14	100.00% covered (success)	100.00%	1 / 1	4
handleMethodCall	100.00% covered (success)	100.00%	103 / 103	100.00% covered (success)	100.00%	1 / 1	22
translateNamedArg	100.00% covered (success)	100.00%	5 / 5	100.00% covered (success)	100.00%	1 / 1	3
backpropagateArgTaint	100.00% covered (success)	100.00%	5 / 5	100.00% covered (success)	100.00%	1 / 1	2
handlePassByRef	100.00% covered (success)	100.00%	21 / 21	100.00% covered (success)	100.00%	1 / 1	9
getPassByRefObjFromNode	54.55% covered (warning)	54.55%	6 / 11	0.00% covered (danger)	0.00%	0 / 1	14.01
getHardcodedPreservedTaintForFunc	70.78% covered (warning)	70.78%	172 / 243	0.00% covered (danger)	0.00%	0 / 1	260.00
getBinOpTaintMask	90.00% covered (success)	90.00%	36 / 40	0.00% covered (danger)	0.00%	0 / 1	6.04
getNodeType	100.00% covered (success)	100.00%	11 / 11	100.00% covered (success)	100.00%	1 / 1	2
nodeIsArray	85.71% covered (warning)	85.71%	6 / 7	0.00% covered (danger)	0.00%	0 / 1	6.10
nodeCanBeArray	100.00% covered (success)	100.00%	7 / 7	100.00% covered (success)	100.00%	1 / 1	5
nodeCanBeString	75.00% covered (warning)	75.00%	3 / 4	0.00% covered (danger)	0.00%	0 / 1	3.14
elementCanBeNumkey	100.00% covered (success)	100.00%	9 / 9	100.00% covered (success)	100.00%	1 / 1	5
nodeCanBeIntKey	90.00% covered (success)	90.00%	9 / 10	0.00% covered (danger)	0.00%	0 / 1	7.05
getReturnObjsOfFunc	91.67% covered (success)	91.67%	11 / 12	0.00% covered (danger)	0.00%	0 / 1	3.01
isSubclassOf	100.00% covered (success)	100.00%	1 / 1	100.00% covered (success)	100.00%	1 / 1	1

1	<?php declare( strict_types=1 );
2
3	namespace SecurityCheckPlugin;
4
5	use ast\Node;
6	use Closure;
7	use Exception;
8	use Generator;
9	use Phan\AST\ASTReverter;
10	use Phan\AST\ContextNode;
11	use Phan\AST\UnionTypeVisitor;
12	use Phan\BlockAnalysisVisitor;
13	use Phan\CodeBase;
14	use Phan\Debug;
15	use Phan\Exception\CodeBaseException;
16	use Phan\Exception\FQSENException;
17	use Phan\Exception\IssueException;
18	use Phan\Exception\NodeException;
19	use Phan\Exception\UnanalyzableException;
20	use Phan\Issue;
21	use Phan\Language\Context;
22	use Phan\Language\Element\FunctionInterface;
23	use Phan\Language\Element\GlobalVariable;
24	use Phan\Language\Element\Method;
25	use Phan\Language\Element\PassByReferenceVariable;
26	use Phan\Language\Element\Property;
27	use Phan\Language\Element\TypedElementInterface;
28	use Phan\Language\Element\Variable;
29	use Phan\Language\FQSEN\FullyQualifiedClassName;
30	use Phan\Language\FQSEN\FullyQualifiedFunctionLikeName;
31	use Phan\Language\FQSEN\FullyQualifiedFunctionName;
32	use Phan\Language\FQSEN\FullyQualifiedMethodName;
33	use Phan\Language\Type\GenericArrayType;
34	use Phan\Language\Type\LiteralTypeInterface;
35	use Phan\Language\UnionType;
36
37	/**
38	* Trait for the Tainedness visitor subclasses. Mostly contains
39	* utility methods.
40	*
41	* Copyright (C) 2017 Brian Wolff <bawolff@gmail.com>
42	*
43	* This program is free software; you can redistribute it and/or modify
44	* it under the terms of the GNU General Public License as published by
45	* the Free Software Foundation; either version 2 of the License, or
46	* (at your option) any later version.
47	*
48	* This program is distributed in the hope that it will be useful,
49	* but WITHOUT ANY WARRANTY; without even the implied warranty of
50	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
51	* GNU General Public License for more details.
52	*
53	* You should have received a copy of the GNU General Public License along
54	* with this program; if not, write to the Free Software Foundation, Inc.,
55	* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
56	*/
57	/**
58	* @property-read Context $context
59	* @property-read \Phan\CodeBase $code_base
60	*/
61	trait TaintednessBaseVisitor {
62	use TaintednessAccessorsTrait;
63
64	/** @var null\|string\|bool\|resource filehandle to output debug messages */
65	private $debugOutput;
66
67	/** @var Context\|null Override the file/line number to emit issues */
68	protected $overrideContext;
69
70	/**
71	* @var bool[] FQSENs of classes without __toString, map of [ (string)FQSEN => true ]
72	*/
73	protected static $fqsensWithoutToStringCache = [];
74
75	/**
76	* Merge taintedness of a function/method
77	*
78	* @param FunctionInterface $func
79	* @param FunctionTaintedness $taint
80	*/
81	protected function addFuncTaint( FunctionInterface $func, FunctionTaintedness $taint ): void {
82	$curTaint = self::getFuncTaint( $func );
83	if ( $curTaint ) {
84	$newTaint = $curTaint->asMergedWith( $taint );
85	} else {
86	$newTaint = $taint;
87	}
88	self::doSetFuncTaint( $func, $newTaint );
89	}
90
91	/**
92	* Ensure a function-like has its taintedness set and not unknown
93	*
94	* @param FunctionInterface $func
95	*/
96	protected function ensureFuncTaintIsSet( FunctionInterface $func ): void {
97	if ( !self::getFuncTaint( $func ) ) {
98	self::doSetFuncTaint( $func, new FunctionTaintedness( Taintedness::newSafe() ) );
99	}
100	}
101
102	/**
103	* @param FunctionInterface $func
104	* @param Context\|string\|null $reason To override the caused-by line
105	* @param FunctionTaintedness $addedTaint
106	* @param FunctionTaintedness $allNewTaint
107	* @param MethodLinks\|null $returnLinks NOTE: These are only used for preserved params, since for sink params
108	* we're already adding a Taintedness with the expected EXEC bits.
109	*/
110	private function maybeAddFuncError(
111	FunctionInterface $func,
112	$reason,
113	FunctionTaintedness $addedTaint,
114	FunctionTaintedness $allNewTaint,
115	MethodLinks $returnLinks = null
116	): void {
117	if ( !is_string( $reason ) ) {
118	$newErrors = [ $this->dbgInfo( $reason ?? $this->context ) ];
119	} else {
120	$newErrors = [ $reason ];
121	}
122	if ( $this->overrideContext && !( $this->isHook ?? false ) ) {
123	// @phan-suppress-previous-line PhanUndeclaredProperty
124	$newErrors[] = $this->dbgInfo( $this->overrideContext );
125	}
126
127	$hasReturnLinks = $returnLinks && !$returnLinks->isEmpty();
128
129	// Future TODO: we might consider using PreservedTaintedness from the funcs instead of MethodLinks, but using
130	// links is more consistent with what we do for non-function causedby lines.
131
132	$newErr = self::getFuncCausedByRawCloneOrEmpty( $func );
133
134	foreach ( $addedTaint->getSinkParamKeysNoVariadic() as $key ) {
135	if ( $reason \|\| $allNewTaint->canOverrideNonVariadicParam( $key ) ) {
136	$curTaint = $addedTaint->getParamSinkTaint( $key );
137	if ( $curTaint->has( SecurityCheckPlugin::ALL_EXEC_TAINT ) ) {
138	$newErr->addParamSinkLines( $key, $newErrors, $curTaint->asExecToYesTaint() );
139	}
140	}
141	}
142	foreach ( $addedTaint->getPreserveParamKeysNoVariadic() as $key ) {
143	if ( $hasReturnLinks && ( $reason \|\| $allNewTaint->canOverrideNonVariadicParam( $key ) ) ) {
144	$newErr->addParamPreservedLines(
145	$key,
146	$newErrors,
147	Taintedness::newSafe(),
148	$returnLinks->asFilteredForFuncAndParam( $func, $key )
149	);
150	}
151	}
152	$variadicIndex = $addedTaint->getVariadicParamIndex();
153	if ( $variadicIndex !== null && ( $reason \|\| $allNewTaint->canOverrideVariadicParam() ) ) {
154	$sinkVariadic = $addedTaint->getVariadicParamSinkTaint();
155	if ( $sinkVariadic && $sinkVariadic->has( SecurityCheckPlugin::ALL_EXEC_TAINT ) ) {
156	$newErr->addVariadicParamSinkLines(
157	$variadicIndex,
158	$newErrors,
159	$sinkVariadic->asExecToYesTaint()
160	);
161	}
162	if ( $hasReturnLinks ) {
163	$newErr->addVariadicParamPreservedLines(
164	$variadicIndex,
165	$newErrors,
166	Taintedness::newSafe(),
167	$returnLinks->asFilteredForFuncAndParam( $func, $variadicIndex )
168	);
169	}
170	}
171
172	$curTaint = $addedTaint->getOverall();
173	if ( ( $reason \|\| $allNewTaint->canOverrideOverall() ) && $curTaint->has( SecurityCheckPlugin::ALL_TAINT ) ) {
174	// Note, the generic error shouldn't have any link
175	$newErr->addGenericLines( $newErrors, $curTaint );
176	}
177
178	self::setFuncCausedByRaw( $func, $newErr );
179	}
180
181	/**
182	* Add the given caused-by lines to $element.
183	*
184	* @param TypedElementInterface $element
185	* @param CausedByLines $rightError
186	*/
187	protected function mergeTaintError( TypedElementInterface $element, CausedByLines $rightError ): void {
188	assert( !$element instanceof FunctionInterface, 'Should use mergeFuncTaintError' );
189
190	$curError = self::getCausedByRaw( $element );
191
192	if ( !$curError ) {
193	$newLeftError = $rightError;
194	} else {
195	$newLeftError = $curError->asMergedWith( $rightError );
196	}
197
198	self::setCausedByRaw( $element, $newLeftError );
199	}
200
201	/**
202	* @param FunctionInterface $func
203	* @param FunctionCausedByLines $newError
204	* @param FunctionTaintedness $allFuncTaint Used to check NO_OVERRIDE
205	*/
206	protected function mergeFuncError(
207	FunctionInterface $func,
208	FunctionCausedByLines $newError,
209	FunctionTaintedness $allFuncTaint
210	): void {
211	$funcError = self::getFuncCausedByRawCloneOrEmpty( $func );
212	$funcError->mergeWith( $newError, $allFuncTaint );
213	self::setFuncCausedByRaw( $func, $funcError );
214	}
215
216	/**
217	* Add the current context to taintedOriginalError book-keeping
218	*
219	* This allows us to show users what line caused an issue.
220	*
221	* @param TypedElementInterface $elem Where to put it
222	* @param Taintedness $taintedness
223	* @param MethodLinks\|null $links
224	* @param string\|null $reason To override the caused by line
225	*/
226	protected function addTaintError(
227	TypedElementInterface $elem,
228	Taintedness $taintedness,
229	?MethodLinks $links,
230	string $reason = null
231	): void {
232	assert( !$elem instanceof FunctionInterface, 'Should use addFuncTaintError' );
233
234	if ( !$taintedness->has( SecurityCheckPlugin::ALL_TAINT ) && ( !$links \|\| $links->isEmpty() ) ) {
235	// Don't add book-keeping if no actual taint was added.
236	return;
237	}
238
239	$newErrors = $reason !== null ? [ $reason ] : [ $this->dbgInfo() ];
240	if ( $this->overrideContext && !( $this->isHook ?? false ) ) {
241	// @phan-suppress-previous-line PhanUndeclaredProperty
242	$newErrors[] = $this->dbgInfo( $this->overrideContext );
243	}
244
245	$newErr = self::getCausedByRawCloneOrEmpty( $elem );
246	$newErr->addLines( $newErrors, $taintedness, $links );
247	self::setCausedByRaw( $elem, $newErr );
248	}
249
250	/**
251	* Ensures that the given variable obj has some taintedness set, initializing to safe if it doesn't.
252	*
253	* @param TypedElementInterface $varObj
254	*/
255	protected function ensureTaintednessIsSet( TypedElementInterface $varObj ): void {
256	if ( !self::getTaintednessRaw( $varObj ) ) {
257	self::setTaintednessRaw( $varObj, Taintedness::newSafe() );
258	}
259	if ( $varObj instanceof GlobalVariable ) {
260	$gVarObj = $varObj->getElement();
261	if ( !self::getTaintednessRaw( $gVarObj ) ) {
262	self::setTaintednessRaw( $gVarObj, Taintedness::newSafe() );
263	}
264	}
265	}
266
267	/**
268	* Change the taintedness of $variableObj.
269	*
270	* @param TypedElementInterface $variableObj
271	* @param Taintedness $taintedness
272	* @param bool $override
273	*/
274	private function setTaintedness(
275	TypedElementInterface $variableObj,
276	Taintedness $taintedness,
277	bool $override
278	): void {
279	assert( !$variableObj instanceof FunctionInterface, 'Must use setFuncTaint for functions' );
280
281	if (
282	$variableObj instanceof Property &&
283	$variableObj->getClassFQSEN() === FullyQualifiedClassName::getStdClassFQSEN()
284	) {
285	// Phan conflates all stdClass props, see https://github.com/phan/phan/issues/3869
286	// Avoid doing the same with taintedness, as that would cause weird issues (see
287	// 'stdclassconflation' test).
288	// TODO Is it possible to store prop taintedness in the Variable object?
289	// that would be similar to a fine-grained handling of arrays.
290	return;
291	}
292
293	if ( $override ) {
294	$newTaint = $taintedness;
295	} else {
296	$curTaint = self::getTaintednessRaw( $variableObj );
297	if ( !$curTaint ) {
298	$newTaint = $taintedness;
299	} else {
300	// NOTE: Do NOT merge in place here, as that would change the taintedness for all variable
301	// objects of which $variableObj is a clone!
302	$newTaint = $curTaint->asMergedWith( $taintedness );
303	}
304	}
305	self::setTaintednessRaw( $variableObj, $newTaint );
306	}
307
308	/**
309	* Given a func, if it has a defining func different from itself, return that defining func. Returns null otherwise.
310	*
311	* @param FunctionInterface $func
312	* @return FunctionInterface\|null
313	*/
314	private function getDefiningFuncIfDifferent( FunctionInterface $func ): ?FunctionInterface {
315	if ( $func instanceof Method && $func->hasDefiningFQSEN() ) {
316	$definingFQSEN = $func->getDefiningFQSEN();
317	if ( $definingFQSEN !== $func->getFQSEN() ) {
318	return $this->code_base->getMethodByFQSEN( $definingFQSEN );
319	}
320	}
321	return null;
322	}
323
324	/**
325	* Get a list of places to look for function taint info
326	*
327	* @todo How to handle multiple function definitions (phan "alternates")
328	* @param FunctionInterface $func
329	* @return Generator<FunctionInterface>
330	*/
331	private function getPossibleFuncDefinitions( FunctionInterface $func ): Generator {
332	yield $func;
333
334	// If we don't have a defining func, stay with the same func.
335	// definingFunc is used later on during fallback processing.
336	$definingFunc = $this->getDefiningFuncIfDifferent( $func );
337	if ( $definingFunc ) {
338	yield $definingFunc;
339	}
340	if ( $func instanceof Method ) {
341	try {
342	$class = $func->getClass( $this->code_base );
343	} catch ( CodeBaseException $e ) {
344	$this->debug( __METHOD__, "Class not found for func $func: " . $this->getDebugInfo( $e ) );
345	return;
346	}
347
348	// Iterate through the whole hierarchy to see if the method was defined in an interface or trait. A few
349	// notes on this:
350	// - getNonParentAncestorFQSENList (and similar methods in Class and Method) only go one level up, and
351	// would not give us e.g. the interfaces implemented by the parent class.
352	// - asExpandedTypes would work, but it has a non-zero overhead, and most importantly, we would cause phan
353	// to emit issues like RedefinedClass in places where phan wouldn't normally emit them.
354	// - It's unclear whether this code should also look for method definitions in classes (and not just
355	// interfaces/traits). And more generally, what would the expectations for *-taint annotations be.
356	$curClass = $class;
357	// Use a safeguard in case this goes out of control (e.g., broken code with circular inheritance).
358	$depth = 0;
359	do {
360	$depth++;
361	$nonParents = $curClass->getNonParentAncestorFQSENList();
362
363	foreach ( $nonParents as $nonParentFQSEN ) {
364	if ( $this->code_base->hasClassWithFQSEN( $nonParentFQSEN ) ) {
365	$nonParent = $this->code_base->getClassByFQSEN( $nonParentFQSEN );
366	// TODO Assuming this is a direct invocation, but it doesn't always make sense
367	$directInvocation = true;
368	if ( $nonParent->hasMethodWithName( $this->code_base, $func->getName(), $directInvocation ) ) {
369	yield $nonParent->getMethodByName( $this->code_base, $func->getName() );
370	}
371	}
372	}
373	if (
374	!$curClass->hasParentType() \|\|
375	!$this->code_base->hasClassWithFQSEN( $curClass->getParentClassFQSEN() )
376	) {
377	break;
378	}
379	$curClass = $curClass->getParentClass( $this->code_base );
380	} while ( $depth < 20 );
381	}
382	}
383
384	/**
385	* This is also for methods and other function like things
386	* @note This is not guaranteed to return a clone
387	*
388	* @param FunctionInterface $func What function/method to look up
389	* @return FunctionTaintedness Always a clone
390	*/
391	protected function getTaintOfFunction( FunctionInterface $func ): FunctionTaintedness {
392	$funcTaint = self::getFuncTaint( $func );
393	if ( $funcTaint !== null ) {
394	return $funcTaint;
395	}
396
397	$annotatedTaint = $this->getSetKnownTaintOfFunctionWithoutAnalysis( $func );
398	if ( $annotatedTaint ) {
399	return $annotatedTaint;
400	}
401
402	$isPHPInternalFunc = $func->isPHPInternal();
403	if ( !$isPHPInternalFunc ) {
404	// PHP internal functions cannot be analyzed because they don't have a body.
405	$funcToAnalyze = $this->getDefiningFuncIfDifferent( $func ) ?: $func;
406	$this->analyzeFunc( $funcToAnalyze );
407	$analyzedFuncTaint = self::getFuncTaint( $funcToAnalyze );
408	if ( $analyzedFuncTaint !== null ) {
409	return $analyzedFuncTaint;
410	}
411	}
412
413	$taintFromReturnType = $this->getTaintByType( $func->getUnionType() );
414	if ( !$isPHPInternalFunc ) {
415	// If we haven't seen this function before, first of all check the return type. If it
416	// returns a safe type (like int), it's safe.
417	$taint = new FunctionTaintedness( $taintFromReturnType );
418	self::doSetFuncTaint( $func, $taint );
419	$this->maybeAddFuncError( $func, null, $taint, $taint );
420	} else {
421	// Assume that anything really dangerous we've already hardcoded. So just preserve taint.
422	$overall = $taintFromReturnType->isSafe()
423	? $taintFromReturnType
424	: new Taintedness( SecurityCheckPlugin::PRESERVE_TAINT );
425	$taint = new FunctionTaintedness( $overall );
426	// We're not adding any error here, since it's presumably unnecessary for PHP internal stuff.
427	self::doSetFuncTaint( $func, $taint );
428	}
429	return $taint;
430	}
431
432	/**
433	* Given a function, find out if it has any hardcoded/annotated taint, or whether it should inherit its taint
434	* from an alternate definition. If anything was found, set that taintedness in the func object and return it.
435	* In particular, this does NOT cause $func to be analyzed.
436	*
437	* @param FunctionInterface $func
438	* @return FunctionTaintedness\|null
439	*/
440	private function getSetKnownTaintOfFunctionWithoutAnalysis( FunctionInterface $func ): ?FunctionTaintedness {
441	$funcsToTry = $this->getPossibleFuncDefinitions( $func );
442	foreach ( $funcsToTry as $trialFunc ) {
443	/** @var FunctionInterface $trialFunc */
444	if ( !$trialFunc->isPHPInternal() ) {
445	// PHP internal functions can't have a docblock.
446	$taintData = $this->getDocBlockTaintOfFunc( $trialFunc );
447	if ( $taintData !== null ) {
448	[ $taint, $methodLinks ] = $taintData;
449	self::doSetFuncTaint( $func, $taint );
450	// TODO Make this more granular if possible
451	$errorDesc = 'annotations in ' . $trialFunc->getFQSEN()->__toString();
452	$this->maybeAddFuncError( $func, $errorDesc, $taint, $taint, $methodLinks );
453	return $taint;
454	}
455	}
456
457	$trialFuncName = $trialFunc->getFQSEN();
458	$taint = SecurityCheckPlugin::$pluginInstance->getBuiltinFuncTaint( $trialFuncName );
459	if ( $taint !== null ) {
460	$taint = clone $taint;
461	self::doSetFuncTaint( $func, $taint );
462	if ( !$func->isPHPInternal() ) {
463	// Caused-by lines are presumably unnecessary for PHP internal stuff.
464	$this->maybeAddFuncError( $func, "Builtin-$trialFuncName", $taint, $taint );
465	}
466	return $taint;
467	}
468	}
469
470	$definingFunc = $this->getDefiningFuncIfDifferent( $func );
471	if ( $definingFunc ) {
472	$definingFuncTaint = self::getFuncTaint( $definingFunc );
473	if ( $definingFuncTaint !== null ) {
474	return $definingFuncTaint;
475	}
476	}
477
478	return null;
479	}
480
481	/**
482	* Analyze a function. This is very similar to Analyzable::analyze, but avoids several checks
483	* used by phan for performance. Phan doesn't know about taintedness, so it may decide to skip
484	* a re-analysis which we need.
485	* @todo This is a bit hacky.
486	* @todo We should implement our own perf checks, e.g. if the method as already called with
487	* the same taintedness, taint links, etc. for all params.
488	* @see \Phan\Analysis\Analyzable::analyze()
489	*
490	* @param FunctionInterface $func
491	*/
492	public function analyzeFunc( FunctionInterface $func ): void {
493	$node = $func->getNode();
494	if ( !$node ) {
495	return;
496	}
497
498	if ( $this->context->isInFunctionLikeScope() && $func->getFQSEN() === $this->context->getFunctionLikeFQSEN() ) {
499	// Avoid pointless recursion
500	return;
501	}
502
503	static $depth = 0;
504	// @todo Tune the max depth. Raw benchmarking shows very little difference between e.g.
505	// 5 and 10. However, while with higher values we can detect more issues and avoid more
506	// false positives, it becomes harder to tell where an issue is coming from.
507	// Thus, this value should be increased only when we'll have better error reporting.
508	if ( $depth > 5 ) {
509	// $this->debug( __METHOD__, 'WARNING: aborting analysis earlier due to max depth' );
510	return;
511	}
512	if ( $node->kind === \ast\AST_CLOSURE && isset( $node->children['uses'] ) ) {
513	return;
514	}
515	$depth++;
516
517	// Like Analyzable::analyze, clone the context to avoid overriding anything
518	$context = clone $func->getContext();
519	// @phan-suppress-next-line PhanUndeclaredMethod All implementations have it
520	if ( $func->getRecursionDepth() !== 0 ) {
521	// Add the arguments types to the internal scope of the function, see
522	// https://github.com/phan/phan/issues/3848
523	foreach ( $func->getParameterList() as $parameter ) {
524	$context->addScopeVariable( $parameter->cloneAsNonVariadic() );
525	}
526	}
527	try {
528	( new BlockAnalysisVisitor( $this->code_base, $context ) )(
529	$node
530	);
531	} finally {
532	$depth--;
533	}
534	}
535
536	/**
537	* Obtain taint information from a docblock comment.
538	*
539	* @param FunctionInterface $func The function to check
540	* @return array<FunctionTaintedness\|MethodLinks>\|null null for no info
541	* @phan-return array{0:FunctionTaintedness,1:MethodLinks}\|null
542	*/
543	protected function getDocBlockTaintOfFunc( FunctionInterface $func ): ?array {
544	// Note that we're not using the hashed docblock for caching, because the same docblock
545	// may have different meanings in different contexts. E.g. @return self
546	$fqsen = (string)$func->getFQSEN();
547	if ( isset( SecurityCheckPlugin::$docblockCache[ $fqsen ] ) ) {
548	[ $taint, $links ] = SecurityCheckPlugin::$docblockCache[ $fqsen ];
549	return [ clone $taint, clone $links ];
550	}
551
552	$docBlock = $func->getDocComment();
553	if ( $docBlock === null ) {
554	return null;
555	}
556	if ( strpos( $docBlock, '-taint' ) === false ) {
557	// Lightweight check for methods that certainly aren't annotated
558	return null;
559	}
560	$lines = explode( "\n", $docBlock );
561	/** @param string[] $args */
562	$invalidLineIssueEmitter = function ( string $msg, array $args ) use ( $func ): void {
563	SecurityCheckPlugin::emitIssue(
564	$this->code_base,
565	// Emit issues at the line of the signature
566	$func->getContext(),
567	'SecurityCheckInvalidAnnotation',
568	$msg,
569	$args
570	);
571	};
572	// Note, not forCaller, as that doesn't see variadic parameters
573	$calleeParamList = $func->getParameterList();
574	$validTaintEncountered = false;
575	// Assume that if some of the taint is specified, then
576	// the person would specify all the dangerous taints, so
577	// don't set the unknown flag if not taint annotation on
578	// @return.
579	$funcTaint = new FunctionTaintedness( Taintedness::newSafe() );
580	// TODO $fakeMethodLinks here is a bit hacky...
581	$fakeMethodLinks = new MethodLinks();
582	foreach ( $lines as $line ) {
583	$m = [];
584	$trimmedLine = ltrim( rtrim( $line ), "* \t/" );
585	if ( strpos( $trimmedLine, '@param-taint' ) === 0 ) {
586	$matched = preg_match( SecurityCheckPlugin::PARAM_ANNOTATION_REGEX, $trimmedLine, $m );
587	if ( !$matched ) {
588	$invalidLineIssueEmitter( "Cannot parse taint line '{COMMENT}'", [ $trimmedLine ] );
589	continue;
590	}
591
592	$paramNumber = null;
593	$isVariadic = null;
594	foreach ( $calleeParamList as $i => $param ) {
595	if ( $m['paramname'] === $param->getName() ) {
596	$paramNumber = $i;
597	$isVariadic = $param->isVariadic();
598	break;
599	}
600	}
601	if ( $paramNumber === null ) {
602	$invalidLineIssueEmitter(
603	'Annotated parameter ${PARAMETER} not found in the signature',
604	[ $m['paramname'] ]
605	);
606	continue;
607	}
608
609	$annotatedAsVariadic = $m['variadic'] !== '';
610	if ( $isVariadic !== $annotatedAsVariadic ) {
611	$msg = $isVariadic
612	? 'Variadic parameter ${PARAMETER} should be annotated as `...${PARAMETER}`'
613	: 'Non-variadic parameter ${PARAMETER} should be annotated as `${PARAMETER}`';
614	$invalidLineIssueEmitter( $msg, [ $m['paramname'], $m['paramname'] ] );
615	}
616	$taintData = SecurityCheckPlugin::parseTaintLine( $m['taint'] );
617	if ( $taintData === null ) {
618	$invalidLineIssueEmitter( "Invalid param taintedness '{COMMENT}'", [ $m['taint'] ] );
619	continue;
620	}
621	/** @var Taintedness $taint */
622	[ $taint, $flags ] = $taintData;
623	$sinkTaint = $taint->withOnly( SecurityCheckPlugin::ALL_EXEC_TAINT );
624	$preserveTaint = $taint->without( SecurityCheckPlugin::ALL_EXEC_TAINT )->asPreservedTaintedness();
625	if ( $isVariadic ) {
626	$funcTaint->setVariadicParamSinkTaint( $paramNumber, $sinkTaint );
627	$funcTaint->setVariadicParamPreservedTaint( $paramNumber, $preserveTaint );
628	$funcTaint->addVariadicParamFlags( $flags );
629	} else {
630	$funcTaint->setParamSinkTaint( $paramNumber, $sinkTaint );
631	$funcTaint->setParamPreservedTaint( $paramNumber, $preserveTaint );
632	$funcTaint->addParamFlags( $paramNumber, $flags );
633	}
634	$fakeMethodLinks->initializeParamForFunc( $func, $paramNumber );
635	$validTaintEncountered = true;
636	if ( ( $taint->get() & SecurityCheckPlugin::ESCAPES_HTML ) === SecurityCheckPlugin::ESCAPES_HTML ) {
637	// Special case to auto-set anything that escapes html to detect double escaping.
638	$funcTaint->setOverall( $funcTaint->getOverall()->with( SecurityCheckPlugin::ESCAPED_TAINT ) );
639	}
640	} elseif ( strpos( $trimmedLine, '@return-taint' ) === 0 ) {
641	$taintLine = substr( $trimmedLine, strlen( '@return-taint' ) + 1 );
642	$taintData = SecurityCheckPlugin::parseTaintLine( $taintLine );
643	if ( $taintData === null ) {
644	$invalidLineIssueEmitter( "Invalid return taintedness '{COMMENT}'", [ $taintLine ] );
645	continue;
646	}
647	/** @var Taintedness $taint */
648	[ $taint, $flags ] = $taintData;
649	if ( $taint->has( SecurityCheckPlugin::ALL_EXEC_TAINT ) ) {
650	$invalidLineIssueEmitter( "Return taintedness cannot be exec", [] );
651	continue;
652	}
653	$funcTaint->setOverall( $taint );
654	$funcTaint->addOverallFlags( $flags );
655	$validTaintEncountered = true;
656	}
657	}
658
659	if ( !$validTaintEncountered ) {
660	$this->debug( __METHOD__, 'Possibly wrong taint annotation in docblock: ' . json_encode( $docBlock ) );
661	}
662
663	SecurityCheckPlugin::$docblockCache[ $fqsen ] = $validTaintEncountered
664	? [ clone $funcTaint, clone $fakeMethodLinks ]
665	: null;
666	return SecurityCheckPlugin::$docblockCache[ $fqsen ];
667	}
668
669	/**
670	* Given a type, determine what type of taint
671	*
672	* e.g. Integers are probably untainted since its hard to do evil
673	* with them, but mark strings as unknown since we don't know.
674	*
675	* Only use as a fallback
676	* @param UnionType $types The types
677	* @return Taintedness
678	*/
679	protected function getTaintByType( UnionType $types ): Taintedness {
680	// NOTE: This flattens intersection types
681	$typelist = $types->getUniqueFlattenedTypeSet();
682	if ( !$typelist ) {
683	// $this->debug( __METHOD__, "Setting type unknown due to no type info." );
684	return new Taintedness( SecurityCheckPlugin::UNKNOWN_TAINT );
685	}
686
687	$taint = new Taintedness( SecurityCheckPlugin::NO_TAINT );
688	$isPossiblyUnknown = false;
689	foreach ( $typelist as $type ) {
690	if ( $type instanceof LiteralTypeInterface ) {
691	// We're going to assume that literals aren't tainted...
692	continue;
693	}
694	switch ( $type->getName() ) {
695	case 'int':
696	case 'non-zero-int':
697	case 'float':
698	case 'bool':
699	case 'false':
700	case 'true':
701	case 'null':
702	case 'void':
703	case 'class-string':
704	case 'callable-string':
705	case 'callable-object':
706	case 'callable-array':
707	break;
708	case 'string':
709	case 'non-empty-string':
710	case 'Closure':
711	case 'callable':
712	case 'array':
713	case 'iterable':
714	case 'object':
715	case 'resource':
716	case 'mixed':
717	case 'non-empty-mixed':
718	case 'non-null-mixed':
719	// $this->debug( __METHOD__, "Taint set unknown due to type '$type'." );
720	$isPossiblyUnknown = true;
721	break;
722	default:
723	if ( $type->hasTemplateTypeRecursive() ) {
724	// TODO Can we do better for template types?
725	$isPossiblyUnknown = true;
726	break;
727	}
728
729	if ( !$type->isObjectWithKnownFQSEN() ) {
730	// Likely some phan-specific types not included above
731	$this->debug( __METHOD__, " $type (" . get_class( $type ) . ') not a class?' );
732	$isPossiblyUnknown = true;
733	break;
734	}
735
736	$fqsenStr = $type->asFQSEN()->__toString();
737	if ( isset( self::$fqsensWithoutToStringCache[$fqsenStr] ) ) {
738	$isPossiblyUnknown = true;
739	break;
740	}
741
742	// This means specific class, so look up __toString()
743	$toStringFQSEN = FullyQualifiedMethodName::fromStringInContext(
744	$fqsenStr . '::__toString',
745	$this->context
746	);
747	if ( !$this->code_base->hasMethodWithFQSEN( $toStringFQSEN ) ) {
748	// This is common in a void context.
749	// e.g. code like $this->foo() will reach this
750	// check.
751	self::$fqsensWithoutToStringCache[$fqsenStr] = true;
752	$isPossiblyUnknown = true;
753	break;
754	}
755	$toString = $this->code_base->getMethodByFQSEN( $toStringFQSEN );
756	$toStringTaint = $this->getTaintOfFunction( $toString );
757	$taint->mergeWith( $toStringTaint->getOverall()->without(
758	SecurityCheckPlugin::PRESERVE_TAINT \| SecurityCheckPlugin::ALL_EXEC_TAINT
759	) );
760	}
761	}
762	if ( $isPossiblyUnknown ) {
763	$taint->add( SecurityCheckPlugin::UNKNOWN_TAINT );
764	}
765	return $taint;
766	}
767
768	/**
769	* Get what taint types are allowed on a typed element (i.e. use its type to rule out
770	* impossible taint types).
771	*
772	* @param TypedElementInterface $var
773	* @return Taintedness\|null Null means all taints, checking for null is faster than ORing
774	*/
775	protected function getTaintMaskForTypedElement( TypedElementInterface $var ): ?Taintedness {
776	if ( $var instanceof GlobalVariable ) {
777	// TODO We wouldn't need to do this if phan didn't infer real types for global variables.
778	// See https://github.com/phan/phan/issues/4518
779	$var = $var->getElement();
780	}
781	// Note, we must use the real union type because:
782	// 1 - The non-real type might be wrong
783	// 2 - The non-real type might be incomplete (e.g. when analysing a func without docblock
784	// we still don't know all the possible types of the params).
785	return $this->getTaintMaskForType( $var->getUnionType()->getRealUnionType() );
786	}
787
788	/**
789	* Get what taint types are allowed on an element with the given type.
790	*
791	* @param UnionType $type
792	* @return Taintedness\|null Null for all flags
793	*/
794	protected function getTaintMaskForType( UnionType $type ): ?Taintedness {
795	$typeTaint = $this->getTaintByType( $type );
796
797	if ( $typeTaint->has( SecurityCheckPlugin::UNKNOWN_TAINT ) ) {
798	return null;
799	}
800	return $typeTaint;
801	}
802
803	/**
804	* Get what taint the element could have in the future. For instance, a func parameter may initially
805	* have no taint, but it may become tainted depending on the argument.
806	* @todo Ensure this won't miss any case (aside from when phan infers a wrong real type)
807	*
808	* @param TypedElementInterface $el
809	* @return Taintedness\|null Null for all taints
810	*/
811	protected function getPossibleFutureTaintOfElement( TypedElementInterface $el ): ?Taintedness {
812	return $this->getTaintMaskForTypedElement( $el );
813	}
814
815	/**
816	* Get name of current method (for debugging purposes)
817	*
818	* @return string Name of method or "[no method]"
819	*/
820	protected function getCurrentMethod(): string {
821	return $this->context->isInFunctionLikeScope() ?
822	(string)$this->context->getFunctionLikeFQSEN() : '[no method]';
823	}
824
825	/**
826	* Get the taintedness of something from the AST tree.
827	*
828	* @param mixed $expr An expression from the AST tree.
829	* @return TaintednessWithError
830	*/
831	protected function getTaintedness( $expr ): TaintednessWithError {
832	if ( $expr instanceof Node ) {
833	return $this->getTaintednessNode( $expr );
834	}
835
836	assert( is_scalar( $expr ) \|\| $expr === null );
837	// Optim: avoid using TaintednessWithError::newEmpty()
838	return new TaintednessWithError(
839	new Taintedness( SecurityCheckPlugin::NO_TAINT ),
840	new CausedByLines(),
841	new MethodLinks()
842	);
843	}
844
845	/**
846	* Give an AST node, find its taint. This always returns a copy.
847	*
848	* @param Node $node
849	* @return TaintednessWithError
850	* @suppress PhanUndeclaredProperty
851	*/
852	protected function getTaintednessNode( Node $node ): TaintednessWithError {
853	// Performance: use isset(), not property_exists()
854	if ( isset( $node->taint ) ) {
855	// Return cached result. Cache hit ratio should ideally be 100%, because we should never have to retrieve
856	// the taintedness of a node without having analyzed it first. For now the ratio is lower because
857	// we don't cache the result of cheap nodes.
858	return $node->taint;
859	}
860	// TODO This might just a return a default if no cached data.
861
862	// Debug::printNode( $node );
863	// Make sure to update the line number, or the same issue may be reported
864	// more than once on different lines (see test 'multilineissue').
865	$oldLine = $this->context->getLineNumberStart();
866	$this->context->setLineNumberStart( $node->lineno );
867
868	$visitor = new TaintednessVisitor( $this->code_base, $this->context );
869	try {
870	return $visitor->analyzeNodeAndGetTaintedness( $node );
871	} finally {
872	$this->context->setLineNumberStart( $oldLine );
873	}
874	}
875
876	/**
877	* Given a phan object (not method/function) find its taint. This always returns a copy
878	* for existing objects.
879	*
880	* @param TypedElementInterface $variableObj
881	* @return Taintedness
882	*/
883	protected function getTaintednessPhanObj( TypedElementInterface $variableObj ): Taintedness {
884	assert( !$variableObj instanceof FunctionInterface, "This method cannot be used with methods" );
885	$taintOrNull = self::getTaintednessRaw( $variableObj );
886	if ( $taintOrNull !== null ) {
887	$mask = $this->getTaintMaskForTypedElement( $variableObj );
888	$taintedness = $mask !== null ? $taintOrNull->withOnly( $mask->get() ) : clone $taintOrNull;
889	// echo "$varName has taintedness $taintedness due to last time\n";
890	} else {
891	$type = $variableObj->getUnionType();
892	$taintedness = $this->getTaintByType( $type );
893	// $this->debug( " \$" . $variableObj->getName() . " first sight."
894	// . " taintedness set to $taintedness due to type $type\n";
895	}
896	return $taintedness;
897	}
898
899	/**
900	* Shortcut to resolve array offsets, which includes:
901	* - Ensuring that the value is not null: null is used for implicit dims like in `$a[] = $b`; we can't say
902	* for sure what the offset will be, and this method would return null (interpreted as offset 0), which is
903	* most likely wrong.
904	* - Casting floats to integers, since using a float as array key raises a warning (and crashes taint-check)
905	* in PHP 8.1 (T307504)
906	* - Letting nodes that represent resources (e.g. `STDIN`) pass through, since they're not scalar and certainly
907	* not valid offsets (see https://github.com/phan/phan/issues/4659).
908	*
909	* @param Node\|mixed $rawOffset
910	* @return Node\|mixed
911	*/
912	protected function resolveOffset( $rawOffset ) {
913	assert( $rawOffset !== null );
914	$resolved = $this->resolveValue( $rawOffset );
915	// phpcs:ignore MediaWiki.Usage.ForbiddenFunctions.is_resource
916	if ( is_resource( $resolved ) ) {
917	return $rawOffset;
918	}
919	return is_float( $resolved ) ? (int)$resolved : $resolved;
920	}
921
922	/**
923	* Shortcut to try and turn an AST element (Node or already literal) into an equivalent PHP
924	* scalar value.
925	*
926	* @param Node\|mixed $value A Node or a scalar value from the AST
927	* @return Node\|mixed An equivalent scalar PHP value, or $value if it cannot be resolved
928	*/
929	protected function resolveValue( $value ) {
930	if ( !$value instanceof Node ) {
931	return $value;
932	}
933	return $this->getCtxN( $value )->getEquivalentPHPScalarValue();
934	}
935
936	/**
937	* Get a property by name in the current scope, failing hard if it cannot be found.
938	* @param string $propName
939	* @return Property
940	*/
941	private function getPropInCurrentScopeByName( string $propName ): Property {
942	assert( $this->context->isInClassScope() );
943	$clazz = $this->context->getClassInScope( $this->code_base );
944
945	assert( $clazz->hasPropertyWithName( $this->code_base, $propName ) );
946	return $clazz->getPropertyByName( $this->code_base, $propName );
947	}
948
949	/**
950	* Quick wrapper to get the ContextNode for a node
951	*
952	* @param Node\|mixed $node
953	* @return ContextNode
954	*/
955	protected function getCtxN( $node ): ContextNode {
956	return new ContextNode(
957	$this->code_base,
958	$this->context,
959	$node
960	);
961	}
962
963	/**
964	* Given a node, return the Phan variable objects that
965	* correspond to that node to which we can backpropagate a NUMKEY taintedness.
966	*
967	* @todo This should be handled together with the non-numkey case
968	*
969	* @param Node $node AST node in question
970	* @return TypedElementInterface[] Array of various phan objects corresponding to $node
971	*/
972	protected function getObjsForNodeForNumkeyBackprop( Node $node ): array {
973	$cn = $this->getCtxN( $node );
974
975	// TODO For now we only backprop in the simple case, to avoid tons of false positives, unless
976	// the env flag is set (chiefly for tests)
977	$definitelyNumkey = !getenv( 'SECCHECK_NUMKEY_SPERIMENTAL' );
978
979	switch ( $node->kind ) {
980	case \ast\AST_PROP:
981	case \ast\AST_NULLSAFE_PROP:
982	case \ast\AST_STATIC_PROP:
983	$prop = $this->getPropFromNode( $node );
984	return $prop && $this->elementCanBeNumkey( $prop, $definitelyNumkey ) ? [ $prop ] : [];
985	case \ast\AST_VAR:
986	case \ast\AST_CLOSURE_VAR:
987	if ( Variable::isHardcodedGlobalVariableWithName( $cn->getVariableName() ) ) {
988	return [];
989	}
990	try {
991	$var = $cn->getVariable();
992	return $this->elementCanBeNumkey( $var, $definitelyNumkey ) ? [ $var ] : [];
993	} catch ( NodeException \| IssueException $e ) {
994	$this->debug( __METHOD__, "variable not in scope?? " . $this->getDebugInfo( $e ) );
995	return [];
996	}
997	case \ast\AST_ENCAPS_LIST:
998	case \ast\AST_ARRAY:
999	$results = [];
1000	foreach ( $node->children as $child ) {
1001	if ( !$child instanceof Node ) {
1002	continue;
1003	}
1004
1005	if (
1006	$node->kind === \ast\AST_ARRAY &&
1007	$child->children['key'] !== null && !$this->nodeCanBeIntKey( $child->children['key'] )
1008	) {
1009	continue;
1010	}
1011	$results = array_merge( $this->getObjsForNodeForNumkeyBackprop( $child ), $results );
1012	}
1013	return $results;
1014	case \ast\AST_ARRAY_ELEM:
1015	$results = [];
1016	if ( $node->children['key'] instanceof Node ) {
1017	$results = array_merge(
1018	$this->getObjsForNodeForNumkeyBackprop( $node->children['key'] ),
1019	$results
1020	);
1021	}
1022	if ( $node->children['value'] instanceof Node ) {
1023	$results = array_merge(
1024	$this->getObjsForNodeForNumkeyBackprop( $node->children['value'] ),
1025	$results
1026	);
1027	}
1028	return $results;
1029	case \ast\AST_CAST:
1030	// Future todo might be to ignore casts to ints, since
1031	// such things should be safe. Unclear if that makes
1032	// sense in all circumstances.
1033	if ( $node->children['expr'] instanceof Node ) {
1034	return $this->getObjsForNodeForNumkeyBackprop( $node->children['expr'] );
1035	}
1036	return [];
1037	case \ast\AST_DIM:
1038	if ( $node->children['expr'] instanceof Node ) {
1039	// For now just consider the outermost array.
1040	// FIXME. doesn't handle tainted array keys!
1041	return $this->getObjsForNodeForNumkeyBackprop( $node->children['expr'] );
1042	}
1043	return [];
1044	case \ast\AST_UNARY_OP:
1045	$var = $node->children['expr'];
1046	return $var instanceof Node ? $this->getObjsForNodeForNumkeyBackprop( $var ) : [];
1047	case \ast\AST_BINARY_OP:
1048	$left = $node->children['left'];
1049	$right = $node->children['right'];
1050	$leftObj = $left instanceof Node ? $this->getObjsForNodeForNumkeyBackprop( $left ) : [];
1051	$rightObj = $right instanceof Node ? $this->getObjsForNodeForNumkeyBackprop( $right ) : [];
1052	return array_merge( $leftObj, $rightObj );
1053	case \ast\AST_CONDITIONAL:
1054	$t = $node->children['true'];
1055	$f = $node->children['false'];
1056	$tObj = $t instanceof Node ? $this->getObjsForNodeForNumkeyBackprop( $t ) : [];
1057	$fObj = $f instanceof Node ? $this->getObjsForNodeForNumkeyBackprop( $f ) : [];
1058	return array_merge( $tObj, $fObj );
1059	case \ast\AST_CONST:
1060	case \ast\AST_CLASS_CONST:
1061	case \ast\AST_CLASS_NAME:
1062	case \ast\AST_MAGIC_CONST:
1063	case \ast\AST_ISSET:
1064	case \ast\AST_NEW:
1065	// For now we don't do methods, only variables
1066	// Also don't do args to function calls.
1067	// Unclear if this makes sense.
1068	return [];
1069	case \ast\AST_CALL:
1070	case \ast\AST_STATIC_CALL:
1071	case \ast\AST_METHOD_CALL:
1072	case \ast\AST_NULLSAFE_METHOD_CALL:
1073	if ( $definitelyNumkey ) {
1074	// This case is too hard for now.
1075	return [];
1076	}
1077	$ctxNode = $this->getCtxN( $node );
1078	// @todo Future todo might be to still return arguments when catching an exception.
1079	if ( $node->kind === \ast\AST_CALL ) {
1080	if ( $node->children['expr']->kind !== \ast\AST_NAME ) {
1081	// TODO Handle this case!
1082	return [];
1083	}
1084	try {
1085	$func = $ctxNode->getFunction( $node->children['expr']->children['name'] );
1086	} catch ( IssueException \| FQSENException $e ) {
1087	$this->debug( __METHOD__, "FIXME func not found: " . $this->getDebugInfo( $e ) );
1088	return [];
1089	}
1090	} else {
1091	$methodName = $node->children['method'];
1092	try {
1093	$func = $ctxNode->getMethod( $methodName, $node->kind === \ast\AST_STATIC_CALL, true );
1094	} catch ( NodeException \| CodeBaseException \| IssueException $e ) {
1095	$this->debug( __METHOD__, "FIXME method not found: " . $this->getDebugInfo( $e ) );
1096	return [];
1097	}
1098	}
1099	try {
1100	return $this->getReturnObjsOfFunc( $func );
1101	} catch ( Exception $e ) {
1102	$this->debug( __METHOD__, "FIXME: " . $this->getDebugInfo( $e ) );
1103	return [];
1104	}
1105	case \ast\AST_PRE_INC:
1106	case \ast\AST_PRE_DEC:
1107	case \ast\AST_POST_INC:
1108	case \ast\AST_POST_DEC:
1109	$children = $node->children;
1110	assert( count( $children ) === 1 );
1111	return $this->getObjsForNodeForNumkeyBackprop( reset( $children ) );
1112	default:
1113	// TODO Should probably handle AST_MATCH & friends
1114	// Debug::printNode( $node );
1115	// This should really be a visitor that recurses into
1116	// things.
1117	$this->debug( __METHOD__, "FIXME unhandled case"
1118	. Debug::nodeName( $node ) . "\n"
1119	);
1120	return [];
1121	}
1122	}
1123
1124	/**
1125	* @param Node $node
1126	* @return Property\|null
1127	*/
1128	protected function getPropFromNode( Node $node ): ?Property {
1129	try {
1130	return $this->getCtxN( $node )->getProperty( $node->kind === \ast\AST_STATIC_PROP );
1131	} catch ( NodeException \| IssueException \| UnanalyzableException $e ) {
1132	$this->debug( __METHOD__, "Cannot determine " .
1133	"property (Maybe don't know what class) - " .
1134	$this->getDebugInfo( $e )
1135	);
1136	return null;
1137	}
1138	}
1139
1140	/**
1141	* Extract some useful debug data from an exception
1142	* @param Exception $e
1143	* @return string
1144	*/
1145	protected function getDebugInfo( Exception $e ): string {
1146	return $e instanceof IssueException
1147	? $e->getIssueInstance()->__toString()
1148	: ( get_class( $e ) . " {$e->getMessage()}" );
1149	}
1150
1151	/**
1152	* Get the current filename and line.
1153	*
1154	* @param Context\|null $context Override the context to make debug info for
1155	* @return string path/to/file +linenumber
1156	*/
1157	protected function dbgInfo( Context $context = null ): string {
1158	$ctx = $context ?: $this->context;
1159	// Using a + instead of : so that I can just copy and paste
1160	// into a vim command line.
1161	return $ctx->getFile() . ' +' . $ctx->getLineNumberStart();
1162	}
1163
1164	/**
1165	* Link together a Method and its parameters,the idea being if the method gets called with something evil
1166	* later, we can traceback anything it might affect.
1167	* Note that we don't do this for functions with hardcoded taint, in which case we assume that any dangerous
1168	* association was already hardcoded. This is also good for performance, because hardcoded function tend to be
1169	* used a lot (for MW, think of methods in Database or in Html).
1170	*
1171	* @param Variable $param The variable object for the parameter. This can also be
1172	* instance of Parameter (subclass of Variable).
1173	* @param FunctionInterface $func The function/method in question
1174	* @param int $i Which argument number is $param
1175	*/
1176	protected function linkParamAndFunc( Variable $param, FunctionInterface $func, int $i ): void {
1177	// $this->debug( __METHOD__, "Linking '$param' to '$func' arg $i" );
1178
1179	// TODO Use $func's builtin/annotated taintedness (available in PreTaintednessVisitor) to check this per
1180	// parameter (looking at NO_OVERRIDE)
1181	$canLinkParam = !SecurityCheckPlugin::$pluginInstance->builtinFuncHasTaint( $func->getFQSEN() );
1182	if ( !$canLinkParam ) {
1183	return;
1184	}
1185
1186	self::ensureVarLinksForArgExist( $func, $i );
1187
1188	$paramLinks = self::getMethodLinksCloneOrEmpty( $param );
1189	$paramLinks->initializeParamForFunc( $func, $i );
1190	self::setMethodLinks( $param, $paramLinks );
1191	}
1192
1193	/**
1194	* Given a LHS and RHS make all the methods that can set RHS also for LHS
1195	*
1196	* Given 2 variables (e.g. $lhs = $rhs ), see to it that any function/method
1197	* which we marked as being able to set the value of rhs, is also marked
1198	* as being able to set the value of lhs. We use this information to figure
1199	* out what method parameter is causing the return statement to be tainted.
1200	*
1201	* @warning Be careful calling this function if lhs already has taint
1202	* or rhs side is a compound statement. This could result in misattribution
1203	* of where the taint is coming from.
1204	*
1205	* This also merges the information on what line caused the taint.
1206	*
1207	* @param TypedElementInterface $lhs Source of method list
1208	* @param MethodLinks $rhsLinks New links
1209	* @param bool $override
1210	*/
1211	protected function mergeTaintDependencies(
1212	TypedElementInterface $lhs,
1213	MethodLinks $rhsLinks,
1214	bool $override
1215	): void {
1216	// So if we have $a = $b;
1217	// First we find out all the methods that can set $b
1218	// Then we add $a to the list of variables that those methods can set.
1219	// Last we add these methods to $a's list of all methods that can set it.
1220
1221	$curLinks = self::getMethodLinks( $lhs );
1222	if ( $override \|\| !$curLinks ) {
1223	$newLinks = $rhsLinks;
1224	} else {
1225	$newLinks = $curLinks->asMergedWith( $rhsLinks );
1226	}
1227
1228	if ( $lhs instanceof Property \|\| $lhs instanceof GlobalVariable \|\| $lhs instanceof PassByReferenceVariable ) {
1229	// Don't attach things like Variable and Parameter. These are local elements, and setting taint
1230	// on them in markAllDependentVarsYes would have no effect. Additionally, since phan creates a new
1231	// Parameter object for each analysis, we will end up with duplicated links that do nothing but
1232	// eating memory.
1233	foreach ( $newLinks->getMethodAndParamTuples() as [ $method, $index ] ) {
1234	$varLinks = self::getVarLinks( $method, $index );
1235	assert( $varLinks instanceof VarLinksSet );
1236	// $this->debug( __METHOD__, "During assignment, we link $lhs to $method($index)" );
1237	$varLinks->attach( $lhs, $newLinks->asPreservedTaintednessForFuncParam( $method, $index ) );
1238	}
1239	}
1240
1241	self::setMethodLinks( $lhs, $newLinks );
1242	}
1243
1244	/**
1245	* Mark any function setting a specific variable as EXEC taint
1246	*
1247	* If you do something like echo $this->foo;
1248	* This method is called to make all things that set $this->foo
1249	* as TAINT_EXEC.
1250	*
1251	* @note This might have annoying false positives with widely used properties
1252	* that are used with different levels of escaping, which is not a good idea anyway.
1253	*
1254	* @param TypedElementInterface $var The variable in question
1255	* @param Taintedness $taint What taint to mark them as.
1256	* @param CausedByLines\|null $additionalError Any extra caused-by lines to add
1257	*/
1258	protected function markAllDependentMethodsExec(
1259	TypedElementInterface $var,
1260	Taintedness $taint,
1261	CausedByLines $additionalError = null
1262	): void {
1263	$futureTaint = $this->getPossibleFutureTaintOfElement( $var );
1264	if ( $futureTaint !== null && !$futureTaint->has( $taint->get() ) ) {
1265	return;
1266	}
1267	// Ensure we only set exec bits, not normal taint bits.
1268	$taint = $taint->withOnly( SecurityCheckPlugin::BACKPROP_TAINTS );
1269	if ( $taint->isSafe() \|\| $this->isIssueSuppressedOrFalsePositive( $taint ) ) {
1270	return;
1271	}
1272
1273	$varLinks = self::getMethodLinks( $var );
1274	if ( $varLinks === null \|\| $varLinks->isEmpty() ) {
1275	return;
1276	}
1277	$backpropError = self::getCausedByRawCloneOrEmpty( $var );
1278	if ( $additionalError ) {
1279	$backpropError->mergeWith( $additionalError );
1280	}
1281
1282	// $this->debug( __METHOD__, "Setting {$var->getName()} exec {$taint->toShortString()}" );
1283	$oldMem = memory_get_peak_usage();
1284	foreach ( $taint->decomposeForLinks( $varLinks ) as [ $curLinks, $curTaint ] ) {
1285	/** @var LinksSet $curLinks */
1286	/** @var Taintedness $curTaint */
1287	foreach ( $curLinks as $method ) {
1288	$paramInfo = $curLinks[$method];
1289	// Note, not forCaller, as that doesn't see variadic parameters
1290	$calleeParamList = $method->getParameterList();
1291	$paramTaint = new FunctionTaintedness( Taintedness::newSafe() );
1292	$funcError = new FunctionCausedByLines();
1293	foreach ( $paramInfo->getParams() as $i => $paramOffsets ) {
1294	$curParTaint = $curTaint->asMovedAtRelevantOffsetsForBackprop( $paramOffsets );
1295	$curBackpropError = $backpropError
1296	->withTaintAddedToMethodArgLinks( $curParTaint->asExecToYesTaint(), $method, $i );
1297	if ( isset( $calleeParamList[$i] ) && $calleeParamList[$i]->isVariadic() ) {
1298	$paramTaint->setVariadicParamSinkTaint( $i, $curParTaint );
1299	$funcError->setVariadicParamSinkLines( $i, $curBackpropError );
1300	} else {
1301	$paramTaint->setParamSinkTaint( $i, $curParTaint );
1302	$funcError->setParamSinkLines( $i, $curBackpropError );
1303	}
1304	// $this->debug( __METHOD__, "Setting method $method arg $i as $taint due to dependency on $var" );
1305	}
1306	$this->addFuncTaint( $method, $paramTaint );
1307	$newFuncTaint = self::getFuncTaint( $method );
1308	assert( $newFuncTaint !== null );
1309	$this->maybeAddFuncError( $method, null, $paramTaint, $newFuncTaint );
1310	$this->mergeFuncError( $method, $funcError, $newFuncTaint );
1311	}
1312	}
1313
1314	$newMem = memory_get_peak_usage();
1315	$diffMem = round( ( $newMem - $oldMem ) / ( 1024 * 1024 ) );
1316	if ( $diffMem > 2 ) {
1317	$this->debug( __METHOD__, "Memory spike $diffMem for variable " . $var->getName() );
1318	}
1319	}
1320
1321	/**
1322	* Mark any function setting a specific variable as EXEC taint
1323	*
1324	* If you do something like echo $this->foo;
1325	* This method is called to make all things that set $this->foo
1326	* as TAINT_EXEC.
1327	*
1328	* @note This might have annoying false positives with widely used properties
1329	* that are used with different levels of escaping, which is not a good idea anyway.
1330	*
1331	* @param Node $node
1332	* @param Taintedness $taint What taint to mark them as.
1333	* @param CausedByLines\|null $additionalError Additional caused-by lines to propagate
1334	* @param bool $tempNumkey Temporary param
1335	*/
1336	protected function markAllDependentMethodsExecForNode(
1337	Node $node,
1338	Taintedness $taint,
1339	CausedByLines $additionalError = null,
1340	bool $tempNumkey = false
1341	): void {
1342	if ( !$tempNumkey ) {
1343	$backpropVisitor = new TaintednessBackpropVisitor(
1344	$this->code_base,
1345	$this->context,
1346	$taint,
1347	$additionalError
1348	);
1349	$backpropVisitor( $node );
1350	return;
1351	}
1352	$phanObjs = $this->getObjsForNodeForNumkeyBackprop( $node );
1353	foreach ( array_unique( $phanObjs ) as $phanObj ) {
1354	$this->markAllDependentMethodsExec( $phanObj, $taint, $additionalError );
1355	}
1356	}
1357
1358	/**
1359	* This happens when someone calls foo( $evilTaintedVar );
1360	*
1361	* It makes sure that any variable that the function foo() sets takes on
1362	* the taint of the supplied argument.
1363	*
1364	* @param FunctionInterface $method The function or method in question
1365	* @param int $i The number of the argument in question.
1366	* @param Taintedness $taint The taint to apply.
1367	* @param CausedByLines $error Caused-by lines to propagate
1368	*/
1369	protected function markAllDependentVarsYes(
1370	FunctionInterface $method,
1371	int $i,
1372	Taintedness $taint,
1373	CausedByLines $error
1374	): void {
1375	if ( $method->isPHPInternal() ) {
1376	return;
1377	}
1378	$varLinks = self::getVarLinks( $method, $i );
1379	if ( $varLinks === null ) {
1380	return;
1381	}
1382
1383	$taintAdjusted = $taint->withOnly( SecurityCheckPlugin::ALL_TAINT );
1384
1385	foreach ( $varLinks as $var ) {
1386	$presTaint = $varLinks[$var];
1387	if ( $var instanceof PassByReferenceVariable ) {
1388	// TODO This should become unnecessary once the TODO in handleMethodCall about postponing
1389	// handlePassByRef is resolved.
1390	$var = $var->getElement();
1391	}
1392	assert( $var instanceof TypedElementInterface );
1393
1394	$taintToPropagate = $presTaint->asTaintednessForArgument( $taintAdjusted );
1395
1396	$adjustedCausedBy = self::getCausedByRawCloneOrEmpty( $var )
1397	->withTaintAddedToMethodArgLinks( $taintToPropagate, $method, $i );
1398	self::setCausedByRaw( $var, $adjustedCausedBy );
1399	$this->setTaintedness( $var, $taintToPropagate, false );
1400	$this->addTaintError( $var, $taintToPropagate, null );
1401	if ( $var instanceof GlobalVariable ) {
1402	$globalVar = $var->getElement();
1403	$adjustedGlobalCausedBy = self::getCausedByRawCloneOrEmpty( $globalVar )
1404	->withTaintAddedToMethodArgLinks( $taintToPropagate, $method, $i );
1405	self::setCausedByRaw( $globalVar, $adjustedGlobalCausedBy );
1406	$this->setTaintedness( $globalVar, $taintToPropagate, false );
1407	$this->addTaintError( $globalVar, $taintToPropagate, null );
1408	}
1409	$this->mergeTaintError( $var, $error );
1410	}
1411	}
1412
1413	/**
1414	* Get the original cause of taint for the given func
1415	*
1416	* @param FunctionInterface $element
1417	* @return FunctionCausedByLines
1418	*/
1419	private function getCausedByLinesForFunc( FunctionInterface $element ): FunctionCausedByLines {
1420	$element = $this->getActualFuncWithCausedBy( $element );
1421	return self::getFuncCausedByRawCloneOrEmpty( $element );
1422	}
1423
1424	/**
1425	* Given a phan element, get the actual element where caused-by data is stored. For instance, for methods, this
1426	* returns the defining methods.
1427	*
1428	* @param FunctionInterface $element
1429	* @return FunctionInterface
1430	*/
1431	private function getActualFuncWithCausedBy( FunctionInterface $element ): FunctionInterface {
1432	if ( SecurityCheckPlugin::$pluginInstance->builtinFuncHasTaint( $element->getFQSEN() ) ) {
1433	return $element;
1434	}
1435	$definingFunc = $this->getDefiningFuncIfDifferent( $element );
1436	return $definingFunc ?? $element;
1437	}
1438
1439	/**
1440	* Output a debug message to stdout.
1441	*
1442	* @param string $method __METHOD__ in question
1443	* @param string $msg debug message
1444	*/
1445	public function debug( $method, $msg ): void {
1446	if ( $this->debugOutput === null ) {
1447	$errorOutput = getenv( "SECCHECK_DEBUG" );
1448	if ( $errorOutput && $errorOutput !== '-' ) {
1449	$this->debugOutput = fopen( $errorOutput, "w" );
1450	} elseif ( $errorOutput === '-' ) {
1451	$this->debugOutput = '-';
1452	} else {
1453	$this->debugOutput = false;
1454	}
1455	}
1456	$line = $method . "\33[1m " . $this->dbgInfo() . " \33[0m" . $msg . "\n";
1457	if ( $this->debugOutput && $this->debugOutput !== '-' ) {
1458	fwrite(
1459	$this->debugOutput,
1460	$line
1461	);
1462	} elseif ( $this->debugOutput === '-' ) {
1463	// @phan-suppress-next-line PhanPluginRemoveDebugEcho This is the only wanted debug echo
1464	echo $line;
1465	}
1466	}
1467
1468	/**
1469	* Given an AST node that's a callable, try and determine what it is
1470	*
1471	* This is intended for functions that register callbacks.
1472	*
1473	* @param Node\|mixed $node The thingy from AST expected to be a Callable
1474	* @return FunctionInterface\|null
1475	*/
1476	protected function getCallableFromNode( $node ): ?FunctionInterface {
1477	if ( is_string( $node ) ) {
1478	// Easy case, 'Foo::Bar'
1479	// NOTE: ContextNode::getFunctionFromNode has a TODO about returning something here.
1480	// And also NOTE: 'self::methodname()' is not valid PHP.
1481	// TODO: We should probably emit a non-security issue in the missing case
1482	if ( strpos( $node, '::' ) === false ) {
1483	$callback = FullyQualifiedFunctionName::fromFullyQualifiedString( $node );
1484	return $this->code_base->hasFunctionWithFQSEN( $callback )
1485	? $this->code_base->getFunctionByFQSEN( $callback )
1486	: null;
1487	}
1488	$callback = FullyQualifiedMethodName::fromFullyQualifiedString( $node );
1489	return $this->code_base->hasMethodWithFQSEN( $callback )
1490	? $this->code_base->getMethodByFQSEN( $callback )
1491	: null;
1492	}
1493	if ( !$node instanceof Node ) {
1494	return null;
1495	}
1496	if (
1497	$node->kind === \ast\AST_CLOSURE \|\|
1498	$node->kind === \ast\AST_VAR \|\|
1499	( $node->kind === \ast\AST_ARRAY && count( $node->children ) === 2 )
1500	) {
1501	// Note: intentionally emitting any issues here.
1502	$funcs = $this->getCtxN( $node )->getFunctionFromNode();
1503	return self::getFirstElmFromArrayOrGenerator( $funcs );
1504	}
1505	return null;
1506	}
1507
1508	/**
1509	* Utility function to get the first element from an iterable that can be either an array or a generator
1510	* @phan-template T
1511	* @param iterable $iter
1512	* @phan-param iterable<T> $iter
1513	* @return mixed\|null Null if $iter is empty
1514	* @phan-return T\|null
1515	*/
1516	protected static function getFirstElmFromArrayOrGenerator( iterable $iter ) {
1517	if ( is_array( $iter ) ) {
1518	return $iter ? $iter[0] : null;
1519	}
1520	assert( $iter instanceof Generator );
1521	return $iter->current() ?: null;
1522	}
1523
1524	/**
1525	* Get the issue names and severities given a taint, as well as the relevant taint type for each issue.
1526	*
1527	* @param int $combinedTaint The taint to warn for. I.e. The exec flags
1528	* from LHS shifted to non-exec bitwise AND'd with the rhs taint.
1529	* @return array[] List of issue type, severity, and taint type
1530	* @phan-return non-empty-list<array{0:string,1:int,2:int}>
1531	*/
1532	public function taintToIssuesAndSeverities( int $combinedTaint ): array {
1533	$issues = [];
1534	if ( $combinedTaint & SecurityCheckPlugin::HTML_TAINT ) {
1535	$issues[] = [ 'SecurityCheck-XSS', Issue::SEVERITY_NORMAL, SecurityCheckPlugin::HTML_TAINT ];
1536	}
1537	if ( $combinedTaint & ( SecurityCheckPlugin::SQL_TAINT \| SecurityCheckPlugin::SQL_NUMKEY_TAINT ) ) {
1538	$issues[] = [
1539	'SecurityCheck-SQLInjection',
1540	Issue::SEVERITY_CRITICAL,
1541	SecurityCheckPlugin::SQL_TAINT \| SecurityCheckPlugin::SQL_NUMKEY_TAINT
1542	];
1543	}
1544	if ( $combinedTaint & SecurityCheckPlugin::SHELL_TAINT ) {
1545	$issues[] = [ 'SecurityCheck-ShellInjection', Issue::SEVERITY_CRITICAL, SecurityCheckPlugin::SHELL_TAINT ];
1546	}
1547	if ( $combinedTaint & SecurityCheckPlugin::SERIALIZE_TAINT ) {
1548	// For now this is low because it seems to have a lot of false positives.
1549	$issues[] = [
1550	'SecurityCheck-PHPSerializeInjection',
1551	Issue::SEVERITY_NORMAL,
1552	SecurityCheckPlugin::SERIALIZE_TAINT
1553	];
1554	}
1555	if ( $combinedTaint & SecurityCheckPlugin::ESCAPED_TAINT ) {
1556	$issues[] = [ 'SecurityCheck-DoubleEscaped', Issue::SEVERITY_NORMAL, SecurityCheckPlugin::ESCAPED_TAINT ];
1557	}
1558	if ( $combinedTaint & SecurityCheckPlugin::PATH_TAINT ) {
1559	$issues[] = [ 'SecurityCheck-PathTraversal', Issue::SEVERITY_CRITICAL, SecurityCheckPlugin::PATH_TAINT ];
1560	}
1561	if ( $combinedTaint & SecurityCheckPlugin::CODE_TAINT ) {
1562	$issues[] = [ 'SecurityCheck-RCE', Issue::SEVERITY_CRITICAL, SecurityCheckPlugin::CODE_TAINT ];
1563	}
1564	if ( $combinedTaint & SecurityCheckPlugin::REGEX_TAINT ) {
1565	$issues[] = [ 'SecurityCheck-ReDoS', Issue::SEVERITY_NORMAL, SecurityCheckPlugin::REGEX_TAINT ];
1566	}
1567	if ( $combinedTaint & SecurityCheckPlugin::CUSTOM1_TAINT ) {
1568	$issues[] = [ 'SecurityCheck-CUSTOM1', Issue::SEVERITY_NORMAL, SecurityCheckPlugin::CUSTOM1_TAINT ];
1569	}
1570	if ( $combinedTaint & SecurityCheckPlugin::CUSTOM2_TAINT ) {
1571	$issues[] = [ 'SecurityCheck-CUSTOM2', Issue::SEVERITY_NORMAL, SecurityCheckPlugin::CUSTOM2_TAINT ];
1572	}
1573
1574	return $issues;
1575	}
1576
1577	/**
1578	* Simplified version of maybeEmitIssue which makes the following assumptions:
1579	* - The caller would compute the RHS taint only to feed it to maybeEmitIssue
1580	* - The message should be followed by caused-by lines
1581	* - These caused-by lines should be taken from the same object passed as RHS
1582	* - Only caused-by lines having the LHS taint should be included
1583	* If these conditions hold true, then this method should be preferred.
1584	*
1585	* @warning DO NOT use this method if the caller already needs to compute the RHS
1586	* taintedness! The taint would be computed twice!
1587	*
1588	* @param Taintedness $lhsTaint
1589	* @param mixed $rhsElement
1590	* @param string $msg
1591	* @param array $params Additional parameters for the message template
1592	* @phan-param list<string\|FullyQualifiedFunctionLikeName> $params
1593	*/
1594	public function maybeEmitIssueSimplified(
1595	Taintedness $lhsTaint,
1596	$rhsElement,
1597	string $msg,
1598	array $params = []
1599	): void {
1600	$rhsTaint = $this->getTaintedness( $rhsElement );
1601	$this->maybeEmitIssue(
1602	$lhsTaint,
1603	$rhsTaint->getTaintedness(),
1604	$msg . '{DETAILS}',
1605	array_merge( $params, [ $rhsTaint->getError() ] )
1606	);
1607	}
1608
1609	/**
1610	* Emit an issue using the appropriate issue type
1611	*
1612	* If $this->overrideContext is set, it will use that for the
1613	* file/line number to report. This is meant as a hack, so that
1614	* in MW we can force hook related issues to be in the extension
1615	* instead of where the hook is called from in MW core.
1616	*
1617	* @param Taintedness $lhsTaint Taint of left hand side (or equivalent)
1618	* @param Taintedness $rhsTaint Taint of right hand side (or equivalent)
1619	* @param string $msg Issue description
1620	* @param array\|Closure $msgParamsOrGetter Message parameters passed to emitIssue. Can also be a closure
1621	* that returns said parameters, for performance.
1622	* @phan-param list\|Closure():list $msgParamsOrGetter
1623	*/
1624	public function maybeEmitIssue(
1625	Taintedness $lhsTaint,
1626	Taintedness $rhsTaint,
1627	string $msg,
1628	$msgParamsOrGetter
1629	): void {
1630	$rhsIsUnknown = $rhsTaint->has( SecurityCheckPlugin::UNKNOWN_TAINT );
1631	if ( $rhsIsUnknown && $lhsTaint->has( SecurityCheckPlugin::ALL_EXEC_TAINT ) ) {
1632	$combinedTaintInt = SecurityCheckPlugin::NO_TAINT;
1633	} else {
1634	$combinedTaint = Taintedness::intersectForSink( $lhsTaint, $rhsTaint );
1635	if ( $combinedTaint->isSafe() ) {
1636	return;
1637	}
1638	$combinedTaintInt = Taintedness::flagsAsExecToYesTaint( $combinedTaint->get() );
1639	}
1640
1641	if (
1642	( $combinedTaintInt === SecurityCheckPlugin::NO_TAINT && $rhsIsUnknown ) \|\|
1643	SecurityCheckPlugin::$pluginInstance->isFalsePositive(
1644	$combinedTaintInt,
1645	$msg,
1646	// FIXME should this be $this->overrideContext ?
1647	$this->context,
1648	$this->code_base
1649	)
1650	) {
1651	$issues = [
1652	[ 'SecurityCheck-LikelyFalsePositive', Issue::SEVERITY_LOW, $combinedTaintInt ]
1653	];
1654	} else {
1655	$issues = $this->taintToIssuesAndSeverities( $combinedTaintInt );
1656	}
1657
1658	if ( !$issues ) {
1659	return;
1660	}
1661
1662	$context = $this->context;
1663	if ( $this->overrideContext ) {
1664	// If we are overriding the file/line number,
1665	// report the original line number as well.
1666	$msg .= " (Originally at: $this->context)";
1667	$context = $this->overrideContext;
1668	}
1669
1670	$msgParams = $msgParamsOrGetter instanceof Closure ? $msgParamsOrGetter() : $msgParamsOrGetter;
1671	// Phan doesn't analyze the ternary correctly and thinks this might also be a closure.
1672	'@phan-var list $msgParams';
1673
1674	foreach ( $issues as [ $issueType, $severity, $relevantTaint ] ) {
1675	$curMsgParams = [];
1676	foreach ( $msgParams as $i => $par ) {
1677	$curMsgParams[$i] = $par instanceof CausedByLines
1678	? $par->toStringForIssue( $relevantTaint )
1679	: $par;
1680	}
1681	SecurityCheckPlugin::emitIssue(
1682	$this->code_base,
1683	$context,
1684	$issueType,
1685	$msg,
1686	$curMsgParams,
1687	$severity
1688	);
1689	}
1690	}
1691
1692	/**
1693	* Method to determine if a potential error isn't really real
1694	*
1695	* This is useful when a specific warning would have a side effect
1696	* and we want to know whether we should suppress the side effect in
1697	* addition to the warning.
1698	*
1699	* @param Taintedness $lhsTaint Must have at least one EXEC flag set
1700	* @return bool
1701	*/
1702	public function isIssueSuppressedOrFalsePositive( Taintedness $lhsTaint ): bool {
1703	$lhsTaintInt = $lhsTaint->get();
1704	assert( ( $lhsTaintInt & SecurityCheckPlugin::ALL_EXEC_TAINT ) !== SecurityCheckPlugin::NO_TAINT );
1705	$combinedTaint = Taintedness::flagsAsExecToYesTaint( $lhsTaintInt );
1706
1707	$issues = $this->taintToIssuesAndSeverities( $combinedTaint );
1708	$context = $this->overrideContext ?: $this->context;
1709	foreach ( $issues as [ $issueType ] ) {
1710	if ( $context->hasSuppressIssue( $this->code_base, $issueType ) ) {
1711	return true;
1712	}
1713	}
1714
1715	$msg = "[dummy msg for false positive check]";
1716	return SecurityCheckPlugin::$pluginInstance->isFalsePositive(
1717	$combinedTaint,
1718	$msg,
1719	// not using $this->overrideContext to be consistent with maybeEmitIssue()
1720	$this->context,
1721	$this->code_base
1722	);
1723	}
1724
1725	/**
1726	* Somebody invokes a method or function (or something similar)
1727	*
1728	* This has to figure out:
1729	* Is the return value of the call tainted
1730	* Are any of the arguments tainted
1731	* Does the function do anything scary with its arguments
1732	* It also has to maintain quite a bit of book-keeping.
1733	*
1734	* @param FunctionInterface $func
1735	* @param FullyQualifiedFunctionLikeName $funcName
1736	* @param array $args Arguments to function/method
1737	* @phan-param array<Node\|mixed> $args
1738	* @param bool $computePreserve Whether the caller wants to know which taintedness is preserved by this call
1739	* @param bool $isHookHandler Whether we're analyzing a hook handler for a Hooks::run call.
1740	* FIXME This is MW-specific
1741	* @return TaintednessWithError\|null Taint The resulting taint of the expression, or null if
1742	* $computePreserve is false
1743	*/
1744	public function handleMethodCall(
1745	FunctionInterface $func,
1746	FullyQualifiedFunctionLikeName $funcName,
1747	array $args,
1748	bool $computePreserve = true,
1749	bool $isHookHandler = false
1750	): ?TaintednessWithError {
1751	$taint = $this->getTaintOfFunction( $func );
1752	$funcError = $this->getCausedByLinesForFunc( $func );
1753
1754	$preserveArgumentsData = [];
1755	foreach ( $args as $i => $argument ) {
1756	if ( !( $argument instanceof Node ) ) {
1757	// Literal value
1758	continue;
1759	}
1760	$curParFlags = $taint->getParamFlags( $i );
1761	if ( ( $curParFlags & SecurityCheckPlugin::ARRAY_OK ) && $this->nodeIsArray( $argument ) ) {
1762	// This function specifies that arrays are always ok, so skip.
1763	continue;
1764	}
1765
1766	if ( $argument->kind === \ast\AST_NAMED_ARG ) {
1767	[ $i, $argument, $argName ] = $this->translateNamedArg( $argument, $func );
1768	if ( $i === null \|\| !$argument instanceof Node ) {
1769	// Cannot find argument or it's literal
1770	continue;
1771	}
1772	$argName = "`$argName`";
1773	} else {
1774	$argName = '#' . ( $i + 1 );
1775	}
1776
1777	$paramSinkTaint = $taint->getParamSinkTaint( $i );
1778	$paramSinkError = $funcError->getParamSinkLines( $i );
1779
1780	$argTaintWithError = $this->getTaintednessNode( $argument );
1781	$curArgTaintedness = $argTaintWithError->getTaintedness();
1782	$baseArgError = $argTaintWithError->getError();
1783	if (
1784	$paramSinkTaint->has( SecurityCheckPlugin::SQL_NUMKEY_EXEC_TAINT )
1785	&& $curArgTaintedness->has( SecurityCheckPlugin::SQL_TAINT )
1786	&& $this->nodeCanBeString( $argument )
1787	) {
1788	// Special case to make NUMKEY work right for non-array values.
1789	// TODO Should consider if this is really best approach.
1790	$curArgTaintedness->add( SecurityCheckPlugin::SQL_NUMKEY_TAINT );
1791	}
1792
1793	$paramSinkTaint = SecurityCheckPlugin::$pluginInstance->modifyParamSinkTaint(
1794	$paramSinkTaint,
1795	$curArgTaintedness,
1796	$argument,
1797	$i,
1798	$func,
1799	$taint,
1800	$this->context,
1801	$this->code_base
1802	);
1803
1804	// Add a hook in order to special case for codebases. This is primarily used as a hack so that in mediawiki
1805	// the Message class doesn't have double escape taint if method takes Message\|string.
1806	// TODO This is quite hacky.
1807	$curArgTaintedness = SecurityCheckPlugin::$pluginInstance->modifyArgTaint(
1808	$curArgTaintedness,
1809	$argument,
1810	$i,
1811	$func,
1812	$taint,
1813	$this->context,
1814	$this->code_base
1815	);
1816
1817	// TODO: We also need to handle the case where someFunc( $execArg ) for pass by reference where
1818	// the parameter is later executed outside the func.
1819	if ( $curArgTaintedness->has( SecurityCheckPlugin::ALL_TAINT ) ) {
1820	$this->markAllDependentVarsYes( $func, $i, $curArgTaintedness, $baseArgError );
1821	}
1822
1823	// We are doing something like evilMethod( $arg ); where $arg is a parameter to the current function.
1824	// So backpropagate that assigning to $arg can cause evilness.
1825	if ( !$paramSinkTaint->isSafe() ) {
1826	$this->backpropagateArgTaint( $argument, $paramSinkTaint, $paramSinkError );
1827	}
1828
1829	$param = $func->getParameterForCaller( $i );
1830	// @todo Internal funcs that pass by reference. Should we assume that their variables are tainted? Most
1831	// common example is probably preg_match, which may very well be tainted much of the time.
1832	// TODO: Ideally this should happen after all args have been processed, so it would account for any
1833	// last-minute modification of the dependent elements (e.g. markAllDependentVarsYes) and would see the
1834	// "final" value for refTaint. Right now this is not possible because links tracked by
1835	// markAllDependentVarsYes are imprecise and would introduce false positives.
1836	if ( $param && $param->isPassByReference() && !$func->isPHPInternal() ) {
1837	$this->handlePassByRef( $func, $argument, $i, $isHookHandler );
1838	}
1839
1840	/** @phan-return list */
1841	$issueArgsGetter = function () use (
1842	$funcName, $argName, $argument, $paramSinkError, $baseArgError
1843	): array {
1844	// Always include the ordinal (it helps for repeated arguments)
1845	$taintedArg = $argName;
1846	$argStr = ASTReverter::toShortString( $argument );
1847	if ( strlen( $argStr ) < 25 ) {
1848	// If we have a short representation of the arg, include it as well.
1849	$taintedArg .= " (`$argStr`)";
1850	}
1851
1852	return [
1853	$funcName,
1854	$this->getCurrentMethod(),
1855	$taintedArg,
1856	$paramSinkError,
1857	$baseArgError,
1858	];
1859	};
1860
1861	$this->maybeEmitIssue(
1862	$paramSinkTaint,
1863	$curArgTaintedness,
1864	"Calling method {FUNCTIONLIKE}() in {FUNCTIONLIKE}" .
1865	" that outputs using tainted argument {CODE}.{DETAILS}{DETAILS}",
1866	$issueArgsGetter
1867	);
1868
1869	$preserveArgumentsData[$i] = [ $curArgTaintedness, $baseArgError ];
1870	}
1871
1872	if ( !$computePreserve ) {
1873	return null;
1874	}
1875
1876	$hardcodedPreservedTaint = $this->getHardcodedPreservedTaintForFunc( $func, $preserveArgumentsData );
1877	if ( $hardcodedPreservedTaint ) {
1878	return $hardcodedPreservedTaint;
1879	}
1880	$overallTaint = $taint->getOverall();
1881	$combinedArgTaint = Taintedness::newSafe();
1882	$combinedArgErrors = new CausedByLines();
1883	foreach ( $preserveArgumentsData as $i => [ $curArgTaintedness, $baseArgError ] ) {
1884	if ( $taint->hasParamPreserve( $i ) ) {
1885	$parTaint = $taint->getParamPreservedTaint( $i );
1886	$preservedArgTaint = $parTaint->asTaintednessForArgument( $curArgTaintedness );
1887	$curArgLinks = MethodLinks::newEmpty();
1888	} elseif (
1889	$overallTaint->has( SecurityCheckPlugin::PRESERVE_TAINT \| SecurityCheckPlugin::UNKNOWN_TAINT )
1890	) {
1891	// No info for this specific parameter, but the overall function either preserves taint
1892	// when unspecified or is unknown. So just pass the taint through, destroying the shape.
1893	$preservedArgTaint = $curArgTaintedness->asCollapsed();
1894	$curArgLinks = MethodLinks::newEmpty();
1895	} else {
1896	// This parameter has no taint info. And overall this function doesn't depend on param
1897	// for taint and isn't unknown. So we consider this argument untainted.
1898	continue;
1899	}
1900
1901	$combinedArgTaint->mergeWith( $preservedArgTaint );
1902	$curArgError = $baseArgError->asIntersectedWithTaintedness( $preservedArgTaint );
1903	$relevantParamError = $funcError->getParamPreservedLines( $i )
1904	->asPreservingTaintednessAndLinks( $preservedArgTaint, $curArgLinks );
1905	$curArgError->mergeWith( $relevantParamError );
1906	// NOTE: If any line inside the callee's body is responsible for preserving the taintedness of more
1907	// than one argument, it will appear once per preserved argument in the overall caused-by of the
1908	// call expression. This is probably a good thing, but can increase the length of caused-by lines.
1909	// TODO Something like T291379 might help here.
1910	$combinedArgErrors->mergeWith( $curArgError );
1911	}
1912
1913	$callTaintedness = $overallTaint->without(
1914	SecurityCheckPlugin::PRESERVE_TAINT \| SecurityCheckPlugin::ALL_EXEC_TAINT
1915	);
1916	$combinedArgTaint->remove( SecurityCheckPlugin::ALL_EXEC_TAINT );
1917	$callTaintedness->mergeWith( $combinedArgTaint );
1918	$callError = $funcError->getGenericLines()->asMergedWith( $combinedArgErrors );
1919	return new TaintednessWithError( $callTaintedness, $callError, MethodLinks::newEmpty() );
1920	}
1921
1922	/**
1923	* @todo This should possibly be part of the public interface upstream
1924	* @see \Phan\Analysis\ArgumentType::analyzeParameterListForCallback
1925	* @param Node $argument
1926	* @param FunctionInterface $func
1927	* @return array
1928	* @phan-return array{0:int\|null,1:Node\|mixed,2:?string}
1929	*/
1930	private function translateNamedArg( Node $argument, FunctionInterface $func ): array {
1931	[ 'name' => $argName, 'expr' => $argExpr ] = $argument->children;
1932	assert( $argExpr !== null );
1933
1934	foreach ( $func->getRealParameterList() as $i => $parameter ) {
1935	if ( $parameter->getName() === $argName ) {
1936	return [ $i, $argExpr, $argName ];
1937	}
1938	}
1939	return [ null, null, null ];
1940	}
1941
1942	/**
1943	* @param Node $argument
1944	* @param Taintedness $taint
1945	* @param CausedByLines\|null $funcError
1946	*
1947	* @todo This has false negatives, because we don't collect function arguments in
1948	* getPhanObjsForNode (we'd have to pass option 'all'), so we can't handle e.g. array_merge
1949	* right now. However, collecting all args would create false positives with functions where
1950	* the arg taint isn't propagated to the return value. Ideally, we'd want to include an argument
1951	* iff the corresponding parameter passes $taint through.
1952	*
1953	* @note It's important that we don't backpropagate taintedness to every returned object in case
1954	* of function calls, but just props and the like (so excluding vars). See test 'toomanydeps'.
1955	*/
1956	protected function backpropagateArgTaint(
1957	Node $argument,
1958	Taintedness $taint,
1959	CausedByLines $funcError = null
1960	): void {
1961	if ( $taint->has( SecurityCheckPlugin::SQL_NUMKEY_EXEC_TAINT ) ) {
1962	// Special case for numkey, we need to "filter" the argument.
1963	// TODO This doesn't return arrays with mixed keys. Currently, doing so would result
1964	// in arrays being considered as a unit, and the taint would be backpropagated to all
1965	// values, even ones with string keys. See TODO in elementCanBeNumkey
1966
1967	// TODO This should be limited to the outer array, see TODO in backpropnumkey test
1968	// Note that this is true in general for NUMKEY taint, not just when backpropagating it
1969	$numkeyTaint = $taint->withOnly( SecurityCheckPlugin::SQL_NUMKEY_EXEC_TAINT );
1970	$this->markAllDependentMethodsExecForNode( $argument, $numkeyTaint, $funcError, true );
1971	$taint = $taint->without( SecurityCheckPlugin::SQL_NUMKEY_EXEC_TAINT );
1972	}
1973
1974	$this->markAllDependentMethodsExecForNode( $argument, $taint, $funcError );
1975	}
1976
1977	/**
1978	* Handle pass-by-ref params when examining a function call. Phan handles passbyref by reanalyzing
1979	* the method with PassByReferenceVariable objects instead of Parameters. These objects contain
1980	* the info about the param, but proxy all calls to the underlying argument object.
1981	* We cannot 100% copy that behaviour: inside the function body, the local variable for the pbr param
1982	* would have the same taintedness as the argument, and things like `echo $pbr` would emit an issue
1983	* inside the function, which is unwanted for now. Additionally, it's unclear how we'd add a caused-by
1984	* entry for the line of the function call.
1985	* Hence, instead of adding taintedness to the underlying argument, we put it in a separate prop, which is only
1986	* written but never read inside the function body. Then after the call was analyzed, this method moves
1987	* the taintedness from the "special" prop onto the normal taintedness prop. We do the same thing for links,
1988	* so as to infer which taintedness from the argument is preserved by the function.
1989	* TODO In the future we might want to really copy phan's approach, as that would allow us to delete some hacks,
1990	* and handle conditionals inside the function body more accurately.
1991	*
1992	* @param FunctionInterface $func
1993	* @param Node $argument
1994	* @param int $i Position of the param
1995	* @param bool $isHookHandler Whether we're analyzing a hook handler for a Hooks::run call.
1996	* FIXME This is MW-specific
1997	*/
1998	private function handlePassByRef(
1999	FunctionInterface $func,
2000	Node $argument,
2001	int $i,
2002	bool $isHookHandler
2003	): void {
2004	$argObj = $this->getPassByRefObjFromNode( $argument );
2005	if ( !$argObj ) {
2006	return;
2007	}
2008	$refTaint = self::getTaintednessRef( $argObj );
2009	if ( !$refTaint ) {
2010	// If no ref taint was set, it's likely due to a recursive call or another instance where phan is not
2011	// reanalyzing the callee with PassByReferenceVariable objects.
2012	return;
2013	}
2014
2015	$globalVarObj = $argObj instanceof GlobalVariable ? $argObj->getElement() : null;
2016	// Move the ref taintedness to the "actual" taintedness of the object
2017	// Note: We assume that the order in which hook handlers are called is nondeterministic, thus
2018	// we never override arg taint for reference params in this case.
2019	$overrideTaint = !( $argObj instanceof Property \|\| $globalVarObj \|\| $isHookHandler );
2020	// Note, the call itself is only responsible if it adds some taintedness
2021	$errTaint = clone $refTaint;
2022	$refLinks = self::getMethodLinksRef( $argObj );
2023	if ( $refLinks && $refLinks->hasDataForFuncAndParam( $func, $i ) ) {
2024	$addedTaint = $refLinks->asPreservedTaintednessForFuncParam( $func, $i )
2025	->asTaintednessForArgument( $this->getTaintednessPhanObj( $argObj ) );
2026	$refTaint->mergeWith( $addedTaint );
2027	}
2028
2029	$this->setTaintedness( $argObj, $refTaint, $overrideTaint );
2030	$this->addTaintError( $argObj, $errTaint, null );
2031	if ( $globalVarObj ) {
2032	$this->setTaintedness( $globalVarObj, $refTaint, false );
2033	$this->addTaintError( $globalVarObj, $errTaint, null );
2034	}
2035	// We clear method links since the by-ref call might have modified them, and precise tracking is not
2036	// trivial to implement, and most probably not worth the effort.
2037	self::setMethodLinks( $argObj, MethodLinks::newEmpty() );
2038	self::clearRefData( $argObj );
2039	}
2040
2041	/**
2042	* Given the node of an argument that is passed by reference, return a list of phan objects
2043	* corresponding to that node.
2044	*
2045	* @param Node $node
2046	* @return TypedElementInterface\|null
2047	*/
2048	private function getPassByRefObjFromNode( Node $node ): ?TypedElementInterface {
2049	$cn = $this->getCtxN( $node );
2050
2051	switch ( $node->kind ) {
2052	case \ast\AST_PROP:
2053	case \ast\AST_STATIC_PROP:
2054	return $this->getPropFromNode( $node );
2055	case \ast\AST_VAR:
2056	if ( Variable::isHardcodedGlobalVariableWithName( $cn->getVariableName() ) ) {
2057	return null;
2058	}
2059	try {
2060	return $cn->getVariable();
2061	} catch ( NodeException \| IssueException $_ ) {
2062	return null;
2063	}
2064	case \ast\AST_DIM:
2065	// Phan doesn't handle this case with PassByReferenceVariable objects, so nothing we can do anyway.
2066	return null;
2067	default:
2068	$this->debug( __METHOD__, 'Unhandled pass-by-ref case: ' . Debug::nodeName( $node ) );
2069	return null;
2070	}
2071	}
2072
2073	/**
2074	* Get the taintedness of the return value of $func (a special-cased internal PHP function) given the taintedness
2075	* of its arguments. Note that this doesn't handle passbyref parameters. If the function is not special-cased,
2076	* returns null.
2077	*
2078	* @param FunctionInterface $func
2079	* @param array<array<Taintedness\|CausedByLines>> $preserveArgumentsData Actual taintedness and caused-by lines of
2080	* each argument. Literal arguments aren't included here.
2081	* @phan-param array<int,array{0:Taintedness,1:CausedByLines}> $preserveArgumentsData
2082	* @return TaintednessWithError\|null
2083	*/
2084	private function getHardcodedPreservedTaintForFunc(
2085	FunctionInterface $func,
2086	array $preserveArgumentsData
2087	): ?TaintednessWithError {
2088	switch ( ltrim( $func->getName(), '\\' ) ) {
2089	// Functions that return one element of the array (first and only parameter)
2090	case 'array_pop':
2091	case 'array_shift':
2092	case 'current':
2093	case 'end':
2094	case 'next':
2095	case 'pos':
2096	case 'prev':
2097	case 'reset':
2098	if ( !isset( $preserveArgumentsData[0] ) ) {
2099	return TaintednessWithError::newEmpty();
2100	}
2101	$taint = $preserveArgumentsData[0][0]->asValueFirstLevel();
2102	$error = $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $taint );
2103	return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() );
2104	case 'array_values':
2105	// Same taintedness as the original array (first and only param), but with safe keys and numkey.
2106	if ( !isset( $preserveArgumentsData[0] ) ) {
2107	return TaintednessWithError::newEmpty();
2108	}
2109	$taint = $preserveArgumentsData[0][0]->withoutKeys();
2110	if ( $taint->has( SecurityCheckPlugin::SQL_TAINT ) ) {
2111	$taint->add( SecurityCheckPlugin::SQL_NUMKEY_TAINT );
2112	}
2113	$error = $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $taint );
2114	return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() );
2115	// Functions that return a key from the array (first and only parameter)
2116	case 'key':
2117	case 'array_key_first':
2118	case 'array_key_last':
2119	// array_keys returns all keys from the array (first param), and can also take two more parameters
2120	// that don't contribute to the resulting taintedness.
2121	case 'array_keys':
2122	if ( !isset( $preserveArgumentsData[0] ) ) {
2123	return TaintednessWithError::newEmpty();
2124	}
2125	$taint = $preserveArgumentsData[0][0]->asKeyForForeach();
2126	$error = $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $taint );
2127	return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() );
2128	case 'array_change_key_case':
2129	// The overall shape remains the same, but the keys of the outermost array (first param) have different
2130	// case. Second param (lower vs upper) is safe.
2131	if ( !isset( $preserveArgumentsData[0] ) ) {
2132	return TaintednessWithError::newEmpty();
2133	}
2134	// TODO: actually handle case changes!
2135	$taint = clone $preserveArgumentsData[0][0];
2136	$error = $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $taint );
2137	return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() );
2138	case 'array_flip':
2139	// Swaps keys and values of the array (first and only param)
2140	if ( !isset( $preserveArgumentsData[0] ) ) {
2141	return TaintednessWithError::newEmpty();
2142	}
2143	$taint = $preserveArgumentsData[0][0]->asKeyForForeach();
2144	$taint->addKeysTaintedness( $preserveArgumentsData[0][0]->asValueFirstLevel()->get() );
2145	$error = $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $taint );
2146	return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() );
2147	case 'implode':
2148	case 'join':
2149	// This function can be called in three different ways:
2150	// - implode( $string, $array ) -> joins elements in $array using $string
2151	// - implode( $array ) -> joins elements in $array using the empty string
2152	// - implode( $array, $string ) -> same as the first one but inverted params, deprecated in PHP 7.4,
2153	// removed in PHP 8
2154	// TODO: Right now we don't support the deprecated syntax; should we?
2155	if ( isset( $preserveArgumentsData[0] ) ) {
2156	$joinerTaint = $preserveArgumentsData[0][0]->asCollapsed();
2157	$joinerError = $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $joinerTaint );
2158	}
2159	$combinedTaint = $joinerTaint ?? Taintedness::newSafe();
2160	$combinedError = $joinerError ?? new CausedByLines();
2161	if ( isset( $preserveArgumentsData[1] ) ) {
2162	$arrayTaint = $preserveArgumentsData[1][0]->withoutKeys()->asCollapsed();
2163	$combinedTaint->mergeWith( $arrayTaint );
2164	$combinedError->mergeWith(
2165	$preserveArgumentsData[1][1]->asIntersectedWithTaintedness( $arrayTaint )
2166	);
2167	}
2168	return new TaintednessWithError( $combinedTaint, $combinedError, MethodLinks::newEmpty() );
2169	case 'array_fill':
2170	// array_fill( $start, $count, $value ) creates an array with $count copies of $value, starting
2171	// at key $start. The first two params are integers, and thus safe.
2172	if ( !isset( $preserveArgumentsData[2] ) ) {
2173	return TaintednessWithError::newEmpty();
2174	}
2175	$preservedArgTaint = clone $preserveArgumentsData[2][0];
2176	$taint = Taintedness::newSafe();
2177	// TODO: We may actually be able to infer the actual keys, instead of setting as unknown
2178	$taint->setOffsetTaintedness( null, $preservedArgTaint );
2179	// TODO: We should also add numkey if the argument has sql.
2180	$error = $preserveArgumentsData[2][1]->asIntersectedWithTaintedness( $preservedArgTaint );
2181	return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() );
2182	case 'array_fill_keys':
2183	// array_fill_keys( $keys, $value ) creates an array whose keys are the element in $keys, and whose
2184	// values are all equal to $value.
2185	$taint = Taintedness::newSafe();
2186	$error = new CausedByLines();
2187	if ( isset( $preserveArgumentsData[0] ) ) {
2188	$keysTaintedness = $preserveArgumentsData[0][0]->asValueFirstLevel();
2189	$taint->addKeysTaintedness( $keysTaintedness->get() );
2190	$error->mergeWith( $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $taint ) );
2191	}
2192	if ( isset( $preserveArgumentsData[1] ) ) {
2193	$preservedValueTaint = $preserveArgumentsData[1][0];
2194	$taint->setOffsetTaintedness( null, clone $preservedValueTaint );
2195	$error->mergeWith(
2196	$preserveArgumentsData[1][1]->asIntersectedWithTaintedness( $preservedValueTaint )
2197	);
2198	}
2199	return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() );
2200	case 'array_combine':
2201	// array_fill_keys( $keys, $values ) creates an array whose keys are the element in $keys, and whose
2202	// values the elements in $values.
2203	$taint = Taintedness::newSafe();
2204	$error = new CausedByLines();
2205	if ( isset( $preserveArgumentsData[0] ) ) {
2206	$keysTaintedness = $preserveArgumentsData[0][0]->asValueFirstLevel();
2207	$taint->addKeysTaintedness( $keysTaintedness->get() );
2208	$error->mergeWith( $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $taint ) );
2209	}
2210	if ( isset( $preserveArgumentsData[1] ) ) {
2211	$valueTaint = $preserveArgumentsData[1][0]->withoutKeys();
2212	$taint->mergeWith( $valueTaint );
2213	$error->mergeWith( $preserveArgumentsData[1][1]->asIntersectedWithTaintedness( $valueTaint ) );
2214	}
2215	return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() );
2216	case 'array_unique':
2217	// Removes duplicate from an array (first param). We can't tell what gets removed, and what's the effect
2218	// of this function on array keys. Second param is safe.
2219	if ( !isset( $preserveArgumentsData[0] ) ) {
2220	return TaintednessWithError::newEmpty();
2221	}
2222	$taint = $preserveArgumentsData[0][0]->asKnownKeysMadeUnknown();
2223	$error = $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $taint );
2224	return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() );
2225	case 'array_diff':
2226	case 'array_diff_assoc':
2227	// - array_diff( $arr, $x_1, ..., $x_n ) returns elements in $arr that are NOT in any of the $x_i.
2228	// The equality of two elements is determined by looking at their values.
2229	// Only the first argument contributes to the preserved taintedness.
2230	// - array_diff_assoc does the same, but two elements are considered equal if they have the same value
2231	// AND the same key.
2232	if ( !isset( $preserveArgumentsData[0] ) ) {
2233	return TaintednessWithError::newEmpty();
2234	}
2235	// We can't infer shape mutations because Taintedness doesn't keep track of the values, so just
2236	// return the taintedness of the first argument.
2237	$preservedArgTaint = clone $preserveArgumentsData[0][0];
2238	return new TaintednessWithError(
2239	$preservedArgTaint,
2240	$preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $preservedArgTaint ),
2241	MethodLinks::newEmpty()
2242	);
2243	case 'array_diff_key':
2244	// array_diff_key( $arr, $x_1, ..., $x_n ) is similar to array_diff, but here two elements are
2245	// considered equal if they have the same key (regardless of the value).
2246	if ( !isset( $preserveArgumentsData[0] ) ) {
2247	return TaintednessWithError::newEmpty();
2248	}
2249	/** @var Taintedness $taint */
2250	[ $taint, $error ] = array_shift( $preserveArgumentsData );
2251	$taint = clone $taint;
2252	foreach ( $preserveArgumentsData as $argData ) {
2253	$taint->removeKnownKeysFrom( $argData[0] );
2254	// No argument besides the first one can contribute to caused-by lines, although
2255	// ideally we would remove the current error from $error.
2256	}
2257	// The shape is destroyed to avoid pretending that we know anything about the final shape of the array.
2258	return new TaintednessWithError( $taint->asKnownKeysMadeUnknown(), $error, MethodLinks::newEmpty() );
2259	case 'array_intersect':
2260	case 'array_intersect_assoc':
2261	// - array_intersect( $arr_1, ..., $arr_n ) returns an array of elements that are in ALL of the $x_i.
2262	// The equality of two elements is determined by looking at their values.
2263	// Only values from the first array are used for the return value.
2264	// - array_intersect_assoc does the same, but two elements are considered equal if they have the same
2265	// value AND the same key.
2266	if ( !$preserveArgumentsData ) {
2267	return TaintednessWithError::newEmpty();
2268	}
2269	// Note: we can't do an actual intersect on the values because Taintedness does not store them, but
2270	// intersecting the taintedness flags, although not perfect, is correct and approximates that.
2271	// The shape is destroyed to avoid pretending that we know anything about the final shape of the array.
2272	/** @var Taintedness $taint */
2273	[ $taint, $error ] = array_shift( $preserveArgumentsData );
2274	$taint = $taint->asKnownKeysMadeUnknown();
2275	foreach ( $preserveArgumentsData as $argData ) {
2276	$taint->keepOnly( $argData[0]->get() );
2277	// No argument besides the first one can contribute to caused-by lines, although
2278	// ideally we would intersect $error with the current error.
2279	}
2280	return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() );
2281	case 'array_intersect_key':
2282	// array_intersect_key( $arr, $x_1, ..., $x_n ) is similar to array_intersect, but here two elements are
2283	// considered equal if they have the same key (irregardless of the value).
2284	if ( !isset( $preserveArgumentsData[0] ) ) {
2285	return TaintednessWithError::newEmpty();
2286	}
2287	// We can't infer shape mutations because there might be unknown keys in either argument, so just
2288	// return the taintedness of the first argument.
2289	$preservedArgTaint = clone $preserveArgumentsData[0][0];
2290	return new TaintednessWithError(
2291	$preservedArgTaint,
2292	$preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $preservedArgTaint ),
2293	MethodLinks::newEmpty()
2294	);
2295	// TODO The last parameter of these functions is a callback, so probably hard to handle. They're also
2296	// variadic, so we'd need to know the arg type to analyze the callback.
2297	case 'array_diff_uassoc':
2298	case 'array_diff_ukey':
2299	case 'array_intersect_uassoc':
2300	case 'array_intersect_ukey':
2301	case 'array_udiff':
2302	case 'array_udiff_assoc':
2303	case 'array_uintersect':
2304	case 'array_uintersect_assoc':
2305	// The last two params of these are callbacks, so twice as hard
2306	case 'array_udiff_uassoc':
2307	case 'array_uintersect_uassoc':
2308	// Only the taintedness from first argument is preserved.
2309	if ( !isset( $preserveArgumentsData[0] ) ) {
2310	return TaintednessWithError::newEmpty();
2311	}
2312	$preservedArgTaint = clone $preserveArgumentsData[0][0];
2313	return new TaintednessWithError(
2314	$preservedArgTaint,
2315	$preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $preservedArgTaint ),
2316	MethodLinks::newEmpty()
2317	);
2318	case 'array_map':
2319	// array_map( $cb, $arr, $arr_1, ..., $arr_n ) returns the result of applying $cb to all the array
2320	// arguments, element by element.
2321	// TODO: Analyze the callback. For now we only preserve taintedness of array arguments.
2322	unset( $preserveArgumentsData[0] );
2323	$taint = Taintedness::newSafe();
2324	$error = new CausedByLines();
2325	foreach ( $preserveArgumentsData as [ $argTaint, $argError ] ) {
2326	$preservedArgTaint = $argTaint->asCollapsed();
2327	$taint->mergeWith( $preservedArgTaint );
2328	$error->mergeWith( $argError->asIntersectedWithTaintedness( $preservedArgTaint ) );
2329	}
2330	return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() );
2331	case 'array_filter':
2332	// array_filter( $arr, $cb, $mode ) filters the $arr by using $cb.
2333	// TODO: Analyze the callback. For now we preserve the whole taintedness of the array.
2334	if ( !isset( $preserveArgumentsData[0] ) ) {
2335	return TaintednessWithError::newEmpty();
2336	}
2337	$preservedArgTaint = $preserveArgumentsData[0][0]->asKnownKeysMadeUnknown();
2338	return new TaintednessWithError(
2339	$preservedArgTaint,
2340	$preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $preservedArgTaint ),
2341	MethodLinks::newEmpty()
2342	);
2343	case 'array_reduce':
2344	// array_reduce( $arr, $cb, $initial ) applies $cb to $arr to obtain a single value.
2345	// TODO: Analyze the callback. For now we preserve the whole taintedness of the array.
2346	if ( !isset( $preserveArgumentsData[0] ) ) {
2347	return TaintednessWithError::newEmpty();
2348	}
2349	$preservedArgTaint = $preserveArgumentsData[0][0]->asCollapsed();
2350	return new TaintednessWithError(
2351	$preservedArgTaint,
2352	$preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $preservedArgTaint ),
2353	MethodLinks::newEmpty()
2354	);
2355	case 'array_reverse':
2356	// array_reverse( $arr, $preserveKeys ) reverses the order of an array. String keys are always
2357	// preserved, the second param controls whether int keys are also preserved.
2358	// TODO: By knowing the value of the second arg, we could improve this by:
2359	// - Removing only int keys if false
2360	// - Preserving the whole shape if true
2361	if ( !isset( $preserveArgumentsData[0] ) ) {
2362	return TaintednessWithError::newEmpty();
2363	}
2364	$preservedArgTaint = $preserveArgumentsData[0][0]->asKnownKeysMadeUnknown();
2365	return new TaintednessWithError(
2366	$preservedArgTaint,
2367	$preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $preservedArgTaint ),
2368	MethodLinks::newEmpty()
2369	);
2370	case 'array_pad':
2371	// array_pad( $arr, $length, $val ) returns a copy of $arr padded to the size specified by $length
2372	// by adding copies of $val.
2373	if ( isset( $preserveArgumentsData[0] ) ) {
2374	$taint = clone $preserveArgumentsData[0][0];
2375	$error = $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $taint );
2376	} else {
2377	$taint = Taintedness::newSafe();
2378	$error = new CausedByLines();
2379	}
2380	if ( isset( $preserveArgumentsData[2] ) ) {
2381	$valArgTaint = $preserveArgumentsData[2][0];
2382	$taint->setOffsetTaintedness( null, $valArgTaint );
2383	$error->mergeWith( $preserveArgumentsData[2][1]->asIntersectedWithTaintedness( $valArgTaint ) );
2384	}
2385	return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() );
2386	case 'array_slice':
2387	// array_slice( $arr, $offset, $len, $preserveKeys ) returns the segment of $arr starting at $offset
2388	// and of size $len. String keys are always preserved, $preserveKeys controls whether int keys
2389	// are also preserved.
2390	if ( !isset( $preserveArgumentsData[0] ) ) {
2391	return TaintednessWithError::newEmpty();
2392	}
2393	$preservedArgTaint = $preserveArgumentsData[0][0]->asKnownKeysMadeUnknown();
2394	return new TaintednessWithError(
2395	$preservedArgTaint,
2396	$preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $preservedArgTaint ),
2397	MethodLinks::newEmpty()
2398	);
2399	case 'array_replace':
2400	// array_replace( $arr, $rep_1, ..., $rep_n ) returns a copy of $arr where each element is replaced
2401	// with the element having the same key in the rightmost argument.
2402	if ( !isset( $preserveArgumentsData[0] ) ) {
2403	return TaintednessWithError::newEmpty();
2404	}
2405	$firstArgData = array_shift( $preserveArgumentsData );
2406	/** @var Taintedness $taint */
2407	$taint = clone $firstArgData[0];
2408	$error = $firstArgData[1]->asIntersectedWithTaintedness( $taint );
2409	foreach ( $preserveArgumentsData as [ $argTaint, $argError ] ) {
2410	$taint->arrayReplace( $argTaint );
2411	// Note: we may be adding too many caused-by lines here
2412	$error->mergeWith( $argError->asIntersectedWithTaintedness( $argTaint ) );
2413	}
2414	return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() );
2415	case 'array_merge':
2416	// array_merge( $arr_1, ... $arr_n ) merges the given array arguments. If any two (or more) input arrays
2417	// have the same string key, the value from the rightmost argument with that key will be used. Integer
2418	// keys are always appended, and never replaced. Additionally, integer keys in the resulting array
2419	// will be renumbered incrementally starting from 0.
2420	if ( !$preserveArgumentsData ) {
2421	return TaintednessWithError::newEmpty();
2422	}
2423	/** @var Taintedness $taint */
2424	[ $taint, $error ] = array_shift( $preserveArgumentsData );
2425	foreach ( $preserveArgumentsData as [ $argTaint, $argError ] ) {
2426	$taint->arrayMerge( $argTaint );
2427	$error->mergeWith( $argError->asIntersectedWithTaintedness( $argTaint ) );
2428	}
2429	return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() );
2430	// TODO Handle these with recursion.
2431	case 'array_merge_recursive':
2432	case 'array_replace_recursive':
2433	$taint = Taintedness::newSafe();
2434	$error = new CausedByLines();
2435	foreach ( $preserveArgumentsData as [ $curArgTaintedness, $baseArgError ] ) {
2436	$preservedArgTaint = $curArgTaintedness->asKnownKeysMadeUnknown();
2437	$taint->mergeWith( $preservedArgTaint );
2438	$error->mergeWith( $baseArgError->asIntersectedWithTaintedness( $preservedArgTaint ) );
2439	}
2440	return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() );
2441	case 'array_chunk':
2442	// array_chunk( $array, $length, $preserve_keys = false ) returns a list of chunks of $array. The keys
2443	// in each chunk are the same of $array if $preserve_keys is true. Else, they're just numbers.
2444	if ( !isset( $preserveArgumentsData[0] ) ) {
2445	return TaintednessWithError::newEmpty();
2446	}
2447	$taint = Taintedness::newSafe();
2448	// TODO: Check value of $preserve_keys to determine the key taintedness more accurately.
2449	// For now, we just assume that keys are preserved.
2450	$taint->setOffsetTaintedness( null, $preserveArgumentsData[0][0]->asKnownKeysMadeUnknown() );
2451	$error = $preserveArgumentsData[0][1]->asIntersectedWithTaintedness( $taint );
2452	return new TaintednessWithError( $taint, $error, MethodLinks::newEmpty() );
2453	default:
2454	return null;
2455	}
2456	}
2457
2458	/**
2459	* Given a binary operator, compute which taint will be preserved. Safe ops don't preserve
2460	* any taint, whereas unsafe ops will preserve all taints. The taint of a binop is basically
2461	* ( lhs_taint \| rhs_taint ) & taint_mask
2462	*
2463	* @warning This method should avoid computing the taint of $lhs and $rhs, because it might be
2464	* called in preorder, but it would trigger a postorder visit.
2465	*
2466	* @param Node $opNode
2467	* @param Node\|mixed $lhs Either a Node or a scalar
2468	* @param Node\|mixed $rhs Either a Node or a scalar
2469	* @return int
2470	*/
2471	protected function getBinOpTaintMask( Node $opNode, $lhs, $rhs ): int {
2472	static $safeBinOps = [
2473	\ast\flags\BINARY_BOOL_XOR,
2474	\ast\flags\BINARY_DIV,
2475	\ast\flags\BINARY_IS_EQUAL,
2476	\ast\flags\BINARY_IS_IDENTICAL,
2477	\ast\flags\BINARY_IS_NOT_EQUAL,
2478	\ast\flags\BINARY_IS_NOT_IDENTICAL,
2479	\ast\flags\BINARY_IS_SMALLER,
2480	\ast\flags\BINARY_IS_SMALLER_OR_EQUAL,
2481	\ast\flags\BINARY_MOD,
2482	\ast\flags\BINARY_MUL,
2483	\ast\flags\BINARY_POW,
2484	// BINARY_ADD handled below due to array addition.
2485	\ast\flags\BINARY_SUB,
2486	\ast\flags\BINARY_BOOL_AND,
2487	\ast\flags\BINARY_BOOL_OR,
2488	\ast\flags\BINARY_IS_GREATER,
2489	\ast\flags\BINARY_IS_GREATER_OR_EQUAL,
2490	\ast\flags\BINARY_SHIFT_LEFT,
2491	\ast\flags\BINARY_SHIFT_RIGHT,
2492	\ast\flags\BINARY_SPACESHIP,
2493	];
2494
2495	// This list is mostly used for debugging purposes
2496	static $knownUnsafeOps = [
2497	\ast\flags\BINARY_ADD,
2498	\ast\flags\BINARY_CONCAT,
2499	\ast\flags\BINARY_COALESCE,
2500	// The result of bitwise ops can be a string, so we err on the side of caution.
2501	\ast\flags\BINARY_BITWISE_AND,
2502	\ast\flags\BINARY_BITWISE_OR,
2503	\ast\flags\BINARY_BITWISE_XOR,
2504	];
2505
2506	if ( in_array( $opNode->flags, $safeBinOps, true ) ) {
2507	return SecurityCheckPlugin::NO_TAINT;
2508	}
2509	if (
2510	$opNode->flags === \ast\flags\BINARY_ADD &&
2511	( !$this->nodeCanBeArray( $lhs ) \|\| !$this->nodeCanBeArray( $rhs ) )
2512	) {
2513	// Array addition is the only way `+` can preserve taintedness; if at least one operand
2514	// is definitely NOT an array, then the result will be an integer, or a fatal error will
2515	// occurr (depending on the other operand). Note that if we cannot be 100% sure that the
2516	// node cannot be an array (e.g. if it has mixed type), we err on the side of caution and
2517	// consider it potentially tainted.
2518	return SecurityCheckPlugin::NO_TAINT;
2519	}
2520
2521	if ( !in_array( $opNode->flags, $knownUnsafeOps, true ) ) {
2522	$this->debug(
2523	__METHOD__,
2524	'Unhandled binop ' . Debug::astFlagDescription( $opNode->flags, $opNode->kind )
2525	);
2526	}
2527
2528	return SecurityCheckPlugin::ALL_TAINT_FLAGS;
2529	}
2530
2531	/**
2532	* Get the possible UnionType of a node, without emitting issues.
2533	*
2534	* @param Node $node
2535	* @return UnionType\|null
2536	*/
2537	protected function getNodeType( Node $node ): ?UnionType {
2538	// Don't emit issues, as this method might be called e.g. on a LHS (see T249647).
2539	// FIXME Improve this. Is it still necessary now that we cache taintedness?
2540	$catchIssueException = false;
2541	// And since we don't emit issues, use a cloned context so phan won't cache any union type. In particular,
2542	// in the event of possibly-undefined union types, the issue about a variable being possibly undeclared would
2543	// get lost, because we don't emit it, and phan will cache the union type without the undefined bit.
2544	$ctx = clone $this->context;
2545	try {
2546	return UnionTypeVisitor::unionTypeFromNode(
2547	$this->code_base,
2548	$ctx,
2549	$node,
2550	$catchIssueException
2551	);
2552	} catch ( IssueException $e ) {
2553	$this->debug( __METHOD__, "Got error " . $this->getDebugInfo( $e ) );
2554	return null;
2555	}
2556	}
2557
2558	/**
2559	* Given a Node, is it an array? (And definitely not a string)
2560	*
2561	* @param Node\|mixed $node A node object or simple value from AST tree
2562	* @return bool Is it an array?
2563	*/
2564	protected function nodeIsArray( $node ): bool {
2565	if ( !( $node instanceof Node ) ) {
2566	// simple literal
2567	return false;
2568	}
2569	if ( $node->kind === \ast\AST_ARRAY ) {
2570	// Exit early in the simple case.
2571	return true;
2572	}
2573	$type = $this->getNodeType( $node );
2574	return $type && $type->hasArrayLike( $this->code_base ) &&
2575	!$type->hasMixedOrNonEmptyMixedType() && !$type->hasStringType();
2576	}
2577
2578	/**
2579	* Can $node potentially be an array?
2580	*
2581	* @param Node\|mixed $node
2582	* @return bool
2583	*/
2584	protected function nodeCanBeArray( $node ): bool {
2585	if ( !( $node instanceof Node ) ) {
2586	return is_array( $node );
2587	}
2588	$type = $this->getNodeType( $node );
2589	if ( !$type ) {
2590	return true;
2591	}
2592	$type = $type->getRealUnionType();
2593	return $type->hasArrayLike( $this->code_base ) \|\| $type->hasMixedOrNonEmptyMixedType() \|\| $type->isEmpty();
2594	}
2595
2596	/**
2597	* Given a Node, is it a string?
2598	*
2599	* @todo Unclear if this should return true for things that can
2600	* autocast to a string (e.g. ints)
2601	* @param Node\|mixed $node A node object or simple value from AST tree
2602	* @return bool Is it a string?
2603	*/
2604	protected function nodeCanBeString( $node ): bool {
2605	if ( !( $node instanceof Node ) ) {
2606	// simple literal
2607	return is_string( $node );
2608	}
2609	$type = $this->getNodeType( $node );
2610	// @todo Should having mixed type result in returning false here?
2611	return $type && $type->hasStringType();
2612	}
2613
2614	/**
2615	* @param TypedElementInterface $el
2616	* @param bool $definitely Whether $el is definitely numkey, not just possibly
2617	* @return bool
2618	*/
2619	protected function elementCanBeNumkey( TypedElementInterface $el, bool $definitely ): bool {
2620	$type = $el->getUnionType()->getRealUnionType();
2621	if ( $type->hasMixedOrNonEmptyMixedType() \|\| $type->isEmpty() ) {
2622	return !$definitely;
2623	}
2624	if ( !$type->hasArray() ) {
2625	return false;
2626	}
2627
2628	$keyTypes = GenericArrayType::keyUnionTypeFromTypeSetStrict( $el->getUnionType()->getRealTypeSet() );
2629	// NOTE: This might lead to false positives if the array has mixed keys, but since we're talking about
2630	// SQLi, we prefer false positives. Also, the mixed keys case isn't fully handled, see backpropagateArgTaint
2631	return $definitely
2632	? $keyTypes === GenericArrayType::KEY_INT
2633	: ( $keyTypes & GenericArrayType::KEY_INT ) !== 0;
2634	}
2635
2636	/**
2637	* Given a Node that is used as array key, can the key be integer?
2638	* Floats are not considered ints here.
2639	* Note: this method cannot be 100% accurate. First, we don't use the real type, so we may have a false positive
2640	* if e.g. a parameter is annotated as string but the argument is an int. Second, even if something has a real type
2641	* and is not an integer, it could be a string that gets autocast to an integer.
2642	*
2643	* @param Node\|mixed $node A node object or simple value from AST tree
2644	* @return bool Is it an int?
2645	* @fixme A lot of duplication with other similar methods...
2646	*/
2647	protected function nodeCanBeIntKey( $node ): bool {
2648	if ( !( $node instanceof Node ) ) {
2649	// simple number; make sure to include float here for PHP 8.1 compat: T307504
2650	if ( is_int( $node ) \|\| is_float( $node ) ) {
2651	return true;
2652	}
2653	// Strings that are canonical representation of numbers are coerced to int keys.
2654	$testArr = [ $node => 'foo' ];
2655	$key = key( $testArr );
2656	return is_int( $key );
2657	}
2658	$type = $this->getNodeType( $node );
2659	if ( !$type ) {
2660	return true;
2661	}
2662	return $type->hasIntType() \|\| $type->hasMixedOrNonEmptyMixedType() \|\| $type->isEmpty();
2663	}
2664
2665	/**
2666	* Get the phan objects from the return line of a Func/Method
2667	*
2668	* This is primarily used to handle the case where a method
2669	* returns a member (e.g. return $this->foo), and then something
2670	* else does something evil with it - e.g. echo $someObj->getFoo().
2671	* This allows keeping track that $this->foo is outputted, so if
2672	* somewhere else in the code someone calls $someObj->setFoo( $unsafe )
2673	* we can trigger a warning.
2674	*
2675	* This of course will only work in simple cases. It may also potentially
2676	* have false positives if one instance is used solely for escaped stuff
2677	* and a different instance is used for unsafe values that are later
2678	* escaped, as all the different instances are treated the same.
2679	*
2680	* It needs the return statement to be trivial (e.g. return $this->foo;). It
2681	* will not work even with something as simple as $a = $this->foo; return $a;
2682	* However, this code path will only happen if the plugin encounters the
2683	* code to output the value prior to reading the code that sets the value to
2684	* something evil. The other code path where the set happens first is much
2685	* more robust and hopefully the more common code path.
2686	*
2687	* @param FunctionInterface $func The function/method. Must use Analyzable trait
2688	* @return TypedElementInterface[] An array of phan objects
2689	*/
2690	public function getReturnObjsOfFunc( FunctionInterface $func ): array {
2691	$retObjs = self::getRetObjs( $func );
2692	if ( $retObjs === null ) {
2693	// We still have to see the function. Analyze it now.
2694	$this->analyzeFunc( $func );
2695	$retObjs = self::getRetObjs( $func );
2696	if ( $retObjs === null ) {
2697	// If it still doesn't exist, perhaps we reached the recursion limit, or it may be a recursive
2698	// function, or a kind of function that we can't handle.
2699	return [];
2700	}
2701	}
2702
2703	// Note that if a function is recursively calling itself, this list might be incomplete.
2704	// This could be remediated with another dynamic property (e.g. retObjsCollected), initialized
2705	// inside visitMethod in preorder, and set to true inside visitMethod in postorder.
2706	// It would be pointless, though, as returning a partial list is better than returning no list.
2707	return array_filter(
2708	$retObjs,
2709	static function ( TypedElementInterface $el ): bool {
2710	return !( $el instanceof Variable );
2711	}
2712	);
2713	}
2714
2715	/**
2716	* Shorthand to check if $child is subclass of $parent.
2717	*
2718	* @param FullyQualifiedClassName $child
2719	* @param FullyQualifiedClassName $parent
2720	* @param CodeBase $codeBase
2721	* @return bool
2722	*/
2723	public static function isSubclassOf(
2724	FullyQualifiedClassName $child,
2725	FullyQualifiedClassName $parent,
2726	CodeBase $codeBase
2727	): bool {
2728	return $child->asType()->asExpandedTypes( $codeBase )->hasType( $parent->asType() );
2729	}
2730	}