Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 539
0.00% covered (danger)
0.00%
0 / 17
CRAP
0.00% covered (danger)
0.00%
0 / 1
SecurityCheckPlugin
0.00% covered (danger)
0.00%
0 / 539
0.00% covered (danger)
0.00%
0 / 17
4830
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 assertRequiredConfig
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
6
 getMergeVariableInfoClosure
0.00% covered (danger)
0.00%
0 / 41
0.00% covered (danger)
0.00%
0 / 1
342
 analyzeStringLiteralStatement
0.00% covered (danger)
0.00%
0 / 38
0.00% covered (danger)
0.00%
0 / 1
72
 taintToString
0.00% covered (danger)
0.00%
0 / 52
0.00% covered (danger)
0.00%
0 / 1
30
 builtinFuncHasTaint
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 getBuiltinFuncTaint
0.00% covered (danger)
0.00%
0 / 38
0.00% covered (danger)
0.00%
0 / 1
56
 assertFunctionTaintArrayWellFormed
0.00% covered (danger)
0.00%
0 / 9
0.00% covered (danger)
0.00%
0 / 1
72
 getCustomFuncTaints
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
0
 isFalsePositive
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 parseTaintLine
0.00% covered (danger)
0.00%
0 / 39
0.00% covered (danger)
0.00%
0 / 1
156
 modifyParamSinkTaint
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 modifyArgTaint
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 convertTaintNameToConstant
0.00% covered (danger)
0.00%
0 / 15
0.00% covered (danger)
0.00%
0 / 1
2
 getPHPFuncTaints
0.00% covered (danger)
0.00%
0 / 295
0.00% covered (danger)
0.00%
0 / 1
2
 getBeforeLoopBodyAnalysisVisitorClassName
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 clearCaches
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
1<?php declare( strict_types=1 );
2
3/**
4 * Base class for SecurityCheckPlugin. Extend if you want to customize.
5 *
6 * Copyright (C) 2017  Brian Wolff <bawolff@gmail.com>
7 *
8 * @license GPL-2.0-or-later
9 */
10
11namespace SecurityCheckPlugin;
12
13use ast\Node;
14use Closure;
15use InvalidArgumentException;
16use LogicException;
17use Phan\CodeBase;
18use Phan\Config;
19use Phan\Language\Context;
20use Phan\Language\Element\Comment\Builder;
21use Phan\Language\Element\FunctionInterface;
22use Phan\Language\Element\Variable;
23use Phan\Language\FQSEN\FullyQualifiedFunctionLikeName;
24use Phan\Language\FQSEN\FullyQualifiedMethodName;
25use Phan\Language\Scope;
26use Phan\PluginV3;
27use Phan\PluginV3\AnalyzeLiteralStatementCapability;
28use Phan\PluginV3\BeforeLoopBodyAnalysisCapability;
29use Phan\PluginV3\MergeVariableInfoCapability;
30use Phan\PluginV3\PostAnalyzeNodeCapability;
31use Phan\PluginV3\PreAnalyzeNodeCapability;
32use RuntimeException;
33
34/**
35 * Base class used by the Generic and MediaWiki flavours of the plugin.
36 */
37abstract class SecurityCheckPlugin extends PluginV3 implements
38    PostAnalyzeNodeCapability,
39    PreAnalyzeNodeCapability,
40    BeforeLoopBodyAnalysisCapability,
41    MergeVariableInfoCapability,
42    AnalyzeLiteralStatementCapability
43{
44    use TaintednessAccessorsTrait;
45
46    // Various taint flags. The _EXEC_ varieties mean
47    // that it is unsafe to assign that type of taint
48    // to the variable in question.
49
50    public const NO_TAINT = 0;
51
52    // Flag to denote that we don't know
53    public const UNKNOWN_TAINT = 1 << 0;
54
55    // Flag for function parameters and the like, where it
56    // preserves whatever taint the function is given.
57    public const PRESERVE_TAINT = 1 << 1;
58
59    // In future might separate out different types of html quoting.
60    // e.g. "<div data-foo='" . htmlspecialchars( $bar ) . "'>";
61    // is unsafe.
62    public const HTML_TAINT = 1 << 2;
63    public const HTML_EXEC_TAINT = 1 << 3;
64
65    public const SQL_TAINT = 1 << 4;
66    public const SQL_EXEC_TAINT = 1 << 5;
67
68    public const SHELL_TAINT = 1 << 6;
69    public const SHELL_EXEC_TAINT = 1 << 7;
70
71    public const SERIALIZE_TAINT = 1 << 8;
72    public const SERIALIZE_EXEC_TAINT = 1 << 9;
73
74    // Tainted paths, as input to include(), require() and some FS functions (path traversal)
75    public const PATH_TAINT = 1 << 10;
76    public const PATH_EXEC_TAINT = 1 << 11;
77
78    // User-controlled code, for RCE
79    public const CODE_TAINT = 1 << 12;
80    public const CODE_EXEC_TAINT = 1 << 13;
81
82    // User-controlled regular expressions, for ReDoS
83    public const REGEX_TAINT = 1 << 14;
84    public const REGEX_EXEC_TAINT = 1 << 15;
85
86    // To allow people to add other application specific taints.
87    public const CUSTOM1_TAINT = 1 << 16;
88    public const CUSTOM1_EXEC_TAINT = 1 << 17;
89    public const CUSTOM2_TAINT = 1 << 18;
90    public const CUSTOM2_EXEC_TAINT = 1 << 19;
91
92    // Special purpose for supporting MediaWiki's IDatabase::select
93    // and friends. Like SQL_TAINT, but only applies to the numeric
94    // keys of an array. Note: These are not included in YES_TAINT/EXEC_TAINT.
95    // e.g. given $f = [ $_GET['foo'] ]; $f would have the flag, but
96    // $g = $_GET['foo']; or $h = [ 's' => $_GET['foo'] ] would not.
97    // The associative keys also have this flag if they are tainted.
98    // It is also assumed anything with this flag will also have
99    // the SQL_TAINT flag set.
100    public const SQL_NUMKEY_TAINT = 1 << 20;
101    public const SQL_NUMKEY_EXEC_TAINT = 1 << 21;
102
103    // For double escaped variables
104    public const ESCAPED_TAINT = 1 << 22;
105    public const ESCAPED_EXEC_TAINT = 1 << 23;
106
107    // Special purpose flags (Starting at 2^28)
108    // TODO Renumber these. Requires changing format of the hardcoded arrays
109    // Cancel's out all EXEC flags on a function arg if arg is array.
110    public const ARRAY_OK = 1 << 28;
111
112    // Do not allow autodetected taint info override given taint.
113    // TODO Store this and other special flags somewhere else in the FunctionTaintedness object, not
114    // as normal taint flags.
115    public const NO_OVERRIDE = 1 << 29;
116
117    public const VARIADIC_PARAM = 1 << 30;
118
119    // *All* function flags
120    //TODO Add a structure test for this
121    public const FUNCTION_FLAGS = self::ARRAY_OK | self::NO_OVERRIDE;
122
123    // Combination flags.
124
125    // YES_TAINT denotes all taint a user controlled variable would have
126    public const YES_TAINT = self::HTML_TAINT | self::SQL_TAINT | self::SHELL_TAINT | self::SERIALIZE_TAINT |
127        self::PATH_TAINT | self::CODE_TAINT | self::REGEX_TAINT | self::CUSTOM1_TAINT | self::CUSTOM2_TAINT;
128    public const EXEC_TAINT = self::YES_TAINT << 1;
129    // @phan-suppress-next-line PhanUnreferencedPublicClassConstant
130    public const YES_EXEC_TAINT = self::YES_TAINT | self::EXEC_TAINT;
131
132    // ALL taint is YES + special purpose taints, but not including special flags.
133    public const ALL_TAINT = self::YES_TAINT | self::SQL_NUMKEY_TAINT | self::ESCAPED_TAINT;
134    public const ALL_EXEC_TAINT =
135        self::EXEC_TAINT | self::SQL_NUMKEY_EXEC_TAINT | self::ESCAPED_EXEC_TAINT;
136    public const ALL_YES_EXEC_TAINT = self::ALL_TAINT | self::ALL_EXEC_TAINT;
137
138    // Taints that support backpropagation.
139    public const BACKPROP_TAINTS = self::ALL_EXEC_TAINT;
140
141    public const ESCAPES_HTML = ( self::YES_TAINT & ~self::HTML_TAINT ) | self::ESCAPED_EXEC_TAINT;
142
143    // As the name would suggest, this must include *ALL* possible taint flags.
144    public const ALL_TAINT_FLAGS = self::ALL_YES_EXEC_TAINT | self::FUNCTION_FLAGS |
145        self::UNKNOWN_TAINT | self::PRESERVE_TAINT | self::VARIADIC_PARAM;
146
147    /**
148     * Used to print taint debug data, see BlockAnalysisVisitor::PHAN_DEBUG_VAR_REGEX
149     */
150    private const DEBUG_TAINTEDNESS_REGEXP =
151        '/@phan-debug-var-taintedness\s+\$(' . Builder::WORD_REGEX . '(,\s*\$' . Builder::WORD_REGEX . ')*)/';
152    // @phan-suppress-previous-line PhanAccessClassConstantInternal It's just perfect for use here
153
154    public const PARAM_ANNOTATION_REGEX =
155        '/@param-taint\s+&?(?P<variadic>\.\.\.)?\$(?P<paramname>\S+)\s+(?P<taint>.*)$/';
156
157    /**
158     * @var self Passed to the visitor for context
159     */
160    public static $pluginInstance;
161
162    /**
163     * @var array<array<FunctionTaintedness|MethodLinks>> Cache of parsed docblocks. This is declared here (as opposed
164     *  to the BaseVisitor) so that PHPUnit can snapshot and restore it.
165     * @phan-var array<array{0:FunctionTaintedness,1:MethodLinks}>
166     */
167    public static $docblockCache = [];
168
169    /** @var FunctionTaintedness[] Cache of taintedness of builtin functions */
170    private static $builtinFuncTaintCache = [];
171
172    /**
173     * Save the subclass instance to make it accessible from the visitor
174     */
175    public function __construct() {
176        $this->assertRequiredConfig();
177        self::$pluginInstance = $this;
178    }
179
180    /**
181     * Ensure that the options we need are enabled.
182     */
183    private function assertRequiredConfig(): void {
184        if ( Config::get_quick_mode() ) {
185            throw new RuntimeException( 'Quick mode must be disabled to run taint-check' );
186        }
187    }
188
189    /**
190     * @inheritDoc
191     */
192    public function getMergeVariableInfoClosure(): Closure {
193        /**
194         * For branches that are not guaranteed to be executed, merge taint info for any involved
195         * variable across all branches.
196         *
197         * @note This method is HOT, so keep it optimized
198         *
199         * @param Variable $variable
200         * @param Scope[] $scopeList
201         * @param bool $varExistsInAllScopes @phan-unused-param
202         * @suppress PhanUnreferencedClosure, PhanUndeclaredProperty, UnusedSuppression
203         */
204        return static function ( Variable $variable, array $scopeList, bool $varExistsInAllScopes ): void {
205            $varName = $variable->getName();
206
207            $vars = [];
208            $firstVar = null;
209            foreach ( $scopeList as $scope ) {
210                $localVar = $scope->getVariableByNameOrNull( $varName );
211                if ( $localVar ) {
212                    if ( !$firstVar ) {
213                        $firstVar = $localVar;
214                    } else {
215                        $vars[] = $localVar;
216                    }
217                }
218            }
219
220            if ( !$firstVar ) {
221                return;
222            }
223
224            /** @var Taintedness $taintedness */
225            $taintedness = $prevTaint = $firstVar->taintedness ?? null;
226            /** @var MethodLinks $methodLinks */
227            $methodLinks = $prevLinks = $firstVar->taintedMethodLinks ?? null;
228            /** @var CausedByLines $error */
229            $error = $prevErr = $firstVar->taintedOriginalError ?? null;
230
231            foreach ( $vars as $localVar ) {
232                // Below we only merge data if it's non-null in the current scope and different from the previous
233                // branch. Using arrays to save all previous values and then in_array seems useless on MW core,
234                // since >99% cases of duplication are already covered by these simple checks.
235
236                $taintOrNull = $localVar->taintedness ?? null;
237                if ( $taintOrNull && $taintOrNull !== $prevTaint ) {
238                    $prevTaint = $taintOrNull;
239                    if ( $taintedness ) {
240                        $taintedness = $taintedness->asMergedWith( $taintOrNull );
241                    } else {
242                        $taintedness = $taintOrNull;
243                    }
244                }
245
246                $variableObjLinksOrNull = $localVar->taintedMethodLinks ?? null;
247                if ( $variableObjLinksOrNull && $variableObjLinksOrNull !== $prevLinks ) {
248                    $prevLinks = $variableObjLinksOrNull;
249                    if ( $methodLinks ) {
250                        $methodLinks = $methodLinks->asMergedWith( $variableObjLinksOrNull );
251                    } else {
252                        $methodLinks = $variableObjLinksOrNull;
253                    }
254                }
255
256                $varErrorOrNull = $localVar->taintedOriginalError ?? null;
257                if ( $varErrorOrNull && $varErrorOrNull !== $prevErr ) {
258                    $prevErr = $varErrorOrNull;
259                    if ( $error ) {
260                        $error = $error->asMergedWith( $varErrorOrNull );
261                    } else {
262                        $error = $varErrorOrNull;
263                    }
264                }
265            }
266
267            if ( $taintedness ) {
268                self::setTaintednessRaw( $variable, $taintedness );
269            }
270            if ( $methodLinks ) {
271                self::setMethodLinks( $variable, $methodLinks );
272            }
273            if ( $error ) {
274                self::setCausedByRaw( $variable, $error );
275            }
276        };
277    }
278
279    /**
280     * Print the taintedness of a variable, when requested
281     * @see BlockAnalysisVisitor::analyzeSubstituteVarAssert()
282     * @inheritDoc
283     * @suppress PhanUndeclaredProperty, UnusedSuppression
284     */
285    public function analyzeStringLiteralStatement( CodeBase $codeBase, Context $context, string $statement ): bool {
286        $found = false;
287        if ( preg_match_all( self::DEBUG_TAINTEDNESS_REGEXP, $statement, $matches, PREG_SET_ORDER ) ) {
288            $scope = $context->getScope();
289            foreach ( $matches as $group ) {
290                foreach ( explode( ',', $group[1] ) as $rawVar ) {
291                    $varName = ltrim( trim( $rawVar ), '$' );
292                    if ( $scope->hasVariableWithName( $varName ) ) {
293                        $var = $scope->getVariableByName( $varName );
294                        $taintOrNull = self::getTaintednessRaw( $var );
295                        $taint = $taintOrNull ? $taintOrNull->toShortString() : 'unset';
296                        $msg = "Variable {CODE} has taintedness: {DETAILS}";
297                        $params = [ "\$$varName", $taint ];
298                    } else {
299                        $msg = "Variable {CODE} doesn't exist in scope";
300                        $params = [ "\$$varName" ];
301                    }
302                    self::emitIssue(
303                        $codeBase,
304                        $context,
305                        'SecurityCheckDebugTaintedness',
306                        $msg,
307                        $params
308                    );
309                    $found = true;
310                }
311            }
312        } elseif ( str_contains( $statement, '@taint-check-debug-method-first-arg' ) ) {
313            // FIXME This is a hack. The annotation is INTERNAL, for use only in the backpropoffsets-blowup
314            // test. We should either find a better way to test that, or maybe add a public annotation
315            // for debugging taintedness of a method (probably unreadable on a single line).
316            $funcName = preg_replace( '/@taint-check-debug-method-first-arg ([a-z0-9:]+)\b.*/i', '$1', $statement );
317            // Let any exception bubble up here, the annotation is for internal use in testing
318            $fqsen = FullyQualifiedMethodName::fromStringInContext( $funcName, $context );
319            $method = $codeBase->getMethodByFQSEN( $fqsen );
320            /** @var FunctionTaintedness|null $fTaint */
321            $fTaint = $method->funcTaint ?? null;
322            if ( !$fTaint ) {
323                return false;
324            }
325            self::emitIssue(
326                $codeBase,
327                $context,
328                'SecurityCheckDebugTaintedness',
329                "Method {CODE} has first param with taintedness: {DETAILS}",
330                [ $funcName, $fTaint->getParamSinkTaint( 0 )->toShortString() ]
331            );
332            return true;
333        }
334        return $found;
335    }
336
337    /**
338     * Get a string representation of a taint integer
339     *
340     * The prefix ~ means all input taints except the letter given.
341     * The prefix * means the EXEC version of the taint.
342     */
343    public static function taintToString( int $taint ): string {
344        if ( $taint === self::NO_TAINT ) {
345            return 'NONE';
346        }
347
348        // Note, order matters here.
349        static $mapping = [
350            self::UNKNOWN_TAINT => 'UNKNOWN',
351            self::PRESERVE_TAINT => 'PRESERVE',
352            self::ALL_TAINT => 'ALL',
353            self::YES_TAINT => 'YES',
354            self::YES_TAINT &
355            ( ~self::HTML_TAINT ) => '~HTML',
356            self::YES_TAINT &
357            ( ~self::SQL_TAINT ) => '~SQL',
358            self::YES_TAINT &
359            ( ~self::SHELL_TAINT ) => '~SHELL',
360            self::YES_TAINT &
361            ( ~self::SERIALIZE_TAINT ) => '~SERIALIZE',
362            self::YES_TAINT &
363            ( ~self::CUSTOM1_TAINT ) => '~CUSTOM1',
364            self::YES_TAINT &
365            ( ~self::CUSTOM2_TAINT ) => '~CUSTOM2',
366            // We skip ~ versions of flags which shouldn't be possible.
367            self::HTML_TAINT => 'HTML',
368            self::SQL_TAINT => 'SQL',
369            self::SHELL_TAINT => 'SHELL',
370            self::ESCAPED_TAINT => 'ESCAPED',
371            self::SERIALIZE_TAINT => 'SERIALIZE',
372            self::CUSTOM1_TAINT => 'CUSTOM1',
373            self::CUSTOM2_TAINT => 'CUSTOM2',
374            self::CODE_TAINT => 'CODE',
375            self::PATH_TAINT => 'PATH',
376            self::REGEX_TAINT => 'REGEX',
377            self::SQL_NUMKEY_TAINT => 'SQL_NUMKEY',
378            self::ARRAY_OK => 'ARRAY_OK',
379            self::ALL_EXEC_TAINT => '*ALL',
380            self::HTML_EXEC_TAINT => '*HTML',
381            self::SQL_EXEC_TAINT => '*SQL',
382            self::SHELL_EXEC_TAINT => '*SHELL',
383            self::ESCAPED_EXEC_TAINT => '*ESCAPED',
384            self::SERIALIZE_EXEC_TAINT => '*SERIALIZE',
385            self::CUSTOM1_EXEC_TAINT => '*CUSTOM1',
386            self::CUSTOM2_EXEC_TAINT => '*CUSTOM2',
387            self::CODE_EXEC_TAINT => '*CODE',
388            self::PATH_EXEC_TAINT => '*PATH',
389            self::REGEX_EXEC_TAINT => '*REGEX',
390            self::SQL_NUMKEY_EXEC_TAINT => '*SQL_NUMKEY',
391        ];
392
393        $types = [];
394        foreach ( $mapping as $bitmap => $val ) {
395            if ( ( $bitmap & $taint ) === $bitmap ) {
396                $types[] = $val;
397                $taint &= ~$bitmap;
398            }
399        }
400        if ( $taint !== 0 ) {
401            $types[] = "Unrecognized: $taint";
402        }
403        return implode( ', ', $types );
404    }
405
406    public function builtinFuncHasTaint( FullyQualifiedFunctionLikeName $fqsen ): bool {
407        return $this->getBuiltinFuncTaint( $fqsen ) !== null;
408    }
409
410    /**
411     * Get the taintedness of a function
412     *
413     * This allows overriding the default taint of a function
414     *
415     * If you want to provide custom taint hints for your application,
416     * override the getCustomFuncTaints()
417     *
418     * @param FullyQualifiedFunctionLikeName $fqsen The function/method in question
419     * @return FunctionTaintedness|null Null to autodetect taintedness
420     */
421    public function getBuiltinFuncTaint( FullyQualifiedFunctionLikeName $fqsen ): ?FunctionTaintedness {
422        $name = (string)$fqsen;
423
424        if ( isset( self::$builtinFuncTaintCache[$name] ) ) {
425            return self::$builtinFuncTaintCache[$name];
426        }
427
428        static $funcTaints = null;
429        if ( $funcTaints === null ) {
430            $funcTaints = $this->getCustomFuncTaints() + $this->getPHPFuncTaints();
431        }
432
433        if ( isset( $funcTaints[$name] ) ) {
434            $rawFuncTaint = $funcTaints[$name];
435            if ( $rawFuncTaint instanceof FunctionTaintedness ) {
436                $funcTaint = $rawFuncTaint;
437            } else {
438                self::assertFunctionTaintArrayWellFormed( $rawFuncTaint );
439                // Note: for backcompat, we set NO_OVERRIDE everywhere.
440                $overallFlags = ( $rawFuncTaint['overall'] & self::FUNCTION_FLAGS ) | self::NO_OVERRIDE;
441                $funcTaint = new FunctionTaintedness(
442                    new Taintedness( $rawFuncTaint['overall'] & ~$overallFlags ),
443                    $overallFlags
444                );
445                unset( $rawFuncTaint['overall'] );
446                foreach ( $rawFuncTaint as $i => $val ) {
447                    assert( ( $val & self::UNKNOWN_TAINT ) === 0, 'Cannot set UNKNOWN' );
448                    $paramFlags = ( $val & self::FUNCTION_FLAGS ) | self::NO_OVERRIDE;
449                    // TODO Split sink and preserve in the hardcoded arrays
450                    if ( $val & self::VARIADIC_PARAM ) {
451                        $pTaint = new Taintedness( $val & ~( self::VARIADIC_PARAM | $paramFlags ) );
452                        $funcTaint = $funcTaint
453                            ->withVariadicParamSinkTaint( $i, $pTaint->withOnly( self::ALL_EXEC_TAINT ), $paramFlags )
454                            ->withVariadicParamPreservedTaint(
455                                $i,
456                                $pTaint->without( self::ALL_EXEC_TAINT )->asPreservedTaintedness()
457                            );
458                    } else {
459                        $pTaint = new Taintedness( $val & ~$paramFlags );
460                        $funcTaint = $funcTaint
461                            ->withParamSinkTaint( $i, $pTaint->withOnly( self::ALL_EXEC_TAINT ), $paramFlags )
462                            ->withParamPreservedTaint(
463                                $i,
464                                $pTaint->without( self::ALL_EXEC_TAINT )->asPreservedTaintedness()
465                            );
466                    }
467                }
468            }
469            self::$builtinFuncTaintCache[$name] = $funcTaint;
470            return self::$builtinFuncTaintCache[$name];
471        }
472        return null;
473    }
474
475    /**
476     * Assert that a taintedness array is well-formed, and fail hard if it isn't.
477     *
478     * @param int[] $taint
479     */
480    private static function assertFunctionTaintArrayWellFormed( array $taint ): void {
481        if ( !isset( $taint['overall'] ) ) {
482            throw new LogicException( 'Overall taint must be set' );
483        }
484
485        foreach ( $taint as $i => $t ) {
486            if ( !is_int( $i ) && $i !== 'overall' ) {
487                throw new LogicException( "Taint indexes must be int or 'overall', got '$i'" );
488            }
489            if ( !is_int( $t ) || ( $t & ~self::ALL_TAINT_FLAGS ) ) {
490                throw new LogicException( "Wrong taint index $i, got: " . var_export( $t, true ) );
491            }
492            if ( $t & ~self::ALL_TAINT_FLAGS ) {
493                throw new LogicException( "Taint index $i has unknown flags: " . decbin( $t ) );
494            }
495        }
496    }
497
498    /**
499     * Get an array of function taints custom for the application
500     *
501     * @return array<string,int[]|FunctionTaintedness> Array of function taints. The keys are FQSENs. The values can be
502     *   either FunctionTaintedness objects, or arrays with 'overall' string key and numeric keys for parameters.
503     *
504     *   For example: [ self::YES_TAINT, 'overall' => self::NO_TAINT ]
505     *   means that the taint of the return value is the same as the taint
506     *   of the first arg, and all other args are ignored.
507     *   [ self::HTML_EXEC_TAINT, 'overall' => self::NO_TAINT ]
508     *   Means that the first arg is output in an html context (e.g. like echo)
509     *   [ self::YES_TAINT & ~self::HTML_TAINT, 'overall' => self::NO_TAINT ]
510     *   Means that the function removes html taint (escapes) e.g. htmlspecialchars
511     *   [ 'overall' => self::YES_TAINT ]
512     *   Means that it returns a tainted value (e.g. return $_POST['foo']; )
513     * @see FunctionTaintedness for more details
514     */
515    abstract protected function getCustomFuncTaints(): array;
516
517    /**
518     * Can be used to force specific issues to be marked false positives
519     *
520     * For example, a specific application might be able to recognize
521     * that we are in a CLI context, and thus the XSS is really a false positive.
522     *
523     * @param int $combinedTaint Combined and adjusted taint of LHS+RHS
524     * @param string &$msg Issue description (so plugin can modify to state why false)
525     * @param Context $context
526     * @param CodeBase $code_base
527     * @return bool Is this a false positive?
528     * @suppress PhanUnusedPublicMethodParameter No param is used
529     */
530    public function isFalsePositive(
531        int $combinedTaint,
532        string &$msg,
533        Context $context,
534        CodeBase $code_base
535    ): bool {
536        return false;
537    }
538
539    /**
540     * Given a param description line, extract taint
541     *
542     * This is to allow putting taint information in method docblocks.
543     * If a function has a docblock comment like:
544     *  *  @param-taint $foo escapes_html
545     * This converts that line into:
546     *   ( self::YES_TAINT & ~self::SQL_TAINT )
547     * Multiple taint types are separated by commas
548     * (which are interpreted as bitwise OR ( "|" ). Future versions
549     * might support more complex bitwise operators, but for now it
550     * doesn't seem needed.
551     *
552     * The following keywords are supported where {type} can be
553     * html, sql, shell, serialize, custom1, custom2, sql_numkey,
554     * escaped.
555     *  * {type} - just set the flag. 99% you should only use 'none' or 'tainted'
556     *  * exec_{type} - sets the exec flag.
557     *  * escapes_{type} - self::YES_TAINT & ~self::{type}_TAINT.
558     *     Note: escapes_html adds the exec_escaped flag, use
559     *     escapes_htmlnoent if the value is safe to double encode.
560     *  * onlysafefor_{type}
561     *     Same as above, intended for return type declarations.
562     *     Only difference is that onlysafefor_html sets ESCAPED_TAINT instead
563     *     of ESCAPED_EXEC_TAINT
564     *  * none - self::NO_TAINT
565     *  * tainted - self::YES_TAINT
566     *  * array_ok - sets self::ARRAY_OK
567     *  * allow_override - Allow autodetected taints to override annotation
568     *
569     * @todo What about ~ operator?
570     * @note The special casing to have escapes_html always add exec_escaped
571     *   (and having htmlnoent exist) is "experimental" and may change in
572     *   future versions (Maybe all types should set exec_escaped. Maybe it
573     *   should be explicit)
574     * @note Excluding UNKNOWN here on purpose, as if we're setting it, it's not unknown
575     * @param string $line A line from the docblock
576     * @return array|null Array of [taintedness, flags], or null on no info
577     * @phan-return array{0:Taintedness,1:int}|null
578     */
579    public static function parseTaintLine( string $line ): ?array {
580        $types = '(?P<type>htmlnoent|html|sql|shell|serialize|custom1|'
581            . 'custom2|code|path|regex|sql_numkey|escaped|none|tainted)';
582        $prefixes = '(?P<prefix>escapes|onlysafefor|exec)';
583        $taintExpr = "(?P<taint>(?:{$prefixes}_)?$types|array_ok|allow_override)";
584
585        $filteredLine = preg_replace( "/((?:$taintExpr,? *)+)(?: .*)?$/", '$1', $line );
586        $taints = explode( ',', strtolower( $filteredLine ) );
587
588        $overallTaint = Taintedness::safeSingleton();
589        $overallFlags = self::NO_OVERRIDE;
590        $numberOfTaintsProcessed = 0;
591        foreach ( $taints as $taint ) {
592            $taintParts = [];
593            if ( !preg_match( "/^$taintExpr$/", trim( $taint ), $taintParts ) ) {
594                continue;
595            }
596            $numberOfTaintsProcessed++;
597            if ( $taintParts['taint'] === 'array_ok' ) {
598                $overallFlags |= self::ARRAY_OK;
599                continue;
600            }
601            if ( $taintParts['taint'] === 'allow_override' ) {
602                $overallFlags &= ~self::NO_OVERRIDE;
603                continue;
604            }
605            $taintAsInt = self::convertTaintNameToConstant( $taintParts['type'] );
606            switch ( $taintParts['prefix'] ) {
607                case '':
608                    $overallTaint = $overallTaint->with( $taintAsInt );
609                    break;
610                case 'exec':
611                    $overallTaint = $overallTaint->with( Taintedness::flagsAsYesToExecTaint( $taintAsInt ) );
612                    break;
613                case 'escapes':
614                case 'onlysafefor':
615                    $overallTaint = $overallTaint->with( self::YES_TAINT & ~$taintAsInt );
616                    if ( $taintParts['type'] === 'html' ) {
617                        if ( $taintParts['prefix'] === 'escapes' ) {
618                            $overallTaint = $overallTaint->with( self::ESCAPED_EXEC_TAINT );
619                        } else {
620                            $overallTaint = $overallTaint->with( self::ESCAPED_TAINT );
621                        }
622                    }
623                    break;
624            }
625        }
626        if ( $numberOfTaintsProcessed === 0 ) {
627            return null;
628        }
629        return [ $overallTaint, $overallFlags ];
630    }
631
632    /**
633     * Hook to override the sink taintedness of a method parameter depending on the current argument.
634     *
635     * @internal This method is unstable and may be removed without prior notice.
636     *
637     * @param Taintedness $paramSinkTaint
638     * @param Taintedness $curArgTaintedness
639     * @param Node $argument Note: This hook is not called on literals
640     * @param int $argIndex Which argument number is this
641     * @param FunctionInterface $func The function/method being called
642     * @param FunctionTaintedness $funcTaint Taint of method formal parameters
643     * @param CausedByLines $paramSinkError
644     * @param Context $context Context object
645     * @param CodeBase $code_base CodeBase object
646     * @return array<Taintedness|CausedByLines> The taint and caused-by lines to use for actual parameter
647     * @phan-return array{0:Taintedness,1:CausedByLines}
648     * @suppress PhanUnusedPublicMethodParameter
649     */
650    public function modifyParamSinkTaint(
651        Taintedness $paramSinkTaint,
652        Taintedness $curArgTaintedness,
653        Node $argument,
654        int $argIndex,
655        FunctionInterface $func,
656        FunctionTaintedness $funcTaint,
657        CausedByLines $paramSinkError,
658        Context $context,
659        CodeBase $code_base
660    ): array {
661        // no-op
662        return [ $paramSinkTaint, $paramSinkError ];
663    }
664
665    /**
666     * Hook to override how taint of an argument to method call is calculated
667     *
668     * @param Taintedness $curArgTaintedness
669     * @param Node $argument Note: This hook is not called on literals
670     * @param int $argIndex Which argument number is this
671     * @param FunctionInterface $func The function/method being called
672     * @param FunctionTaintedness $funcTaint Taint of method formal parameters
673     * @param Context $context Context object
674     * @param CodeBase $code_base CodeBase object
675     * @return Taintedness The taint to use for actual parameter
676     * @suppress PhanUnusedPublicMethodParameter
677     */
678    public function modifyArgTaint(
679        Taintedness $curArgTaintedness,
680        Node $argument,
681        int $argIndex,
682        FunctionInterface $func,
683        FunctionTaintedness $funcTaint,
684        Context $context,
685        CodeBase $code_base
686    ): Taintedness {
687        // no-op
688        return $curArgTaintedness;
689    }
690
691    /**
692     * Convert a string like 'html' to self::HTML_TAINT.
693     *
694     * @note htmlnoent treated like self::HTML_TAINT.
695     * @param string $name one of:
696     *   html, sql, shell, serialize, custom1, custom2, code, path, regex, sql_numkey,
697     *   escaped, none (= self::NO_TAINT), tainted (= self::YES_TAINT)
698     * @return int One of the TAINT constants
699     */
700    public static function convertTaintNameToConstant( string $name ): int {
701        return match ( $name ) {
702            'html', 'htmlnoent' => self::HTML_TAINT,
703            'sql' => self::SQL_TAINT,
704            'shell' => self::SHELL_TAINT,
705            'serialize' => self::SERIALIZE_TAINT,
706            'custom1' => self::CUSTOM1_TAINT,
707            'custom2' => self::CUSTOM2_TAINT,
708            'code' => self::CODE_TAINT,
709            'path' => self::PATH_TAINT,
710            'regex' => self::REGEX_TAINT,
711            'sql_numkey' => self::SQL_NUMKEY_TAINT,
712            'escaped' => self::ESCAPED_TAINT,
713            'tainted' => self::YES_TAINT,
714            'none' => self::NO_TAINT,
715            // @codeCoverageIgnoreStart
716            default => throw new InvalidArgumentException( "$name not valid taint" )
717            // @codeCoverageIgnoreEnd
718        };
719    }
720
721    /**
722     * Taints for builtin php functions
723     *
724     * @return int[][] List of func taints (See getBuiltinFuncTaint())
725     * @phan-return array<string,int[]>
726     */
727    private function getPHPFuncTaints(): array {
728        $pregMatchTaint = [
729            self::REGEX_EXEC_TAINT,
730            self::YES_TAINT,
731            // TODO: Possibly unsafe pass-by-ref
732            self::NO_TAINT,
733            self::NO_TAINT,
734            self::NO_TAINT,
735            'overall' => self::NO_TAINT,
736        ];
737        $pregReplaceTaint = [
738            self::REGEX_EXEC_TAINT,
739            // TODO: This is used for strings (in preg_replace) and callbacks (in preg_replace_callback)
740            self::YES_TAINT,
741            self::YES_TAINT,
742            self::NO_TAINT,
743            self::NO_TAINT,
744            'overall' => self::NO_TAINT
745        ];
746        return [
747            '\htmlentities' => [
748                self::ESCAPES_HTML,
749                'overall' => self::ESCAPED_TAINT
750            ],
751            '\htmlspecialchars' => [
752                self::ESCAPES_HTML,
753                'overall' => self::ESCAPED_TAINT
754            ],
755            '\escapeshellarg' => [
756                ~self::SHELL_TAINT & self::YES_TAINT,
757                'overall' => self::NO_TAINT
758            ],
759            // TODO: Perhaps we should distinguish arguments escape vs command escape
760            '\escapeshellcmd' => [
761                ~self::SHELL_TAINT & self::YES_TAINT,
762                'overall' => self::NO_TAINT
763            ],
764            '\shell_exec' => [
765                self::SHELL_EXEC_TAINT,
766                'overall' => self::YES_TAINT
767            ],
768            '\passthru' => [
769                self::SHELL_EXEC_TAINT,
770                self::NO_TAINT,
771                'overall' => self::NO_TAINT
772            ],
773            '\exec' => [
774                self::SHELL_EXEC_TAINT,
775                // TODO: This is an unsafe passbyref
776                self::NO_TAINT,
777                self::NO_TAINT,
778                'overall' => self::YES_TAINT
779            ],
780            '\system' => [
781                self::SHELL_EXEC_TAINT,
782                self::NO_TAINT,
783                'overall' => self::YES_TAINT
784            ],
785            '\proc_open' => [
786                self::SHELL_EXEC_TAINT,
787                self::NO_TAINT,
788                // TODO: Unsafe passbyref
789                self::NO_TAINT,
790                self::NO_TAINT,
791                self::NO_TAINT,
792                self::NO_TAINT,
793                // TODO: Perhaps not so safe
794                'overall' => self::NO_TAINT
795            ],
796            '\popen' => [
797                self::SHELL_EXEC_TAINT,
798                self::NO_TAINT,
799                // TODO: Perhaps not so safe
800                'overall' => self::NO_TAINT
801            ],
802            // Or any time the serialized data comes from a trusted source.
803            '\serialize' => [
804                'overall' => self::YES_TAINT & ~self::SERIALIZE_TAINT,
805            ],
806            '\unserialize' => [
807                self::SERIALIZE_EXEC_TAINT,
808                'overall' => self::NO_TAINT,
809            ],
810            '\mysql_query' => [
811                self::SQL_EXEC_TAINT,
812                'overall' => self::UNKNOWN_TAINT
813            ],
814            '\mysqli_query' => [
815                self::NO_TAINT,
816                self::SQL_EXEC_TAINT,
817                'overall' => self::UNKNOWN_TAINT
818            ],
819            '\mysqli::query' => [
820                self::SQL_EXEC_TAINT,
821                'overall' => self::UNKNOWN_TAINT
822            ],
823            '\mysqli_real_query' => [
824                self::NO_TAINT,
825                self::SQL_EXEC_TAINT,
826                'overall' => self::UNKNOWN_TAINT
827            ],
828            '\mysqli::real_query' => [
829                self::SQL_EXEC_TAINT,
830                'overall' => self::UNKNOWN_TAINT
831            ],
832            '\mysqli_fetch_all' => [
833                'overall' => self::YES_TAINT,
834            ],
835            '\mysqli_result::fetch' => [
836                'overall' => self::YES_TAINT,
837            ],
838            '\mysqli_fetch_array' => [
839                'overall' => self::YES_TAINT,
840            ],
841            '\mysqli_result::fetch_array' => [
842                'overall' => self::YES_TAINT,
843            ],
844            '\mysqli_fetch_assoc' => [
845                'overall' => self::YES_TAINT,
846            ],
847            '\mysqli_result::fetch_assoc' => [
848                'overall' => self::YES_TAINT,
849            ],
850            '\mysqli_fetch_column' => [
851                'overall' => self::YES_TAINT,
852            ],
853            '\mysqli_result::fetch_column' => [
854                'overall' => self::YES_TAINT,
855            ],
856            '\mysqli_fetch_object' => [
857                'overall' => self::YES_TAINT,
858            ],
859            '\mysqli_result::fetch_object' => [
860                'overall' => self::YES_TAINT,
861            ],
862            '\mysqli_fetch_row' => [
863                'overall' => self::YES_TAINT,
864            ],
865            '\mysqli_result::fetch_row' => [
866                'overall' => self::YES_TAINT,
867            ],
868            '\sqlite_query' => [
869                self::NO_TAINT,
870                self::SQL_EXEC_TAINT,
871                self::NO_TAINT,
872                self::NO_TAINT,
873                'overall' => self::UNKNOWN_TAINT
874            ],
875            '\sqlite_single_query' => [
876                self::NO_TAINT,
877                self::SQL_EXEC_TAINT,
878                self::NO_TAINT,
879                self::NO_TAINT,
880                'overall' => self::UNKNOWN_TAINT
881            ],
882            // Note: addslashes, addcslashes etc. intentionally omitted because they're not
883            // enough to avoid SQLi.
884            '\mysqli_escape_string' => [
885                self::NO_TAINT,
886                self::YES_TAINT & ~self::SQL_TAINT,
887                'overall' => self::NO_TAINT
888            ],
889            '\mysqli_real_escape_string' => [
890                self::NO_TAINT,
891                self::YES_TAINT & ~self::SQL_TAINT,
892                'overall' => self::NO_TAINT
893            ],
894            '\mysqli::escape_string' => [
895                self::YES_TAINT & ~self::SQL_TAINT,
896                'overall' => self::NO_TAINT
897            ],
898            '\mysqli::real_escape_string' => [
899                self::YES_TAINT & ~self::SQL_TAINT,
900                'overall' => self::NO_TAINT
901            ],
902            '\sqlite_escape_string' => [
903                self::YES_TAINT & ~self::SQL_TAINT,
904                'overall' => self::NO_TAINT
905            ],
906            '\PDO::query' => [
907                self::SQL_EXEC_TAINT,
908                self::NO_TAINT,
909                self::NO_TAINT,
910                self::NO_TAINT,
911                'overall' => self::UNKNOWN_TAINT
912            ],
913            '\PDO::prepare' => [
914                self::SQL_EXEC_TAINT,
915                self::NO_TAINT,
916                'overall' => self::UNKNOWN_TAINT
917            ],
918            '\PDO::exec' => [
919                self::SQL_EXEC_TAINT,
920                'overall' => self::NO_TAINT
921            ],
922            '\base64_encode' => [
923                self::YES_TAINT & ~self::HTML_TAINT,
924                'overall' => self::NO_TAINT
925            ],
926            '\file_put_contents' => [
927                self::PATH_EXEC_TAINT,
928                self::NO_TAINT,
929                self::NO_TAINT,
930                self::NO_TAINT,
931                'overall' => self::NO_TAINT
932            ],
933            // TODO: What about file_get_contents() and file() ?
934            '\fopen' => [
935                self::PATH_EXEC_TAINT,
936                self::NO_TAINT,
937                self::NO_TAINT,
938                self::NO_TAINT,
939                // TODO: Perhaps not so safe
940                'overall' => self::NO_TAINT
941            ],
942            '\opendir' => [
943                self::PATH_EXEC_TAINT,
944                self::NO_TAINT,
945                // TODO: Perhaps not so safe
946                'overall' => self::NO_TAINT
947            ],
948            '\rawurlencode' => [
949                self::YES_TAINT & ~self::PATH_TAINT,
950                'overall' => self::NO_TAINT
951            ],
952            '\urlencode' => [
953                self::YES_TAINT & ~self::PATH_TAINT,
954                'overall' => self::NO_TAINT
955            ],
956            '\printf' => [
957                self::HTML_EXEC_TAINT,
958                // TODO We could check if the respective specifiers are safe
959                self::HTML_EXEC_TAINT | self::VARIADIC_PARAM,
960                'overall' => self::NO_TAINT
961            ],
962            '\preg_filter' => [
963                self::REGEX_EXEC_TAINT,
964                self::YES_TAINT,
965                self::YES_TAINT,
966                self::NO_TAINT,
967                self::NO_TAINT,
968                'overall' => self::NO_TAINT
969            ],
970            '\preg_grep' => [
971                self::REGEX_EXEC_TAINT,
972                self::YES_TAINT,
973                self::NO_TAINT,
974                'overall' => self::NO_TAINT
975            ],
976            '\preg_match_all' => $pregMatchTaint,
977            '\preg_match' => $pregMatchTaint,
978            '\preg_quote' => [
979                self::YES_TAINT & ~self::REGEX_TAINT,
980                self::NO_TAINT,
981                'overall' => self::NO_TAINT
982            ],
983            '\preg_replace' => $pregReplaceTaint,
984            '\preg_replace_callback' => $pregReplaceTaint,
985            '\preg_replace_callback_array' => [
986                self::REGEX_EXEC_TAINT,
987                self::YES_TAINT,
988                self::NO_TAINT,
989                self::NO_TAINT,
990                self::NO_TAINT,
991                'overall' => self::NO_TAINT
992            ],
993            '\preg_split' => [
994                self::REGEX_EXEC_TAINT,
995                self::YES_TAINT,
996                self::NO_TAINT,
997                self::NO_TAINT,
998                'overall' => self::NO_TAINT
999            ],
1000            // We assume that hashing functions are safe, see T272492
1001            '\md5' => [
1002                self::NO_TAINT,
1003                self::NO_TAINT,
1004                'overall' => self::NO_TAINT
1005            ],
1006            '\sha1' => [
1007                self::NO_TAINT,
1008                self::NO_TAINT,
1009                'overall' => self::NO_TAINT
1010            ],
1011            '\crc32' => [
1012                self::NO_TAINT,
1013                'overall' => self::NO_TAINT
1014            ],
1015            '\hash' => [
1016                self::NO_TAINT,
1017                self::NO_TAINT,
1018                self::NO_TAINT,
1019                self::NO_TAINT,
1020                'overall' => self::NO_TAINT
1021            ],
1022            '\hash_hmac' => [
1023                self::NO_TAINT,
1024                self::NO_TAINT,
1025                self::NO_TAINT,
1026                self::NO_TAINT,
1027                'overall' => self::NO_TAINT
1028            ],
1029            // exit() and die() became ordinary functions in PHP 8.4
1030            '\exit' => [
1031                self::HTML_EXEC_TAINT,
1032                'overall' => self::NO_TAINT
1033            ],
1034            '\die' => [
1035                self::HTML_EXEC_TAINT,
1036                'overall' => self::NO_TAINT
1037            ],
1038        ];
1039    }
1040
1041    /**
1042     * @inheritDoc
1043     */
1044    public static function getBeforeLoopBodyAnalysisVisitorClassName(): string {
1045        return TaintednessLoopVisitor::class;
1046    }
1047
1048    /**
1049     * Clear caches for testing.
1050     * @suppress PhanUnreferencedPublicMethod Used in tests (not analyzed by phan)
1051     */
1052    public static function clearCaches(): void {
1053        self::$docblockCache = [];
1054        TaintednessVisitor::$fqsensWithoutToStringCache = [];
1055    }
1056}