Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 540
0.00% covered (danger)
0.00%
0 / 17
CRAP
0.00% covered (danger)
0.00%
0 / 1
SecurityCheckPlugin
0.00% covered (danger)
0.00%
0 / 540
0.00% covered (danger)
0.00%
0 / 17
4830
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 assertRequiredConfig
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
6
 getMergeVariableInfoClosure
0.00% covered (danger)
0.00%
0 / 41
0.00% covered (danger)
0.00%
0 / 1
342
 analyzeStringLiteralStatement
0.00% covered (danger)
0.00%
0 / 38
0.00% covered (danger)
0.00%
0 / 1
72
 taintToString
0.00% covered (danger)
0.00%
0 / 52
0.00% covered (danger)
0.00%
0 / 1
30
 builtinFuncHasTaint
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 getBuiltinFuncTaint
0.00% covered (danger)
0.00%
0 / 38
0.00% covered (danger)
0.00%
0 / 1
56
 assertFunctionTaintArrayWellFormed
0.00% covered (danger)
0.00%
0 / 9
0.00% covered (danger)
0.00%
0 / 1
72
 getCustomFuncTaints
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
0
 isFalsePositive
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 parseTaintLine
0.00% covered (danger)
0.00%
0 / 40
0.00% covered (danger)
0.00%
0 / 1
156
 modifyParamSinkTaint
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 modifyArgTaint
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 convertTaintNameToConstant
0.00% covered (danger)
0.00%
0 / 15
0.00% covered (danger)
0.00%
0 / 1
2
 getPHPFuncTaints
0.00% covered (danger)
0.00%
0 / 295
0.00% covered (danger)
0.00%
0 / 1
2
 getBeforeLoopBodyAnalysisVisitorClassName
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 clearCaches
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
1<?php declare( strict_types=1 );
2
3/**
4 * Base class for SecurityCheckPlugin. Extend if you want to customize.
5 *
6 * Copyright (C) 2017  Brian Wolff <bawolff@gmail.com>
7 *
8 * @license GPL-2.0-or-later
9 */
10
11namespace SecurityCheckPlugin;
12
13use ast\Node;
14use Closure;
15use InvalidArgumentException;
16use LogicException;
17use Phan\CodeBase;
18use Phan\Config;
19use Phan\Language\Context;
20use Phan\Language\Element\Comment\Builder;
21use Phan\Language\Element\FunctionInterface;
22use Phan\Language\Element\Variable;
23use Phan\Language\FQSEN\FullyQualifiedFunctionLikeName;
24use Phan\Language\FQSEN\FullyQualifiedMethodName;
25use Phan\Language\Scope;
26use Phan\PluginV3;
27use Phan\PluginV3\AnalyzeLiteralStatementCapability;
28use Phan\PluginV3\BeforeLoopBodyAnalysisCapability;
29use Phan\PluginV3\MergeVariableInfoCapability;
30use Phan\PluginV3\PostAnalyzeNodeCapability;
31use Phan\PluginV3\PreAnalyzeNodeCapability;
32use RuntimeException;
33
34/**
35 * Base class used by the Generic and MediaWiki flavours of the plugin.
36 */
37abstract class SecurityCheckPlugin extends PluginV3 implements
38    PostAnalyzeNodeCapability,
39    PreAnalyzeNodeCapability,
40    BeforeLoopBodyAnalysisCapability,
41    MergeVariableInfoCapability,
42    AnalyzeLiteralStatementCapability
43{
44    use TaintednessAccessorsTrait;
45
46    // Various taint flags. The _EXEC_ varieties mean
47    // that it is unsafe to assign that type of taint
48    // to the variable in question.
49
50    public const NO_TAINT = 0;
51
52    // Flag to denote that we don't know
53    public const UNKNOWN_TAINT = 1 << 0;
54
55    // Flag for function parameters and the like, where it
56    // preserves whatever taint the function is given.
57    public const PRESERVE_TAINT = 1 << 1;
58
59    // In future might separate out different types of html quoting.
60    // e.g. "<div data-foo='" . htmlspecialchars( $bar ) . "'>";
61    // is unsafe.
62    public const HTML_TAINT = 1 << 2;
63    public const HTML_EXEC_TAINT = 1 << 3;
64
65    public const SQL_TAINT = 1 << 4;
66    public const SQL_EXEC_TAINT = 1 << 5;
67
68    public const SHELL_TAINT = 1 << 6;
69    public const SHELL_EXEC_TAINT = 1 << 7;
70
71    public const SERIALIZE_TAINT = 1 << 8;
72    public const SERIALIZE_EXEC_TAINT = 1 << 9;
73
74    // Tainted paths, as input to include(), require() and some FS functions (path traversal)
75    public const PATH_TAINT = 1 << 10;
76    public const PATH_EXEC_TAINT = 1 << 11;
77
78    // User-controlled code, for RCE
79    public const CODE_TAINT = 1 << 12;
80    public const CODE_EXEC_TAINT = 1 << 13;
81
82    // User-controlled regular expressions, for ReDoS
83    public const REGEX_TAINT = 1 << 14;
84    public const REGEX_EXEC_TAINT = 1 << 15;
85
86    // To allow people to add other application specific taints.
87    public const CUSTOM1_TAINT = 1 << 16;
88    public const CUSTOM1_EXEC_TAINT = 1 << 17;
89    public const CUSTOM2_TAINT = 1 << 18;
90    public const CUSTOM2_EXEC_TAINT = 1 << 19;
91
92    // Special purpose for supporting MediaWiki's IDatabase::select
93    // and friends. Like SQL_TAINT, but only applies to the numeric
94    // keys of an array. Note: These are not included in YES_TAINT/EXEC_TAINT.
95    // e.g. given $f = [ $_GET['foo'] ]; $f would have the flag, but
96    // $g = $_GET['foo']; or $h = [ 's' => $_GET['foo'] ] would not.
97    // The associative keys also have this flag if they are tainted.
98    // It is also assumed anything with this flag will also have
99    // the SQL_TAINT flag set.
100    public const SQL_NUMKEY_TAINT = 1 << 20;
101    public const SQL_NUMKEY_EXEC_TAINT = 1 << 21;
102
103    // For double escaped variables
104    public const ESCAPED_TAINT = 1 << 22;
105    public const ESCAPED_EXEC_TAINT = 1 << 23;
106
107    // Special purpose flags (Starting at 2^28)
108    // TODO Renumber these. Requires changing format of the hardcoded arrays
109    // Cancel's out all EXEC flags on a function arg if arg is array.
110    public const ARRAY_OK = 1 << 28;
111
112    // Do not allow autodetected taint info override given taint.
113    // TODO Store this and other special flags somewhere else in the FunctionTaintedness object, not
114    // as normal taint flags.
115    public const NO_OVERRIDE = 1 << 29;
116
117    public const VARIADIC_PARAM = 1 << 30;
118
119    // *All* function flags
120    //TODO Add a structure test for this
121    public const FUNCTION_FLAGS = self::ARRAY_OK | self::NO_OVERRIDE;
122
123    // Combination flags.
124
125    // YES_TAINT denotes all taint a user controlled variable would have
126    public const YES_TAINT = self::HTML_TAINT | self::SQL_TAINT | self::SHELL_TAINT | self::SERIALIZE_TAINT |
127        self::PATH_TAINT | self::CODE_TAINT | self::REGEX_TAINT | self::CUSTOM1_TAINT | self::CUSTOM2_TAINT;
128    public const EXEC_TAINT = self::YES_TAINT << 1;
129    // @phan-suppress-next-line PhanUnreferencedPublicClassConstant
130    public const YES_EXEC_TAINT = self::YES_TAINT | self::EXEC_TAINT;
131
132    // ALL taint is YES + special purpose taints, but not including special flags.
133    public const ALL_TAINT = self::YES_TAINT | self::SQL_NUMKEY_TAINT | self::ESCAPED_TAINT;
134    public const ALL_EXEC_TAINT =
135        self::EXEC_TAINT | self::SQL_NUMKEY_EXEC_TAINT | self::ESCAPED_EXEC_TAINT;
136    public const ALL_YES_EXEC_TAINT = self::ALL_TAINT | self::ALL_EXEC_TAINT;
137
138    // Taints that support backpropagation.
139    public const BACKPROP_TAINTS = self::ALL_EXEC_TAINT;
140
141    public const ESCAPES_HTML = ( self::YES_TAINT & ~self::HTML_TAINT ) | self::ESCAPED_EXEC_TAINT;
142
143    // As the name would suggest, this must include *ALL* possible taint flags.
144    public const ALL_TAINT_FLAGS = self::ALL_YES_EXEC_TAINT | self::FUNCTION_FLAGS |
145        self::UNKNOWN_TAINT | self::PRESERVE_TAINT | self::VARIADIC_PARAM;
146
147    /**
148     * Used to print taint debug data, see BlockAnalysisVisitor::PHAN_DEBUG_VAR_REGEX
149     */
150    private const DEBUG_TAINTEDNESS_REGEXP =
151        '/@phan-debug-var-taintedness\s+\$(' . Builder::WORD_REGEX . '(,\s*\$' . Builder::WORD_REGEX . ')*)/';
152    // @phan-suppress-previous-line PhanAccessClassConstantInternal It's just perfect for use here
153
154    public const PARAM_ANNOTATION_REGEX =
155        '/@param-taint\s+&?(?P<variadic>\.\.\.)?\$(?P<paramname>\S+)\s+(?P<taint>.*)$/';
156
157    /**
158     * @var self Passed to the visitor for context
159     */
160    public static $pluginInstance;
161
162    /**
163     * @var array<array<FunctionTaintedness|MethodLinks>> Cache of parsed docblocks. This is declared here (as opposed
164     *  to the BaseVisitor) so that PHPUnit can snapshot and restore it.
165     * @phan-var array<array{0:FunctionTaintedness,1:MethodLinks}>
166     */
167    public static $docblockCache = [];
168
169    /** @var FunctionTaintedness[] Cache of taintedness of builtin functions */
170    private static $builtinFuncTaintCache = [];
171
172    /**
173     * Save the subclass instance to make it accessible from the visitor
174     */
175    public function __construct() {
176        $this->assertRequiredConfig();
177        self::$pluginInstance = $this;
178    }
179
180    /**
181     * Ensure that the options we need are enabled.
182     */
183    private function assertRequiredConfig(): void {
184        if ( Config::get_quick_mode() ) {
185            throw new RuntimeException( 'Quick mode must be disabled to run taint-check' );
186        }
187    }
188
189    /**
190     * @inheritDoc
191     */
192    public function getMergeVariableInfoClosure(): Closure {
193        /**
194         * For branches that are not guaranteed to be executed, merge taint info for any involved
195         * variable across all branches.
196         *
197         * @note This method is HOT, so keep it optimized
198         *
199         * @param Variable $variable
200         * @param Scope[] $scopeList
201         * @param bool $varExistsInAllScopes @phan-unused-param
202         * @suppress PhanUnreferencedClosure, PhanUndeclaredProperty, UnusedSuppression
203         */
204        return static function ( Variable $variable, array $scopeList, bool $varExistsInAllScopes ): void {
205            $varName = $variable->getName();
206
207            $vars = [];
208            $firstVar = null;
209            foreach ( $scopeList as $scope ) {
210                $localVar = $scope->getVariableByNameOrNull( $varName );
211                if ( $localVar ) {
212                    if ( !$firstVar ) {
213                        $firstVar = $localVar;
214                    } else {
215                        $vars[] = $localVar;
216                    }
217                }
218            }
219
220            if ( !$firstVar ) {
221                return;
222            }
223
224            /** @var Taintedness $taintedness */
225            $taintedness = $prevTaint = $firstVar->taintedness ?? null;
226            /** @var MethodLinks $methodLinks */
227            $methodLinks = $prevLinks = $firstVar->taintedMethodLinks ?? null;
228            /** @var CausedByLines $error */
229            $error = $prevErr = $firstVar->taintedOriginalError ?? null;
230
231            foreach ( $vars as $localVar ) {
232                // Below we only merge data if it's non-null in the current scope and different from the previous
233                // branch. Using arrays to save all previous values and then in_array seems useless on MW core,
234                // since >99% cases of duplication are already covered by these simple checks.
235
236                $taintOrNull = $localVar->taintedness ?? null;
237                if ( $taintOrNull && $taintOrNull !== $prevTaint ) {
238                    $prevTaint = $taintOrNull;
239                    if ( $taintedness ) {
240                        $taintedness = $taintedness->asMergedWith( $taintOrNull );
241                    } else {
242                        $taintedness = $taintOrNull;
243                    }
244                }
245
246                $variableObjLinksOrNull = $localVar->taintedMethodLinks ?? null;
247                if ( $variableObjLinksOrNull && $variableObjLinksOrNull !== $prevLinks ) {
248                    $prevLinks = $variableObjLinksOrNull;
249                    if ( $methodLinks ) {
250                        $methodLinks = $methodLinks->asMergedWith( $variableObjLinksOrNull );
251                    } else {
252                        $methodLinks = $variableObjLinksOrNull;
253                    }
254                }
255
256                $varErrorOrNull = $localVar->taintedOriginalError ?? null;
257                if ( $varErrorOrNull && $varErrorOrNull !== $prevErr ) {
258                    $prevErr = $varErrorOrNull;
259                    if ( $error ) {
260                        $error = $error->asMergedWith( $varErrorOrNull );
261                    } else {
262                        $error = $varErrorOrNull;
263                    }
264                }
265            }
266
267            if ( $taintedness ) {
268                self::setTaintednessRaw( $variable, $taintedness );
269            }
270            if ( $methodLinks ) {
271                self::setMethodLinks( $variable, $methodLinks );
272            }
273            if ( $error ) {
274                self::setCausedByRaw( $variable, $error );
275            }
276        };
277    }
278
279    /**
280     * Print the taintedness of a variable, when requested
281     * @see BlockAnalysisVisitor::analyzeSubstituteVarAssert()
282     * @inheritDoc
283     * @suppress PhanUndeclaredProperty, UnusedSuppression
284     */
285    public function analyzeStringLiteralStatement( CodeBase $codeBase, Context $context, string $statement ): bool {
286        $found = false;
287        if ( preg_match_all( self::DEBUG_TAINTEDNESS_REGEXP, $statement, $matches, PREG_SET_ORDER ) ) {
288            $scope = $context->getScope();
289            foreach ( $matches as $group ) {
290                foreach ( explode( ',', $group[1] ) as $rawVar ) {
291                    $varName = ltrim( trim( $rawVar ), '$' );
292                    if ( $scope->hasVariableWithName( $varName ) ) {
293                        $var = $scope->getVariableByName( $varName );
294                        $taintOrNull = self::getTaintednessRaw( $var );
295                        $taint = $taintOrNull ? $taintOrNull->toShortString() : 'unset';
296                        $msg = "Variable {CODE} has taintedness: {DETAILS}";
297                        $params = [ "\$$varName", $taint ];
298                    } else {
299                        $msg = "Variable {CODE} doesn't exist in scope";
300                        $params = [ "\$$varName" ];
301                    }
302                    self::emitIssue(
303                        $codeBase,
304                        $context,
305                        'SecurityCheckDebugTaintedness',
306                        $msg,
307                        $params
308                    );
309                    $found = true;
310                }
311            }
312        } elseif ( str_contains( $statement, '@taint-check-debug-method-first-arg' ) ) {
313            // FIXME This is a hack. The annotation is INTERNAL, for use only in the backpropoffsets-blowup
314            // test. We should either find a better way to test that, or maybe add a public annotation
315            // for debugging taintedness of a method (probably unreadable on a single line).
316            $funcName = preg_replace( '/@taint-check-debug-method-first-arg ([a-z0-9:]+)\b.*/i', '$1', $statement );
317            // Let any exception bubble up here, the annotation is for internal use in testing
318            $fqsen = FullyQualifiedMethodName::fromStringInContext( $funcName, $context );
319            $method = $codeBase->getMethodByFQSEN( $fqsen );
320            /** @var FunctionTaintedness|null $fTaint */
321            $fTaint = $method->funcTaint ?? null;
322            if ( !$fTaint ) {
323                return false;
324            }
325            self::emitIssue(
326                $codeBase,
327                $context,
328                'SecurityCheckDebugTaintedness',
329                "Method {CODE} has first param with taintedness: {DETAILS}",
330                [ $funcName, $fTaint->getParamSinkTaint( 0 )->toShortString() ]
331            );
332            return true;
333        }
334        return $found;
335    }
336
337    /**
338     * Get a string representation of a taint integer
339     *
340     * The prefix ~ means all input taints except the letter given.
341     * The prefix * means the EXEC version of the taint.
342     */
343    public static function taintToString( int $taint ): string {
344        if ( $taint === self::NO_TAINT ) {
345            return 'NONE';
346        }
347
348        // Note, order matters here.
349        static $mapping = [
350            self::UNKNOWN_TAINT => 'UNKNOWN',
351            self::PRESERVE_TAINT => 'PRESERVE',
352            self::ALL_TAINT => 'ALL',
353            self::YES_TAINT => 'YES',
354            self::YES_TAINT &
355            ( ~self::HTML_TAINT ) => '~HTML',
356            self::YES_TAINT &
357            ( ~self::SQL_TAINT ) => '~SQL',
358            self::YES_TAINT &
359            ( ~self::SHELL_TAINT ) => '~SHELL',
360            self::YES_TAINT &
361            ( ~self::SERIALIZE_TAINT ) => '~SERIALIZE',
362            self::YES_TAINT &
363            ( ~self::CUSTOM1_TAINT ) => '~CUSTOM1',
364            self::YES_TAINT &
365            ( ~self::CUSTOM2_TAINT ) => '~CUSTOM2',
366            // We skip ~ versions of flags which shouldn't be possible.
367            self::HTML_TAINT => 'HTML',
368            self::SQL_TAINT => 'SQL',
369            self::SHELL_TAINT => 'SHELL',
370            self::ESCAPED_TAINT => 'ESCAPED',
371            self::SERIALIZE_TAINT => 'SERIALIZE',
372            self::CUSTOM1_TAINT => 'CUSTOM1',
373            self::CUSTOM2_TAINT => 'CUSTOM2',
374            self::CODE_TAINT => 'CODE',
375            self::PATH_TAINT => 'PATH',
376            self::REGEX_TAINT => 'REGEX',
377            self::SQL_NUMKEY_TAINT => 'SQL_NUMKEY',
378            self::ARRAY_OK => 'ARRAY_OK',
379            self::ALL_EXEC_TAINT => '*ALL',
380            self::HTML_EXEC_TAINT => '*HTML',
381            self::SQL_EXEC_TAINT => '*SQL',
382            self::SHELL_EXEC_TAINT => '*SHELL',
383            self::ESCAPED_EXEC_TAINT => '*ESCAPED',
384            self::SERIALIZE_EXEC_TAINT => '*SERIALIZE',
385            self::CUSTOM1_EXEC_TAINT => '*CUSTOM1',
386            self::CUSTOM2_EXEC_TAINT => '*CUSTOM2',
387            self::CODE_EXEC_TAINT => '*CODE',
388            self::PATH_EXEC_TAINT => '*PATH',
389            self::REGEX_EXEC_TAINT => '*REGEX',
390            self::SQL_NUMKEY_EXEC_TAINT => '*SQL_NUMKEY',
391        ];
392
393        $types = [];
394        foreach ( $mapping as $bitmap => $val ) {
395            if ( ( $bitmap & $taint ) === $bitmap ) {
396                $types[] = $val;
397                $taint &= ~$bitmap;
398            }
399        }
400        if ( $taint !== 0 ) {
401            $types[] = "Unrecognized: $taint";
402        }
403        return implode( ', ', $types );
404    }
405
406    public function builtinFuncHasTaint( FullyQualifiedFunctionLikeName $fqsen ): bool {
407        return $this->getBuiltinFuncTaint( $fqsen ) !== null;
408    }
409
410    /**
411     * Get the taintedness of a function
412     *
413     * This allows overriding the default taint of a function
414     *
415     * If you want to provide custom taint hints for your application,
416     * override the getCustomFuncTaints()
417     *
418     * @param FullyQualifiedFunctionLikeName $fqsen The function/method in question
419     * @return FunctionTaintedness|null Null to autodetect taintedness
420     */
421    public function getBuiltinFuncTaint( FullyQualifiedFunctionLikeName $fqsen ): ?FunctionTaintedness {
422        $name = (string)$fqsen;
423
424        if ( isset( self::$builtinFuncTaintCache[$name] ) ) {
425            return self::$builtinFuncTaintCache[$name];
426        }
427
428        static $funcTaints = null;
429        if ( $funcTaints === null ) {
430            $funcTaints = $this->getCustomFuncTaints() + $this->getPHPFuncTaints();
431        }
432
433        if ( isset( $funcTaints[$name] ) ) {
434            $rawFuncTaint = $funcTaints[$name];
435            if ( $rawFuncTaint instanceof FunctionTaintedness ) {
436                $funcTaint = $rawFuncTaint;
437            } else {
438                self::assertFunctionTaintArrayWellFormed( $rawFuncTaint );
439                // Note: for backcompat, we set NO_OVERRIDE everywhere.
440                $overallFlags = ( $rawFuncTaint['overall'] & self::FUNCTION_FLAGS ) | self::NO_OVERRIDE;
441                $funcTaint = new FunctionTaintedness(
442                    new Taintedness( $rawFuncTaint['overall'] & ~$overallFlags ),
443                    $overallFlags
444                );
445                unset( $rawFuncTaint['overall'] );
446                foreach ( $rawFuncTaint as $i => $val ) {
447                    assert( ( $val & self::UNKNOWN_TAINT ) === 0, 'Cannot set UNKNOWN' );
448                    $paramFlags = ( $val & self::FUNCTION_FLAGS ) | self::NO_OVERRIDE;
449                    // TODO Split sink and preserve in the hardcoded arrays
450                    if ( $val & self::VARIADIC_PARAM ) {
451                        $pTaint = new Taintedness( $val & ~( self::VARIADIC_PARAM | $paramFlags ) );
452                        $funcTaint = $funcTaint
453                            ->withVariadicParamSinkTaint( $i, $pTaint->withOnly( self::ALL_EXEC_TAINT ), $paramFlags )
454                            ->withVariadicParamPreservedTaint(
455                                $i,
456                                $pTaint->without( self::ALL_EXEC_TAINT )->asPreservedTaintedness()
457                            );
458                    } else {
459                        $pTaint = new Taintedness( $val & ~$paramFlags );
460                        $funcTaint = $funcTaint
461                            ->withParamSinkTaint( $i, $pTaint->withOnly( self::ALL_EXEC_TAINT ), $paramFlags )
462                            ->withParamPreservedTaint(
463                                $i,
464                                $pTaint->without( self::ALL_EXEC_TAINT )->asPreservedTaintedness()
465                            );
466                    }
467                }
468            }
469            self::$builtinFuncTaintCache[$name] = $funcTaint;
470            return self::$builtinFuncTaintCache[$name];
471        }
472        return null;
473    }
474
475    /**
476     * Assert that a taintedness array is well-formed, and fail hard if it isn't.
477     *
478     * @param int[] $taint
479     */
480    private static function assertFunctionTaintArrayWellFormed( array $taint ): void {
481        if ( !isset( $taint['overall'] ) ) {
482            throw new LogicException( 'Overall taint must be set' );
483        }
484
485        foreach ( $taint as $i => $t ) {
486            if ( !is_int( $i ) && $i !== 'overall' ) {
487                throw new LogicException( "Taint indexes must be int or 'overall', got '$i'" );
488            }
489            if ( !is_int( $t ) || ( $t & ~self::ALL_TAINT_FLAGS ) ) {
490                throw new LogicException( "Wrong taint index $i, got: " . var_export( $t, true ) );
491            }
492            if ( $t & ~self::ALL_TAINT_FLAGS ) {
493                throw new LogicException( "Taint index $i has unknown flags: " . decbin( $t ) );
494            }
495        }
496    }
497
498    /**
499     * Get an array of function taints custom for the application
500     *
501     * @return array<string,int[]|FunctionTaintedness> Array of function taints. The keys are FQSENs. The values can be
502     *   either FunctionTaintedness objects, or arrays with 'overall' string key and numeric keys for parameters.
503     *
504     *   For example: [ self::YES_TAINT, 'overall' => self::NO_TAINT ]
505     *   means that the taint of the return value is the same as the taint
506     *   of the first arg, and all other args are ignored.
507     *   [ self::HTML_EXEC_TAINT, 'overall' => self::NO_TAINT ]
508     *   Means that the first arg is output in an html context (e.g. like echo)
509     *   [ self::YES_TAINT & ~self::HTML_TAINT, 'overall' => self::NO_TAINT ]
510     *   Means that the function removes html taint (escapes) e.g. htmlspecialchars
511     *   [ 'overall' => self::YES_TAINT ]
512     *   Means that it returns a tainted value (e.g. return $_POST['foo']; )
513     * @see FunctionTaintedness for more details
514     */
515    abstract protected function getCustomFuncTaints(): array;
516
517    /**
518     * Can be used to force specific issues to be marked false positives
519     *
520     * For example, a specific application might be able to recognize
521     * that we are in a CLI context, and thus the XSS is really a false positive.
522     *
523     * @param int $combinedTaint Combined and adjusted taint of LHS+RHS
524     * @param string &$msg Issue description (so plugin can modify to state why false)
525     * @param Context $context
526     * @param CodeBase $code_base
527     * @return bool Is this a false positive?
528     * @suppress PhanUnusedPublicMethodParameter No param is used
529     */
530    public function isFalsePositive(
531        int $combinedTaint,
532        string &$msg,
533        Context $context,
534        CodeBase $code_base
535    ): bool {
536        return false;
537    }
538
539    /**
540     * Given a param description line, extract taint
541     *
542     * This is to allow putting taint information in method docblocks.
543     * If a function has a docblock comment like:
544     *  *  @param-taint $foo escapes_html
545     * This converts that line into:
546     *   ( self::YES_TAINT & ~self::SQL_TAINT )
547     * Multiple taint types are separated by commas
548     * (which are interpreted as bitwise OR ( "|" ). Future versions
549     * might support more complex bitwise operators, but for now it
550     * doesn't seem needed.
551     *
552     * The following keywords are supported where {type} can be
553     * html, sql, shell, serialize, custom1, custom2, sql_numkey,
554     * escaped.
555     *  * {type} - just set the flag. 99% you should only use 'none' or 'tainted'
556     *  * exec_{type} - sets the exec flag.
557     *  * escapes_{type} - self::YES_TAINT & ~self::{type}_TAINT.
558     *     Note: escapes_html adds the exec_escaped flag, use
559     *     escapes_htmlnoent if the value is safe to double encode.
560     *  * onlysafefor_{type}
561     *     Same as above, intended for return type declarations.
562     *     Only difference is that onlysafefor_html sets ESCAPED_TAINT instead
563     *     of ESCAPED_EXEC_TAINT
564     *  * none - self::NO_TAINT
565     *  * tainted - self::YES_TAINT
566     *  * array_ok - sets self::ARRAY_OK
567     *  * allow_override - Allow autodetected taints to override annotation
568     *
569     * @todo What about ~ operator?
570     * @note The special casing to have escapes_html always add exec_escaped
571     *   (and having htmlnoent exist) is "experimental" and may change in
572     *   future versions (Maybe all types should set exec_escaped. Maybe it
573     *   should be explicit)
574     * @note Excluding UNKNOWN here on purpose, as if we're setting it, it's not unknown
575     * @param string $line A line from the docblock
576     * @return array|null Array of [taintedness, flags], or null on no info
577     * @phan-return array{0:Taintedness,1:int}|null
578     */
579    public static function parseTaintLine( string $line ): ?array {
580        $types = '(?P<type>htmlnoent|html|sql|shell|serialize|custom1|'
581            . 'custom2|code|path|regex|sql_numkey|escaped|none|tainted)';
582        $prefixes = '(?P<prefix>escapes|onlysafefor|exec)';
583        $taintExpr = "(?P<taint>(?:{$prefixes}_)?$types|array_ok|allow_override)";
584
585        $filteredLine = preg_replace( "/((?:$taintExpr,? *)+)(?: .*)?$/", '$1', $line );
586        $taints = explode( ',', strtolower( $filteredLine ) );
587        $taints = array_map( 'trim', $taints );
588
589        $overallTaint = Taintedness::safeSingleton();
590        $overallFlags = self::NO_OVERRIDE;
591        $numberOfTaintsProcessed = 0;
592        foreach ( $taints as $taint ) {
593            $taintParts = [];
594            if ( !preg_match( "/^$taintExpr$/", $taint, $taintParts ) ) {
595                continue;
596            }
597            $numberOfTaintsProcessed++;
598            if ( $taintParts['taint'] === 'array_ok' ) {
599                $overallFlags |= self::ARRAY_OK;
600                continue;
601            }
602            if ( $taintParts['taint'] === 'allow_override' ) {
603                $overallFlags &= ~self::NO_OVERRIDE;
604                continue;
605            }
606            $taintAsInt = self::convertTaintNameToConstant( $taintParts['type'] );
607            switch ( $taintParts['prefix'] ) {
608                case '':
609                    $overallTaint = $overallTaint->with( $taintAsInt );
610                    break;
611                case 'exec':
612                    $overallTaint = $overallTaint->with( Taintedness::flagsAsYesToExecTaint( $taintAsInt ) );
613                    break;
614                case 'escapes':
615                case 'onlysafefor':
616                    $overallTaint = $overallTaint->with( self::YES_TAINT & ~$taintAsInt );
617                    if ( $taintParts['type'] === 'html' ) {
618                        if ( $taintParts['prefix'] === 'escapes' ) {
619                            $overallTaint = $overallTaint->with( self::ESCAPED_EXEC_TAINT );
620                        } else {
621                            $overallTaint = $overallTaint->with( self::ESCAPED_TAINT );
622                        }
623                    }
624                    break;
625            }
626        }
627        if ( $numberOfTaintsProcessed === 0 ) {
628            return null;
629        }
630        return [ $overallTaint, $overallFlags ];
631    }
632
633    /**
634     * Hook to override the sink taintedness of a method parameter depending on the current argument.
635     *
636     * @internal This method is unstable and may be removed without prior notice.
637     *
638     * @param Taintedness $paramSinkTaint
639     * @param Taintedness $curArgTaintedness
640     * @param Node $argument Note: This hook is not called on literals
641     * @param int $argIndex Which argument number is this
642     * @param FunctionInterface $func The function/method being called
643     * @param FunctionTaintedness $funcTaint Taint of method formal parameters
644     * @param CausedByLines $paramSinkError
645     * @param Context $context Context object
646     * @param CodeBase $code_base CodeBase object
647     * @return array<Taintedness|CausedByLines> The taint and caused-by lines to use for actual parameter
648     * @phan-return array{0:Taintedness,1:CausedByLines}
649     * @suppress PhanUnusedPublicMethodParameter
650     */
651    public function modifyParamSinkTaint(
652        Taintedness $paramSinkTaint,
653        Taintedness $curArgTaintedness,
654        Node $argument,
655        int $argIndex,
656        FunctionInterface $func,
657        FunctionTaintedness $funcTaint,
658        CausedByLines $paramSinkError,
659        Context $context,
660        CodeBase $code_base
661    ): array {
662        // no-op
663        return [ $paramSinkTaint, $paramSinkError ];
664    }
665
666    /**
667     * Hook to override how taint of an argument to method call is calculated
668     *
669     * @param Taintedness $curArgTaintedness
670     * @param Node $argument Note: This hook is not called on literals
671     * @param int $argIndex Which argument number is this
672     * @param FunctionInterface $func The function/method being called
673     * @param FunctionTaintedness $funcTaint Taint of method formal parameters
674     * @param Context $context Context object
675     * @param CodeBase $code_base CodeBase object
676     * @return Taintedness The taint to use for actual parameter
677     * @suppress PhanUnusedPublicMethodParameter
678     */
679    public function modifyArgTaint(
680        Taintedness $curArgTaintedness,
681        Node $argument,
682        int $argIndex,
683        FunctionInterface $func,
684        FunctionTaintedness $funcTaint,
685        Context $context,
686        CodeBase $code_base
687    ): Taintedness {
688        // no-op
689        return $curArgTaintedness;
690    }
691
692    /**
693     * Convert a string like 'html' to self::HTML_TAINT.
694     *
695     * @note htmlnoent treated like self::HTML_TAINT.
696     * @param string $name one of:
697     *   html, sql, shell, serialize, custom1, custom2, code, path, regex, sql_numkey,
698     *   escaped, none (= self::NO_TAINT), tainted (= self::YES_TAINT)
699     * @return int One of the TAINT constants
700     */
701    public static function convertTaintNameToConstant( string $name ): int {
702        return match ( $name ) {
703            'html', 'htmlnoent' => self::HTML_TAINT,
704            'sql' => self::SQL_TAINT,
705            'shell' => self::SHELL_TAINT,
706            'serialize' => self::SERIALIZE_TAINT,
707            'custom1' => self::CUSTOM1_TAINT,
708            'custom2' => self::CUSTOM2_TAINT,
709            'code' => self::CODE_TAINT,
710            'path' => self::PATH_TAINT,
711            'regex' => self::REGEX_TAINT,
712            'sql_numkey' => self::SQL_NUMKEY_TAINT,
713            'escaped' => self::ESCAPED_TAINT,
714            'tainted' => self::YES_TAINT,
715            'none' => self::NO_TAINT,
716            // @codeCoverageIgnoreStart
717            default => throw new InvalidArgumentException( "$name not valid taint" )
718            // @codeCoverageIgnoreEnd
719        };
720    }
721
722    /**
723     * Taints for builtin php functions
724     *
725     * @return int[][] List of func taints (See getBuiltinFuncTaint())
726     * @phan-return array<string,int[]>
727     */
728    private function getPHPFuncTaints(): array {
729        $pregMatchTaint = [
730            self::REGEX_EXEC_TAINT,
731            self::YES_TAINT,
732            // TODO: Possibly unsafe pass-by-ref
733            self::NO_TAINT,
734            self::NO_TAINT,
735            self::NO_TAINT,
736            'overall' => self::NO_TAINT,
737        ];
738        $pregReplaceTaint = [
739            self::REGEX_EXEC_TAINT,
740            // TODO: This is used for strings (in preg_replace) and callbacks (in preg_replace_callback)
741            self::YES_TAINT,
742            self::YES_TAINT,
743            self::NO_TAINT,
744            self::NO_TAINT,
745            'overall' => self::NO_TAINT
746        ];
747        return [
748            '\htmlentities' => [
749                self::ESCAPES_HTML,
750                'overall' => self::ESCAPED_TAINT
751            ],
752            '\htmlspecialchars' => [
753                self::ESCAPES_HTML,
754                'overall' => self::ESCAPED_TAINT
755            ],
756            '\escapeshellarg' => [
757                ~self::SHELL_TAINT & self::YES_TAINT,
758                'overall' => self::NO_TAINT
759            ],
760            // TODO: Perhaps we should distinguish arguments escape vs command escape
761            '\escapeshellcmd' => [
762                ~self::SHELL_TAINT & self::YES_TAINT,
763                'overall' => self::NO_TAINT
764            ],
765            '\shell_exec' => [
766                self::SHELL_EXEC_TAINT,
767                'overall' => self::YES_TAINT
768            ],
769            '\passthru' => [
770                self::SHELL_EXEC_TAINT,
771                self::NO_TAINT,
772                'overall' => self::NO_TAINT
773            ],
774            '\exec' => [
775                self::SHELL_EXEC_TAINT,
776                // TODO: This is an unsafe passbyref
777                self::NO_TAINT,
778                self::NO_TAINT,
779                'overall' => self::YES_TAINT
780            ],
781            '\system' => [
782                self::SHELL_EXEC_TAINT,
783                self::NO_TAINT,
784                'overall' => self::YES_TAINT
785            ],
786            '\proc_open' => [
787                self::SHELL_EXEC_TAINT,
788                self::NO_TAINT,
789                // TODO: Unsafe passbyref
790                self::NO_TAINT,
791                self::NO_TAINT,
792                self::NO_TAINT,
793                self::NO_TAINT,
794                // TODO: Perhaps not so safe
795                'overall' => self::NO_TAINT
796            ],
797            '\popen' => [
798                self::SHELL_EXEC_TAINT,
799                self::NO_TAINT,
800                // TODO: Perhaps not so safe
801                'overall' => self::NO_TAINT
802            ],
803            // Or any time the serialized data comes from a trusted source.
804            '\serialize' => [
805                'overall' => self::YES_TAINT & ~self::SERIALIZE_TAINT,
806            ],
807            '\unserialize' => [
808                self::SERIALIZE_EXEC_TAINT,
809                'overall' => self::NO_TAINT,
810            ],
811            '\mysql_query' => [
812                self::SQL_EXEC_TAINT,
813                'overall' => self::UNKNOWN_TAINT
814            ],
815            '\mysqli_query' => [
816                self::NO_TAINT,
817                self::SQL_EXEC_TAINT,
818                'overall' => self::UNKNOWN_TAINT
819            ],
820            '\mysqli::query' => [
821                self::SQL_EXEC_TAINT,
822                'overall' => self::UNKNOWN_TAINT
823            ],
824            '\mysqli_real_query' => [
825                self::NO_TAINT,
826                self::SQL_EXEC_TAINT,
827                'overall' => self::UNKNOWN_TAINT
828            ],
829            '\mysqli::real_query' => [
830                self::SQL_EXEC_TAINT,
831                'overall' => self::UNKNOWN_TAINT
832            ],
833            '\mysqli_fetch_all' => [
834                'overall' => self::YES_TAINT,
835            ],
836            '\mysqli_result::fetch' => [
837                'overall' => self::YES_TAINT,
838            ],
839            '\mysqli_fetch_array' => [
840                'overall' => self::YES_TAINT,
841            ],
842            '\mysqli_result::fetch_array' => [
843                'overall' => self::YES_TAINT,
844            ],
845            '\mysqli_fetch_assoc' => [
846                'overall' => self::YES_TAINT,
847            ],
848            '\mysqli_result::fetch_assoc' => [
849                'overall' => self::YES_TAINT,
850            ],
851            '\mysqli_fetch_column' => [
852                'overall' => self::YES_TAINT,
853            ],
854            '\mysqli_result::fetch_column' => [
855                'overall' => self::YES_TAINT,
856            ],
857            '\mysqli_fetch_object' => [
858                'overall' => self::YES_TAINT,
859            ],
860            '\mysqli_result::fetch_object' => [
861                'overall' => self::YES_TAINT,
862            ],
863            '\mysqli_fetch_row' => [
864                'overall' => self::YES_TAINT,
865            ],
866            '\mysqli_result::fetch_row' => [
867                'overall' => self::YES_TAINT,
868            ],
869            '\sqlite_query' => [
870                self::NO_TAINT,
871                self::SQL_EXEC_TAINT,
872                self::NO_TAINT,
873                self::NO_TAINT,
874                'overall' => self::UNKNOWN_TAINT
875            ],
876            '\sqlite_single_query' => [
877                self::NO_TAINT,
878                self::SQL_EXEC_TAINT,
879                self::NO_TAINT,
880                self::NO_TAINT,
881                'overall' => self::UNKNOWN_TAINT
882            ],
883            // Note: addslashes, addcslashes etc. intentionally omitted because they're not
884            // enough to avoid SQLi.
885            '\mysqli_escape_string' => [
886                self::NO_TAINT,
887                self::YES_TAINT & ~self::SQL_TAINT,
888                'overall' => self::NO_TAINT
889            ],
890            '\mysqli_real_escape_string' => [
891                self::NO_TAINT,
892                self::YES_TAINT & ~self::SQL_TAINT,
893                'overall' => self::NO_TAINT
894            ],
895            '\mysqli::escape_string' => [
896                self::YES_TAINT & ~self::SQL_TAINT,
897                'overall' => self::NO_TAINT
898            ],
899            '\mysqli::real_escape_string' => [
900                self::YES_TAINT & ~self::SQL_TAINT,
901                'overall' => self::NO_TAINT
902            ],
903            '\sqlite_escape_string' => [
904                self::YES_TAINT & ~self::SQL_TAINT,
905                'overall' => self::NO_TAINT
906            ],
907            '\PDO::query' => [
908                self::SQL_EXEC_TAINT,
909                self::NO_TAINT,
910                self::NO_TAINT,
911                self::NO_TAINT,
912                'overall' => self::UNKNOWN_TAINT
913            ],
914            '\PDO::prepare' => [
915                self::SQL_EXEC_TAINT,
916                self::NO_TAINT,
917                'overall' => self::UNKNOWN_TAINT
918            ],
919            '\PDO::exec' => [
920                self::SQL_EXEC_TAINT,
921                'overall' => self::NO_TAINT
922            ],
923            '\base64_encode' => [
924                self::YES_TAINT & ~self::HTML_TAINT,
925                'overall' => self::NO_TAINT
926            ],
927            '\file_put_contents' => [
928                self::PATH_EXEC_TAINT,
929                self::NO_TAINT,
930                self::NO_TAINT,
931                self::NO_TAINT,
932                'overall' => self::NO_TAINT
933            ],
934            // TODO: What about file_get_contents() and file() ?
935            '\fopen' => [
936                self::PATH_EXEC_TAINT,
937                self::NO_TAINT,
938                self::NO_TAINT,
939                self::NO_TAINT,
940                // TODO: Perhaps not so safe
941                'overall' => self::NO_TAINT
942            ],
943            '\opendir' => [
944                self::PATH_EXEC_TAINT,
945                self::NO_TAINT,
946                // TODO: Perhaps not so safe
947                'overall' => self::NO_TAINT
948            ],
949            '\rawurlencode' => [
950                self::YES_TAINT & ~self::PATH_TAINT,
951                'overall' => self::NO_TAINT
952            ],
953            '\urlencode' => [
954                self::YES_TAINT & ~self::PATH_TAINT,
955                'overall' => self::NO_TAINT
956            ],
957            '\printf' => [
958                self::HTML_EXEC_TAINT,
959                // TODO We could check if the respective specifiers are safe
960                self::HTML_EXEC_TAINT | self::VARIADIC_PARAM,
961                'overall' => self::NO_TAINT
962            ],
963            '\preg_filter' => [
964                self::REGEX_EXEC_TAINT,
965                self::YES_TAINT,
966                self::YES_TAINT,
967                self::NO_TAINT,
968                self::NO_TAINT,
969                'overall' => self::NO_TAINT
970            ],
971            '\preg_grep' => [
972                self::REGEX_EXEC_TAINT,
973                self::YES_TAINT,
974                self::NO_TAINT,
975                'overall' => self::NO_TAINT
976            ],
977            '\preg_match_all' => $pregMatchTaint,
978            '\preg_match' => $pregMatchTaint,
979            '\preg_quote' => [
980                self::YES_TAINT & ~self::REGEX_TAINT,
981                self::NO_TAINT,
982                'overall' => self::NO_TAINT
983            ],
984            '\preg_replace' => $pregReplaceTaint,
985            '\preg_replace_callback' => $pregReplaceTaint,
986            '\preg_replace_callback_array' => [
987                self::REGEX_EXEC_TAINT,
988                self::YES_TAINT,
989                self::NO_TAINT,
990                self::NO_TAINT,
991                self::NO_TAINT,
992                'overall' => self::NO_TAINT
993            ],
994            '\preg_split' => [
995                self::REGEX_EXEC_TAINT,
996                self::YES_TAINT,
997                self::NO_TAINT,
998                self::NO_TAINT,
999                'overall' => self::NO_TAINT
1000            ],
1001            // We assume that hashing functions are safe, see T272492
1002            '\md5' => [
1003                self::NO_TAINT,
1004                self::NO_TAINT,
1005                'overall' => self::NO_TAINT
1006            ],
1007            '\sha1' => [
1008                self::NO_TAINT,
1009                self::NO_TAINT,
1010                'overall' => self::NO_TAINT
1011            ],
1012            '\crc32' => [
1013                self::NO_TAINT,
1014                'overall' => self::NO_TAINT
1015            ],
1016            '\hash' => [
1017                self::NO_TAINT,
1018                self::NO_TAINT,
1019                self::NO_TAINT,
1020                self::NO_TAINT,
1021                'overall' => self::NO_TAINT
1022            ],
1023            '\hash_hmac' => [
1024                self::NO_TAINT,
1025                self::NO_TAINT,
1026                self::NO_TAINT,
1027                self::NO_TAINT,
1028                'overall' => self::NO_TAINT
1029            ],
1030            // exit() and die() became ordinary functions in PHP 8.4
1031            '\exit' => [
1032                self::HTML_EXEC_TAINT,
1033                'overall' => self::NO_TAINT
1034            ],
1035            '\die' => [
1036                self::HTML_EXEC_TAINT,
1037                'overall' => self::NO_TAINT
1038            ],
1039        ];
1040    }
1041
1042    /**
1043     * @inheritDoc
1044     */
1045    public static function getBeforeLoopBodyAnalysisVisitorClassName(): string {
1046        return TaintednessLoopVisitor::class;
1047    }
1048
1049    /**
1050     * Clear caches for testing.
1051     * @suppress PhanUnreferencedPublicMethod Used in tests (not analyzed by phan)
1052     */
1053    public static function clearCaches(): void {
1054        self::$docblockCache = [];
1055        TaintednessVisitor::$fqsensWithoutToStringCache = [];
1056    }
1057}