Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
96.52% covered (success)
96.52%
471 / 488
40.00% covered (danger)
40.00%
6 / 15
CRAP
0.00% covered (danger)
0.00%
0 / 1
SecurityCheckPlugin
96.52% covered (success)
96.52%
471 / 488
40.00% covered (danger)
40.00%
6 / 15
83
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 assertRequiredConfig
50.00% covered (danger)
50.00%
1 / 2
0.00% covered (danger)
0.00%
0 / 1
2.50
 getMergeVariableInfoClosure
92.68% covered (success)
92.68%
38 / 41
0.00% covered (danger)
0.00%
0 / 1
18.13
 analyzeStringLiteralStatement
97.30% covered (success)
97.30%
36 / 37
0.00% covered (danger)
0.00%
0 / 1
8
 taintToString
98.08% covered (success)
98.08%
51 / 52
0.00% covered (danger)
0.00%
0 / 1
5
 builtinFuncHasTaint
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getBuiltinFuncTaint
100.00% covered (success)
100.00%
35 / 35
100.00% covered (success)
100.00%
1 / 1
7
 assertFunctionTaintArrayWellFormed
55.56% covered (warning)
55.56%
5 / 9
0.00% covered (danger)
0.00%
0 / 1
13.62
 getCustomFuncTaints
n/a
0 / 0
n/a
0 / 0
0
 isFalsePositive
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 parseTaintLine
100.00% covered (success)
100.00%
40 / 40
100.00% covered (success)
100.00%
1 / 1
12
 modifyParamSinkTaint
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 modifyArgTaint
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 convertTaintNameToConstant
85.71% covered (warning)
85.71%
24 / 28
0.00% covered (danger)
0.00%
0 / 1
16.75
 getPHPFuncTaints
100.00% covered (success)
100.00%
237 / 237
100.00% covered (success)
100.00%
1 / 1
1
 getBeforeLoopBodyAnalysisVisitorClassName
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
1<?php declare( strict_types=1 );
2
3/**
4 * Base class for SecurityCheckPlugin. Extend if you want to customize.
5 *
6 * Copyright (C) 2017  Brian Wolff <bawolff@gmail.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23namespace SecurityCheckPlugin;
24
25use AssertionError;
26use ast\Node;
27use Closure;
28use Error;
29use Phan\CodeBase;
30use Phan\Config;
31use Phan\Language\Context;
32use Phan\Language\Element\Comment\Builder;
33use Phan\Language\Element\FunctionInterface;
34use Phan\Language\Element\Variable;
35use Phan\Language\FQSEN\FullyQualifiedFunctionLikeName;
36use Phan\Language\FQSEN\FullyQualifiedMethodName;
37use Phan\Language\Scope;
38use Phan\PluginV3;
39use Phan\PluginV3\AnalyzeLiteralStatementCapability;
40use Phan\PluginV3\BeforeLoopBodyAnalysisCapability;
41use Phan\PluginV3\MergeVariableInfoCapability;
42use Phan\PluginV3\PostAnalyzeNodeCapability;
43use Phan\PluginV3\PreAnalyzeNodeCapability;
44
45/**
46 * Base class used by the Generic and MediaWiki flavours of the plugin.
47 */
48abstract class SecurityCheckPlugin extends PluginV3 implements
49    PostAnalyzeNodeCapability,
50    PreAnalyzeNodeCapability,
51    BeforeLoopBodyAnalysisCapability,
52    MergeVariableInfoCapability,
53    AnalyzeLiteralStatementCapability
54{
55    use TaintednessAccessorsTrait;
56
57    // Various taint flags. The _EXEC_ varieties mean
58    // that it is unsafe to assign that type of taint
59    // to the variable in question.
60
61    public const NO_TAINT = 0;
62
63    // Flag to denote that we don't know
64    public const UNKNOWN_TAINT = 1 << 0;
65
66    // Flag for function parameters and the like, where it
67    // preserves whatever taint the function is given.
68    public const PRESERVE_TAINT = 1 << 1;
69
70    // In future might separate out different types of html quoting.
71    // e.g. "<div data-foo='" . htmlspecialchars( $bar ) . "'>";
72    // is unsafe.
73    public const HTML_TAINT = 1 << 2;
74    public const HTML_EXEC_TAINT = 1 << 3;
75
76    public const SQL_TAINT = 1 << 4;
77    public const SQL_EXEC_TAINT = 1 << 5;
78
79    public const SHELL_TAINT = 1 << 6;
80    public const SHELL_EXEC_TAINT = 1 << 7;
81
82    public const SERIALIZE_TAINT = 1 << 8;
83    public const SERIALIZE_EXEC_TAINT = 1 << 9;
84
85    // Tainted paths, as input to include(), require() and some FS functions (path traversal)
86    public const PATH_TAINT = 1 << 10;
87    public const PATH_EXEC_TAINT = 1 << 11;
88
89    // User-controlled code, for RCE
90    public const CODE_TAINT = 1 << 12;
91    public const CODE_EXEC_TAINT = 1 << 13;
92
93    // User-controlled regular expressions, for ReDoS
94    public const REGEX_TAINT = 1 << 14;
95    public const REGEX_EXEC_TAINT = 1 << 15;
96
97    // To allow people to add other application specific taints.
98    public const CUSTOM1_TAINT = 1 << 16;
99    public const CUSTOM1_EXEC_TAINT = 1 << 17;
100    public const CUSTOM2_TAINT = 1 << 18;
101    public const CUSTOM2_EXEC_TAINT = 1 << 19;
102
103    // Special purpose for supporting MediaWiki's IDatabase::select
104    // and friends. Like SQL_TAINT, but only applies to the numeric
105    // keys of an array. Note: These are not included in YES_TAINT/EXEC_TAINT.
106    // e.g. given $f = [ $_GET['foo'] ]; $f would have the flag, but
107    // $g = $_GET['foo']; or $h = [ 's' => $_GET['foo'] ] would not.
108    // The associative keys also have this flag if they are tainted.
109    // It is also assumed anything with this flag will also have
110    // the SQL_TAINT flag set.
111    public const SQL_NUMKEY_TAINT = 1 << 20;
112    public const SQL_NUMKEY_EXEC_TAINT = 1 << 21;
113
114    // For double escaped variables
115    public const ESCAPED_TAINT = 1 << 22;
116    public const ESCAPED_EXEC_TAINT = 1 << 23;
117
118    // Special purpose flags (Starting at 2^28)
119    // TODO Renumber these. Requires changing format of the hardcoded arrays
120    // Cancel's out all EXEC flags on a function arg if arg is array.
121    public const ARRAY_OK = 1 << 28;
122
123    // Do not allow autodetected taint info override given taint.
124    // TODO Store this and other special flags somewhere else in the FunctionTaintedness object, not
125    // as normal taint flags.
126    public const NO_OVERRIDE = 1 << 29;
127
128    public const VARIADIC_PARAM = 1 << 30;
129
130    // *All* function flags
131    //TODO Add a structure test for this
132    public const FUNCTION_FLAGS = self::ARRAY_OK | self::NO_OVERRIDE;
133
134    // Combination flags.
135
136    // YES_TAINT denotes all taint a user controlled variable would have
137    public const YES_TAINT = self::HTML_TAINT | self::SQL_TAINT | self::SHELL_TAINT | self::SERIALIZE_TAINT |
138        self::PATH_TAINT | self::CODE_TAINT | self::REGEX_TAINT | self::CUSTOM1_TAINT | self::CUSTOM2_TAINT;
139    public const EXEC_TAINT = self::YES_TAINT << 1;
140    // @phan-suppress-next-line PhanUnreferencedPublicClassConstant
141    public const YES_EXEC_TAINT = self::YES_TAINT | self::EXEC_TAINT;
142
143    // ALL taint is YES + special purpose taints, but not including special flags.
144    public const ALL_TAINT = self::YES_TAINT | self::SQL_NUMKEY_TAINT | self::ESCAPED_TAINT;
145    public const ALL_EXEC_TAINT =
146        self::EXEC_TAINT | self::SQL_NUMKEY_EXEC_TAINT | self::ESCAPED_EXEC_TAINT;
147    public const ALL_YES_EXEC_TAINT = self::ALL_TAINT | self::ALL_EXEC_TAINT;
148
149    // Taints that support backpropagation.
150    public const BACKPROP_TAINTS = self::ALL_EXEC_TAINT;
151
152    public const ESCAPES_HTML = ( self::YES_TAINT & ~self::HTML_TAINT ) | self::ESCAPED_EXEC_TAINT;
153
154    // As the name would suggest, this must include *ALL* possible taint flags.
155    public const ALL_TAINT_FLAGS = self::ALL_YES_EXEC_TAINT | self::FUNCTION_FLAGS |
156        self::UNKNOWN_TAINT | self::PRESERVE_TAINT | self::VARIADIC_PARAM;
157
158    /**
159     * Used to print taint debug data, see BlockAnalysisVisitor::PHAN_DEBUG_VAR_REGEX
160     */
161    private const DEBUG_TAINTEDNESS_REGEXP =
162        '/@phan-debug-var-taintedness\s+\$(' . Builder::WORD_REGEX . '(,\s*\$' . Builder::WORD_REGEX . ')*)/';
163    // @phan-suppress-previous-line PhanAccessClassConstantInternal It's just perfect for use here
164
165    public const PARAM_ANNOTATION_REGEX =
166        '/@param-taint\s+&?(?P<variadic>\.\.\.)?\$(?P<paramname>\S+)\s+(?P<taint>.*)$/';
167
168    /**
169     * @var self Passed to the visitor for context
170     */
171    public static $pluginInstance;
172
173    /**
174     * @var array<array<FunctionTaintedness|MethodLinks>> Cache of parsed docblocks. This is declared here (as opposed
175     *  to the BaseVisitor) so that PHPUnit can snapshot and restore it.
176     * @phan-var array<array{0:FunctionTaintedness,1:MethodLinks}>
177     */
178    public static $docblockCache = [];
179
180    /** @var FunctionTaintedness[] Cache of taintedness of builtin functions */
181    private static $builtinFuncTaintCache = [];
182
183    /**
184     * Save the subclass instance to make it accessible from the visitor
185     */
186    public function __construct() {
187        $this->assertRequiredConfig();
188        self::$pluginInstance = $this;
189    }
190
191    /**
192     * Ensure that the options we need are enabled.
193     */
194    private function assertRequiredConfig(): void {
195        if ( Config::get_quick_mode() ) {
196            throw new AssertionError( 'Quick mode must be disabled to run taint-check' );
197        }
198    }
199
200    /**
201     * @inheritDoc
202     */
203    public function getMergeVariableInfoClosure(): Closure {
204        /**
205         * For branches that are not guaranteed to be executed, merge taint info for any involved
206         * variable across all branches.
207         *
208         * @note This method is HOT, so keep it optimized
209         *
210         * @param Variable $variable
211         * @param Scope[] $scopeList
212         * @param bool $varExistsInAllScopes @phan-unused-param
213         * @suppress PhanUnreferencedClosure, PhanUndeclaredProperty, UnusedSuppression
214         */
215        return static function ( Variable $variable, array $scopeList, bool $varExistsInAllScopes ) {
216            $varName = $variable->getName();
217
218            $vars = [];
219            $firstVar = null;
220            foreach ( $scopeList as $scope ) {
221                $localVar = $scope->getVariableByNameOrNull( $varName );
222                if ( $localVar ) {
223                    if ( !$firstVar ) {
224                        $firstVar = $localVar;
225                    } else {
226                        $vars[] = $localVar;
227                    }
228                }
229            }
230
231            if ( !$firstVar ) {
232                return;
233            }
234
235            $taintedness = $prevTaint = $firstVar->taintedness ?? null;
236            $methodLinks = $prevLinks = $firstVar->taintedMethodLinks ?? null;
237            $error = $prevErr = $firstVar->taintedOriginalError ?? null;
238
239            foreach ( $vars as $localVar ) {
240                // Below we only merge data if it's non-null in the current scope and different from the previous
241                // branch. Using arrays to save all previous values and then in_array seems useless on MW core,
242                // since >99% cases of duplication are already covered by these simple checks.
243
244                $taintOrNull = $localVar->taintedness ?? null;
245                if ( $taintOrNull && $taintOrNull !== $prevTaint ) {
246                    $prevTaint = $taintOrNull;
247                    if ( $taintedness ) {
248                        $taintedness->mergeWith( $taintOrNull );
249                    } else {
250                        $taintedness = $taintOrNull;
251                    }
252                }
253
254                $variableObjLinksOrNull = $localVar->taintedMethodLinks ?? null;
255                if ( $variableObjLinksOrNull && $variableObjLinksOrNull !== $prevLinks ) {
256                    $prevLinks = $variableObjLinksOrNull;
257                    if ( $methodLinks ) {
258                        $methodLinks->mergeWith( $variableObjLinksOrNull );
259                    } else {
260                        $methodLinks = $variableObjLinksOrNull;
261                    }
262                }
263
264                $varErrorOrNull = $localVar->taintedOriginalError ?? null;
265                if ( $varErrorOrNull && $varErrorOrNull !== $prevErr ) {
266                    $prevErr = $varErrorOrNull;
267                    if ( $error ) {
268                        $error->mergeWith( $varErrorOrNull );
269                    } else {
270                        $error = $varErrorOrNull;
271                    }
272                }
273            }
274
275            if ( $taintedness ) {
276                self::setTaintednessRaw( $variable, $taintedness );
277            }
278            if ( $methodLinks ) {
279                self::setMethodLinks( $variable, $methodLinks );
280            }
281            if ( $error ) {
282                self::setCausedByRaw( $variable, $error );
283            }
284        };
285    }
286
287    /**
288     * Print the taintedness of a variable, when requested
289     * @see BlockAnalysisVisitor::analyzeSubstituteVarAssert()
290     * @inheritDoc
291     * @suppress PhanUndeclaredProperty, UnusedSuppression
292     */
293    public function analyzeStringLiteralStatement( CodeBase $codeBase, Context $context, string $statement ): bool {
294        $found = false;
295        if ( preg_match_all( self::DEBUG_TAINTEDNESS_REGEXP, $statement, $matches, PREG_SET_ORDER ) ) {
296            foreach ( $matches as $group ) {
297                foreach ( explode( ',', $group[1] ) as $rawVar ) {
298                    $varName = ltrim( trim( $rawVar ), '$' );
299                    if ( $context->getScope()->hasVariableWithName( $varName ) ) {
300                        $var = $context->getScope()->getVariableByName( $varName );
301                        $taintOrNull = self::getTaintednessRaw( $var );
302                        $taint = $taintOrNull ? $taintOrNull->toShortString() : 'unset';
303                        $msg = "Variable {CODE} has taintedness: {DETAILS}";
304                        $params = [ "\$$varName", $taint ];
305                    } else {
306                        $msg = "Variable {CODE} doesn't exist in scope";
307                        $params = [ "\$$varName" ];
308                    }
309                    self::emitIssue(
310                        $codeBase,
311                        $context,
312                        'SecurityCheckDebugTaintedness',
313                        $msg,
314                        $params
315                    );
316                    $found = true;
317                }
318            }
319        } elseif ( strpos( $statement, '@taint-check-debug-method-first-arg' ) !== false ) {
320            // FIXME This is a hack. The annotation is INTERNAL, for use only in the backpropoffsets-blowup
321            // test. We should either find a better way to test that, or maybe add a public annotation
322            // for debugging taintedness of a method (probably unreadable on a single line).
323            $funcName = preg_replace( '/@taint-check-debug-method-first-arg ([a-z:]+)\b.*/i', '$1', $statement );
324            // Let any exception bubble up here, the annotation is for internal use in testing
325            $fqsen = FullyQualifiedMethodName::fromStringInContext( $funcName, $context );
326            $method = $codeBase->getMethodByFQSEN( $fqsen );
327            /** @var FunctionTaintedness|null $fTaint */
328            $fTaint = $method->funcTaint ?? null;
329            if ( !$fTaint ) {
330                return false;
331            }
332            self::emitIssue(
333                $codeBase,
334                $context,
335                'SecurityCheckDebugTaintedness',
336                "Method {CODE} has first param with taintedness: {DETAILS}",
337                [ $funcName, $fTaint->getParamSinkTaint( 0 )->toShortString() ]
338            );
339            return true;
340        }
341        return $found;
342    }
343
344    /**
345     * Get a string representation of a taint integer
346     *
347     * The prefix ~ means all input taints except the letter given.
348     * The prefix * means the EXEC version of the taint.
349     *
350     * @param int $taint
351     * @return string
352     */
353    public static function taintToString( int $taint ): string {
354        if ( $taint === self::NO_TAINT ) {
355            return 'NONE';
356        }
357
358        // Note, order matters here.
359        static $mapping = [
360            self::UNKNOWN_TAINT => 'UNKNOWN',
361            self::PRESERVE_TAINT => 'PRESERVE',
362            self::ALL_TAINT => 'ALL',
363            self::YES_TAINT => 'YES',
364            self::YES_TAINT &
365            ( ~self::HTML_TAINT ) => '~HTML',
366            self::YES_TAINT &
367            ( ~self::SQL_TAINT ) => '~SQL',
368            self::YES_TAINT &
369            ( ~self::SHELL_TAINT ) => '~SHELL',
370            self::YES_TAINT &
371            ( ~self::SERIALIZE_TAINT ) => '~SERIALIZE',
372            self::YES_TAINT &
373            ( ~self::CUSTOM1_TAINT ) => '~CUSTOM1',
374            self::YES_TAINT &
375            ( ~self::CUSTOM2_TAINT ) => '~CUSTOM2',
376            // We skip ~ versions of flags which shouldn't be possible.
377            self::HTML_TAINT => 'HTML',
378            self::SQL_TAINT => 'SQL',
379            self::SHELL_TAINT => 'SHELL',
380            self::ESCAPED_TAINT => 'ESCAPED',
381            self::SERIALIZE_TAINT => 'SERIALIZE',
382            self::CUSTOM1_TAINT => 'CUSTOM1',
383            self::CUSTOM2_TAINT => 'CUSTOM2',
384            self::CODE_TAINT => 'CODE',
385            self::PATH_TAINT => 'PATH',
386            self::REGEX_TAINT => 'REGEX',
387            self::SQL_NUMKEY_TAINT => 'SQL_NUMKEY',
388            self::ARRAY_OK => 'ARRAY_OK',
389            self::ALL_EXEC_TAINT => '*ALL',
390            self::HTML_EXEC_TAINT => '*HTML',
391            self::SQL_EXEC_TAINT => '*SQL',
392            self::SHELL_EXEC_TAINT => '*SHELL',
393            self::ESCAPED_EXEC_TAINT => '*ESCAPED',
394            self::SERIALIZE_EXEC_TAINT => '*SERIALIZE',
395            self::CUSTOM1_EXEC_TAINT => '*CUSTOM1',
396            self::CUSTOM2_EXEC_TAINT => '*CUSTOM2',
397            self::CODE_EXEC_TAINT => '*CODE',
398            self::PATH_EXEC_TAINT => '*PATH',
399            self::REGEX_EXEC_TAINT => '*REGEX',
400            self::SQL_NUMKEY_EXEC_TAINT => '*SQL_NUMKEY',
401        ];
402
403        $types = [];
404        foreach ( $mapping as $bitmap => $val ) {
405            if ( ( $bitmap & $taint ) === $bitmap ) {
406                $types[] = $val;
407                $taint &= ~$bitmap;
408            }
409        }
410        if ( $taint !== 0 ) {
411            $types[] = "Unrecognized: $taint";
412        }
413        return implode( ', ', $types );
414    }
415
416    /**
417     * @param FullyQualifiedFunctionLikeName $fqsen
418     * @return bool
419     */
420    public function builtinFuncHasTaint( FullyQualifiedFunctionLikeName $fqsen ): bool {
421        return $this->getBuiltinFuncTaint( $fqsen ) !== null;
422    }
423
424    /**
425     * Get the taintedness of a function
426     *
427     * This allows overriding the default taint of a function
428     *
429     * If you want to provide custom taint hints for your application,
430     * override the getCustomFuncTaints()
431     *
432     * @param FullyQualifiedFunctionLikeName $fqsen The function/method in question
433     * @return FunctionTaintedness|null Null to autodetect taintedness
434     */
435    public function getBuiltinFuncTaint( FullyQualifiedFunctionLikeName $fqsen ): ?FunctionTaintedness {
436        $name = (string)$fqsen;
437
438        if ( isset( self::$builtinFuncTaintCache[$name] ) ) {
439            return self::$builtinFuncTaintCache[$name];
440        }
441
442        static $funcTaints = null;
443        if ( $funcTaints === null ) {
444            $funcTaints = $this->getCustomFuncTaints() + $this->getPHPFuncTaints();
445        }
446
447        if ( isset( $funcTaints[$name] ) ) {
448            $rawFuncTaint = $funcTaints[$name];
449            if ( $rawFuncTaint instanceof FunctionTaintedness ) {
450                $funcTaint = $rawFuncTaint;
451            } else {
452                self::assertFunctionTaintArrayWellFormed( $rawFuncTaint );
453                // Note: for backcompat, we set NO_OVERRIDE everywhere.
454                $overallFlags = ( $rawFuncTaint['overall'] & self::FUNCTION_FLAGS ) | self::NO_OVERRIDE;
455                $funcTaint = new FunctionTaintedness( new Taintedness( $rawFuncTaint['overall'] & ~$overallFlags ) );
456                $funcTaint->addOverallFlags( $overallFlags );
457                unset( $rawFuncTaint['overall'] );
458                foreach ( $rawFuncTaint as $i => $val ) {
459                    assert( ( $val & self::UNKNOWN_TAINT ) === 0, 'Cannot set UNKNOWN' );
460                    $paramFlags = ( $val & self::FUNCTION_FLAGS ) | self::NO_OVERRIDE;
461                    // TODO Split sink and preserve in the hardcoded arrays
462                    if ( $val & self::VARIADIC_PARAM ) {
463                        $pTaint = new Taintedness( $val & ~( self::VARIADIC_PARAM | $paramFlags ) );
464                        $funcTaint->setVariadicParamSinkTaint( $i, $pTaint->withOnly( self::ALL_EXEC_TAINT ) );
465                        $funcTaint->setVariadicParamPreservedTaint(
466                            $i,
467                            $pTaint->without( self::ALL_EXEC_TAINT )->asPreservedTaintedness()
468                        );
469                        $funcTaint->addVariadicParamFlags( $paramFlags );
470                    } else {
471                        $pTaint = new Taintedness( $val & ~$paramFlags );
472                        $funcTaint->setParamSinkTaint( $i, $pTaint->withOnly( self::ALL_EXEC_TAINT ) );
473                        $funcTaint->setParamPreservedTaint(
474                            $i,
475                            $pTaint->without( self::ALL_EXEC_TAINT )->asPreservedTaintedness()
476                        );
477                        $funcTaint->addParamFlags( $i, $paramFlags );
478                    }
479                }
480            }
481            self::$builtinFuncTaintCache[$name] = $funcTaint;
482            return self::$builtinFuncTaintCache[$name];
483        }
484        return null;
485    }
486
487    /**
488     * Assert that a taintedness array is well-formed, and fail hard if it isn't.
489     *
490     * @param int[] $taint
491     */
492    private static function assertFunctionTaintArrayWellFormed( array $taint ): void {
493        if ( !isset( $taint['overall'] ) ) {
494            throw new Error( 'Overall taint must be set' );
495        }
496
497        foreach ( $taint as $i => $t ) {
498            if ( !is_int( $i ) && $i !== 'overall' ) {
499                throw new Error( "Taint indexes must be int or 'overall', got '$i'" );
500            }
501            if ( !is_int( $t ) || ( $t & ~self::ALL_TAINT_FLAGS ) ) {
502                throw new Error( "Wrong taint index $i, got: " . var_export( $t, true ) );
503            }
504            if ( $t & ~self::ALL_TAINT_FLAGS ) {
505                throw new Error( "Taint index $i has unknown flags: " . decbin( $t ) );
506            }
507        }
508    }
509
510    /**
511     * Get an array of function taints custom for the application
512     *
513     * @return array<string,int[]|FunctionTaintedness> Array of function taints. The keys are FQSENs. The values can be
514     *   either FunctionTaintedness objects, or arrays with 'overall' string key and numeric keys for parameters.
515     *
516     *   For example: [ self::YES_TAINT, 'overall' => self::NO_TAINT ]
517     *   means that the taint of the return value is the same as the taint
518     *   of the first arg, and all other args are ignored.
519     *   [ self::HTML_EXEC_TAINT, 'overall' => self::NO_TAINT ]
520     *   Means that the first arg is output in an html context (e.g. like echo)
521     *   [ self::YES_TAINT & ~self::HTML_TAINT, 'overall' => self::NO_TAINT ]
522     *   Means that the function removes html taint (escapes) e.g. htmlspecialchars
523     *   [ 'overall' => self::YES_TAINT ]
524     *   Means that it returns a tainted value (e.g. return $_POST['foo']; )
525     * @see FunctionTaintedness for more details
526     */
527    abstract protected function getCustomFuncTaints(): array;
528
529    /**
530     * Can be used to force specific issues to be marked false positives
531     *
532     * For example, a specific application might be able to recognize
533     * that we are in a CLI context, and thus the XSS is really a false positive.
534     *
535     * @param int $combinedTaint Combined and adjusted taint of LHS+RHS
536     * @param string &$msg Issue description (so plugin can modify to state why false)
537     * @param Context $context
538     * @param CodeBase $code_base
539     * @return bool Is this a false positive?
540     * @suppress PhanUnusedPublicMethodParameter No param is used
541     */
542    public function isFalsePositive(
543        int $combinedTaint,
544        string &$msg,
545        Context $context,
546        CodeBase $code_base
547    ): bool {
548        return false;
549    }
550
551    /**
552     * Given a param description line, extract taint
553     *
554     * This is to allow putting taint information in method docblocks.
555     * If a function has a docblock comment like:
556     *  *  @param-taint $foo escapes_html
557     * This converts that line into:
558     *   ( self::YES_TAINT & ~self::SQL_TAINT )
559     * Multiple taint types are separated by commas
560     * (which are interpreted as bitwise OR ( "|" ). Future versions
561     * might support more complex bitwise operators, but for now it
562     * doesn't seem needed.
563     *
564     * The following keywords are supported where {type} can be
565     * html, sql, shell, serialize, custom1, custom2, sql_numkey,
566     * escaped.
567     *  * {type} - just set the flag. 99% you should only use 'none' or 'tainted'
568     *  * exec_{type} - sets the exec flag.
569     *  * escapes_{type} - self::YES_TAINT & ~self::{type}_TAINT.
570     *     Note: escapes_html adds the exec_escaped flag, use
571     *     escapes_htmlnoent if the value is safe to double encode.
572     *  * onlysafefor_{type}
573     *     Same as above, intended for return type declarations.
574     *     Only difference is that onlysafefor_html sets ESCAPED_TAINT instead
575     *     of ESCAPED_EXEC_TAINT
576     *  * none - self::NO_TAINT
577     *  * tainted - self::YES_TAINT
578     *  * array_ok - sets self::ARRAY_OK
579     *  * allow_override - Allow autodetected taints to override annotation
580     *
581     * @todo What about ~ operator?
582     * @note The special casing to have escapes_html always add exec_escaped
583     *   (and having htmlnoent exist) is "experimental" and may change in
584     *   future versions (Maybe all types should set exec_escaped. Maybe it
585     *   should be explicit)
586     * @note Excluding UNKNOWN here on purpose, as if we're setting it, it's not unknown
587     * @param string $line A line from the docblock
588     * @return array|null Array of [taintedness, flags], or null on no info
589     * @phan-return array{0:Taintedness,1:int}|null
590     */
591    public static function parseTaintLine( string $line ): ?array {
592        $types = '(?P<type>htmlnoent|html|sql|shell|serialize|custom1|'
593            . 'custom2|code|path|regex|sql_numkey|escaped|none|tainted)';
594        $prefixes = '(?P<prefix>escapes|onlysafefor|exec)';
595        $taintExpr = "(?P<taint>(?:{$prefixes}_)?$types|array_ok|allow_override)";
596
597        $filteredLine = preg_replace( "/((?:$taintExpr,? *)+)(?: .*)?$/", '$1', $line );
598        $taints = explode( ',', strtolower( $filteredLine ) );
599        $taints = array_map( 'trim', $taints );
600
601        $overallTaint = new Taintedness( self::NO_TAINT );
602        $overallFlags = self::NO_OVERRIDE;
603        $numberOfTaintsProcessed = 0;
604        foreach ( $taints as $taint ) {
605            $taintParts = [];
606            if ( !preg_match( "/^$taintExpr$/", $taint, $taintParts ) ) {
607                continue;
608            }
609            $numberOfTaintsProcessed++;
610            if ( $taintParts['taint'] === 'array_ok' ) {
611                $overallFlags |= self::ARRAY_OK;
612                continue;
613            }
614            if ( $taintParts['taint'] === 'allow_override' ) {
615                $overallFlags &= ~self::NO_OVERRIDE;
616                continue;
617            }
618            $taintAsInt = self::convertTaintNameToConstant( $taintParts['type'] );
619            switch ( $taintParts['prefix'] ) {
620                case '':
621                    $overallTaint->add( $taintAsInt );
622                    break;
623                case 'exec':
624                    $overallTaint->add( Taintedness::flagsAsYesToExecTaint( $taintAsInt ) );
625                    break;
626                case 'escapes':
627                case 'onlysafefor':
628                    $overallTaint->add( self::YES_TAINT & ~$taintAsInt );
629                    if ( $taintParts['type'] === 'html' ) {
630                        if ( $taintParts['prefix'] === 'escapes' ) {
631                            $overallTaint->add( self::ESCAPED_EXEC_TAINT );
632                        } else {
633                            $overallTaint->add( self::ESCAPED_TAINT );
634                        }
635                    }
636                    break;
637            }
638        }
639        if ( $numberOfTaintsProcessed === 0 ) {
640            return null;
641        }
642        return [ $overallTaint, $overallFlags ];
643    }
644
645    /**
646     * Hook to override the sink taintedness of a method parameter depending on the current argument.
647     *
648     * @internal This method is unstable and may be removed without prior notice.
649     *
650     * @param Taintedness $paramSinkTaint
651     * @param Taintedness $curArgTaintedness
652     * @param Node $argument Note: This hook is not called on literals
653     * @param int $argIndex Which argument number is this
654     * @param FunctionInterface $func The function/method being called
655     * @param FunctionTaintedness $funcTaint Taint of method formal parameters
656     * @param Context $context Context object
657     * @param CodeBase $code_base CodeBase object
658     * @return Taintedness The taint to use for actual parameter
659     * @suppress PhanUnusedPublicMethodParameter
660     */
661    public function modifyParamSinkTaint(
662        Taintedness $paramSinkTaint,
663        Taintedness $curArgTaintedness,
664        Node $argument,
665        int $argIndex,
666        FunctionInterface $func,
667        FunctionTaintedness $funcTaint,
668        Context $context,
669        CodeBase $code_base
670    ): Taintedness {
671        // no-op
672        return $paramSinkTaint;
673    }
674
675    /**
676     * Hook to override how taint of an argument to method call is calculated
677     *
678     * @param Taintedness $curArgTaintedness
679     * @param Node $argument Note: This hook is not called on literals
680     * @param int $argIndex Which argument number is this
681     * @param FunctionInterface $func The function/method being called
682     * @param FunctionTaintedness $funcTaint Taint of method formal parameters
683     * @param Context $context Context object
684     * @param CodeBase $code_base CodeBase object
685     * @return Taintedness The taint to use for actual parameter
686     * @suppress PhanUnusedPublicMethodParameter
687     */
688    public function modifyArgTaint(
689        Taintedness $curArgTaintedness,
690        Node $argument,
691        int $argIndex,
692        FunctionInterface $func,
693        FunctionTaintedness $funcTaint,
694        Context $context,
695        CodeBase $code_base
696    ): Taintedness {
697        // no-op
698        return $curArgTaintedness;
699    }
700
701    /**
702     * Convert a string like 'html' to self::HTML_TAINT.
703     *
704     * @note htmlnoent treated like self::HTML_TAINT.
705     * @param string $name one of:
706     *   html, sql, shell, serialize, custom1, custom2, code, path, regex, sql_numkey,
707     *   escaped, none (= self::NO_TAINT), tainted (= self::YES_TAINT)
708     * @return int One of the TAINT constants
709     */
710    public static function convertTaintNameToConstant( string $name ): int {
711        switch ( $name ) {
712            case 'html':
713            case 'htmlnoent':
714                return self::HTML_TAINT;
715            case 'sql':
716                return self::SQL_TAINT;
717            case 'shell':
718                return self::SHELL_TAINT;
719            case 'serialize':
720                return self::SERIALIZE_TAINT;
721            case 'custom1':
722                return self::CUSTOM1_TAINT;
723            case 'custom2':
724                return self::CUSTOM2_TAINT;
725            case 'code':
726                return self::CODE_TAINT;
727            case 'path':
728                return self::PATH_TAINT;
729            case 'regex':
730                return self::REGEX_TAINT;
731            case 'sql_numkey':
732                return self::SQL_NUMKEY_TAINT;
733            case 'escaped':
734                return self::ESCAPED_TAINT;
735            case 'tainted':
736                return self::YES_TAINT;
737            case 'none':
738                return self::NO_TAINT;
739            default:
740                throw new AssertionError( "$name not valid taint" );
741        }
742    }
743
744    /**
745     * Taints for builtin php functions
746     *
747     * @return int[][] List of func taints (See getBuiltinFuncTaint())
748     * @phan-return array<string,int[]>
749     */
750    private function getPHPFuncTaints(): array {
751        $pregMatchTaint = [
752            self::REGEX_EXEC_TAINT,
753            self::YES_TAINT,
754            // TODO: Possibly unsafe pass-by-ref
755            self::NO_TAINT,
756            self::NO_TAINT,
757            self::NO_TAINT,
758            'overall' => self::NO_TAINT,
759        ];
760        $pregReplaceTaint = [
761            self::REGEX_EXEC_TAINT,
762            // TODO: This is used for strings (in preg_replace) and callbacks (in preg_replace_callback)
763            self::YES_TAINT,
764            self::YES_TAINT,
765            self::NO_TAINT,
766            self::NO_TAINT,
767            'overall' => self::NO_TAINT
768        ];
769        return [
770            '\htmlentities' => [
771                self::ESCAPES_HTML,
772                'overall' => self::ESCAPED_TAINT
773            ],
774            '\htmlspecialchars' => [
775                self::ESCAPES_HTML,
776                'overall' => self::ESCAPED_TAINT
777            ],
778            '\escapeshellarg' => [
779                ~self::SHELL_TAINT & self::YES_TAINT,
780                'overall' => self::NO_TAINT
781            ],
782            // TODO: Perhaps we should distinguish arguments escape vs command escape
783            '\escapeshellcmd' => [
784                ~self::SHELL_TAINT & self::YES_TAINT,
785                'overall' => self::NO_TAINT
786            ],
787            '\shell_exec' => [
788                self::SHELL_EXEC_TAINT,
789                'overall' => self::YES_TAINT
790            ],
791            '\passthru' => [
792                self::SHELL_EXEC_TAINT,
793                self::NO_TAINT,
794                'overall' => self::NO_TAINT
795            ],
796            '\exec' => [
797                self::SHELL_EXEC_TAINT,
798                // TODO: This is an unsafe passbyref
799                self::NO_TAINT,
800                self::NO_TAINT,
801                'overall' => self::YES_TAINT
802            ],
803            '\system' => [
804                self::SHELL_EXEC_TAINT,
805                self::NO_TAINT,
806                'overall' => self::YES_TAINT
807            ],
808            '\proc_open' => [
809                self::SHELL_EXEC_TAINT,
810                self::NO_TAINT,
811                // TODO: Unsafe passbyref
812                self::NO_TAINT,
813                self::NO_TAINT,
814                self::NO_TAINT,
815                self::NO_TAINT,
816                // TODO: Perhaps not so safe
817                'overall' => self::NO_TAINT
818            ],
819            '\popen' => [
820                self::SHELL_EXEC_TAINT,
821                self::NO_TAINT,
822                // TODO: Perhaps not so safe
823                'overall' => self::NO_TAINT
824            ],
825            // Or any time the serialized data comes from a trusted source.
826            '\serialize' => [
827                'overall' => self::YES_TAINT & ~self::SERIALIZE_TAINT,
828            ],
829            '\unserialize' => [
830                self::SERIALIZE_EXEC_TAINT,
831                'overall' => self::NO_TAINT,
832            ],
833            '\mysql_query' => [
834                self::SQL_EXEC_TAINT,
835                'overall' => self::UNKNOWN_TAINT
836            ],
837            '\mysqli_query' => [
838                self::NO_TAINT,
839                self::SQL_EXEC_TAINT,
840                'overall' => self::UNKNOWN_TAINT
841            ],
842            '\mysqli::query' => [
843                self::SQL_EXEC_TAINT,
844                'overall' => self::UNKNOWN_TAINT
845            ],
846            '\mysqli_real_query' => [
847                self::NO_TAINT,
848                self::SQL_EXEC_TAINT,
849                'overall' => self::UNKNOWN_TAINT
850            ],
851            '\mysqli::real_query' => [
852                self::SQL_EXEC_TAINT,
853                'overall' => self::UNKNOWN_TAINT
854            ],
855            '\sqlite_query' => [
856                self::NO_TAINT,
857                self::SQL_EXEC_TAINT,
858                self::NO_TAINT,
859                self::NO_TAINT,
860                'overall' => self::UNKNOWN_TAINT
861            ],
862            '\sqlite_single_query' => [
863                self::NO_TAINT,
864                self::SQL_EXEC_TAINT,
865                self::NO_TAINT,
866                self::NO_TAINT,
867                'overall' => self::UNKNOWN_TAINT
868            ],
869            // Note: addslashes, addcslashes etc. intentionally omitted because they're not
870            // enough to avoid SQLi.
871            '\mysqli_escape_string' => [
872                self::NO_TAINT,
873                self::YES_TAINT & ~self::SQL_TAINT,
874                'overall' => self::NO_TAINT
875            ],
876            '\mysqli_real_escape_string' => [
877                self::NO_TAINT,
878                self::YES_TAINT & ~self::SQL_TAINT,
879                'overall' => self::NO_TAINT
880            ],
881            '\mysqli::escape_string' => [
882                self::YES_TAINT & ~self::SQL_TAINT,
883                'overall' => self::NO_TAINT
884            ],
885            '\mysqli::real_escape_string' => [
886                self::YES_TAINT & ~self::SQL_TAINT,
887                'overall' => self::NO_TAINT
888            ],
889            '\sqlite_escape_string' => [
890                self::YES_TAINT & ~self::SQL_TAINT,
891                'overall' => self::NO_TAINT
892            ],
893            '\PDO::query' => [
894                self::SQL_EXEC_TAINT,
895                self::NO_TAINT,
896                self::NO_TAINT,
897                self::NO_TAINT,
898                'overall' => self::UNKNOWN_TAINT
899            ],
900            '\PDO::prepare' => [
901                self::SQL_EXEC_TAINT,
902                self::NO_TAINT,
903                'overall' => self::UNKNOWN_TAINT
904            ],
905            '\PDO::exec' => [
906                self::SQL_EXEC_TAINT,
907                'overall' => self::NO_TAINT
908            ],
909            '\base64_encode' => [
910                self::YES_TAINT & ~self::HTML_TAINT,
911                'overall' => self::NO_TAINT
912            ],
913            '\file_put_contents' => [
914                self::PATH_EXEC_TAINT,
915                self::NO_TAINT,
916                self::NO_TAINT,
917                self::NO_TAINT,
918                'overall' => self::NO_TAINT
919            ],
920            // TODO: What about file_get_contents() and file() ?
921            '\fopen' => [
922                self::PATH_EXEC_TAINT,
923                self::NO_TAINT,
924                self::NO_TAINT,
925                self::NO_TAINT,
926                // TODO: Perhaps not so safe
927                'overall' => self::NO_TAINT
928            ],
929            '\opendir' => [
930                self::PATH_EXEC_TAINT,
931                self::NO_TAINT,
932                // TODO: Perhaps not so safe
933                'overall' => self::NO_TAINT
934            ],
935            '\rawurlencode' => [
936                self::YES_TAINT & ~self::PATH_TAINT,
937                'overall' => self::NO_TAINT
938            ],
939            '\urlencode' => [
940                self::YES_TAINT & ~self::PATH_TAINT,
941                'overall' => self::NO_TAINT
942            ],
943            '\printf' => [
944                self::HTML_EXEC_TAINT,
945                // TODO We could check if the respective specifiers are safe
946                self::HTML_EXEC_TAINT | self::VARIADIC_PARAM,
947                'overall' => self::NO_TAINT
948            ],
949            '\preg_filter' => [
950                self::REGEX_EXEC_TAINT,
951                self::YES_TAINT,
952                self::YES_TAINT,
953                self::NO_TAINT,
954                self::NO_TAINT,
955                'overall' => self::NO_TAINT
956            ],
957            '\preg_grep' => [
958                self::REGEX_EXEC_TAINT,
959                self::YES_TAINT,
960                self::NO_TAINT,
961                'overall' => self::NO_TAINT
962            ],
963            '\preg_match_all' => $pregMatchTaint,
964            '\preg_match' => $pregMatchTaint,
965            '\preg_quote' => [
966                self::YES_TAINT & ~self::REGEX_TAINT,
967                self::NO_TAINT,
968                'overall' => self::NO_TAINT
969            ],
970            '\preg_replace' => $pregReplaceTaint,
971            '\preg_replace_callback' => $pregReplaceTaint,
972            '\preg_replace_callback_array' => [
973                self::REGEX_EXEC_TAINT,
974                self::YES_TAINT,
975                self::NO_TAINT,
976                self::NO_TAINT,
977                self::NO_TAINT,
978                'overall' => self::NO_TAINT
979            ],
980            '\preg_split' => [
981                self::REGEX_EXEC_TAINT,
982                self::YES_TAINT,
983                self::NO_TAINT,
984                self::NO_TAINT,
985                'overall' => self::NO_TAINT
986            ],
987            // We assume that hashing functions are safe, see T272492
988            '\md5' => [
989                self::NO_TAINT,
990                self::NO_TAINT,
991                'overall' => self::NO_TAINT
992            ],
993            '\sha1' => [
994                self::NO_TAINT,
995                self::NO_TAINT,
996                'overall' => self::NO_TAINT
997            ],
998            '\crc32' => [
999                self::NO_TAINT,
1000                'overall' => self::NO_TAINT
1001            ],
1002        ];
1003    }
1004
1005    /**
1006     * @inheritDoc
1007     */
1008    public static function getBeforeLoopBodyAnalysisVisitorClassName(): string {
1009        return TaintednessLoopVisitor::class;
1010    }
1011}