Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
96.73% covered (success)
96.73%
473 / 489
40.00% covered (danger)
40.00%
6 / 15
CRAP
0.00% covered (danger)
0.00%
0 / 1
SecurityCheckPlugin
96.73% covered (success)
96.73%
473 / 489
40.00% covered (danger)
40.00%
6 / 15
83
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 assertRequiredConfig
50.00% covered (danger)
50.00%
1 / 2
0.00% covered (danger)
0.00%
0 / 1
2.50
 getMergeVariableInfoClosure
92.68% covered (success)
92.68%
38 / 41
0.00% covered (danger)
0.00%
0 / 1
18.13
 analyzeStringLiteralStatement
97.30% covered (success)
97.30%
36 / 37
0.00% covered (danger)
0.00%
0 / 1
8
 taintToString
98.08% covered (success)
98.08%
51 / 52
0.00% covered (danger)
0.00%
0 / 1
5
 builtinFuncHasTaint
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getBuiltinFuncTaint
100.00% covered (success)
100.00%
37 / 37
100.00% covered (success)
100.00%
1 / 1
7
 assertFunctionTaintArrayWellFormed
55.56% covered (warning)
55.56%
5 / 9
0.00% covered (danger)
0.00%
0 / 1
13.62
 getCustomFuncTaints
n/a
0 / 0
n/a
0 / 0
0
 isFalsePositive
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 parseTaintLine
100.00% covered (success)
100.00%
40 / 40
100.00% covered (success)
100.00%
1 / 1
12
 modifyParamSinkTaint
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 modifyArgTaint
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 convertTaintNameToConstant
88.89% covered (warning)
88.89%
24 / 27
0.00% covered (danger)
0.00%
0 / 1
16.35
 getPHPFuncTaints
100.00% covered (success)
100.00%
237 / 237
100.00% covered (success)
100.00%
1 / 1
1
 getBeforeLoopBodyAnalysisVisitorClassName
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
1<?php declare( strict_types=1 );
2
3/**
4 * Base class for SecurityCheckPlugin. Extend if you want to customize.
5 *
6 * Copyright (C) 2017  Brian Wolff <bawolff@gmail.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23namespace SecurityCheckPlugin;
24
25use ast\Node;
26use Closure;
27use Error;
28use InvalidArgumentException;
29use Phan\CodeBase;
30use Phan\Config;
31use Phan\Language\Context;
32use Phan\Language\Element\Comment\Builder;
33use Phan\Language\Element\FunctionInterface;
34use Phan\Language\Element\Variable;
35use Phan\Language\FQSEN\FullyQualifiedFunctionLikeName;
36use Phan\Language\FQSEN\FullyQualifiedMethodName;
37use Phan\Language\Scope;
38use Phan\PluginV3;
39use Phan\PluginV3\AnalyzeLiteralStatementCapability;
40use Phan\PluginV3\BeforeLoopBodyAnalysisCapability;
41use Phan\PluginV3\MergeVariableInfoCapability;
42use Phan\PluginV3\PostAnalyzeNodeCapability;
43use Phan\PluginV3\PreAnalyzeNodeCapability;
44use RuntimeException;
45
46/**
47 * Base class used by the Generic and MediaWiki flavours of the plugin.
48 */
49abstract class SecurityCheckPlugin extends PluginV3 implements
50    PostAnalyzeNodeCapability,
51    PreAnalyzeNodeCapability,
52    BeforeLoopBodyAnalysisCapability,
53    MergeVariableInfoCapability,
54    AnalyzeLiteralStatementCapability
55{
56    use TaintednessAccessorsTrait;
57
58    // Various taint flags. The _EXEC_ varieties mean
59    // that it is unsafe to assign that type of taint
60    // to the variable in question.
61
62    public const NO_TAINT = 0;
63
64    // Flag to denote that we don't know
65    public const UNKNOWN_TAINT = 1 << 0;
66
67    // Flag for function parameters and the like, where it
68    // preserves whatever taint the function is given.
69    public const PRESERVE_TAINT = 1 << 1;
70
71    // In future might separate out different types of html quoting.
72    // e.g. "<div data-foo='" . htmlspecialchars( $bar ) . "'>";
73    // is unsafe.
74    public const HTML_TAINT = 1 << 2;
75    public const HTML_EXEC_TAINT = 1 << 3;
76
77    public const SQL_TAINT = 1 << 4;
78    public const SQL_EXEC_TAINT = 1 << 5;
79
80    public const SHELL_TAINT = 1 << 6;
81    public const SHELL_EXEC_TAINT = 1 << 7;
82
83    public const SERIALIZE_TAINT = 1 << 8;
84    public const SERIALIZE_EXEC_TAINT = 1 << 9;
85
86    // Tainted paths, as input to include(), require() and some FS functions (path traversal)
87    public const PATH_TAINT = 1 << 10;
88    public const PATH_EXEC_TAINT = 1 << 11;
89
90    // User-controlled code, for RCE
91    public const CODE_TAINT = 1 << 12;
92    public const CODE_EXEC_TAINT = 1 << 13;
93
94    // User-controlled regular expressions, for ReDoS
95    public const REGEX_TAINT = 1 << 14;
96    public const REGEX_EXEC_TAINT = 1 << 15;
97
98    // To allow people to add other application specific taints.
99    public const CUSTOM1_TAINT = 1 << 16;
100    public const CUSTOM1_EXEC_TAINT = 1 << 17;
101    public const CUSTOM2_TAINT = 1 << 18;
102    public const CUSTOM2_EXEC_TAINT = 1 << 19;
103
104    // Special purpose for supporting MediaWiki's IDatabase::select
105    // and friends. Like SQL_TAINT, but only applies to the numeric
106    // keys of an array. Note: These are not included in YES_TAINT/EXEC_TAINT.
107    // e.g. given $f = [ $_GET['foo'] ]; $f would have the flag, but
108    // $g = $_GET['foo']; or $h = [ 's' => $_GET['foo'] ] would not.
109    // The associative keys also have this flag if they are tainted.
110    // It is also assumed anything with this flag will also have
111    // the SQL_TAINT flag set.
112    public const SQL_NUMKEY_TAINT = 1 << 20;
113    public const SQL_NUMKEY_EXEC_TAINT = 1 << 21;
114
115    // For double escaped variables
116    public const ESCAPED_TAINT = 1 << 22;
117    public const ESCAPED_EXEC_TAINT = 1 << 23;
118
119    // Special purpose flags (Starting at 2^28)
120    // TODO Renumber these. Requires changing format of the hardcoded arrays
121    // Cancel's out all EXEC flags on a function arg if arg is array.
122    public const ARRAY_OK = 1 << 28;
123
124    // Do not allow autodetected taint info override given taint.
125    // TODO Store this and other special flags somewhere else in the FunctionTaintedness object, not
126    // as normal taint flags.
127    public const NO_OVERRIDE = 1 << 29;
128
129    public const VARIADIC_PARAM = 1 << 30;
130
131    // *All* function flags
132    //TODO Add a structure test for this
133    public const FUNCTION_FLAGS = self::ARRAY_OK | self::NO_OVERRIDE;
134
135    // Combination flags.
136
137    // YES_TAINT denotes all taint a user controlled variable would have
138    public const YES_TAINT = self::HTML_TAINT | self::SQL_TAINT | self::SHELL_TAINT | self::SERIALIZE_TAINT |
139        self::PATH_TAINT | self::CODE_TAINT | self::REGEX_TAINT | self::CUSTOM1_TAINT | self::CUSTOM2_TAINT;
140    public const EXEC_TAINT = self::YES_TAINT << 1;
141    // @phan-suppress-next-line PhanUnreferencedPublicClassConstant
142    public const YES_EXEC_TAINT = self::YES_TAINT | self::EXEC_TAINT;
143
144    // ALL taint is YES + special purpose taints, but not including special flags.
145    public const ALL_TAINT = self::YES_TAINT | self::SQL_NUMKEY_TAINT | self::ESCAPED_TAINT;
146    public const ALL_EXEC_TAINT =
147        self::EXEC_TAINT | self::SQL_NUMKEY_EXEC_TAINT | self::ESCAPED_EXEC_TAINT;
148    public const ALL_YES_EXEC_TAINT = self::ALL_TAINT | self::ALL_EXEC_TAINT;
149
150    // Taints that support backpropagation.
151    public const BACKPROP_TAINTS = self::ALL_EXEC_TAINT;
152
153    public const ESCAPES_HTML = ( self::YES_TAINT & ~self::HTML_TAINT ) | self::ESCAPED_EXEC_TAINT;
154
155    // As the name would suggest, this must include *ALL* possible taint flags.
156    public const ALL_TAINT_FLAGS = self::ALL_YES_EXEC_TAINT | self::FUNCTION_FLAGS |
157        self::UNKNOWN_TAINT | self::PRESERVE_TAINT | self::VARIADIC_PARAM;
158
159    /**
160     * Used to print taint debug data, see BlockAnalysisVisitor::PHAN_DEBUG_VAR_REGEX
161     */
162    private const DEBUG_TAINTEDNESS_REGEXP =
163        '/@phan-debug-var-taintedness\s+\$(' . Builder::WORD_REGEX . '(,\s*\$' . Builder::WORD_REGEX . ')*)/';
164    // @phan-suppress-previous-line PhanAccessClassConstantInternal It's just perfect for use here
165
166    public const PARAM_ANNOTATION_REGEX =
167        '/@param-taint\s+&?(?P<variadic>\.\.\.)?\$(?P<paramname>\S+)\s+(?P<taint>.*)$/';
168
169    /**
170     * @var self Passed to the visitor for context
171     */
172    public static $pluginInstance;
173
174    /**
175     * @var array<array<FunctionTaintedness|MethodLinks>> Cache of parsed docblocks. This is declared here (as opposed
176     *  to the BaseVisitor) so that PHPUnit can snapshot and restore it.
177     * @phan-var array<array{0:FunctionTaintedness,1:MethodLinks}>
178     */
179    public static $docblockCache = [];
180
181    /** @var FunctionTaintedness[] Cache of taintedness of builtin functions */
182    private static $builtinFuncTaintCache = [];
183
184    /**
185     * Save the subclass instance to make it accessible from the visitor
186     */
187    public function __construct() {
188        $this->assertRequiredConfig();
189        self::$pluginInstance = $this;
190    }
191
192    /**
193     * Ensure that the options we need are enabled.
194     */
195    private function assertRequiredConfig(): void {
196        if ( Config::get_quick_mode() ) {
197            throw new RuntimeException( 'Quick mode must be disabled to run taint-check' );
198        }
199    }
200
201    /**
202     * @inheritDoc
203     */
204    public function getMergeVariableInfoClosure(): Closure {
205        /**
206         * For branches that are not guaranteed to be executed, merge taint info for any involved
207         * variable across all branches.
208         *
209         * @note This method is HOT, so keep it optimized
210         *
211         * @param Variable $variable
212         * @param Scope[] $scopeList
213         * @param bool $varExistsInAllScopes @phan-unused-param
214         * @suppress PhanUnreferencedClosure, PhanUndeclaredProperty, UnusedSuppression
215         */
216        return static function ( Variable $variable, array $scopeList, bool $varExistsInAllScopes ) {
217            $varName = $variable->getName();
218
219            $vars = [];
220            $firstVar = null;
221            foreach ( $scopeList as $scope ) {
222                $localVar = $scope->getVariableByNameOrNull( $varName );
223                if ( $localVar ) {
224                    if ( !$firstVar ) {
225                        $firstVar = $localVar;
226                    } else {
227                        $vars[] = $localVar;
228                    }
229                }
230            }
231
232            if ( !$firstVar ) {
233                return;
234            }
235
236            /** @var Taintedness $taintedness */
237            $taintedness = $prevTaint = $firstVar->taintedness ?? null;
238            /** @var MethodLinks $methodLinks */
239            $methodLinks = $prevLinks = $firstVar->taintedMethodLinks ?? null;
240            /** @var CausedByLines $error */
241            $error = $prevErr = $firstVar->taintedOriginalError ?? null;
242
243            foreach ( $vars as $localVar ) {
244                // Below we only merge data if it's non-null in the current scope and different from the previous
245                // branch. Using arrays to save all previous values and then in_array seems useless on MW core,
246                // since >99% cases of duplication are already covered by these simple checks.
247
248                $taintOrNull = $localVar->taintedness ?? null;
249                if ( $taintOrNull && $taintOrNull !== $prevTaint ) {
250                    $prevTaint = $taintOrNull;
251                    if ( $taintedness ) {
252                        $taintedness = $taintedness->asMergedWith( $taintOrNull );
253                    } else {
254                        $taintedness = $taintOrNull;
255                    }
256                }
257
258                $variableObjLinksOrNull = $localVar->taintedMethodLinks ?? null;
259                if ( $variableObjLinksOrNull && $variableObjLinksOrNull !== $prevLinks ) {
260                    $prevLinks = $variableObjLinksOrNull;
261                    if ( $methodLinks ) {
262                        $methodLinks = $methodLinks->asMergedWith( $variableObjLinksOrNull );
263                    } else {
264                        $methodLinks = $variableObjLinksOrNull;
265                    }
266                }
267
268                $varErrorOrNull = $localVar->taintedOriginalError ?? null;
269                if ( $varErrorOrNull && $varErrorOrNull !== $prevErr ) {
270                    $prevErr = $varErrorOrNull;
271                    if ( $error ) {
272                        $error = $error->asMergedWith( $varErrorOrNull );
273                    } else {
274                        $error = $varErrorOrNull;
275                    }
276                }
277            }
278
279            if ( $taintedness ) {
280                self::setTaintednessRaw( $variable, $taintedness );
281            }
282            if ( $methodLinks ) {
283                self::setMethodLinks( $variable, $methodLinks );
284            }
285            if ( $error ) {
286                self::setCausedByRaw( $variable, $error );
287            }
288        };
289    }
290
291    /**
292     * Print the taintedness of a variable, when requested
293     * @see BlockAnalysisVisitor::analyzeSubstituteVarAssert()
294     * @inheritDoc
295     * @suppress PhanUndeclaredProperty, UnusedSuppression
296     */
297    public function analyzeStringLiteralStatement( CodeBase $codeBase, Context $context, string $statement ): bool {
298        $found = false;
299        if ( preg_match_all( self::DEBUG_TAINTEDNESS_REGEXP, $statement, $matches, PREG_SET_ORDER ) ) {
300            foreach ( $matches as $group ) {
301                foreach ( explode( ',', $group[1] ) as $rawVar ) {
302                    $varName = ltrim( trim( $rawVar ), '$' );
303                    if ( $context->getScope()->hasVariableWithName( $varName ) ) {
304                        $var = $context->getScope()->getVariableByName( $varName );
305                        $taintOrNull = self::getTaintednessRaw( $var );
306                        $taint = $taintOrNull ? $taintOrNull->toShortString() : 'unset';
307                        $msg = "Variable {CODE} has taintedness: {DETAILS}";
308                        $params = [ "\$$varName", $taint ];
309                    } else {
310                        $msg = "Variable {CODE} doesn't exist in scope";
311                        $params = [ "\$$varName" ];
312                    }
313                    self::emitIssue(
314                        $codeBase,
315                        $context,
316                        'SecurityCheckDebugTaintedness',
317                        $msg,
318                        $params
319                    );
320                    $found = true;
321                }
322            }
323        } elseif ( strpos( $statement, '@taint-check-debug-method-first-arg' ) !== false ) {
324            // FIXME This is a hack. The annotation is INTERNAL, for use only in the backpropoffsets-blowup
325            // test. We should either find a better way to test that, or maybe add a public annotation
326            // for debugging taintedness of a method (probably unreadable on a single line).
327            $funcName = preg_replace( '/@taint-check-debug-method-first-arg ([a-z0-9:]+)\b.*/i', '$1', $statement );
328            // Let any exception bubble up here, the annotation is for internal use in testing
329            $fqsen = FullyQualifiedMethodName::fromStringInContext( $funcName, $context );
330            $method = $codeBase->getMethodByFQSEN( $fqsen );
331            /** @var FunctionTaintedness|null $fTaint */
332            $fTaint = $method->funcTaint ?? null;
333            if ( !$fTaint ) {
334                return false;
335            }
336            self::emitIssue(
337                $codeBase,
338                $context,
339                'SecurityCheckDebugTaintedness',
340                "Method {CODE} has first param with taintedness: {DETAILS}",
341                [ $funcName, $fTaint->getParamSinkTaint( 0 )->toShortString() ]
342            );
343            return true;
344        }
345        return $found;
346    }
347
348    /**
349     * Get a string representation of a taint integer
350     *
351     * The prefix ~ means all input taints except the letter given.
352     * The prefix * means the EXEC version of the taint.
353     *
354     * @param int $taint
355     * @return string
356     */
357    public static function taintToString( int $taint ): string {
358        if ( $taint === self::NO_TAINT ) {
359            return 'NONE';
360        }
361
362        // Note, order matters here.
363        static $mapping = [
364            self::UNKNOWN_TAINT => 'UNKNOWN',
365            self::PRESERVE_TAINT => 'PRESERVE',
366            self::ALL_TAINT => 'ALL',
367            self::YES_TAINT => 'YES',
368            self::YES_TAINT &
369            ( ~self::HTML_TAINT ) => '~HTML',
370            self::YES_TAINT &
371            ( ~self::SQL_TAINT ) => '~SQL',
372            self::YES_TAINT &
373            ( ~self::SHELL_TAINT ) => '~SHELL',
374            self::YES_TAINT &
375            ( ~self::SERIALIZE_TAINT ) => '~SERIALIZE',
376            self::YES_TAINT &
377            ( ~self::CUSTOM1_TAINT ) => '~CUSTOM1',
378            self::YES_TAINT &
379            ( ~self::CUSTOM2_TAINT ) => '~CUSTOM2',
380            // We skip ~ versions of flags which shouldn't be possible.
381            self::HTML_TAINT => 'HTML',
382            self::SQL_TAINT => 'SQL',
383            self::SHELL_TAINT => 'SHELL',
384            self::ESCAPED_TAINT => 'ESCAPED',
385            self::SERIALIZE_TAINT => 'SERIALIZE',
386            self::CUSTOM1_TAINT => 'CUSTOM1',
387            self::CUSTOM2_TAINT => 'CUSTOM2',
388            self::CODE_TAINT => 'CODE',
389            self::PATH_TAINT => 'PATH',
390            self::REGEX_TAINT => 'REGEX',
391            self::SQL_NUMKEY_TAINT => 'SQL_NUMKEY',
392            self::ARRAY_OK => 'ARRAY_OK',
393            self::ALL_EXEC_TAINT => '*ALL',
394            self::HTML_EXEC_TAINT => '*HTML',
395            self::SQL_EXEC_TAINT => '*SQL',
396            self::SHELL_EXEC_TAINT => '*SHELL',
397            self::ESCAPED_EXEC_TAINT => '*ESCAPED',
398            self::SERIALIZE_EXEC_TAINT => '*SERIALIZE',
399            self::CUSTOM1_EXEC_TAINT => '*CUSTOM1',
400            self::CUSTOM2_EXEC_TAINT => '*CUSTOM2',
401            self::CODE_EXEC_TAINT => '*CODE',
402            self::PATH_EXEC_TAINT => '*PATH',
403            self::REGEX_EXEC_TAINT => '*REGEX',
404            self::SQL_NUMKEY_EXEC_TAINT => '*SQL_NUMKEY',
405        ];
406
407        $types = [];
408        foreach ( $mapping as $bitmap => $val ) {
409            if ( ( $bitmap & $taint ) === $bitmap ) {
410                $types[] = $val;
411                $taint &= ~$bitmap;
412            }
413        }
414        if ( $taint !== 0 ) {
415            $types[] = "Unrecognized: $taint";
416        }
417        return implode( ', ', $types );
418    }
419
420    /**
421     * @param FullyQualifiedFunctionLikeName $fqsen
422     * @return bool
423     */
424    public function builtinFuncHasTaint( FullyQualifiedFunctionLikeName $fqsen ): bool {
425        return $this->getBuiltinFuncTaint( $fqsen ) !== null;
426    }
427
428    /**
429     * Get the taintedness of a function
430     *
431     * This allows overriding the default taint of a function
432     *
433     * If you want to provide custom taint hints for your application,
434     * override the getCustomFuncTaints()
435     *
436     * @param FullyQualifiedFunctionLikeName $fqsen The function/method in question
437     * @return FunctionTaintedness|null Null to autodetect taintedness
438     */
439    public function getBuiltinFuncTaint( FullyQualifiedFunctionLikeName $fqsen ): ?FunctionTaintedness {
440        $name = (string)$fqsen;
441
442        if ( isset( self::$builtinFuncTaintCache[$name] ) ) {
443            return self::$builtinFuncTaintCache[$name];
444        }
445
446        static $funcTaints = null;
447        if ( $funcTaints === null ) {
448            $funcTaints = $this->getCustomFuncTaints() + $this->getPHPFuncTaints();
449        }
450
451        if ( isset( $funcTaints[$name] ) ) {
452            $rawFuncTaint = $funcTaints[$name];
453            if ( $rawFuncTaint instanceof FunctionTaintedness ) {
454                $funcTaint = $rawFuncTaint;
455            } else {
456                self::assertFunctionTaintArrayWellFormed( $rawFuncTaint );
457                // Note: for backcompat, we set NO_OVERRIDE everywhere.
458                $overallFlags = ( $rawFuncTaint['overall'] & self::FUNCTION_FLAGS ) | self::NO_OVERRIDE;
459                $funcTaint = new FunctionTaintedness(
460                    new Taintedness( $rawFuncTaint['overall'] & ~$overallFlags ),
461                    $overallFlags
462                );
463                unset( $rawFuncTaint['overall'] );
464                foreach ( $rawFuncTaint as $i => $val ) {
465                    assert( ( $val & self::UNKNOWN_TAINT ) === 0, 'Cannot set UNKNOWN' );
466                    $paramFlags = ( $val & self::FUNCTION_FLAGS ) | self::NO_OVERRIDE;
467                    // TODO Split sink and preserve in the hardcoded arrays
468                    if ( $val & self::VARIADIC_PARAM ) {
469                        $pTaint = new Taintedness( $val & ~( self::VARIADIC_PARAM | $paramFlags ) );
470                        $funcTaint = $funcTaint
471                            ->withVariadicParamSinkTaint( $i, $pTaint->withOnly( self::ALL_EXEC_TAINT ), $paramFlags )
472                            ->withVariadicParamPreservedTaint(
473                                $i,
474                                $pTaint->without( self::ALL_EXEC_TAINT )->asPreservedTaintedness()
475                            );
476                    } else {
477                        $pTaint = new Taintedness( $val & ~$paramFlags );
478                        $funcTaint = $funcTaint
479                            ->withParamSinkTaint( $i, $pTaint->withOnly( self::ALL_EXEC_TAINT ), $paramFlags )
480                            ->withParamPreservedTaint(
481                                $i,
482                                $pTaint->without( self::ALL_EXEC_TAINT )->asPreservedTaintedness()
483                            );
484                    }
485                }
486            }
487            self::$builtinFuncTaintCache[$name] = $funcTaint;
488            return self::$builtinFuncTaintCache[$name];
489        }
490        return null;
491    }
492
493    /**
494     * Assert that a taintedness array is well-formed, and fail hard if it isn't.
495     *
496     * @param int[] $taint
497     */
498    private static function assertFunctionTaintArrayWellFormed( array $taint ): void {
499        if ( !isset( $taint['overall'] ) ) {
500            throw new Error( 'Overall taint must be set' );
501        }
502
503        foreach ( $taint as $i => $t ) {
504            if ( !is_int( $i ) && $i !== 'overall' ) {
505                throw new Error( "Taint indexes must be int or 'overall', got '$i'" );
506            }
507            if ( !is_int( $t ) || ( $t & ~self::ALL_TAINT_FLAGS ) ) {
508                throw new Error( "Wrong taint index $i, got: " . var_export( $t, true ) );
509            }
510            if ( $t & ~self::ALL_TAINT_FLAGS ) {
511                throw new Error( "Taint index $i has unknown flags: " . decbin( $t ) );
512            }
513        }
514    }
515
516    /**
517     * Get an array of function taints custom for the application
518     *
519     * @return array<string,int[]|FunctionTaintedness> Array of function taints. The keys are FQSENs. The values can be
520     *   either FunctionTaintedness objects, or arrays with 'overall' string key and numeric keys for parameters.
521     *
522     *   For example: [ self::YES_TAINT, 'overall' => self::NO_TAINT ]
523     *   means that the taint of the return value is the same as the taint
524     *   of the first arg, and all other args are ignored.
525     *   [ self::HTML_EXEC_TAINT, 'overall' => self::NO_TAINT ]
526     *   Means that the first arg is output in an html context (e.g. like echo)
527     *   [ self::YES_TAINT & ~self::HTML_TAINT, 'overall' => self::NO_TAINT ]
528     *   Means that the function removes html taint (escapes) e.g. htmlspecialchars
529     *   [ 'overall' => self::YES_TAINT ]
530     *   Means that it returns a tainted value (e.g. return $_POST['foo']; )
531     * @see FunctionTaintedness for more details
532     */
533    abstract protected function getCustomFuncTaints(): array;
534
535    /**
536     * Can be used to force specific issues to be marked false positives
537     *
538     * For example, a specific application might be able to recognize
539     * that we are in a CLI context, and thus the XSS is really a false positive.
540     *
541     * @param int $combinedTaint Combined and adjusted taint of LHS+RHS
542     * @param string &$msg Issue description (so plugin can modify to state why false)
543     * @param Context $context
544     * @param CodeBase $code_base
545     * @return bool Is this a false positive?
546     * @suppress PhanUnusedPublicMethodParameter No param is used
547     */
548    public function isFalsePositive(
549        int $combinedTaint,
550        string &$msg,
551        Context $context,
552        CodeBase $code_base
553    ): bool {
554        return false;
555    }
556
557    /**
558     * Given a param description line, extract taint
559     *
560     * This is to allow putting taint information in method docblocks.
561     * If a function has a docblock comment like:
562     *  *  @param-taint $foo escapes_html
563     * This converts that line into:
564     *   ( self::YES_TAINT & ~self::SQL_TAINT )
565     * Multiple taint types are separated by commas
566     * (which are interpreted as bitwise OR ( "|" ). Future versions
567     * might support more complex bitwise operators, but for now it
568     * doesn't seem needed.
569     *
570     * The following keywords are supported where {type} can be
571     * html, sql, shell, serialize, custom1, custom2, sql_numkey,
572     * escaped.
573     *  * {type} - just set the flag. 99% you should only use 'none' or 'tainted'
574     *  * exec_{type} - sets the exec flag.
575     *  * escapes_{type} - self::YES_TAINT & ~self::{type}_TAINT.
576     *     Note: escapes_html adds the exec_escaped flag, use
577     *     escapes_htmlnoent if the value is safe to double encode.
578     *  * onlysafefor_{type}
579     *     Same as above, intended for return type declarations.
580     *     Only difference is that onlysafefor_html sets ESCAPED_TAINT instead
581     *     of ESCAPED_EXEC_TAINT
582     *  * none - self::NO_TAINT
583     *  * tainted - self::YES_TAINT
584     *  * array_ok - sets self::ARRAY_OK
585     *  * allow_override - Allow autodetected taints to override annotation
586     *
587     * @todo What about ~ operator?
588     * @note The special casing to have escapes_html always add exec_escaped
589     *   (and having htmlnoent exist) is "experimental" and may change in
590     *   future versions (Maybe all types should set exec_escaped. Maybe it
591     *   should be explicit)
592     * @note Excluding UNKNOWN here on purpose, as if we're setting it, it's not unknown
593     * @param string $line A line from the docblock
594     * @return array|null Array of [taintedness, flags], or null on no info
595     * @phan-return array{0:Taintedness,1:int}|null
596     */
597    public static function parseTaintLine( string $line ): ?array {
598        $types = '(?P<type>htmlnoent|html|sql|shell|serialize|custom1|'
599            . 'custom2|code|path|regex|sql_numkey|escaped|none|tainted)';
600        $prefixes = '(?P<prefix>escapes|onlysafefor|exec)';
601        $taintExpr = "(?P<taint>(?:{$prefixes}_)?$types|array_ok|allow_override)";
602
603        $filteredLine = preg_replace( "/((?:$taintExpr,? *)+)(?: .*)?$/", '$1', $line );
604        $taints = explode( ',', strtolower( $filteredLine ) );
605        $taints = array_map( 'trim', $taints );
606
607        $overallTaint = Taintedness::safeSingleton();
608        $overallFlags = self::NO_OVERRIDE;
609        $numberOfTaintsProcessed = 0;
610        foreach ( $taints as $taint ) {
611            $taintParts = [];
612            if ( !preg_match( "/^$taintExpr$/", $taint, $taintParts ) ) {
613                continue;
614            }
615            $numberOfTaintsProcessed++;
616            if ( $taintParts['taint'] === 'array_ok' ) {
617                $overallFlags |= self::ARRAY_OK;
618                continue;
619            }
620            if ( $taintParts['taint'] === 'allow_override' ) {
621                $overallFlags &= ~self::NO_OVERRIDE;
622                continue;
623            }
624            $taintAsInt = self::convertTaintNameToConstant( $taintParts['type'] );
625            switch ( $taintParts['prefix'] ) {
626                case '':
627                    $overallTaint = $overallTaint->with( $taintAsInt );
628                    break;
629                case 'exec':
630                    $overallTaint = $overallTaint->with( Taintedness::flagsAsYesToExecTaint( $taintAsInt ) );
631                    break;
632                case 'escapes':
633                case 'onlysafefor':
634                    $overallTaint = $overallTaint->with( self::YES_TAINT & ~$taintAsInt );
635                    if ( $taintParts['type'] === 'html' ) {
636                        if ( $taintParts['prefix'] === 'escapes' ) {
637                            $overallTaint = $overallTaint->with( self::ESCAPED_EXEC_TAINT );
638                        } else {
639                            $overallTaint = $overallTaint->with( self::ESCAPED_TAINT );
640                        }
641                    }
642                    break;
643            }
644        }
645        if ( $numberOfTaintsProcessed === 0 ) {
646            return null;
647        }
648        return [ $overallTaint, $overallFlags ];
649    }
650
651    /**
652     * Hook to override the sink taintedness of a method parameter depending on the current argument.
653     *
654     * @internal This method is unstable and may be removed without prior notice.
655     *
656     * @param Taintedness $paramSinkTaint
657     * @param Taintedness $curArgTaintedness
658     * @param Node $argument Note: This hook is not called on literals
659     * @param int $argIndex Which argument number is this
660     * @param FunctionInterface $func The function/method being called
661     * @param FunctionTaintedness $funcTaint Taint of method formal parameters
662     * @param CausedByLines $paramSinkError
663     * @param Context $context Context object
664     * @param CodeBase $code_base CodeBase object
665     * @return array<Taintedness|CausedByLines> The taint and caused-by lines to use for actual parameter
666     * @phan-return array{0:Taintedness,1:CausedByLines}
667     * @suppress PhanUnusedPublicMethodParameter
668     */
669    public function modifyParamSinkTaint(
670        Taintedness $paramSinkTaint,
671        Taintedness $curArgTaintedness,
672        Node $argument,
673        int $argIndex,
674        FunctionInterface $func,
675        FunctionTaintedness $funcTaint,
676        CausedByLines $paramSinkError,
677        Context $context,
678        CodeBase $code_base
679    ): array {
680        // no-op
681        return [ $paramSinkTaint, $paramSinkError ];
682    }
683
684    /**
685     * Hook to override how taint of an argument to method call is calculated
686     *
687     * @param Taintedness $curArgTaintedness
688     * @param Node $argument Note: This hook is not called on literals
689     * @param int $argIndex Which argument number is this
690     * @param FunctionInterface $func The function/method being called
691     * @param FunctionTaintedness $funcTaint Taint of method formal parameters
692     * @param Context $context Context object
693     * @param CodeBase $code_base CodeBase object
694     * @return Taintedness The taint to use for actual parameter
695     * @suppress PhanUnusedPublicMethodParameter
696     */
697    public function modifyArgTaint(
698        Taintedness $curArgTaintedness,
699        Node $argument,
700        int $argIndex,
701        FunctionInterface $func,
702        FunctionTaintedness $funcTaint,
703        Context $context,
704        CodeBase $code_base
705    ): Taintedness {
706        // no-op
707        return $curArgTaintedness;
708    }
709
710    /**
711     * Convert a string like 'html' to self::HTML_TAINT.
712     *
713     * @note htmlnoent treated like self::HTML_TAINT.
714     * @param string $name one of:
715     *   html, sql, shell, serialize, custom1, custom2, code, path, regex, sql_numkey,
716     *   escaped, none (= self::NO_TAINT), tainted (= self::YES_TAINT)
717     * @return int One of the TAINT constants
718     */
719    public static function convertTaintNameToConstant( string $name ): int {
720        switch ( $name ) {
721            case 'html':
722            case 'htmlnoent':
723                return self::HTML_TAINT;
724            case 'sql':
725                return self::SQL_TAINT;
726            case 'shell':
727                return self::SHELL_TAINT;
728            case 'serialize':
729                return self::SERIALIZE_TAINT;
730            case 'custom1':
731                return self::CUSTOM1_TAINT;
732            case 'custom2':
733                return self::CUSTOM2_TAINT;
734            case 'code':
735                return self::CODE_TAINT;
736            case 'path':
737                return self::PATH_TAINT;
738            case 'regex':
739                return self::REGEX_TAINT;
740            case 'sql_numkey':
741                return self::SQL_NUMKEY_TAINT;
742            case 'escaped':
743                return self::ESCAPED_TAINT;
744            case 'tainted':
745                return self::YES_TAINT;
746            case 'none':
747                return self::NO_TAINT;
748            default:
749                // @codeCoverageIgnoreStart
750                throw new InvalidArgumentException( "$name not valid taint" );
751                // @codeCoverageIgnoreEnd
752        }
753    }
754
755    /**
756     * Taints for builtin php functions
757     *
758     * @return int[][] List of func taints (See getBuiltinFuncTaint())
759     * @phan-return array<string,int[]>
760     */
761    private function getPHPFuncTaints(): array {
762        $pregMatchTaint = [
763            self::REGEX_EXEC_TAINT,
764            self::YES_TAINT,
765            // TODO: Possibly unsafe pass-by-ref
766            self::NO_TAINT,
767            self::NO_TAINT,
768            self::NO_TAINT,
769            'overall' => self::NO_TAINT,
770        ];
771        $pregReplaceTaint = [
772            self::REGEX_EXEC_TAINT,
773            // TODO: This is used for strings (in preg_replace) and callbacks (in preg_replace_callback)
774            self::YES_TAINT,
775            self::YES_TAINT,
776            self::NO_TAINT,
777            self::NO_TAINT,
778            'overall' => self::NO_TAINT
779        ];
780        return [
781            '\htmlentities' => [
782                self::ESCAPES_HTML,
783                'overall' => self::ESCAPED_TAINT
784            ],
785            '\htmlspecialchars' => [
786                self::ESCAPES_HTML,
787                'overall' => self::ESCAPED_TAINT
788            ],
789            '\escapeshellarg' => [
790                ~self::SHELL_TAINT & self::YES_TAINT,
791                'overall' => self::NO_TAINT
792            ],
793            // TODO: Perhaps we should distinguish arguments escape vs command escape
794            '\escapeshellcmd' => [
795                ~self::SHELL_TAINT & self::YES_TAINT,
796                'overall' => self::NO_TAINT
797            ],
798            '\shell_exec' => [
799                self::SHELL_EXEC_TAINT,
800                'overall' => self::YES_TAINT
801            ],
802            '\passthru' => [
803                self::SHELL_EXEC_TAINT,
804                self::NO_TAINT,
805                'overall' => self::NO_TAINT
806            ],
807            '\exec' => [
808                self::SHELL_EXEC_TAINT,
809                // TODO: This is an unsafe passbyref
810                self::NO_TAINT,
811                self::NO_TAINT,
812                'overall' => self::YES_TAINT
813            ],
814            '\system' => [
815                self::SHELL_EXEC_TAINT,
816                self::NO_TAINT,
817                'overall' => self::YES_TAINT
818            ],
819            '\proc_open' => [
820                self::SHELL_EXEC_TAINT,
821                self::NO_TAINT,
822                // TODO: Unsafe passbyref
823                self::NO_TAINT,
824                self::NO_TAINT,
825                self::NO_TAINT,
826                self::NO_TAINT,
827                // TODO: Perhaps not so safe
828                'overall' => self::NO_TAINT
829            ],
830            '\popen' => [
831                self::SHELL_EXEC_TAINT,
832                self::NO_TAINT,
833                // TODO: Perhaps not so safe
834                'overall' => self::NO_TAINT
835            ],
836            // Or any time the serialized data comes from a trusted source.
837            '\serialize' => [
838                'overall' => self::YES_TAINT & ~self::SERIALIZE_TAINT,
839            ],
840            '\unserialize' => [
841                self::SERIALIZE_EXEC_TAINT,
842                'overall' => self::NO_TAINT,
843            ],
844            '\mysql_query' => [
845                self::SQL_EXEC_TAINT,
846                'overall' => self::UNKNOWN_TAINT
847            ],
848            '\mysqli_query' => [
849                self::NO_TAINT,
850                self::SQL_EXEC_TAINT,
851                'overall' => self::UNKNOWN_TAINT
852            ],
853            '\mysqli::query' => [
854                self::SQL_EXEC_TAINT,
855                'overall' => self::UNKNOWN_TAINT
856            ],
857            '\mysqli_real_query' => [
858                self::NO_TAINT,
859                self::SQL_EXEC_TAINT,
860                'overall' => self::UNKNOWN_TAINT
861            ],
862            '\mysqli::real_query' => [
863                self::SQL_EXEC_TAINT,
864                'overall' => self::UNKNOWN_TAINT
865            ],
866            '\sqlite_query' => [
867                self::NO_TAINT,
868                self::SQL_EXEC_TAINT,
869                self::NO_TAINT,
870                self::NO_TAINT,
871                'overall' => self::UNKNOWN_TAINT
872            ],
873            '\sqlite_single_query' => [
874                self::NO_TAINT,
875                self::SQL_EXEC_TAINT,
876                self::NO_TAINT,
877                self::NO_TAINT,
878                'overall' => self::UNKNOWN_TAINT
879            ],
880            // Note: addslashes, addcslashes etc. intentionally omitted because they're not
881            // enough to avoid SQLi.
882            '\mysqli_escape_string' => [
883                self::NO_TAINT,
884                self::YES_TAINT & ~self::SQL_TAINT,
885                'overall' => self::NO_TAINT
886            ],
887            '\mysqli_real_escape_string' => [
888                self::NO_TAINT,
889                self::YES_TAINT & ~self::SQL_TAINT,
890                'overall' => self::NO_TAINT
891            ],
892            '\mysqli::escape_string' => [
893                self::YES_TAINT & ~self::SQL_TAINT,
894                'overall' => self::NO_TAINT
895            ],
896            '\mysqli::real_escape_string' => [
897                self::YES_TAINT & ~self::SQL_TAINT,
898                'overall' => self::NO_TAINT
899            ],
900            '\sqlite_escape_string' => [
901                self::YES_TAINT & ~self::SQL_TAINT,
902                'overall' => self::NO_TAINT
903            ],
904            '\PDO::query' => [
905                self::SQL_EXEC_TAINT,
906                self::NO_TAINT,
907                self::NO_TAINT,
908                self::NO_TAINT,
909                'overall' => self::UNKNOWN_TAINT
910            ],
911            '\PDO::prepare' => [
912                self::SQL_EXEC_TAINT,
913                self::NO_TAINT,
914                'overall' => self::UNKNOWN_TAINT
915            ],
916            '\PDO::exec' => [
917                self::SQL_EXEC_TAINT,
918                'overall' => self::NO_TAINT
919            ],
920            '\base64_encode' => [
921                self::YES_TAINT & ~self::HTML_TAINT,
922                'overall' => self::NO_TAINT
923            ],
924            '\file_put_contents' => [
925                self::PATH_EXEC_TAINT,
926                self::NO_TAINT,
927                self::NO_TAINT,
928                self::NO_TAINT,
929                'overall' => self::NO_TAINT
930            ],
931            // TODO: What about file_get_contents() and file() ?
932            '\fopen' => [
933                self::PATH_EXEC_TAINT,
934                self::NO_TAINT,
935                self::NO_TAINT,
936                self::NO_TAINT,
937                // TODO: Perhaps not so safe
938                'overall' => self::NO_TAINT
939            ],
940            '\opendir' => [
941                self::PATH_EXEC_TAINT,
942                self::NO_TAINT,
943                // TODO: Perhaps not so safe
944                'overall' => self::NO_TAINT
945            ],
946            '\rawurlencode' => [
947                self::YES_TAINT & ~self::PATH_TAINT,
948                'overall' => self::NO_TAINT
949            ],
950            '\urlencode' => [
951                self::YES_TAINT & ~self::PATH_TAINT,
952                'overall' => self::NO_TAINT
953            ],
954            '\printf' => [
955                self::HTML_EXEC_TAINT,
956                // TODO We could check if the respective specifiers are safe
957                self::HTML_EXEC_TAINT | self::VARIADIC_PARAM,
958                'overall' => self::NO_TAINT
959            ],
960            '\preg_filter' => [
961                self::REGEX_EXEC_TAINT,
962                self::YES_TAINT,
963                self::YES_TAINT,
964                self::NO_TAINT,
965                self::NO_TAINT,
966                'overall' => self::NO_TAINT
967            ],
968            '\preg_grep' => [
969                self::REGEX_EXEC_TAINT,
970                self::YES_TAINT,
971                self::NO_TAINT,
972                'overall' => self::NO_TAINT
973            ],
974            '\preg_match_all' => $pregMatchTaint,
975            '\preg_match' => $pregMatchTaint,
976            '\preg_quote' => [
977                self::YES_TAINT & ~self::REGEX_TAINT,
978                self::NO_TAINT,
979                'overall' => self::NO_TAINT
980            ],
981            '\preg_replace' => $pregReplaceTaint,
982            '\preg_replace_callback' => $pregReplaceTaint,
983            '\preg_replace_callback_array' => [
984                self::REGEX_EXEC_TAINT,
985                self::YES_TAINT,
986                self::NO_TAINT,
987                self::NO_TAINT,
988                self::NO_TAINT,
989                'overall' => self::NO_TAINT
990            ],
991            '\preg_split' => [
992                self::REGEX_EXEC_TAINT,
993                self::YES_TAINT,
994                self::NO_TAINT,
995                self::NO_TAINT,
996                'overall' => self::NO_TAINT
997            ],
998            // We assume that hashing functions are safe, see T272492
999            '\md5' => [
1000                self::NO_TAINT,
1001                self::NO_TAINT,
1002                'overall' => self::NO_TAINT
1003            ],
1004            '\sha1' => [
1005                self::NO_TAINT,
1006                self::NO_TAINT,
1007                'overall' => self::NO_TAINT
1008            ],
1009            '\crc32' => [
1010                self::NO_TAINT,
1011                'overall' => self::NO_TAINT
1012            ],
1013        ];
1014    }
1015
1016    /**
1017     * @inheritDoc
1018     */
1019    public static function getBeforeLoopBodyAnalysisVisitorClassName(): string {
1020        return TaintednessLoopVisitor::class;
1021    }
1022}