Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
91.76% covered (success)
91.76%
546 / 595
34.78% covered (danger)
34.78%
8 / 23
CRAP
0.00% covered (danger)
0.00%
0 / 1
MWVisitor
91.76% covered (success)
91.76%
546 / 595
34.78% covered (danger)
34.78%
8 / 23
245.79
0.00% covered (danger)
0.00%
0 / 1
 analyzeCallNode
95.83% covered (success)
95.83%
23 / 24
0.00% covered (danger)
0.00%
0 / 1
11
 checkExternalLink
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
3
 doSelectWrapperSpecialHandling
100.00% covered (success)
100.00%
22 / 22
100.00% covered (success)
100.00%
1 / 1
6
 triggerHook
79.49% covered (warning)
79.49%
31 / 39
0.00% covered (danger)
0.00%
0 / 1
10.86
 hookArgsContainReference
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
4
 extractHookArgs
83.33% covered (warning)
83.33%
5 / 6
0.00% covered (danger)
0.00%
0 / 1
3.04
 getHookTypeForRegistrationMethod
71.43% covered (warning)
71.43%
5 / 7
0.00% covered (danger)
0.00%
0 / 1
4.37
 handleNormalHookRegistration
58.33% covered (warning)
58.33%
7 / 12
0.00% covered (danger)
0.00%
0 / 1
5.16
 handleParserHookRegistration
83.33% covered (warning)
83.33%
5 / 6
0.00% covered (danger)
0.00%
0 / 1
3.04
 registerHook
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
2
 visitReturn
100.00% covered (success)
100.00%
20 / 20
100.00% covered (success)
100.00%
1 / 1
6
 handleGetQueryInfoReturn
96.67% covered (success)
96.67%
29 / 30
0.00% covered (danger)
0.00%
0 / 1
9
 checkMakeList
97.73% covered (success)
97.73%
43 / 44
0.00% covered (danger)
0.00%
0 / 1
12
 literalListConstToName
66.67% covered (warning)
66.67%
10 / 15
0.00% covered (danger)
0.00%
0 / 1
8.81
 checkSQLOptions
96.55% covered (success)
96.55%
28 / 29
0.00% covered (danger)
0.00%
0 / 1
7
 checkJoinCond
77.78% covered (warning)
77.78%
35 / 45
0.00% covered (danger)
0.00%
0 / 1
14.85
 visitReturnOfFunctionHook
95.00% covered (success)
95.00%
19 / 20
0.00% covered (danger)
0.00%
0 / 1
12
 getCallableFromHookRegistration
90.00% covered (success)
90.00%
18 / 20
0.00% covered (danger)
0.00%
0 / 1
12.14
 getSingleCallable
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
5
 getCallbackForVar
81.82% covered (warning)
81.82%
9 / 11
0.00% covered (danger)
0.00%
0 / 1
5.15
 visitAssign
100.00% covered (success)
100.00%
27 / 27
100.00% covered (success)
100.00%
1 / 1
16
 detectHTMLForm
95.77% covered (success)
95.77%
181 / 189
0.00% covered (danger)
0.00%
0 / 1
63
 visitArray
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
1<?php
2
3namespace SecurityCheckPlugin;
4
5use ast\Node;
6use Phan\Analysis\PostOrderAnalysisVisitor;
7use Phan\AST\ContextNode;
8use Phan\Exception\CodeBaseException;
9use Phan\Exception\InvalidFQSENException;
10use Phan\Exception\IssueException;
11use Phan\Language\Element\FunctionInterface;
12use Phan\Language\Element\Method;
13use Phan\Language\FQSEN\FullyQualifiedClassName;
14use Phan\Language\FQSEN\FullyQualifiedFunctionLikeName;
15use Phan\Language\FQSEN\FullyQualifiedFunctionName;
16use Phan\Language\FQSEN\FullyQualifiedMethodName;
17use Phan\Language\UnionType;
18
19/**
20 * MediaWiki specific node visitor
21 *
22 * Copyright (C) 2017  Brian Wolff <bawolff@gmail.com>
23 *
24 * This program is free software; you can redistribute it and/or modify
25 * it under the terms of the GNU General Public License as published by
26 * the Free Software Foundation; either version 2 of the License, or
27 * (at your option) any later version.
28 *
29 * This program is distributed in the hope that it will be useful,
30 * but WITHOUT ANY WARRANTY; without even the implied warranty of
31 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
32 * GNU General Public License for more details.
33 *
34 * You should have received a copy of the GNU General Public License along
35 * with this program; if not, write to the Free Software Foundation, Inc.,
36 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
37 */
38class MWVisitor extends TaintednessVisitor {
39    /**
40     * @todo This is a temporary hack. Proper solution is refactoring/avoiding overrideContext
41     * @var bool|null
42     * @suppress PhanWriteOnlyProtectedProperty
43     */
44    protected $isHook;
45
46    /**
47     * Try and recognize hook registration
48     * @inheritDoc
49     */
50    protected function analyzeCallNode( Node $node, iterable $funcs ): void {
51        parent::analyzeCallNode( $node, $funcs );
52        if ( !isset( $node->children['method'] ) ) {
53            // Called by visitCall
54            return;
55        }
56
57        assert( is_array( $funcs ) && count( $funcs ) === 1 );
58        $method = $funcs[0];
59        assert( $method instanceof Method );
60
61        // Should this be getDefiningFQSEN() instead?
62        $methodName = (string)$method->getFQSEN();
63        $parserFQSEN = MediaWikiHooksHelper::getInstance()->getMwParserClassFQSEN( $this->code_base )->__toString();
64        // $this->debug( __METHOD__, "Checking to see if we should register $methodName" );
65        switch ( $methodName ) {
66            case "$parserFQSEN::setFunctionHook":
67            case "$parserFQSEN::setHook":
68                $type = $this->getHookTypeForRegistrationMethod( $methodName );
69                if ( $type === null ) {
70                    break;
71                }
72                // $this->debug( __METHOD__, "registering $methodName as $type" );
73                $this->handleParserHookRegistration( $node, $type );
74                break;
75            case '\Hooks::register':
76                $this->handleNormalHookRegistration( $node );
77                break;
78            case '\Hooks::run':
79            case '\Hooks::runWithoutAbort':
80                $this->triggerHook( $node );
81                break;
82            case '\Linker::makeExternalLink':
83                $this->checkExternalLink( $node );
84                break;
85            default:
86                $this->doSelectWrapperSpecialHandling( $node, $method );
87        }
88    }
89
90    /**
91     * Linker::makeExternalLink escaping depends on third argument
92     *
93     * @param Node $node
94     */
95    private function checkExternalLink( Node $node ): void {
96        $escapeArg = $this->resolveValue( $node->children['args']->children[2] ?? true );
97        $text = $node->children['args']->children[1] ?? null;
98        if ( !$escapeArg && $text instanceof Node ) {
99            $this->maybeEmitIssueSimplified(
100                new Taintedness( SecurityCheckPlugin::HTML_EXEC_TAINT ),
101                $text,
102                "Calling Linker::makeExternalLink with user controlled text " .
103                "and third argument set to false"
104            );
105        }
106    }
107
108    /**
109     * Special casing for complex format of IDatabase::select
110     *
111     * This handles the $options, and $join_cond. Other args are
112     * handled through normal means
113     *
114     * @param Node $node Either an AST_METHOD_CALL or AST_STATIC_CALL
115     * @param Method $method
116     */
117    private function doSelectWrapperSpecialHandling( Node $node, Method $method ): void {
118        $relevantMethods = [
119            'makeList' => true,
120            'select' => true,
121            'selectField' => true,
122            'selectFieldValues' => true,
123            'selectSQLText' => true,
124            'selectRowCount' => true,
125            'selectRow' => true,
126        ];
127
128        if ( !isset( $relevantMethods[$method->getName()] ) ) {
129            return;
130        }
131
132        $idbFQSEN = FullyQualifiedClassName::fromFullyQualifiedString( '\\Wikimedia\\Rdbms\\IDatabase' );
133        if ( !self::isSubclassOf( $method->getClassFQSEN(), $idbFQSEN, $this->code_base ) ) {
134            return;
135        }
136
137        if ( $method->getName() === 'makeList' ) {
138            $this->checkMakeList( $node );
139            return;
140        }
141
142        $args = $node->children['args']->children;
143        if ( isset( $args[4] ) ) {
144            $this->checkSQLOptions( $args[4] );
145        }
146        if ( isset( $args[5] ) ) {
147            $this->checkJoinCond( $args[5] );
148        }
149    }
150
151    /**
152     * Dispatch a hook (i.e. Handle Hooks::run)
153     *
154     * @param Node $node The Hooks::run AST_STATIC_CALL
155     */
156    private function triggerHook( Node $node ): void {
157        $argList = $node->children['args']->children;
158        if ( count( $argList ) === 0 ) {
159            $this->debug( __METHOD__, "Too few args to Hooks::run" );
160            return;
161        }
162        if ( !is_string( $argList[0] ) ) {
163            $this->debug( __METHOD__, "Cannot determine hook name" );
164            return;
165        }
166        '@phan-var array{0:string,1?:Node} $argList';
167        $hookName = $argList[0];
168        if (
169            count( $argList ) < 2
170            || $argList[1]->kind !== \ast\AST_ARRAY
171        ) {
172            // @todo There are definitely cases where this
173            // will prevent us from running hooks
174            // e.g. EditPageGetPreviewContent
175            $this->debug( __METHOD__, "Could not run hook $hookName due to complex args" );
176            return;
177        }
178        $args = $this->extractHookArgs( $argList[1] );
179        $hasPassByRef = self::hookArgsContainReference( $argList[1] );
180        $analyzer = new PostOrderAnalysisVisitor( $this->code_base, $this->context, [] );
181        $argumentTypes = array_fill( 0, count( $args ), UnionType::empty() );
182
183        $subscribers = MediaWikiHooksHelper::getInstance()->getHookSubscribers( $hookName );
184        foreach ( $subscribers as $subscriber ) {
185            if ( $subscriber instanceof FullyQualifiedMethodName ) {
186                if ( !$this->code_base->hasMethodWithFQSEN( $subscriber ) ) {
187                    $this->debug( __METHOD__, "Hook subscriber $subscriber not found!" );
188                    continue;
189                }
190                $func = $this->code_base->getMethodByFQSEN( $subscriber );
191            } else {
192                assert( $subscriber instanceof FullyQualifiedFunctionName );
193                if ( !$this->code_base->hasFunctionWithFQSEN( $subscriber ) ) {
194                    $this->debug( __METHOD__, "Hook subscriber $subscriber not found!" );
195                    continue;
196                }
197                $func = $this->code_base->getFunctionByFQSEN( $subscriber );
198            }
199
200            // $this->debug( __METHOD__, "Dispatching $hookName to $subscriber" );
201            // This is hacky, but try to ensure that the associated line
202            // number for any issues is in the extension, and not the
203            // line where the Hooks::register() is in MW core.
204            // FIXME: In the case of reference parameters, this is
205            // still reporting things being in MW core instead of extension.
206            $oldContext = $this->overrideContext;
207            $fContext = $func->getContext();
208            $newContext = clone $this->context;
209            $newContext = $newContext->withFile( $fContext->getFile() )
210                ->withLineNumberStart( $fContext->getLineNumberStart() );
211            $this->overrideContext = $newContext;
212            $this->isHook = true;
213
214            if ( $hasPassByRef ) {
215                // Trigger an analysis of the function call (see e.g. ClosureReturnTypeOverridePlugin's
216                // handling of call_user_func_array). Note that it's not enough to use our
217                // handleMethodCall, because that doesn't handle references correctly.
218
219                // NOTE: This is only known to be necessary with references, hence the check above
220                // (for performance). There might be other edge cases, though...
221
222                // TODO We don't care about types, so we use an empty union type. However this looks
223                // very very fragile.
224                // TODO 2: Someday we could write a generic-purpose MW plugin, which could (among other
225                // things) understand hook. It could share some code with taint-check, and at that
226                // point we'd likely want to use the correct types here (note that phan alone isn't
227                // able to analyze hooks at all).
228                $analyzer->analyzeCallableWithArgumentTypes( $argumentTypes, $func, $args );
229            }
230            $this->handleMethodCall( $func, $subscriber, $args, false, true );
231
232            $this->overrideContext = $oldContext;
233            $this->isHook = false;
234        }
235    }
236
237    /**
238     * Check whether any argument to (inside an array) is a reference.
239     *
240     * @param Node $argArrayNode
241     * @return bool
242     */
243    private static function hookArgsContainReference( Node $argArrayNode ): bool {
244        foreach ( $argArrayNode->children as $child ) {
245            if ( $child instanceof Node && ( $child->flags & \ast\flags\ARRAY_ELEM_REF ) ) {
246                return true;
247            }
248        }
249        return false;
250    }
251
252    /**
253     * Convenience methods for extracting hooks arguments. Copied from
254     * ClosureReturnTypeOverridePlugin::extractArrayArgs (which is private)
255     * and simplified for our use case.
256     *
257     * @param Node $argArrayNode
258     * @return Node[]
259     */
260    private function extractHookArgs( Node $argArrayNode ): array {
261        assert( $argArrayNode->kind === \ast\AST_ARRAY );
262        $arguments = [];
263        foreach ( $argArrayNode->children as $child ) {
264            if ( !( $child instanceof Node ) ) {
265                continue;
266            }
267            $arguments[] = $child->children['value'];
268        }
269        return $arguments;
270    }
271
272    /**
273     * @param string $method The method name of the registration function
274     * @return string|null The name of the hook that gets registered
275     */
276    private function getHookTypeForRegistrationMethod( string $method ): ?string {
277        $parserFQSEN = MediaWikiHooksHelper::getInstance()->getMwParserClassFQSEN( $this->code_base )->__toString();
278        switch ( $method ) {
279            case "$parserFQSEN::setFunctionHook":
280                return '!ParserFunctionHook';
281            case "$parserFQSEN::setHook":
282                return '!ParserHook';
283            default:
284                $this->debug( __METHOD__, "$method not a hook registerer" );
285                return null;
286        }
287    }
288
289    /**
290     * Handle registering a normal hook from Hooks::register (Not from $wgHooks)
291     *
292     * @param Node $node The node representing the AST_STATIC_CALL
293     */
294    private function handleNormalHookRegistration( Node $node ): void {
295        assert( $node->kind === \ast\AST_STATIC_CALL );
296        $params = $node->children['args']->children;
297        if ( count( $params ) < 2 ) {
298            $this->debug( __METHOD__, "Could not understand Hooks::register" );
299            return;
300        }
301        $hookName = $params[0];
302        if ( !is_string( $hookName ) ) {
303            $this->debug( __METHOD__, "Could not register hook. Name is complex" );
304            return;
305        }
306        $cb = $this->getCallableFromHookRegistration( $params[1], $hookName );
307        if ( $cb ) {
308            $this->registerHook( $hookName, $cb );
309        } else {
310            $this->debug( __METHOD__, "Could not register $hookName hook due to complex callback" );
311        }
312    }
313
314    /**
315     * When someone calls $parser->setFunctionHook() or setTagHook()
316     *
317     * @note Causes phan to error out if given non-existent class
318     * @param Node $node The AST_METHOD_CALL node
319     * @param string $hookType The name of the hook
320     */
321    private function handleParserHookRegistration( Node $node, string $hookType ): void {
322        $args = $node->children['args']->children;
323        if ( count( $args ) < 2 ) {
324            return;
325        }
326        $callback = $this->getCallableFromNode( $args[1] );
327        if ( $callback ) {
328            $this->registerHook( $hookType, $callback );
329        }
330    }
331
332    /**
333     * @param string $hookType
334     * @param FunctionInterface $callback
335     */
336    private function registerHook( string $hookType, FunctionInterface $callback ): void {
337        $fqsen = $callback->getFQSEN();
338        $alreadyRegistered = MediaWikiHooksHelper::getInstance()->registerHook( $hookType, $fqsen );
339        if ( !$alreadyRegistered ) {
340            // $this->debug( __METHOD__, "registering $fqsen for hook $hookType" );
341            // If this is the first time seeing this, make sure we reanalyze the hook function now that
342            // we know what it is, in case it's already been analyzed.
343            $this->analyzeFunc( $callback );
344        }
345    }
346
347    /**
348     * For special hooks, check their return value
349     *
350     * e.g. A tag hook's return value is output as html.
351     * @param Node $node
352     */
353    public function visitReturn( Node $node ): void {
354        parent::visitReturn( $node );
355        if (
356            !$node->children['expr'] instanceof Node ||
357            !$this->context->isInFunctionLikeScope()
358        ) {
359            return;
360        }
361        $funcFQSEN = $this->context->getFunctionLikeFQSEN();
362
363        if ( strpos( (string)$funcFQSEN, '::getQueryInfo' ) !== false ) {
364            $this->handleGetQueryInfoReturn( $node->children['expr'] );
365        }
366
367        $hookType = MediaWikiHooksHelper::getInstance()->isSpecialHookSubscriber( $funcFQSEN );
368        switch ( $hookType ) {
369            case '!ParserFunctionHook':
370                $this->visitReturnOfFunctionHook( $node->children['expr'], $funcFQSEN );
371                break;
372            case '!ParserHook':
373                $ret = $node->children['expr'];
374                $this->maybeEmitIssueSimplified(
375                    new Taintedness( SecurityCheckPlugin::HTML_EXEC_TAINT ),
376                    $ret,
377                    "Outputting user controlled HTML from Parser tag hook {FUNCTIONLIKE}",
378                    [ $funcFQSEN ]
379                );
380                break;
381        }
382    }
383
384    /**
385     * Methods named getQueryInfo() in MediaWiki usually
386     * return an array that is later fed to select
387     *
388     * @note This will only work where the return
389     *  statement is an array literal.
390     * @param Node|mixed $node Node from ast tree
391     */
392    private function handleGetQueryInfoReturn( $node ): void {
393        if (
394            !( $node instanceof Node ) ||
395            $node->kind !== \ast\AST_ARRAY
396        ) {
397            return;
398        }
399        // The argument order is
400        // $table, $vars, $conds = '', $fname = __METHOD__,
401        // $options = [], $join_conds = []
402        $keysToArg = [
403            'tables' => 0,
404            'fields' => 1,
405            'conds' => 2,
406            'options' => 4,
407            'join_conds' => 5,
408        ];
409        $args = [ '', '', '', '' ];
410        foreach ( $node->children as $child ) {
411            assert( $child->kind === \ast\AST_ARRAY_ELEM );
412            $key = $child->children['key'];
413            if ( $key instanceof Node ) {
414                // Dynamic name, skip (T268055).
415                continue;
416            }
417            if ( !isset( $keysToArg[$key] ) ) {
418                continue;
419            }
420            $args[$keysToArg[$key]] = $child->children['value'];
421        }
422        $selectFQSEN = FullyQualifiedMethodName::fromFullyQualifiedString(
423            '\Wikimedia\Rdbms\IDatabase::select'
424        );
425        if ( !$this->code_base->hasMethodWithFQSEN( $selectFQSEN ) ) {
426            // Huh. Core wasn't parsed. That's bad, but don't fail hard.
427            $this->debug( __METHOD__, 'Database::select does not exist.' );
428            return;
429        }
430        $select = $this->code_base->getMethodByFQSEN( $selectFQSEN );
431        // TODO: The message about calling Database::select here is not very clear.
432        $this->handleMethodCall( $select, $selectFQSEN, $args, false );
433        if ( isset( $args[4] ) ) {
434            $this->checkSQLOptions( $args[4] );
435        }
436        if ( isset( $args[5] ) ) {
437            $this->checkJoinCond( $args[5] );
438        }
439    }
440
441    /**
442     * Check IDatabase::makeList
443     *
444     * Special cased because the second arg totally changes
445     * how this function is interpreted.
446     * @param Node $node
447     */
448    private function checkMakeList( Node $node ): void {
449        $args = $node->children['args'];
450        // First determine which IDatabase::LIST_*
451        // 0 = IDatabase::LIST_COMMA is default value.
452        $typeArg = $args->children[1] ?? 0;
453        if ( $typeArg instanceof Node ) {
454            $typeArg = $this->getCtxN( $typeArg )->getEquivalentPHPValueForNode(
455                $typeArg,
456                ContextNode::RESOLVE_SCALAR_DEFAULT & ~ContextNode::RESOLVE_CONSTANTS
457            );
458        }
459        if ( $typeArg instanceof Node ) {
460            if ( $typeArg->kind === \ast\AST_CLASS_CONST ) {
461                // Probably IDatabase::LIST_*. Note that non-class constants are resolved
462                $typeArg = $typeArg->children['const'];
463            } elseif ( $typeArg->kind === \ast\AST_CONST ) {
464                $typeArg = $typeArg->children['name']->children['name'];
465            } else {
466                // Something that cannot be resolved statically. Since LIST_NAMES is very rare, and LIST_COMMA is
467                // default, assume its LIST_AND or LIST_OR
468                $this->debug( __METHOD__, "Could not determine 2nd arg makeList()" );
469                $this->maybeEmitIssueSimplified(
470                    new Taintedness( SecurityCheckPlugin::SQL_NUMKEY_EXEC_TAINT ),
471                    $args->children[0],
472                    "IDatabase::makeList with unknown type arg is " .
473                    "given an array with unescaped keynames or " .
474                    "values for numeric keys (May be false positive)"
475                );
476
477                return;
478            }
479        }
480
481        // Make sure not to mix strings and ints in switch cases, as that will break horribly
482        if ( is_int( $typeArg ) ) {
483            $typeArg = $this->literalListConstToName( $typeArg );
484        }
485        switch ( $typeArg ) {
486            case 'LIST_COMMA':
487                // String keys ignored. Everything escaped. So nothing to worry about.
488                break;
489            case 'LIST_AND':
490            case 'LIST_SET':
491            case 'LIST_OR':
492                // exec_sql_numkey
493                $this->maybeEmitIssueSimplified(
494                    new Taintedness( SecurityCheckPlugin::SQL_NUMKEY_EXEC_TAINT ),
495                    $args->children[0],
496                    "IDatabase::makeList with LIST_AND, LIST_OR or "
497                    . "LIST_SET must sql escape string key names and values of numeric keys"
498                );
499                break;
500            case 'LIST_NAMES':
501                // Like comma but with no escaping.
502                $this->maybeEmitIssueSimplified(
503                    new Taintedness( SecurityCheckPlugin::SQL_EXEC_TAINT ),
504                    $args->children[0],
505                    "IDatabase::makeList with LIST_NAMES needs "
506                    . "to escape for SQL"
507                );
508                break;
509            default:
510                $this->debug( __METHOD__, "Unrecognized 2nd arg " . "to IDatabase::makeList: '$typeArg'" );
511        }
512    }
513
514    /**
515     * Convert a literal int value for a LIST_* constant to its name. This is a horrible hack for crappy code
516     * that uses the constants literally rather than by name. Such code shouldn't deserve taint analysis.
517     * This method can obviously break very easily if the values are changed.
518     *
519     * @param int $value
520     * @return string
521     */
522    private function literalListConstToName( int $value ): string {
523        switch ( $value ) {
524            case 0:
525                return 'LIST_COMMA';
526            case 1:
527                return 'LIST_AND';
528            case 2:
529                return 'LIST_SET';
530            case 3:
531                return 'LIST_NAMES';
532            case 4:
533                return 'LIST_OR';
534            default:
535                // Oh boy, what the heck are you doing? Well, DWIM
536                $this->debug(
537                    __METHOD__,
538                    'Someone specified a LIST_* constant literally but it is not a valid value. Wow.'
539                );
540                return 'LIST_AND';
541        }
542    }
543
544    /**
545     * Check the options parameter to IDatabase::select
546     *
547     * This only works if its specified as an array literal.
548     *
549     * Relevant options:
550     *  GROUP BY is put directly in the query (array gets imploded)
551     *  HAVING is treated like a WHERE clause
552     *  ORDER BY is put directly in the query (array gets imploded)
553     *  USE INDEX is directly put in string (both array and string version)
554     *  IGNORE INDEX ditto
555     * @param Node|mixed $node The node from the AST tree
556     */
557    private function checkSQLOptions( $node ): void {
558        if ( !( $node instanceof Node ) || $node->kind !== \ast\AST_ARRAY ) {
559            return;
560        }
561        $relevant = [
562            'GROUP BY' => true,
563            'ORDER BY' => true,
564            'HAVING' => true,
565            'USE INDEX' => true,
566            'IGNORE INDEX' => true,
567        ];
568        foreach ( $node->children as $arrayElm ) {
569            assert( $arrayElm->kind === \ast\AST_ARRAY_ELEM );
570            $val = $arrayElm->children['value'];
571            $key = $arrayElm->children['key'];
572
573            if ( isset( $relevant[$key] ) ) {
574                $taintType = ( $key === 'HAVING' && $this->nodeIsArray( $val ) ) ?
575                    SecurityCheckPlugin::SQL_NUMKEY_EXEC_TAINT :
576                    SecurityCheckPlugin::SQL_EXEC_TAINT;
577                $taintType = new Taintedness( $taintType );
578
579                $this->backpropagateArgTaint( $node, $taintType );
580                $ctx = clone $this->context;
581                $this->overrideContext = $ctx->withLineNumberStart(
582                    $val->lineno ?? $ctx->getLineNumberStart()
583                );
584                $this->maybeEmitIssueSimplified(
585                    $taintType,
586                    $val,
587                    "{STRING_LITERAL} clause is user controlled",
588                    [ $key ]
589                );
590                $this->overrideContext = null;
591            }
592        }
593    }
594
595    /**
596     * Check a join_cond structure.
597     *
598     * Syntax is like
599     *
600     *  [ 'aliasOfTable' => [ 'JOIN TYPE', $onConditions ], ... ]
601     *  join type is usually something safe like INNER JOIN, but it is not
602     *  validated or escaped. $onConditions is the same form as a WHERE clause.
603     *
604     * @param Node|mixed $node
605     */
606    private function checkJoinCond( $node ): void {
607        if ( !( $node instanceof Node ) || $node->kind !== \ast\AST_ARRAY ) {
608            return;
609        }
610
611        foreach ( $node->children as $table ) {
612            assert( $table->kind === \ast\AST_ARRAY_ELEM );
613
614            $tableName = is_string( $table->children['key'] ) ?
615                $table->children['key'] :
616                '[UNKNOWN TABLE]';
617            $joinInfo = $table->children['value'];
618            if ( $joinInfo instanceof Node && $joinInfo->kind === \ast\AST_ARRAY ) {
619                if (
620                    count( $joinInfo->children ) === 0 ||
621                    $joinInfo->children[0]->children['key'] !== null
622                ) {
623                    $this->debug( __METHOD__, "join info has named key??" );
624                    continue;
625                }
626                $joinType = $joinInfo->children[0]->children['value'];
627                // join type does not get escaped.
628                $this->maybeEmitIssueSimplified(
629                    new Taintedness( SecurityCheckPlugin::SQL_EXEC_TAINT ),
630                    $joinType,
631                    "Join type for {STRING_LITERAL} is user controlled",
632                    [ $tableName ]
633                );
634                if ( $joinType instanceof Node ) {
635                    $this->backpropagateArgTaint(
636                        $joinType,
637                        new Taintedness( SecurityCheckPlugin::SQL_EXEC_TAINT )
638                    );
639                }
640                // On to the join ON conditions.
641                if (
642                    count( $joinInfo->children ) === 1 ||
643                    $joinInfo->children[1]->children['key'] !== null
644                ) {
645                    $this->debug( __METHOD__, "join info has named key??" );
646                    continue;
647                }
648                $onCond = $joinInfo->children[1]->children['value'];
649                $ctx = clone $this->context;
650                $this->overrideContext = $ctx->withLineNumberStart(
651                    $onCond->lineno ?? $ctx->getLineNumberStart()
652                );
653                $this->maybeEmitIssueSimplified(
654                    new Taintedness( SecurityCheckPlugin::SQL_NUMKEY_EXEC_TAINT ),
655                    $onCond,
656                    "The ON conditions are not properly escaped for the join to `{STRING_LITERAL}`",
657                    [ $tableName ]
658                );
659                if ( $onCond instanceof Node ) {
660                    $this->backpropagateArgTaint(
661                        $onCond,
662                        new Taintedness( SecurityCheckPlugin::SQL_NUMKEY_EXEC_TAINT )
663                    );
664                }
665                $this->overrideContext = null;
666            }
667        }
668    }
669
670    /**
671     * Check to see if isHTML => true and is tainted.
672     *
673     * @param Node $node The expr child of the return. NOT the return itself
674     * @param FullyQualifiedFunctionLikeName $funcName
675     */
676    private function visitReturnOfFunctionHook( Node $node, FullyQualifiedFunctionLikeName $funcName ): void {
677        if ( $node->kind !== \ast\AST_ARRAY || count( $node->children ) < 2 ) {
678            return;
679        }
680        $isHTML = false;
681        foreach ( $node->children as $child ) {
682            assert( $child instanceof Node && $child->kind === \ast\AST_ARRAY_ELEM );
683
684            if (
685                $child->children['key'] === 'isHTML' &&
686                $child->children['value'] instanceof Node &&
687                $child->children['value']->kind === \ast\AST_CONST &&
688                $child->children['value']->children['name'] instanceof Node &&
689                $child->children['value']->children['name']->children['name'] === 'true'
690            ) {
691                $isHTML = true;
692                break;
693            }
694        }
695        if ( !$isHTML ) {
696            return;
697        }
698
699        $arg = $node->children[0];
700        assert( $arg instanceof Node && $arg->kind === \ast\AST_ARRAY_ELEM );
701        $this->maybeEmitIssueSimplified(
702            new Taintedness( SecurityCheckPlugin::HTML_EXEC_TAINT ),
703            $arg->children['value'],
704            "Outputting user controlled HTML from Parser function hook {FUNCTIONLIKE}",
705            [ $funcName ]
706        );
707    }
708
709    /**
710     * Given a MediaWiki hook registration, find the callback
711     *
712     * @note This is a different format than Parser hooks use.
713     *
714     * Valid examples of callbacks:
715     *  "wfSomeFunction"
716     *  "SomeClass::SomeStaticMethod"
717     *  A Closure
718     *  $instanceOfSomeObject  (With implied method name based on hook name)
719     *  new SomeClass
720     *  [ <one of the above>, $extraArgsForCallback, ...]
721     *  [ [<one of the above>], $extraArgsForCallback, ...]
722     *  [ $instanceOfObj, 'methodName', $optionalArgForCallback, ... ]
723     *  [ [ $instanceOfObj, 'methodName' ], $optionalArgForCallback, ...]
724     *
725     * Oddly enough, [ 'NameOfClass', 'NameOfStaticMethod' ] does not appear
726     * to be valid, despite that being a valid callable.
727     *
728     * @param Node|mixed $node
729     * @param string $hookName
730     * @return FunctionInterface|null
731     */
732    private function getCallableFromHookRegistration( $node, string $hookName ): ?FunctionInterface {
733        // "wfSomething", "Class::Method", closure
734        if ( !$node instanceof Node || $node->kind === \ast\AST_CLOSURE ) {
735            return $this->getCallableFromNode( $node );
736        }
737
738        $cb = $this->getSingleCallable( $node, 'on' . $hookName );
739        if ( $cb ) {
740            return $cb;
741        }
742
743        if ( $node->kind === \ast\AST_ARRAY ) {
744            if ( count( $node->children ) === 0 ) {
745                return null;
746            }
747            $firstChild = $node->children[0]->children['value'];
748            if (
749                ( $firstChild instanceof Node && $firstChild->kind === \ast\AST_ARRAY ) ||
750                !( $firstChild instanceof Node ) ||
751                count( $node->children ) === 1
752            ) {
753                // One of:
754                // [ [ <callback> ], $optionalArgs, ... ]
755                // [ 'SomeClass::method', $optionalArgs, ... ]
756                // [ <callback> ]
757                // Important to note, this is safe because the
758                // [ 'SomeClass', 'MethodToCallStatically' ]
759                // syntax isn't supported by hooks.
760                return $this->getCallableFromHookRegistration( $firstChild, $hookName );
761            }
762            // Remaining case is: [ $someObject, 'methodToCall', 'arg', ... ]
763            $methodName = $this->resolveValue( $node->children[1]->children['value'] );
764            if ( !is_string( $methodName ) ) {
765                return null;
766            }
767            $cb = $this->getSingleCallable( $firstChild, $methodName );
768            if ( $cb ) {
769                return $cb;
770            }
771        }
772        return null;
773    }
774
775    /**
776     * @param Node $node
777     * @param string $methodName
778     * @return FunctionInterface|null
779     */
780    private function getSingleCallable( Node $node, string $methodName ): ?FunctionInterface {
781        if ( $node->kind === \ast\AST_VAR && is_string( $node->children['name'] ) ) {
782            return $this->getCallbackForVar( $node, $methodName );
783        }
784        if ( $node->kind === \ast\AST_NEW ) {
785            $cxn = $this->getCtxN( $node );
786            try {
787                $ctor = $cxn->getMethod( '__construct', false, false, true );
788                return $ctor->getClass( $this->code_base )->getMethodByName( $this->code_base, $methodName );
789            } catch ( CodeBaseException $e ) {
790                // @todo Should probably emit a non-security issue
791                $this->debug( __METHOD__, "Missing hook handle: " . $this->getDebugInfo( $e ) );
792            }
793        }
794        return null;
795    }
796
797    /**
798     * Given an AST_VAR node, figure out what it represents as callback
799     *
800     * @param Node $node The variable
801     * @param string $defaultMethod If the var is an object, what method to use
802     * @return FunctionInterface|null
803     */
804    private function getCallbackForVar( Node $node, $defaultMethod = '' ): ?FunctionInterface {
805        assert( $node->kind === \ast\AST_VAR );
806        $cnode = $this->getCtxN( $node );
807        // Try the class case first, because the callable case might emit issues (about missing __invoke) if executed
808        // for a variable holding just a class instance.
809        try {
810            // Don't warn if it's the wrong type, for it might be a callable and not a class.
811            $classes = $cnode->getClassList( true, ContextNode::CLASS_LIST_ACCEPT_ANY, null, false );
812        } catch ( CodeBaseException | IssueException $_ ) {
813            $classes = [];
814        }
815        foreach ( $classes as $class ) {
816            if ( $class->getFQSEN()->__toString() === '\Closure' ) {
817                // This means callable case, done below.
818                continue;
819            }
820            try {
821                return $class->getMethodByName( $this->code_base, $defaultMethod );
822            } catch ( CodeBaseException $_ ) {
823                return null;
824            }
825        }
826
827        return $this->getCallableFromNode( $node );
828    }
829
830    /**
831     * Check for $wgHooks registration
832     *
833     * @param Node $node
834     * @note This assumes $wgHooks is always the global
835     *   even if there is no globals declaration.
836     */
837    public function visitAssign( Node $node ): void {
838        parent::visitAssign( $node );
839
840        $var = $node->children['var'];
841        if ( !$var instanceof Node ) {
842            // Syntax error
843            return;
844        }
845        $hookName = null;
846        $expr = $node->children['expr'];
847        // The $wgHooks['foo'][] case
848        if (
849            $var->kind === \ast\AST_DIM &&
850            $var->children['dim'] === null &&
851            $var->children['expr'] instanceof Node &&
852            $var->children['expr']->kind === \ast\AST_DIM &&
853            $var->children['expr']->children['expr'] instanceof Node &&
854            is_string( $var->children['expr']->children['dim'] ) &&
855            /* The $wgHooks['SomeHook'][] case */
856            ( ( $var->children['expr']->children['expr']->kind === \ast\AST_VAR &&
857            $var->children['expr']->children['expr']->children['name'] === 'wgHooks' ) ||
858            /* The $_GLOBALS['wgHooks']['SomeHook'][] case */
859            ( $var->children['expr']->children['expr']->kind === \ast\AST_DIM &&
860            $var->children['expr']->children['expr']->children['expr'] instanceof Node &&
861            $var->children['expr']->children['expr']->children['expr']->kind === \ast\AST_VAR &&
862            $var->children['expr']->children['expr']->children['expr']->children['name'] === '_GLOBALS' ) )
863        ) {
864            $hookName = $var->children['expr']->children['dim'];
865        }
866
867        if ( $hookName !== null ) {
868            $cb = $this->getCallableFromHookRegistration( $expr, $hookName );
869            if ( $cb ) {
870                $this->registerHook( $hookName, $cb );
871            } else {
872                $this->debug( __METHOD__, "Could not register hook " .
873                    "$hookName due to complex callback"
874                );
875            }
876        }
877    }
878
879    /**
880     * Special implementation of visitArray to detect HTMLForm specifiers
881     *
882     * @param Node $node
883     */
884    private function detectHTMLForm( Node $node ): void {
885        // Try to immediately filter out things that certainly aren't HTMLForms
886        $maybeHTMLForm = false;
887        foreach ( $node->children as $child ) {
888            if ( $child instanceof Node && $child->kind === \ast\AST_ARRAY_ELEM ) {
889                $key = $child->children['key'];
890                if ( $key instanceof Node || $key === 'class' || $key === 'type' ) {
891                    $maybeHTMLForm = true;
892                    break;
893                }
894            }
895        }
896        if ( !$maybeHTMLForm ) {
897            return;
898        }
899
900        $authReqFQSEN = FullyQualifiedClassName::fromFullyQualifiedString(
901            'MediaWiki\Auth\AuthenticationRequest'
902        );
903
904        if (
905            $this->code_base->hasClassWithFQSEN( $authReqFQSEN ) &&
906            $this->context->isInClassScope() &&
907            self::isSubclassOf( $this->context->getClassFQSEN(), $authReqFQSEN, $this->code_base )
908        ) {
909            // AuthenticationRequest::getFieldInfo() defines a very
910            // similar array but with different rules. T202112
911            return;
912        }
913
914        // This is a rather superficial check. There
915        // are many ways to construct htmlform specifiers this
916        // won't catch, and it may also have some false positives.
917
918        static $validHTMLFormTypes = [
919            'api',
920            'text',
921            'textwithbutton',
922            'textarea',
923            'select',
924            'combobox',
925            'radio',
926            'multiselect',
927            'limitselect',
928            'check',
929            'toggle',
930            'int',
931            'float',
932            'info',
933            'selectorother',
934            'selectandother',
935            'namespaceselect',
936            'namespaceselectwithbutton',
937            'tagfilter',
938            'sizefilter',
939            'submit',
940            'hidden',
941            'edittools',
942            'checkmatrix',
943            'cloner',
944            'autocompleteselect',
945            'date',
946            'time',
947            'datetime',
948            'email',
949            'password',
950            'url',
951            'title',
952            'user',
953            'usersmultiselect',
954        ];
955
956        $type = null;
957        $raw = null;
958        $class = null;
959        $rawLabel = null;
960        $help = null;
961        $help_raw = null;
962        $label = null;
963        $default = null;
964        $options = null;
965        $isInfo = false;
966        // options key is really messed up with escaping.
967        $isOptionsSafe = true;
968        foreach ( $node->children as $child ) {
969            if ( $child === null || $child->kind === \ast\AST_UNPACK ) {
970                // If we have list( , $x ) = foo(), or an in-place unpack, chances are this is not an HTMLForm.
971                return;
972            }
973            assert( $child->kind === \ast\AST_ARRAY_ELEM );
974            if ( $child->children['key'] === null ) {
975                // Implicit offset, hence most certainly not an HTMLForm.
976                return;
977            }
978            $key = $this->resolveOffset( $child->children['key'] );
979            if ( !is_string( $key ) ) {
980                // Either not resolvable (so nothing we can say) or a non-string literal, skip.
981                return;
982            }
983            switch ( $key ) {
984                case 'type':
985                    $type = $this->resolveValue( $child->children['value'] );
986                    break;
987                case 'class':
988                    $class = $this->resolveValue( $child->children['value'] );
989                    break;
990                case 'label':
991                    $label = $this->resolveValue( $child->children['value'] );
992                    break;
993                case 'options':
994                    $options = $this->resolveValue( $child->children['value'] );
995                    break;
996                case 'default':
997                    $default = $this->resolveValue( $child->children['value'] );
998                    break;
999                case 'label-raw':
1000                    $rawLabel = $this->resolveValue( $child->children['value'] );
1001                    break;
1002                case 'raw':
1003                case 'rawrow':
1004                    $raw = $this->resolveValue( $child->children['value'] );
1005                    break;
1006                case 'help':
1007                    // TODO: remove help key case when back compat is no longer needed
1008                    $help = $this->resolveValue( $child->children['value'] );
1009                    break;
1010                case 'help-raw':
1011                    $help_raw = $this->resolveValue( $child->children['value'] );
1012                    break;
1013            }
1014        }
1015
1016        if ( !$class && !$type ) {
1017            // Definitely not an HTMLForm
1018            // Also important to reject empty string, not just
1019            // null, otherwise 9e409c781015 of Wikibase causes
1020            // this to fatal
1021            return;
1022        }
1023
1024        if (
1025            $raw === null && $label === null && $rawLabel === null && $help == null
1026            && $help_raw === null && $default === null && $options === null
1027        ) {
1028            // e.g. [ 'class' => 'someCssClass' ] appears a lot
1029            // in the code base. If we don't have any of the html
1030            // fields, skip out early.
1031            return;
1032        }
1033
1034        if ( $type !== null && !in_array( $type, $validHTMLFormTypes, true ) ) {
1035            // Not a valid HTMLForm field
1036            // (Or someone just added a new field type)
1037            return;
1038        }
1039
1040        if ( $type === 'info' ) {
1041            $isInfo = true;
1042        }
1043
1044        if ( in_array( $type, [ 'radio', 'multiselect' ], true ) ) {
1045            $isOptionsSafe = false;
1046        }
1047
1048        if ( $class !== null ) {
1049            if ( !is_string( $class ) ) {
1050                return;
1051            }
1052            try {
1053                $fqsen = FullyQualifiedClassName::fromStringInContext(
1054                    $class,
1055                    $this->context
1056                );
1057            } catch ( InvalidFQSENException $_ ) {
1058                // 'class' refers to something which is not a class, and this is probably not
1059                // an HTMLForm
1060                return;
1061            }
1062            if ( !$this->code_base->hasClassWithFQSEN( $fqsen ) ) {
1063                return;
1064            }
1065            $fqsenString = (string)$fqsen;
1066            if ( $fqsenString === '\HTMLInfoField' ||
1067                $fqsenString === '\MediaWiki\HTMLForm\Field\HTMLInfoField'
1068            ) {
1069                $isInfo = true;
1070            }
1071            if (
1072                $fqsenString === '\HTMLMultiSelectField' ||
1073                $fqsenString === '\MediaWiki\HTMLForm\Field\HTMLMultiSelectField' ||
1074                $fqsenString === '\HTMLRadioField' ||
1075                $fqsenString === '\MediaWiki\HTMLForm\Field\HTMLRadioField'
1076            ) {
1077                $isOptionsSafe = false;
1078            }
1079
1080            $fqsenBase = FullyQualifiedClassName::fromFullyQualifiedString(
1081                '\MediaWiki\HTMLForm\Field\HTMLFormField'
1082            );
1083            if ( !$this->code_base->hasClassWithFQSEN( $fqsenBase ) ) {
1084                $fqsenBase = FullyQualifiedClassName::fromFullyQualifiedString(
1085                    '\HTMLFormField'
1086                );
1087                if ( !$this->code_base->hasClassWithFQSEN( $fqsenBase ) ) {
1088                    $this->debug( __METHOD__, "Missing HTMLFormField base class?!" );
1089                    return;
1090                }
1091            }
1092
1093            $isAField = self::isSubclassOf( $fqsen, $fqsenBase, $this->code_base );
1094
1095            if ( !$isAField ) {
1096                return;
1097            }
1098        }
1099
1100        if ( $label !== null ) {
1101            // double escape check for label.
1102            $this->maybeEmitIssueSimplified(
1103                new Taintedness( SecurityCheckPlugin::ESCAPED_EXEC_TAINT ),
1104                $label,
1105                'HTMLForm label key escapes its input'
1106            );
1107        }
1108        if ( $rawLabel !== null ) {
1109            $this->maybeEmitIssueSimplified(
1110                new Taintedness( SecurityCheckPlugin::HTML_EXEC_TAINT ),
1111                $rawLabel,
1112                'HTMLForm label-raw needs to escape input'
1113            );
1114        }
1115        if ( $help !== null ) {
1116            $this->maybeEmitIssueSimplified(
1117                new Taintedness( SecurityCheckPlugin::HTML_EXEC_TAINT ),
1118                $help,
1119                'HTMLForm help needs to escape input'
1120            );
1121        }
1122        if ( $help_raw !== null ) {
1123            $this->maybeEmitIssueSimplified(
1124                new Taintedness( SecurityCheckPlugin::HTML_EXEC_TAINT ),
1125                $help_raw,
1126                'HTMLForm help-raw needs to escape input'
1127            );
1128        }
1129        if ( $isInfo && $raw === true ) {
1130            $this->maybeEmitIssueSimplified(
1131                new Taintedness( SecurityCheckPlugin::HTML_EXEC_TAINT ),
1132                $default,
1133                'HTMLForm info field in raw mode needs to escape default key'
1134            );
1135        }
1136        if ( $isInfo && ( $raw === false || $raw === null ) ) {
1137            $this->maybeEmitIssueSimplified(
1138                new Taintedness( SecurityCheckPlugin::ESCAPED_EXEC_TAINT ),
1139                $default,
1140                'HTMLForm info field (non-raw) escapes default key already'
1141            );
1142        }
1143        if ( !$isOptionsSafe && $options instanceof Node ) {
1144            $htmlExecTaint = new Taintedness( SecurityCheckPlugin::HTML_EXEC_TAINT );
1145            $optTaint = $this->getTaintedness( $options );
1146            $this->maybeEmitIssue(
1147                $htmlExecTaint,
1148                $optTaint->getTaintedness()->asKeyForForeach(),
1149                'HTMLForm option label needs escaping{DETAILS}',
1150                [ [ 'lines' => $optTaint->getError(), 'sink' => false ] ]
1151            );
1152        }
1153    }
1154
1155    /**
1156     * Try to detect HTMLForm specifiers
1157     *
1158     * @param Node $node
1159     */
1160    public function visitArray( Node $node ): void {
1161        parent::visitArray( $node );
1162        // Performance: use isset(), not property_exists
1163        // @phan-suppress-next-line PhanUndeclaredProperty
1164        if ( !isset( $node->skipHTMLFormAnalysis ) ) {
1165            $this->detectHTMLForm( $node );
1166        }
1167    }
1168}