Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
100.00% covered (success)
100.00%
285 / 285
100.00% covered (success)
100.00%
17 / 17
CRAP
100.00% covered (success)
100.00%
1 / 1
SyntaxChecker
100.00% covered (success)
100.00%
285 / 285
100.00% covered (success)
100.00%
17 / 17
89
100.00% covered (success)
100.00%
1 / 1
 __construct
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
1
 start
100.00% covered (success)
100.00%
12 / 12
100.00% covered (success)
100.00%
1 / 1
4
 desugar
100.00% covered (success)
100.00%
62 / 62
100.00% covered (success)
100.00%
1 / 1
23
 desugarAndOr
100.00% covered (success)
100.00%
47 / 47
100.00% covered (success)
100.00%
1 / 1
3
 newNode
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 newNodeReplaceType
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 newNodeMapAll
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
 newNodeMapExceptFirst
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
2
 newNodeBinop
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
1
 newNodeNamedBinop
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
1
 check
100.00% covered (success)
100.00%
81 / 81
100.00% covered (success)
100.00%
1 / 1
28
 mapUnion
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
4
 mapIntersect
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
3
 assignVar
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
2
 lookupVar
100.00% covered (success)
100.00%
23 / 23
100.00% covered (success)
100.00%
1 / 1
5
 checkArgCount
100.00% covered (success)
100.00%
14 / 14
100.00% covered (success)
100.00%
1 / 1
5
 isReservedIdentifier
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
3
1<?php
2
3namespace MediaWiki\Extension\AbuseFilter\Parser;
4
5use InvalidArgumentException;
6use LogicException;
7use MediaWiki\Extension\AbuseFilter\KeywordsManager;
8use MediaWiki\Extension\AbuseFilter\Parser\Exception\InternalException;
9use MediaWiki\Extension\AbuseFilter\Parser\Exception\UserVisibleException;
10use MediaWiki\Message\Message;
11
12/**
13 * SyntaxChecker statically analyzes the code without actually running it.
14 * Currently, it only checks for
15 *
16 * - unbound variables
17 * - unused variables: note that a := 1; a := 1; a
18 *     is considered OK even though the first `a` seems unused
19 *     because the pattern "a := null; if ... then (a := ...) end; ..."
20 *     should not count first `a` as unused.
21 * - assignment to built-in identifiers
22 * - invalid function call (arity mismatch, non-valid function)
23 * - first-order information of `set_var` and `set`
24 *
25 * Because it doesn't cover all checks that the current Check Syntax does,
26 * it is currently complementary to the current Check Syntax.
27 * In the future, it could subsume the current Check Syntax, and could be
28 * extended to perform type checking or type inference.
29 */
30class SyntaxChecker {
31    /**
32     * @var AFPTreeNode|null Root of the AST to check
33     */
34    private $treeRoot;
35
36    /** @var KeywordsManager */
37    private $keywordsManager;
38
39    public const MCONSERVATIVE = 'MODE_CONSERVATIVE';
40    public const MLIBERAL = 'MODE_LIBERAL';
41    public const DUMMYPOS = 0;
42    public const CACHE_VERSION = 1;
43
44    /**
45     * @var string The mode of checking. The value should be either
46     *
47     *     - MLIBERAL: which guarantees that all user-defined variables
48     *       will be bound, but incompatible with what the evaluator currently
49     *       permits. E.g.,
50     *
51     *       if true then (a := 1) else null end; a
52     *
53     *       is rejected in this mode, even though `a` is in fact always bound.
54     *
55     *     - MCONSERVATIVE which is compatible with what the evaluator
56     *       currently permits, but could allow undefined variables to occur.
57     *       E.g.,
58     *
59     *       if false then (a := 1) else null end; a
60     *
61     *       is accepted in this mode, even though `a` is in fact always unbound.
62     */
63    private $mode;
64
65    /**
66     * @var bool Whether we want to check for unused variables
67     */
68    private $checkUnusedVars;
69
70    /**
71     * @param AFPSyntaxTree $tree
72     * @param KeywordsManager $keywordsManager
73     * @param string $mode
74     * @param bool $checkUnusedVars
75     */
76    public function __construct(
77        AFPSyntaxTree $tree,
78        KeywordsManager $keywordsManager,
79        string $mode = self::MCONSERVATIVE,
80        bool $checkUnusedVars = false
81    ) {
82        $this->treeRoot = $tree->getRoot();
83        $this->keywordsManager = $keywordsManager;
84        $this->mode = $mode;
85        $this->checkUnusedVars = $checkUnusedVars;
86    }
87
88    /**
89     * Start the static analysis
90     *
91     * @throws UserVisibleException
92     */
93    public function start(): void {
94        if ( !$this->treeRoot ) {
95            return;
96        }
97        $bound = $this->check( $this->desugar( $this->treeRoot ), [] );
98        $unused = array_keys( array_filter( $bound, static function ( $v ) {
99            return !$v;
100        } ) );
101        if ( $this->checkUnusedVars && $unused ) {
102            throw new UserVisibleException(
103                'unusedvars',
104                self::DUMMYPOS,
105                [ Message::listParam( $unused, 'comma' ) ]
106            );
107        }
108    }
109
110    /**
111     * Remove syntactic sugar so that we don't need to deal with
112     * too many cases.
113     *
114     * This could benefit the evaluator as well, but for now, this is
115     * only used for static analysis.
116     *
117     * Postcondition:
118     *     - The tree will not contain nodes of
119     *       type ASSIGNMENT, LOGIC, COMPARE, SUM_REL, MUL_REL, POW,
120     *       KEYWORD_OPERATOR, and ARRAY_INDEX
121     *     - The tree may additionally contain a node of type BINOP.
122     *     - The tree should not have set_var function application.
123     *     - Conditionals will have both branches.
124     *
125     * @param AFPTreeNode $node
126     * @return AFPTreeNode
127     * @throws InternalException
128     */
129    private function desugar( AFPTreeNode $node ): AFPTreeNode {
130        switch ( $node->type ) {
131            case AFPTreeNode::ATOM:
132                return $node;
133
134            case AFPTreeNode::FUNCTION_CALL:
135                if ( $node->children[0] === 'set_var' ) {
136                    $node->children[0] = 'set';
137                }
138                return $this->newNodeMapExceptFirst( $node );
139
140            case AFPTreeNode::ARRAY_INDEX:
141                return $this->newNodeNamedBinop( $node, '[]' );
142
143            case AFPTreeNode::POW:
144                return $this->newNodeNamedBinop( $node, '**' );
145
146            case AFPTreeNode::UNARY:
147            case AFPTreeNode::INDEX_ASSIGNMENT:
148            case AFPTreeNode::ARRAY_APPEND:
149                return $this->newNodeMapExceptFirst( $node );
150
151            case AFPTreeNode::BOOL_INVERT:
152                /*
153                 * @todo this should really be combined with UNARY,
154                 * but let's wait to change the meaning of UNARY across
155                 * the codebase together
156                 */
157                return $this->newNodeMapAll( $node );
158
159            case AFPTreeNode::KEYWORD_OPERATOR:
160            case AFPTreeNode::MUL_REL:
161            case AFPTreeNode::SUM_REL:
162            case AFPTreeNode::COMPARE:
163                return $this->newNodeBinop( $node );
164
165            case AFPTreeNode::LOGIC:
166                $result = $this->newNodeBinop( $node );
167                [ $op, $left, $right ] = $result->children;
168                if ( $op === '&' || $op === '|' ) {
169                    return $this->desugarAndOr( $op, $left, $right, $node->position );
170                } else {
171                    return $result;
172                }
173
174            case AFPTreeNode::ARRAY_DEFINITION:
175            case AFPTreeNode::SEMICOLON:
176                return $this->newNodeMapAll( $node );
177
178            case AFPTreeNode::CONDITIONAL:
179                if ( $node->children[2] === null ) {
180                    $node->children[2] = new AFPTreeNode(
181                        AFPTreeNode::ATOM,
182                        new AFPToken(
183                            AFPToken::TKEYWORD,
184                            "null",
185                            $node->position
186                        ),
187                        $node->position
188                    );
189                }
190                return $this->newNodeMapAll( $node );
191
192            case AFPTreeNode::ASSIGNMENT:
193                [ $varname, $value ] = $node->children;
194
195                return new AFPTreeNode(
196                    AFPTreeNode::FUNCTION_CALL,
197                    [
198                        "set",
199                        new AFPTreeNode(
200                            AFPTreeNode::ATOM,
201                            new AFPToken(
202                                AFPToken::TSTRING,
203                                $varname,
204                                $node->position
205                            ),
206                            $node->position
207                        ),
208                        $this->desugar( $value )
209                    ],
210                    $node->position
211                );
212
213            default:
214                // @codeCoverageIgnoreStart
215                throw new InternalException( "Unknown node type passed: {$node->type}" );
216                // @codeCoverageIgnoreEnd
217        }
218    }
219
220    /**
221     * @param string $op
222     * @param AFPTreeNode $left
223     * @param AFPTreeNode $right
224     * @param int $position
225     * @return AFPTreeNode
226     */
227    private function desugarAndOr(
228        string $op,
229        AFPTreeNode $left,
230        AFPTreeNode $right,
231        int $position
232    ): AFPTreeNode {
233        $trueNode = new AFPTreeNode(
234            AFPTreeNode::ATOM,
235            new AFPToken(
236                AFPToken::TKEYWORD,
237                "true",
238                $position
239            ),
240            $position
241        );
242        $falseNode = new AFPTreeNode(
243            AFPTreeNode::ATOM,
244            new AFPToken(
245                AFPToken::TKEYWORD,
246                "false",
247                $position
248            ),
249            $position
250        );
251        $conditionalNode = new AFPTreeNode(
252            AFPTreeNode::CONDITIONAL,
253            [
254                $right,
255                $trueNode,
256                $falseNode
257            ],
258            $position
259        );
260
261        if ( $op === '&' ) {
262            // <a> & <b> is supposed to be equivalent to
263            // if <a> then (if <b> then true else false) else false end
264            // See T237336 for why this is currently not the case.
265            return new AFPTreeNode(
266                AFPTreeNode::CONDITIONAL,
267                [
268                    $left,
269                    $conditionalNode,
270                    $falseNode
271                ],
272                $position
273            );
274        } elseif ( $op === '|' ) {
275            // <a> | <b> is supposed to be equivalent to
276            // if <a> then true else (if <b> then true else false) end
277            // See T237336 for why this is currently not the case.
278            return new AFPTreeNode(
279                AFPTreeNode::CONDITIONAL,
280                [
281                    $left,
282                    $trueNode,
283                    $conditionalNode
284                ],
285                $position
286            );
287        } else {
288            // @codeCoverageIgnoreStart
289            throw new InternalException( "Unknown operator: {$op}" );
290            // @codeCoverageIgnoreEnd
291        }
292    }
293
294    /**
295     * Construct a new node with information based on the old node but
296     * with different children
297     *
298     * @param AFPTreeNode $node
299     * @param AFPTreeNode[]|string[]|AFPToken $children
300     * @return AFPTreeNode
301     */
302    private function newNode( AFPTreeNode $node, $children ): AFPTreeNode {
303        return new AFPTreeNode( $node->type, $children, $node->position );
304    }
305
306    /**
307     * Construct a new node with information based on the old node but
308     * with different type
309     *
310     * @param AFPTreeNode $node
311     * @param string $type
312     * @return AFPTreeNode
313     */
314    private function newNodeReplaceType(
315        AFPTreeNode $node,
316        string $type
317    ): AFPTreeNode {
318        return new AFPTreeNode( $type, $node->children, $node->position );
319    }
320
321    /**
322     * Recursively desugar on all children
323     *
324     * @param AFPTreeNode $node
325     * @return AFPTreeNode
326     */
327    private function newNodeMapAll( AFPTreeNode $node ): AFPTreeNode {
328        $children = $node->children;
329        if ( !is_array( $children ) ) {
330            // @codeCoverageIgnoreStart
331            throw new LogicException(
332                "Unexpected non-array children of an AFPTreeNode of type " .
333                "{$node->type} at position {$node->position}"
334            );
335            // @codeCoverageIgnoreEnd
336        }
337        return $this->newNode( $node, array_map( [ $this, 'desugar' ], $children ) );
338    }
339
340    /**
341     * Recursively desugar on all children except the first one
342     *
343     * @param AFPTreeNode $node
344     * @return AFPTreeNode
345     */
346    private function newNodeMapExceptFirst( AFPTreeNode $node ): AFPTreeNode {
347        $items = [ $node->children[0] ];
348        $args = array_slice( $node->children, 1 );
349        foreach ( $args as $el ) {
350            $items[] = $this->desugar( $el );
351        }
352        return $this->newNode( $node, $items );
353    }
354
355    /**
356     * Convert a node with an operation into a BINOP
357     *
358     * @param AFPTreeNode $node
359     * @return AFPTreeNode
360     */
361    private function newNodeBinop( AFPTreeNode $node ): AFPTreeNode {
362        return $this->newNodeReplaceType(
363            $this->newNodeMapExceptFirst( $node ),
364            AFPTreeNode::BINOP
365        );
366    }
367
368    /**
369     * Convert a node without an operation into a BINOP with the specified operation
370     *
371     * @param AFPTreeNode $node
372     * @param string $op
373     * @return AFPTreeNode
374     */
375    private function newNodeNamedBinop(
376        AFPTreeNode $node,
377        string $op
378    ): AFPTreeNode {
379        $items = $this->newNodeMapAll( $node )->children;
380        array_unshift( $items, $op );
381        return $this->newNodeReplaceType(
382            $this->newNode( $node, $items ),
383            AFPTreeNode::BINOP
384        );
385    }
386
387    /**
388     * - Statically compute what are bound after evaluating $node,
389     *     provided that variables in $bound are already bound.
390     * - Similarly compute for each bound variable after evaluating $node
391     *     whether it is used provided that we already have $bound
392     *     that contains necessary information.
393     * - Ensure function application's validity.
394     * - Ensure that the first argument of set is a literal string.
395     * - Ensure that all assignment is not done on built-in identifier.
396     *
397     * Precondition:
398     *     - The tree $node should be desugared and normalized.
399     *
400     * Postcondition:
401     *     - $node is guaranteed to have no unbound variables
402     *       provided that variables in $bound are already bound
403     *       (for the definition of unbound variable indicated by $this->mode)
404     *     - All function applications should be valid and have correct arity.
405     *     - The set function application's first argument should be
406     *       a literal string.
407     *
408     * @param AFPTreeNode $node
409     * @param bool[] $bound Map of [ variable_name => used ]
410     * @return bool[] Map of [ variable_name => used ]
411     * @throws UserVisibleException
412     * @throws InternalException
413     */
414    private function check( AFPTreeNode $node, array $bound ): array {
415        switch ( $node->type ) {
416            // phpcs:ignore PSR2.ControlStructures.SwitchDeclaration.TerminatingComment
417            case AFPTreeNode::ATOM:
418                $tok = $node->children;
419                switch ( $tok->type ) {
420                    case AFPToken::TID:
421                        return $this->lookupVar(
422                            $tok->value,
423                            $tok->pos,
424                            $bound
425                        );
426
427                    case AFPToken::TSTRING:
428                    case AFPToken::TFLOAT:
429                    case AFPToken::TINT:
430                    case AFPToken::TKEYWORD:
431                        return $bound;
432
433                    default:
434                        // @codeCoverageIgnoreStart
435                        throw new InternalException( "Unknown token {$tok->type} provided in the ATOM node" );
436                        // @codeCoverageIgnoreEnd
437                }
438            case AFPTreeNode::ARRAY_DEFINITION:
439                // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach children is array here
440                foreach ( $node->children as $el ) {
441                    $bound = $this->check( $el, $bound );
442                }
443                return $bound;
444
445            case AFPTreeNode::FUNCTION_CALL:
446                $fname = $node->children[0];
447                $args = array_slice( $node->children, 1 );
448                if ( !array_key_exists( $fname, FilterEvaluator::FUNCTIONS ) ) {
449                    throw new UserVisibleException(
450                        'unknownfunction',
451                        $node->position,
452                        [ $fname ]
453                    );
454                }
455                $this->checkArgCount( $args, $fname, $node->position );
456
457                if ( $fname === 'set' ) {
458                    // arity is checked, so we know $args[0] and $args[1] exist
459                    $tok = $args[0]->children;
460
461                    if (
462                        !( $tok instanceof AFPToken ) ||
463                        $tok->type !== AFPToken::TSTRING
464                    ) {
465                        throw new UserVisibleException(
466                            'variablevariable',
467                            $node->position,
468                            []
469                        );
470                    }
471
472                    $bound = $this->check( $args[1], $bound );
473                    // set the variable as unused
474                    return $this->assignVar(
475                        $tok->value,
476                        $tok->pos,
477                        $bound
478                    );
479                } else {
480                    foreach ( $args as $arg ) {
481                        $bound = $this->check( $arg, $bound );
482                    }
483                    return $bound;
484                }
485
486            case AFPTreeNode::BINOP:
487                [ , $left, $right ] = $node->children;
488                return $this->check( $right, $this->check( $left, $bound ) );
489
490            case AFPTreeNode::UNARY:
491                [ , $argument ] = $node->children;
492                return $this->check( $argument, $bound );
493
494            case AFPTreeNode::BOOL_INVERT:
495                [ $argument ] = $node->children;
496                return $this->check( $argument, $bound );
497            // phpcs:ignore PSR2.ControlStructures.SwitchDeclaration.TerminatingComment
498            case AFPTreeNode::CONDITIONAL:
499                [ $condition, $exprIfTrue, $exprIfFalse ] = $node->children;
500                $bound = $this->check( $condition, $bound );
501                $boundLeft = $this->check( $exprIfTrue, $bound );
502                $boundRight = $this->check( $exprIfFalse, $bound );
503                switch ( $this->mode ) {
504                    case self::MCONSERVATIVE:
505                        return $this->mapUnion( $boundLeft, $boundRight );
506                    case self::MLIBERAL:
507                        return $this->mapIntersect( $boundLeft, $boundRight );
508                    default:
509                        // @codeCoverageIgnoreStart
510                        throw new LogicException( "Unknown mode: {$this->mode}" );
511                        // @codeCoverageIgnoreEnd
512                }
513
514            case AFPTreeNode::INDEX_ASSIGNMENT:
515                [ $varName, $offset, $value ] = $node->children;
516
517                // deal with unbound $varName
518                $bound = $this->lookupVar( $varName, $node->position, $bound );
519                $bound = $this->check( $offset, $bound );
520                $bound = $this->check( $value, $bound );
521                // deal with built-in $varName and set $varName as unused
522                return $this->assignVar( $varName, $node->position, $bound );
523
524            case AFPTreeNode::ARRAY_APPEND:
525                [ $varName, $value ] = $node->children;
526
527                // deal with unbound $varName
528                $bound = $this->lookupVar( $varName, $node->position, $bound );
529                $bound = $this->check( $value, $bound );
530                // deal with built-in $varName and set $varName as unused
531                return $this->assignVar( $varName, $node->position, $bound );
532
533            case AFPTreeNode::SEMICOLON:
534                // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach children is array here
535                foreach ( $node->children as $statement ) {
536                    $bound = $this->check( $statement, $bound );
537                }
538                return $bound;
539
540            default:
541                // @codeCoverageIgnoreStart
542                throw new LogicException( "Unknown type: {$node->type}" );
543                // @codeCoverageIgnoreEnd
544        }
545    }
546
547    /**
548     * @param array $left
549     * @param array $right
550     * @return array
551     */
552    private function mapUnion( array $left, array $right ): array {
553        foreach ( $right as $key => $val ) {
554            if ( array_key_exists( $key, $left ) ) {
555                $left[ $key ] = $left[ $key ] || $val;
556            } else {
557                $left[ $key ] = $val;
558            }
559        }
560        return $left;
561    }
562
563    /**
564     * @param array $left
565     * @param array $right
566     * @return array
567     */
568    private function mapIntersect( array $left, array $right ): array {
569        $keys = array_intersect_key( $left, $right );
570        $result = [];
571        foreach ( $keys as $key => $val ) {
572            $result[ $key ] = $left[ $key ] || $right[ $key ];
573        }
574        return $result;
575    }
576
577    /**
578     * @param string $var
579     * @param int $pos
580     * @param array $bound
581     * @return array
582     */
583    private function assignVar( string $var, int $pos, array $bound ): array {
584        $var = strtolower( $var );
585        if ( $this->isReservedIdentifier( $var ) ) {
586            throw new UserVisibleException(
587                'overridebuiltin',
588                $pos,
589                [ $var ]
590            );
591        }
592        $bound[ $var ] = false;
593        return $bound;
594    }
595
596    /**
597     * @param string $var
598     * @param int $pos
599     * @param array $bound
600     * @return array
601     */
602    private function lookupVar( string $var, int $pos, array $bound ): array {
603        $var = strtolower( $var );
604        if ( array_key_exists( $var, $bound ) ) {
605            // user-defined variable
606            $bound[ $var ] = true;
607            return $bound;
608        } elseif ( $this->keywordsManager->isVarDisabled( $var ) ) {
609            // disabled built-in variables
610            throw new UserVisibleException(
611                'disabledvar',
612                $pos,
613                [ $var ]
614            );
615        } elseif ( $this->keywordsManager->varExists( $var ) ) {
616            // non-disabled built-in variables
617            return $bound;
618        } elseif ( $this->isReservedIdentifier( $var ) ) {
619            // other built-in identifiers
620            throw new UserVisibleException(
621                'usebuiltin',
622                $pos,
623                [ $var ]
624            );
625        } else {
626            // unbound variables
627            throw new UserVisibleException(
628                'unrecognisedvar',
629                $pos,
630                [ $var ]
631            );
632        }
633    }
634
635    /**
636     * Check that a built-in function has been provided the right amount of arguments
637     *
638     * @param array $args The arguments supplied to the function
639     * @param string $func The function name
640     * @param int $position
641     * @throws UserVisibleException
642     */
643    private function checkArgCount( array $args, string $func, int $position ): void {
644        if ( !array_key_exists( $func, FilterEvaluator::FUNC_ARG_COUNT ) ) {
645            // @codeCoverageIgnoreStart
646            throw new InvalidArgumentException( "$func is not a valid function." );
647            // @codeCoverageIgnoreEnd
648        }
649        [ $min, $max ] = FilterEvaluator::FUNC_ARG_COUNT[ $func ];
650        if ( count( $args ) < $min ) {
651            throw new UserVisibleException(
652                $min === 1 ? 'noparams' : 'notenoughargs',
653                $position,
654                [ $func, $min, count( $args ) ]
655            );
656        } elseif ( count( $args ) > $max ) {
657            throw new UserVisibleException(
658                'toomanyargs',
659                $position,
660                [ $func, $max, count( $args ) ]
661            );
662        }
663    }
664
665    /**
666     * Check whether the given name is a reserved identifier, e.g. the name of a built-in variable,
667     * function, or keyword.
668     *
669     * @param string $name
670     * @return bool
671     */
672    private function isReservedIdentifier( string $name ): bool {
673        return $this->keywordsManager->varExists( $name ) ||
674            array_key_exists( $name, FilterEvaluator::FUNCTIONS ) ||
675            // We need to check for true, false, if/then/else etc. because, even if they have a different
676            // AFPToken type, they may be used inside set/set_var()
677            in_array( $name, AbuseFilterTokenizer::KEYWORDS, true );
678    }
679}