Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
100.00% covered (success)
100.00%
272 / 272
100.00% covered (success)
100.00%
17 / 17
CRAP
100.00% covered (success)
100.00%
1 / 1
SyntaxChecker
100.00% covered (success)
100.00%
272 / 272
100.00% covered (success)
100.00%
17 / 17
80
100.00% covered (success)
100.00%
1 / 1
 __construct
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 start
100.00% covered (success)
100.00%
10 / 10
100.00% covered (success)
100.00%
1 / 1
4
 desugar
100.00% covered (success)
100.00%
62 / 62
100.00% covered (success)
100.00%
1 / 1
23
 desugarAndOr
100.00% covered (success)
100.00%
47 / 47
100.00% covered (success)
100.00%
1 / 1
3
 newNode
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 newNodeReplaceType
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 newNodeMapAll
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
2
 newNodeMapExceptFirst
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
2
 newNodeBinop
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
1
 newNodeNamedBinop
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
1
 check
100.00% covered (success)
100.00%
72 / 72
100.00% covered (success)
100.00%
1 / 1
19
 mapUnion
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
4
 mapIntersect
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
3
 assignVar
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
2
 lookupVar
100.00% covered (success)
100.00%
23 / 23
100.00% covered (success)
100.00%
1 / 1
5
 checkArgCount
100.00% covered (success)
100.00%
14 / 14
100.00% covered (success)
100.00%
1 / 1
5
 isReservedIdentifier
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
3
1<?php
2
3namespace MediaWiki\Extension\AbuseFilter\Parser;
4
5use InvalidArgumentException;
6use LogicException;
7use MediaWiki\Extension\AbuseFilter\KeywordsManager;
8use MediaWiki\Extension\AbuseFilter\Parser\Exception\InternalException;
9use MediaWiki\Extension\AbuseFilter\Parser\Exception\UserVisibleException;
10use MediaWiki\Message\Message;
11use Wikimedia\Message\ListType;
12
13/**
14 * SyntaxChecker statically analyzes the code without actually running it.
15 * Currently, it only checks for
16 *
17 * - unbound variables
18 * - unused variables: note that a := 1; a := 1; a
19 *     is considered OK even though the first `a` seems unused
20 *     because the pattern "a := null; if ... then (a := ...) end; ..."
21 *     should not count first `a` as unused.
22 * - assignment to built-in identifiers
23 * - invalid function call (arity mismatch, non-valid function)
24 * - first-order information of `set_var` and `set`
25 *
26 * Because it doesn't cover all checks that the current Check Syntax does,
27 * it is currently complementary to the current Check Syntax.
28 * In the future, it could subsume the current Check Syntax, and could be
29 * extended to perform type checking or type inference.
30 */
31class SyntaxChecker {
32    /**
33     * @var AFPTreeNode|null Root of the AST to check
34     */
35    private $treeRoot;
36
37    public const MCONSERVATIVE = 'MODE_CONSERVATIVE';
38    public const MLIBERAL = 'MODE_LIBERAL';
39    public const DUMMYPOS = 0;
40    public const CACHE_VERSION = 1;
41
42    /**
43     * @var string The mode of checking. The value should be either
44     *
45     *     - MLIBERAL: which guarantees that all user-defined variables
46     *       will be bound, but incompatible with what the evaluator currently
47     *       permits. E.g.,
48     *
49     *       if true then (a := 1) else null end; a
50     *
51     *       is rejected in this mode, even though `a` is in fact always bound.
52     *
53     *     - MCONSERVATIVE which is compatible with what the evaluator
54     *       currently permits, but could allow undefined variables to occur.
55     *       E.g.,
56     *
57     *       if false then (a := 1) else null end; a
58     *
59     *       is accepted in this mode, even though `a` is in fact always unbound.
60     */
61    private $mode;
62
63    public function __construct(
64        AFPSyntaxTree $tree,
65        private readonly KeywordsManager $keywordsManager,
66        string $mode = self::MCONSERVATIVE,
67        private readonly bool $checkUnusedVars = false
68    ) {
69        $this->treeRoot = $tree->getRoot();
70        $this->mode = $mode;
71    }
72
73    /**
74     * Start the static analysis
75     *
76     * @throws UserVisibleException
77     */
78    public function start(): void {
79        if ( !$this->treeRoot ) {
80            return;
81        }
82        $bound = $this->check( $this->desugar( $this->treeRoot ), [] );
83        $unused = array_keys( $bound, false, true );
84        if ( $this->checkUnusedVars && $unused ) {
85            throw new UserVisibleException(
86                'unusedvars',
87                self::DUMMYPOS,
88                [ Message::listParam( $unused, ListType::COMMA ) ]
89            );
90        }
91    }
92
93    /**
94     * Remove syntactic sugar so that we don't need to deal with
95     * too many cases.
96     *
97     * This could benefit the evaluator as well, but for now, this is
98     * only used for static analysis.
99     *
100     * Postcondition:
101     *     - The tree will not contain nodes of
102     *       type ASSIGNMENT, LOGIC, COMPARE, SUM_REL, MUL_REL, POW,
103     *       KEYWORD_OPERATOR, and ARRAY_INDEX
104     *     - The tree may additionally contain a node of type BINOP.
105     *     - The tree should not have set_var function application.
106     *     - Conditionals will have both branches.
107     *
108     * @param AFPTreeNode $node
109     * @return AFPTreeNode
110     * @throws InternalException
111     */
112    private function desugar( AFPTreeNode $node ): AFPTreeNode {
113        switch ( $node->type ) {
114            case AFPTreeNode::ATOM:
115                return $node;
116
117            case AFPTreeNode::FUNCTION_CALL:
118                if ( $node->children[0] === 'set_var' ) {
119                    $node->children[0] = 'set';
120                }
121                return $this->newNodeMapExceptFirst( $node );
122
123            case AFPTreeNode::ARRAY_INDEX:
124                return $this->newNodeNamedBinop( $node, '[]' );
125
126            case AFPTreeNode::POW:
127                return $this->newNodeNamedBinop( $node, '**' );
128
129            case AFPTreeNode::UNARY:
130            case AFPTreeNode::INDEX_ASSIGNMENT:
131            case AFPTreeNode::ARRAY_APPEND:
132                return $this->newNodeMapExceptFirst( $node );
133
134            case AFPTreeNode::BOOL_INVERT:
135                /*
136                 * @todo this should really be combined with UNARY,
137                 * but let's wait to change the meaning of UNARY across
138                 * the codebase together
139                 */
140                return $this->newNodeMapAll( $node );
141
142            case AFPTreeNode::KEYWORD_OPERATOR:
143            case AFPTreeNode::MUL_REL:
144            case AFPTreeNode::SUM_REL:
145            case AFPTreeNode::COMPARE:
146                return $this->newNodeBinop( $node );
147
148            case AFPTreeNode::LOGIC:
149                $result = $this->newNodeBinop( $node );
150                [ $op, $left, $right ] = $result->children;
151                if ( $op === '&' || $op === '|' ) {
152                    return $this->desugarAndOr( $op, $left, $right, $node->position );
153                } else {
154                    return $result;
155                }
156
157            case AFPTreeNode::ARRAY_DEFINITION:
158            case AFPTreeNode::SEMICOLON:
159                return $this->newNodeMapAll( $node );
160
161            case AFPTreeNode::CONDITIONAL:
162                if ( $node->children[2] === null ) {
163                    $node->children[2] = new AFPTreeNode(
164                        AFPTreeNode::ATOM,
165                        new AFPToken(
166                            AFPToken::TKEYWORD,
167                            "null",
168                            $node->position
169                        ),
170                        $node->position
171                    );
172                }
173                return $this->newNodeMapAll( $node );
174
175            case AFPTreeNode::ASSIGNMENT:
176                [ $varname, $value ] = $node->children;
177
178                return new AFPTreeNode(
179                    AFPTreeNode::FUNCTION_CALL,
180                    [
181                        "set",
182                        new AFPTreeNode(
183                            AFPTreeNode::ATOM,
184                            new AFPToken(
185                                AFPToken::TSTRING,
186                                $varname,
187                                $node->position
188                            ),
189                            $node->position
190                        ),
191                        $this->desugar( $value )
192                    ],
193                    $node->position
194                );
195
196            default:
197                // @codeCoverageIgnoreStart
198                throw new InternalException( "Unknown node type passed: {$node->type}" );
199                // @codeCoverageIgnoreEnd
200        }
201    }
202
203    /**
204     * @param string $op
205     * @param AFPTreeNode $left
206     * @param AFPTreeNode $right
207     * @param int $position
208     * @return AFPTreeNode
209     */
210    private function desugarAndOr(
211        string $op,
212        AFPTreeNode $left,
213        AFPTreeNode $right,
214        int $position
215    ): AFPTreeNode {
216        $trueNode = new AFPTreeNode(
217            AFPTreeNode::ATOM,
218            new AFPToken(
219                AFPToken::TKEYWORD,
220                "true",
221                $position
222            ),
223            $position
224        );
225        $falseNode = new AFPTreeNode(
226            AFPTreeNode::ATOM,
227            new AFPToken(
228                AFPToken::TKEYWORD,
229                "false",
230                $position
231            ),
232            $position
233        );
234        $conditionalNode = new AFPTreeNode(
235            AFPTreeNode::CONDITIONAL,
236            [
237                $right,
238                $trueNode,
239                $falseNode
240            ],
241            $position
242        );
243
244        if ( $op === '&' ) {
245            // <a> & <b> is supposed to be equivalent to
246            // if <a> then (if <b> then true else false) else false end
247            // See T237336 for why this is currently not the case.
248            return new AFPTreeNode(
249                AFPTreeNode::CONDITIONAL,
250                [
251                    $left,
252                    $conditionalNode,
253                    $falseNode
254                ],
255                $position
256            );
257        } elseif ( $op === '|' ) {
258            // <a> | <b> is supposed to be equivalent to
259            // if <a> then true else (if <b> then true else false) end
260            // See T237336 for why this is currently not the case.
261            return new AFPTreeNode(
262                AFPTreeNode::CONDITIONAL,
263                [
264                    $left,
265                    $trueNode,
266                    $conditionalNode
267                ],
268                $position
269            );
270        } else {
271            // @codeCoverageIgnoreStart
272            throw new InternalException( "Unknown operator: {$op}" );
273            // @codeCoverageIgnoreEnd
274        }
275    }
276
277    /**
278     * Construct a new node with information based on the old node but
279     * with different children
280     *
281     * @param AFPTreeNode $node
282     * @param AFPTreeNode[]|string[]|AFPToken $children
283     * @return AFPTreeNode
284     */
285    private function newNode( AFPTreeNode $node, $children ): AFPTreeNode {
286        return new AFPTreeNode( $node->type, $children, $node->position );
287    }
288
289    /**
290     * Construct a new node with information based on the old node but
291     * with different type
292     *
293     * @param AFPTreeNode $node
294     * @param string $type
295     * @return AFPTreeNode
296     */
297    private function newNodeReplaceType(
298        AFPTreeNode $node,
299        string $type
300    ): AFPTreeNode {
301        return new AFPTreeNode( $type, $node->children, $node->position );
302    }
303
304    /**
305     * Recursively desugar on all children
306     *
307     * @param AFPTreeNode $node
308     * @return AFPTreeNode
309     */
310    private function newNodeMapAll( AFPTreeNode $node ): AFPTreeNode {
311        $children = $node->children;
312        if ( !is_array( $children ) ) {
313            // @codeCoverageIgnoreStart
314            throw new LogicException(
315                "Unexpected non-array children of an AFPTreeNode of type " .
316                "{$node->type} at position {$node->position}"
317            );
318            // @codeCoverageIgnoreEnd
319        }
320        return $this->newNode( $node, array_map( [ $this, 'desugar' ], $children ) );
321    }
322
323    /**
324     * Recursively desugar on all children except the first one
325     *
326     * @param AFPTreeNode $node
327     * @return AFPTreeNode
328     */
329    private function newNodeMapExceptFirst( AFPTreeNode $node ): AFPTreeNode {
330        $items = [ $node->children[0] ];
331        $args = array_slice( $node->children, 1 );
332        foreach ( $args as $el ) {
333            $items[] = $this->desugar( $el );
334        }
335        return $this->newNode( $node, $items );
336    }
337
338    /**
339     * Convert a node with an operation into a BINOP
340     *
341     * @param AFPTreeNode $node
342     * @return AFPTreeNode
343     */
344    private function newNodeBinop( AFPTreeNode $node ): AFPTreeNode {
345        return $this->newNodeReplaceType(
346            $this->newNodeMapExceptFirst( $node ),
347            AFPTreeNode::BINOP
348        );
349    }
350
351    /**
352     * Convert a node without an operation into a BINOP with the specified operation
353     *
354     * @param AFPTreeNode $node
355     * @param string $op
356     * @return AFPTreeNode
357     */
358    private function newNodeNamedBinop(
359        AFPTreeNode $node,
360        string $op
361    ): AFPTreeNode {
362        $items = $this->newNodeMapAll( $node )->children;
363        array_unshift( $items, $op );
364        return $this->newNodeReplaceType(
365            $this->newNode( $node, $items ),
366            AFPTreeNode::BINOP
367        );
368    }
369
370    /**
371     * - Statically compute what are bound after evaluating $node,
372     *     provided that variables in $bound are already bound.
373     * - Similarly compute for each bound variable after evaluating $node
374     *     whether it is used provided that we already have $bound
375     *     that contains necessary information.
376     * - Ensure function application's validity.
377     * - Ensure that the first argument of set is a literal string.
378     * - Ensure that all assignment is not done on built-in identifier.
379     *
380     * Precondition:
381     *     - The tree $node should be desugared and normalized.
382     *
383     * Postcondition:
384     *     - $node is guaranteed to have no unbound variables
385     *       provided that variables in $bound are already bound
386     *       (for the definition of unbound variable indicated by $this->mode)
387     *     - All function applications should be valid and have correct arity.
388     *     - The set function application's first argument should be
389     *       a literal string.
390     *
391     * @param AFPTreeNode $node
392     * @param array<string,bool> $bound Map of [ variable_name => used ]
393     * @return array<string,bool> Map of [ variable_name => used ]
394     * @throws UserVisibleException
395     * @throws InternalException
396     */
397    private function check( AFPTreeNode $node, array $bound ): array {
398        switch ( $node->type ) {
399            // phpcs:ignore PSR2.ControlStructures.SwitchDeclaration.TerminatingComment
400            case AFPTreeNode::ATOM:
401                $tok = $node->children;
402                return match ( $tok->type ) {
403                    AFPToken::TID => $this->lookupVar( $tok->value, $tok->pos, $bound ),
404                    AFPToken::TSTRING, AFPToken::TFLOAT, AFPToken::TINT, AFPToken::TKEYWORD => $bound,
405                    // @codeCoverageIgnoreStart
406                    default => throw new InternalException( "Unknown token {$tok->type} provided in the ATOM node" ),
407                    // @codeCoverageIgnoreEnd
408                };
409            case AFPTreeNode::ARRAY_DEFINITION:
410                // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach children is array here
411                foreach ( $node->children as $el ) {
412                    $bound = $this->check( $el, $bound );
413                }
414                return $bound;
415
416            case AFPTreeNode::FUNCTION_CALL:
417                $fname = $node->children[0];
418                $args = array_slice( $node->children, 1 );
419                if ( !array_key_exists( $fname, FilterEvaluator::FUNCTIONS ) ) {
420                    throw new UserVisibleException(
421                        'unknownfunction',
422                        $node->position,
423                        [ $fname ]
424                    );
425                }
426                $this->checkArgCount( $args, $fname, $node->position );
427
428                if ( $fname === 'set' ) {
429                    // arity is checked, so we know $args[0] and $args[1] exist
430                    $tok = $args[0]->children;
431
432                    if (
433                        !( $tok instanceof AFPToken ) ||
434                        $tok->type !== AFPToken::TSTRING
435                    ) {
436                        throw new UserVisibleException(
437                            'variablevariable',
438                            $node->position,
439                            []
440                        );
441                    }
442
443                    $bound = $this->check( $args[1], $bound );
444                    // set the variable as unused
445                    return $this->assignVar(
446                        $tok->value,
447                        $tok->pos,
448                        $bound
449                    );
450                } else {
451                    foreach ( $args as $arg ) {
452                        $bound = $this->check( $arg, $bound );
453                    }
454                    return $bound;
455                }
456
457            case AFPTreeNode::BINOP:
458                [ , $left, $right ] = $node->children;
459                return $this->check( $right, $this->check( $left, $bound ) );
460