Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
100.00% covered (success)
100.00%
337 / 337
100.00% covered (success)
100.00%
26 / 26
CRAP
100.00% covered (success)
100.00%
1 / 1
AFPTreeParser
100.00% covered (success)
100.00%
337 / 337
100.00% covered (success)
100.00%
26 / 26
111
100.00% covered (success)
100.00%
1 / 1
 __construct
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 setFilter
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 resetState
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 move
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getNextToken
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getState
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 setState
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 parse
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 buildSyntaxTree
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
1
 doLevelEntry
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
2
 doLevelSemicolon
100.00% covered (success)
100.00%
16 / 16
100.00% covered (success)
100.00%
1 / 1
7
 doLevelSet
100.00% covered (success)
100.00%
34 / 34
100.00% covered (success)
100.00%
1 / 1
13
 doLevelConditions
100.00% covered (success)
100.00%
56 / 56
100.00% covered (success)
100.00%
1 / 1
13
 doLevelBoolOps
100.00% covered (success)
100.00%
13 / 13
100.00% covered (success)
100.00%
1 / 1
3
 doLevelCompares
100.00% covered (success)
100.00%
18 / 18
100.00% covered (success)
100.00%
1 / 1
4
 doLevelSumRels
100.00% covered (success)
100.00%
13 / 13
100.00% covered (success)
100.00%
1 / 1
3
 doLevelMulRels
100.00% covered (success)
100.00%
13 / 13
100.00% covered (success)
100.00%
1 / 1
3
 doLevelPow
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
3
 doLevelBoolInvert
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
3
 doLevelKeywordOperators
100.00% covered (success)
100.00%
13 / 13
100.00% covered (success)
100.00%
1 / 1
3
 doLevelUnarys
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
4
 doLevelArrayElements
100.00% covered (success)
100.00%
10 / 10
100.00% covered (success)
100.00%
1 / 1
5
 doLevelParenthesis
100.00% covered (success)
100.00%
21 / 21
100.00% covered (success)
100.00%
1 / 1
7
 doLevelFunction
100.00% covered (success)
100.00%
38 / 38
100.00% covered (success)
100.00%
1 / 1
11
 doLevelAtom
100.00% covered (success)
100.00%
46 / 46
100.00% covered (success)
100.00%
1 / 1
16
 checkLogDeprecatedVar
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
2
1<?php
2
3/**
4 * A version of the abuse filter parser that separates parsing the filter and
5 * evaluating it into different passes, allowing the parse tree to be cached.
6 *
7 * @file
8 * @phan-file-suppress PhanPossiblyInfiniteRecursionSameParams Recursion controlled by class props
9 */
10
11namespace MediaWiki\Extension\AbuseFilter\Parser;
12
13use MediaWiki\Extension\AbuseFilter\KeywordsManager;
14use MediaWiki\Extension\AbuseFilter\Parser\Exception\UserVisibleException;
15use Psr\Log\LoggerInterface;
16use Wikimedia\Stats\IBufferingStatsdDataFactory;
17
18/**
19 * A parser that transforms the text of the filter into a parse tree.
20 */
21class AFPTreeParser {
22    /**
23     * @var array<int,array{0:AFPToken,1:int}> Contains the AFPTokens for the code being parsed
24     */
25    private $mTokens;
26    /**
27     * @var AFPToken The current token
28     */
29    private $mCur;
30    /** @var int The position of the current token */
31    private $mPos;
32
33    /**
34     * @var string|null The ID of the filter being parsed, if available. Can also be "global-$ID"
35     */
36    private $mFilter;
37
38    public const CACHE_VERSION = 2;
39
40    public function __construct(
41        private readonly LoggerInterface $logger,
42        private readonly IBufferingStatsdDataFactory $statsd,
43        private readonly KeywordsManager $keywordsManager
44    ) {
45        $this->resetState();
46    }
47
48    /**
49     * @param string $filter
50     */
51    public function setFilter( $filter ) {
52        $this->mFilter = $filter;
53    }
54
55    /**
56     * Resets the state
57     */
58    private function resetState() {
59        $this->mTokens = [];
60        $this->mPos = 0;
61        $this->mFilter = null;
62    }
63
64    /**
65     * Advances the parser to the next token in the filter code.
66     */
67    private function move() {
68        [ $this->mCur, $this->mPos ] = $this->mTokens[$this->mPos];
69    }
70
71    /**
72     * Get the next token. This is similar to move() but doesn't change class members,
73     *   allowing to look ahead without rolling back the state.
74     *
75     * @return AFPToken
76     */
77    private function getNextToken() {
78        return $this->mTokens[$this->mPos][0];
79    }
80
81    /**
82     * getState() function allows parser state to be rollbacked to several tokens
83     * back.
84     *
85     * @return AFPParserState
86     */
87    private function getState() {
88        return new AFPParserState( $this->mCur, $this->mPos );
89    }
90
91    /**
92     * setState() function allows parser state to be rollbacked to several tokens
93     * back.
94     */
95    private function setState( AFPParserState $state ) {
96        $this->mCur = $state->token;
97        $this->mPos = $state->pos;
98    }
99
100    /**
101     * Parse the supplied filter source code into a tree.
102     *
103     * @param array<int,array{0:AFPToken,1:int}> $tokens
104     * @return AFPSyntaxTree
105     * @throws UserVisibleException
106     */
107    public function parse( array $tokens ): AFPSyntaxTree {
108        $this->mTokens = $tokens;
109        $this->mPos = 0;
110
111        return $this->buildSyntaxTree();
112    }
113
114    private function buildSyntaxTree(): AFPSyntaxTree {
115        $startTime = microtime( true );
116        $root = $this->doLevelEntry();
117        $this->statsd->timing( 'abusefilter_cachingParser_buildtree', microtime( true ) - $startTime );
118        return new AFPSyntaxTree( $root );
119    }
120
121    /* Levels */
122
123    /**
124     * Handles unexpected characters after the expression.
125     * @return AFPTreeNode|null Null only if no statements
126     * @throws UserVisibleException
127     */
128    private function doLevelEntry() {
129        $result = $this->doLevelSemicolon();
130
131        if ( $this->mCur->type !== AFPToken::TNONE ) {
132            throw new UserVisibleException(
133                'unexpectedatend',
134                $this->mPos, [ $this->mCur->type ]
135            );
136        }
137
138        return $result;
139    }
140
141    /**
142     * Handles the semicolon operator.
143     *
144     * @return AFPTreeNode|null
145     */
146    private function doLevelSemicolon() {
147        $statements = [];
148
149        do {
150            $this->move();
151            $position = $this->mPos;
152
153            if (
154                $this->mCur->type === AFPToken::TNONE ||
155                ( $this->mCur->type === AFPToken::TBRACE && $this->mCur->value == ')' )
156            ) {
157                // Handle special cases which the other parser handled in doLevelAtom
158                break;
159            }
160
161            // Allow empty statements.
162            if ( $this->mCur->type === AFPToken::TSTATEMENTSEPARATOR ) {
163                continue;
164            }
165
166            $statements[] = $this->doLevelSet();
167            $position = $this->mPos;
168        } while ( $this->mCur->type === AFPToken::TSTATEMENTSEPARATOR );
169
170        // Flatten the tree if possible.
171        if ( count( $statements ) === 0 ) {
172            return null;
173        } elseif ( count( $statements ) === 1 ) {
174            return $statements[0];
175        } else {
176            return new AFPTreeNode( AFPTreeNode::SEMICOLON, $statements, $position );
177        }
178    }
179
180    /**
181     * Handles variable assignment.
182     *
183     * @return AFPTreeNode
184     * @throws UserVisibleException
185     */
186    private function doLevelSet() {
187        if ( $this->mCur->type === AFPToken::TID ) {
188            $varname = (string)$this->mCur->value;
189
190            // Speculatively parse the assignment statement assuming it can
191            // potentially be an assignment, but roll back if it isn't.
192            // @todo Use $this->getNextToken for clearer code
193            $initialState = $this->getState();
194            $this->move();
195
196            if ( $this->mCur->type === AFPToken::TOP && $this->mCur->value === ':=' ) {
197                $position = $this->mPos;
198                $this->move();
199                $value = $this->doLevelSet();
200
201                return new AFPTreeNode( AFPTreeNode::ASSIGNMENT, [ $varname, $value ], $position );
202            }
203
204            if ( $this->mCur->type === AFPToken::TSQUAREBRACKET && $this->mCur->value === '[' ) {
205                $this->move();
206
207                if ( $this->mCur->type === AFPToken::TSQUAREBRACKET && $this->mCur->value === ']' ) {
208                    $index = 'append';
209                } else {
210                    // Parse index offset.
211                    $this->setState( $initialState );
212                    $this->move();
213                    $index = $this->doLevelSemicolon();
214                    if ( !( $this->mCur->type === AFPToken::TSQUAREBRACKET && $this->mCur->value === ']' ) ) {
215                        throw new UserVisibleException( 'expectednotfound', $this->mPos,
216                            [ ']', $this->mCur->type, $this->mCur->value ] );
217                    }
218                }
219
220                $this->move();
221                if ( $this->mCur->type === AFPToken::TOP && $this->mCur->value === ':=' ) {
222                    $position = $this->mPos;
223                    $this->move();
224                    $value = $this->doLevelSet();
225                    if ( $index === 'append' ) {
226                        return new AFPTreeNode(
227                            AFPTreeNode::ARRAY_APPEND, [ $varname, $value ], $position );
228                    } else {
229                        return new AFPTreeNode(
230                            AFPTreeNode::INDEX_ASSIGNMENT,
231                            [ $varname, $index, $value ],
232                            $position
233                        );
234                    }
235                }
236            }
237
238            // If we reached this point, we did not find an assignment.  Roll back
239            // and assume this was just a literal.
240            $this->setState( $initialState );
241        }
242
243        return $this->doLevelConditions();
244    }
245
246    /**
247     * Handles ternary operator and if-then-else-end.
248     *
249     * @return AFPTreeNode
250     * @throws UserVisibleException
251     */
252    private function doLevelConditions() {
253        if ( $this->mCur->type === AFPToken::TKEYWORD && $this->mCur->value === 'if' ) {
254            $position = $this->mPos;
255            $this->move();
256            $condition = $this->doLevelBoolOps();
257
258            if ( !( $this->mCur->type === AFPToken::TKEYWORD && $this->mCur->value === 'then' ) ) {
259                throw new UserVisibleException( 'expectednotfound',
260                    $this->mPos,
261                    [
262                        'then',
263                        $this->mCur->type,
264                        $this->mCur->value
265                    ]
266                );
267            }
268            $this->move();
269
270            $valueIfTrue = $this->doLevelConditions();
271
272            if ( $this->mCur->type === AFPToken::TKEYWORD && $this->mCur->value === 'else' ) {
273                $this->move();
274                $valueIfFalse = $this->doLevelConditions();
275            } else {
276                $valueIfFalse = null;
277            }
278
279            if ( !( $this->mCur->type === AFPToken::TKEYWORD && $this->mCur->value === 'end' ) ) {
280                throw new UserVisibleException( 'expectednotfound',
281                    $this->mPos,
282                    [
283                        'end',
284                        $this->mCur->type,
285                        $this->mCur->value
286                    ]
287                );
288            }
289            $this->move();
290
291            return new AFPTreeNode(
292                AFPTreeNode::CONDITIONAL,
293                [ $condition, $valueIfTrue, $valueIfFalse ],
294                $position
295            );
296        }
297
298        $condition = $this->doLevelBoolOps();
299        if ( $this->mCur->type === AFPToken::TOP && $this->mCur->value === '?' ) {
300            $position = $this->mPos;
301            $this->move();
302
303            $valueIfTrue = $this->doLevelConditions();
304            if ( !( $this->mCur->type === AFPToken::TOP && $this->mCur->value === ':' ) ) {
305                throw new UserVisibleException( 'expectednotfound',
306                    $this->mPos,
307                    [
308                        ':',
309                        $this->mCur->type,
310                        $this->mCur->value
311                    ]
312                );
313            }
314            $this->move();
315
316            $valueIfFalse = $this->doLevelConditions();
317            return new AFPTreeNode(
318                AFPTreeNode::CONDITIONAL,
319                [ $condition, $valueIfTrue, $valueIfFalse ],
320                $position
321            );
322        }
323
324        return $condition;
325    }
326
327    /**
328     * Handles logic operators.
329     *
330     * @return AFPTreeNode
331     */
332    private function doLevelBoolOps() {
333        $leftOperand = $this->doLevelCompares();
334        $ops = [ '&', '|', '^' ];
335        while ( $this->mCur->type === AFPToken::TOP && in_array( $this->mCur->value, $ops ) ) {
336            $op = $this->mCur->value;
337            $position = $this->mPos;
338            $this->move();
339
340            $rightOperand = $this->doLevelCompares();
341
342            $leftOperand = new AFPTreeNode(
343                AFPTreeNode::LOGIC,
344                [ $op, $leftOperand, $rightOperand ],
345                $position
346            );
347        }
348        return $leftOperand;
349    }
350
351    /**
352     * Handles comparison operators.
353     *
354     * @return AFPTreeNode
355     */
356    private function doLevelCompares() {
357        $leftOperand = $this->doLevelSumRels();
358        $equalityOps = [ '==', '===', '!=', '!==', '=' ];
359        $orderOps = [ '<', '>', '<=', '>=' ];
360        // Only allow either a single operation, or a combination of a single equalityOps and a single
361        // orderOps. This resembles what PHP does, and allows `a < b == c` while rejecting `a < b < c`
362        $allowedOps = array_merge( $equalityOps, $orderOps );
363        while ( $this->mCur->type === AFPToken::TOP && in_array( $this->mCur->value, $allowedOps ) ) {
364            $op = $this->mCur->value;
365            $allowedOps = in_array( $op, $equalityOps ) ?
366                array_diff( $allowedOps, $equalityOps ) :
367                array_diff( $allowedOps, $orderOps );
368            $position = $this->mPos;
369            $this->move();
370            $rightOperand = $this->doLevelSumRels();