Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
100.00% |
272 / 272 |
|
100.00% |
17 / 17 |
CRAP | |
100.00% |
1 / 1 |
| SyntaxChecker | |
100.00% |
272 / 272 |
|
100.00% |
17 / 17 |
80 | |
100.00% |
1 / 1 |
| __construct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| start | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
4 | |||
| desugar | |
100.00% |
62 / 62 |
|
100.00% |
1 / 1 |
23 | |||
| desugarAndOr | |
100.00% |
47 / 47 |
|
100.00% |
1 / 1 |
3 | |||
| newNode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| newNodeReplaceType | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| newNodeMapAll | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
| newNodeMapExceptFirst | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
| newNodeBinop | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
| newNodeNamedBinop | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
| check | |
100.00% |
72 / 72 |
|
100.00% |
1 / 1 |
19 | |||
| mapUnion | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
4 | |||
| mapIntersect | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
| assignVar | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
2 | |||
| lookupVar | |
100.00% |
23 / 23 |
|
100.00% |
1 / 1 |
5 | |||
| checkArgCount | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
5 | |||
| isReservedIdentifier | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
3 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace MediaWiki\Extension\AbuseFilter\Parser; |
| 4 | |
| 5 | use InvalidArgumentException; |
| 6 | use LogicException; |
| 7 | use MediaWiki\Extension\AbuseFilter\KeywordsManager; |
| 8 | use MediaWiki\Extension\AbuseFilter\Parser\Exception\InternalException; |
| 9 | use MediaWiki\Extension\AbuseFilter\Parser\Exception\UserVisibleException; |
| 10 | use MediaWiki\Message\Message; |
| 11 | use Wikimedia\Message\ListType; |
| 12 | |
| 13 | /** |
| 14 | * SyntaxChecker statically analyzes the code without actually running it. |
| 15 | * Currently, it only checks for |
| 16 | * |
| 17 | * - unbound variables |
| 18 | * - unused variables: note that a := 1; a := 1; a |
| 19 | * is considered OK even though the first `a` seems unused |
| 20 | * because the pattern "a := null; if ... then (a := ...) end; ..." |
| 21 | * should not count first `a` as unused. |
| 22 | * - assignment to built-in identifiers |
| 23 | * - invalid function call (arity mismatch, non-valid function) |
| 24 | * - first-order information of `set_var` and `set` |
| 25 | * |
| 26 | * Because it doesn't cover all checks that the current Check Syntax does, |
| 27 | * it is currently complementary to the current Check Syntax. |
| 28 | * In the future, it could subsume the current Check Syntax, and could be |
| 29 | * extended to perform type checking or type inference. |
| 30 | */ |
| 31 | class SyntaxChecker { |
| 32 | /** |
| 33 | * @var AFPTreeNode|null Root of the AST to check |
| 34 | */ |
| 35 | private $treeRoot; |
| 36 | |
| 37 | public const MCONSERVATIVE = 'MODE_CONSERVATIVE'; |
| 38 | public const MLIBERAL = 'MODE_LIBERAL'; |
| 39 | public const DUMMYPOS = 0; |
| 40 | public const CACHE_VERSION = 1; |
| 41 | |
| 42 | /** |
| 43 | * @var string The mode of checking. The value should be either |
| 44 | * |
| 45 | * - MLIBERAL: which guarantees that all user-defined variables |
| 46 | * will be bound, but incompatible with what the evaluator currently |
| 47 | * permits. E.g., |
| 48 | * |
| 49 | * if true then (a := 1) else null end; a |
| 50 | * |
| 51 | * is rejected in this mode, even though `a` is in fact always bound. |
| 52 | * |
| 53 | * - MCONSERVATIVE which is compatible with what the evaluator |
| 54 | * currently permits, but could allow undefined variables to occur. |
| 55 | * E.g., |
| 56 | * |
| 57 | * if false then (a := 1) else null end; a |
| 58 | * |
| 59 | * is accepted in this mode, even though `a` is in fact always unbound. |
| 60 | */ |
| 61 | private $mode; |
| 62 | |
| 63 | public function __construct( |
| 64 | AFPSyntaxTree $tree, |
| 65 | private readonly KeywordsManager $keywordsManager, |
| 66 | string $mode = self::MCONSERVATIVE, |
| 67 | private readonly bool $checkUnusedVars = false |
| 68 | ) { |
| 69 | $this->treeRoot = $tree->getRoot(); |
| 70 | $this->mode = $mode; |
| 71 | } |
| 72 | |
| 73 | /** |
| 74 | * Start the static analysis |
| 75 | * |
| 76 | * @throws UserVisibleException |
| 77 | */ |
| 78 | public function start(): void { |
| 79 | if ( !$this->treeRoot ) { |
| 80 | return; |
| 81 | } |
| 82 | $bound = $this->check( $this->desugar( $this->treeRoot ), [] ); |
| 83 | $unused = array_keys( $bound, false, true ); |
| 84 | if ( $this->checkUnusedVars && $unused ) { |
| 85 | throw new UserVisibleException( |
| 86 | 'unusedvars', |
| 87 | self::DUMMYPOS, |
| 88 | [ Message::listParam( $unused, ListType::COMMA ) ] |
| 89 | ); |
| 90 | } |
| 91 | } |
| 92 | |
| 93 | /** |
| 94 | * Remove syntactic sugar so that we don't need to deal with |
| 95 | * too many cases. |
| 96 | * |
| 97 | * This could benefit the evaluator as well, but for now, this is |
| 98 | * only used for static analysis. |
| 99 | * |
| 100 | * Postcondition: |
| 101 | * - The tree will not contain nodes of |
| 102 | * type ASSIGNMENT, LOGIC, COMPARE, SUM_REL, MUL_REL, POW, |
| 103 | * KEYWORD_OPERATOR, and ARRAY_INDEX |
| 104 | * - The tree may additionally contain a node of type BINOP. |
| 105 | * - The tree should not have set_var function application. |
| 106 | * - Conditionals will have both branches. |
| 107 | * |
| 108 | * @param AFPTreeNode $node |
| 109 | * @return AFPTreeNode |
| 110 | * @throws InternalException |
| 111 | */ |
| 112 | private function desugar( AFPTreeNode $node ): AFPTreeNode { |
| 113 | switch ( $node->type ) { |
| 114 | case AFPTreeNode::ATOM: |
| 115 | return $node; |
| 116 | |
| 117 | case AFPTreeNode::FUNCTION_CALL: |
| 118 | if ( $node->children[0] === 'set_var' ) { |
| 119 | $node->children[0] = 'set'; |
| 120 | } |
| 121 | return $this->newNodeMapExceptFirst( $node ); |
| 122 | |
| 123 | case AFPTreeNode::ARRAY_INDEX: |
| 124 | return $this->newNodeNamedBinop( $node, '[]' ); |
| 125 | |
| 126 | case AFPTreeNode::POW: |
| 127 | return $this->newNodeNamedBinop( $node, '**' ); |
| 128 | |
| 129 | case AFPTreeNode::UNARY: |
| 130 | case AFPTreeNode::INDEX_ASSIGNMENT: |
| 131 | case AFPTreeNode::ARRAY_APPEND: |
| 132 | return $this->newNodeMapExceptFirst( $node ); |
| 133 | |
| 134 | case AFPTreeNode::BOOL_INVERT: |
| 135 | /* |
| 136 | * @todo this should really be combined with UNARY, |
| 137 | * but let's wait to change the meaning of UNARY across |
| 138 | * the codebase together |
| 139 | */ |
| 140 | return $this->newNodeMapAll( $node ); |
| 141 | |
| 142 | case AFPTreeNode::KEYWORD_OPERATOR: |
| 143 | case AFPTreeNode::MUL_REL: |
| 144 | case AFPTreeNode::SUM_REL: |
| 145 | case AFPTreeNode::COMPARE: |
| 146 | return $this->newNodeBinop( $node ); |
| 147 | |
| 148 | case AFPTreeNode::LOGIC: |
| 149 | $result = $this->newNodeBinop( $node ); |
| 150 | [ $op, $left, $right ] = $result->children; |
| 151 | if ( $op === '&' || $op === '|' ) { |
| 152 | return $this->desugarAndOr( $op, $left, $right, $node->position ); |
| 153 | } else { |
| 154 | return $result; |
| 155 | } |
| 156 | |
| 157 | case AFPTreeNode::ARRAY_DEFINITION: |
| 158 | case AFPTreeNode::SEMICOLON: |
| 159 | return $this->newNodeMapAll( $node ); |
| 160 | |
| 161 | case AFPTreeNode::CONDITIONAL: |
| 162 | if ( $node->children[2] === null ) { |
| 163 | $node->children[2] = new AFPTreeNode( |
| 164 | AFPTreeNode::ATOM, |
| 165 | new AFPToken( |
| 166 | AFPToken::TKEYWORD, |
| 167 | "null", |
| 168 | $node->position |
| 169 | ), |
| 170 | $node->position |
| 171 | ); |
| 172 | } |
| 173 | return $this->newNodeMapAll( $node ); |
| 174 | |
| 175 | case AFPTreeNode::ASSIGNMENT: |
| 176 | [ $varname, $value ] = $node->children; |
| 177 | |
| 178 | return new AFPTreeNode( |
| 179 | AFPTreeNode::FUNCTION_CALL, |
| 180 | [ |
| 181 | "set", |
| 182 | new AFPTreeNode( |
| 183 | AFPTreeNode::ATOM, |
| 184 | new AFPToken( |
| 185 | AFPToken::TSTRING, |
| 186 | $varname, |
| 187 | $node->position |
| 188 | ), |
| 189 | $node->position |
| 190 | ), |
| 191 | $this->desugar( $value ) |
| 192 | ], |
| 193 | $node->position |
| 194 | ); |
| 195 | |
| 196 | default: |
| 197 | // @codeCoverageIgnoreStart |
| 198 | throw new InternalException( "Unknown node type passed: {$node->type}" ); |
| 199 | // @codeCoverageIgnoreEnd |
| 200 | } |
| 201 | } |
| 202 | |
| 203 | /** |
| 204 | * @param string $op |
| 205 | * @param AFPTreeNode $left |
| 206 | * @param AFPTreeNode $right |
| 207 | * @param int $position |
| 208 | * @return AFPTreeNode |
| 209 | */ |
| 210 | private function desugarAndOr( |
| 211 | string $op, |
| 212 | AFPTreeNode $left, |
| 213 | AFPTreeNode $right, |
| 214 | int $position |
| 215 | ): AFPTreeNode { |
| 216 | $trueNode = new AFPTreeNode( |
| 217 | AFPTreeNode::ATOM, |
| 218 | new AFPToken( |
| 219 | AFPToken::TKEYWORD, |
| 220 | "true", |
| 221 | $position |
| 222 | ), |
| 223 | $position |
| 224 | ); |
| 225 | $falseNode = new AFPTreeNode( |
| 226 | AFPTreeNode::ATOM, |
| 227 | new AFPToken( |
| 228 | AFPToken::TKEYWORD, |
| 229 | "false", |
| 230 | $position |
| 231 | ), |
| 232 | $position |
| 233 | ); |
| 234 | $conditionalNode = new AFPTreeNode( |
| 235 | AFPTreeNode::CONDITIONAL, |
| 236 | [ |
| 237 | $right, |
| 238 | $trueNode, |
| 239 | $falseNode |
| 240 | ], |
| 241 | $position |
| 242 | ); |
| 243 | |
| 244 | if ( $op === '&' ) { |
| 245 | // <a> & <b> is supposed to be equivalent to |
| 246 | // if <a> then (if <b> then true else false) else false end |
| 247 | // See T237336 for why this is currently not the case. |
| 248 | return new AFPTreeNode( |
| 249 | AFPTreeNode::CONDITIONAL, |
| 250 | [ |
| 251 | $left, |
| 252 | $conditionalNode, |
| 253 | $falseNode |
| 254 | ], |
| 255 | $position |
| 256 | ); |
| 257 | } elseif ( $op === '|' ) { |
| 258 | // <a> | <b> is supposed to be equivalent to |
| 259 | // if <a> then true else (if <b> then true else false) end |
| 260 | // See T237336 for why this is currently not the case. |
| 261 | return new AFPTreeNode( |
| 262 | AFPTreeNode::CONDITIONAL, |
| 263 | [ |
| 264 | $left, |
| 265 | $trueNode, |
| 266 | $conditionalNode |
| 267 | ], |
| 268 | $position |
| 269 | ); |
| 270 | } else { |
| 271 | // @codeCoverageIgnoreStart |
| 272 | throw new InternalException( "Unknown operator: {$op}" ); |
| 273 | // @codeCoverageIgnoreEnd |
| 274 | } |
| 275 | } |
| 276 | |
| 277 | /** |
| 278 | * Construct a new node with information based on the old node but |
| 279 | * with different children |
| 280 | * |
| 281 | * @param AFPTreeNode $node |
| 282 | * @param AFPTreeNode[]|string[]|AFPToken $children |
| 283 | * @return AFPTreeNode |
| 284 | */ |
| 285 | private function newNode( AFPTreeNode $node, $children ): AFPTreeNode { |
| 286 | return new AFPTreeNode( $node->type, $children, $node->position ); |
| 287 | } |
| 288 | |
| 289 | /** |
| 290 | * Construct a new node with information based on the old node but |
| 291 | * with different type |
| 292 | * |
| 293 | * @param AFPTreeNode $node |
| 294 | * @param string $type |
| 295 | * @return AFPTreeNode |
| 296 | */ |
| 297 | private function newNodeReplaceType( |
| 298 | AFPTreeNode $node, |
| 299 | string $type |
| 300 | ): AFPTreeNode { |
| 301 | return new AFPTreeNode( $type, $node->children, $node->position ); |
| 302 | } |
| 303 | |
| 304 | /** |
| 305 | * Recursively desugar on all children |
| 306 | * |
| 307 | * @param AFPTreeNode $node |
| 308 | * @return AFPTreeNode |
| 309 | */ |
| 310 | private function newNodeMapAll( AFPTreeNode $node ): AFPTreeNode { |
| 311 | $children = $node->children; |
| 312 | if ( !is_array( $children ) ) { |
| 313 | // @codeCoverageIgnoreStart |
| 314 | throw new LogicException( |
| 315 | "Unexpected non-array children of an AFPTreeNode of type " . |
| 316 | "{$node->type} at position {$node->position}" |
| 317 | ); |
| 318 | // @codeCoverageIgnoreEnd |
| 319 | } |
| 320 | return $this->newNode( $node, array_map( [ $this, 'desugar' ], $children ) ); |
| 321 | } |
| 322 | |
| 323 | /** |
| 324 | * Recursively desugar on all children except the first one |
| 325 | * |
| 326 | * @param AFPTreeNode $node |
| 327 | * @return AFPTreeNode |
| 328 | */ |
| 329 | private function newNodeMapExceptFirst( AFPTreeNode $node ): AFPTreeNode { |
| 330 | $items = [ $node->children[0] ]; |
| 331 | $args = array_slice( $node->children, 1 ); |
| 332 | foreach ( $args as $el ) { |
| 333 | $items[] = $this->desugar( $el ); |
| 334 | } |
| 335 | return $this->newNode( $node, $items ); |
| 336 | } |
| 337 | |
| 338 | /** |
| 339 | * Convert a node with an operation into a BINOP |
| 340 | * |
| 341 | * @param AFPTreeNode $node |
| 342 | * @return AFPTreeNode |
| 343 | */ |
| 344 | private function newNodeBinop( AFPTreeNode $node ): AFPTreeNode { |
| 345 | return $this->newNodeReplaceType( |
| 346 | $this->newNodeMapExceptFirst( $node ), |
| 347 | AFPTreeNode::BINOP |
| 348 | ); |
| 349 | } |
| 350 | |
| 351 | /** |
| 352 | * Convert a node without an operation into a BINOP with the specified operation |
| 353 | * |
| 354 | * @param AFPTreeNode $node |
| 355 | * @param string $op |
| 356 | * @return AFPTreeNode |
| 357 | */ |
| 358 | private function newNodeNamedBinop( |
| 359 | AFPTreeNode $node, |
| 360 | string $op |
| 361 | ): AFPTreeNode { |
| 362 | $items = $this->newNodeMapAll( $node )->children; |
| 363 | array_unshift( $items, $op ); |
| 364 | return $this->newNodeReplaceType( |
| 365 | $this->newNode( $node, $items ), |
| 366 | AFPTreeNode::BINOP |
| 367 | ); |
| 368 | } |
| 369 | |
| 370 | /** |
| 371 | * - Statically compute what are bound after evaluating $node, |
| 372 | * provided that variables in $bound are already bound. |
| 373 | * - Similarly compute for each bound variable after evaluating $node |
| 374 | * whether it is used provided that we already have $bound |
| 375 | * that contains necessary information. |
| 376 | * - Ensure function application's validity. |
| 377 | * - Ensure that the first argument of set is a literal string. |
| 378 | * - Ensure that all assignment is not done on built-in identifier. |
| 379 | * |
| 380 | * Precondition: |
| 381 | * - The tree $node should be desugared and normalized. |
| 382 | * |
| 383 | * Postcondition: |
| 384 | * - $node is guaranteed to have no unbound variables |
| 385 | * provided that variables in $bound are already bound |
| 386 | * (for the definition of unbound variable indicated by $this->mode) |
| 387 | * - All function applications should be valid and have correct arity. |
| 388 | * - The set function application's first argument should be |
| 389 | * a literal string. |
| 390 | * |
| 391 | * @param AFPTreeNode $node |
| 392 | * @param array<string,bool> $bound Map of [ variable_name => used ] |
| 393 | * @return array<string,bool> Map of [ variable_name => used ] |
| 394 | * @throws UserVisibleException |
| 395 | * @throws InternalException |
| 396 | */ |
| 397 | private function check( AFPTreeNode $node, array $bound ): array { |
| 398 | switch ( $node->type ) { |
| 399 | // phpcs:ignore PSR2.ControlStructures.SwitchDeclaration.TerminatingComment |
| 400 | case AFPTreeNode::ATOM: |
| 401 | $tok = $node->children; |
| 402 | return match ( $tok->type ) { |
| 403 | AFPToken::TID => $this->lookupVar( $tok->value, $tok->pos, $bound ), |
| 404 | AFPToken::TSTRING, AFPToken::TFLOAT, AFPToken::TINT, AFPToken::TKEYWORD => $bound, |
| 405 | // @codeCoverageIgnoreStart |
| 406 | default => throw new InternalException( "Unknown token {$tok->type} provided in the ATOM node" ), |
| 407 | // @codeCoverageIgnoreEnd |
| 408 | }; |
| 409 | case AFPTreeNode::ARRAY_DEFINITION: |
| 410 | // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach children is array here |
| 411 | foreach ( $node->children as $el ) { |
| 412 | $bound = $this->check( $el, $bound ); |
| 413 | } |
| 414 | return $bound; |
| 415 | |
| 416 | case AFPTreeNode::FUNCTION_CALL: |
| 417 | $fname = $node->children[0]; |
| 418 | $args = array_slice( $node->children, 1 ); |
| 419 | if ( !array_key_exists( $fname, FilterEvaluator::FUNCTIONS ) ) { |
| 420 | throw new UserVisibleException( |
| 421 | 'unknownfunction', |
| 422 | $node->position, |
| 423 | [ $fname ] |
| 424 | ); |
| 425 | } |
| 426 | $this->checkArgCount( $args, $fname, $node->position ); |
| 427 | |
| 428 | if ( $fname === 'set' ) { |
| 429 | // arity is checked, so we know $args[0] and $args[1] exist |
| 430 | $tok = $args[0]->children; |
| 431 | |
| 432 | if ( |
| 433 | !( $tok instanceof AFPToken ) || |
| 434 | $tok->type !== AFPToken::TSTRING |
| 435 | ) { |
| 436 | throw new UserVisibleException( |
| 437 | 'variablevariable', |
| 438 | $node->position, |
| 439 | [] |
| 440 | ); |
| 441 | } |
| 442 | |
| 443 | $bound = $this->check( $args[1], $bound ); |
| 444 | // set the variable as unused |
| 445 | return $this->assignVar( |
| 446 | $tok->value, |
| 447 | $tok->pos, |
| 448 | $bound |
| 449 | ); |
| 450 | } else { |
| 451 | foreach ( $args as $arg ) { |
| 452 | $bound = $this->check( $arg, $bound ); |
| 453 | } |
| 454 | return $bound; |
| 455 | } |
| 456 | |
| 457 | case AFPTreeNode::BINOP: |
| 458 | [ , $left, $right ] = $node->children; |
| 459 | return $this->check( $right, $this->check( $left, $bound ) ); |
| 460 | |