Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
94.50% |
498 / 527 |
|
89.23% |
58 / 65 |
CRAP | |
0.00% |
0 / 1 |
| FilterEvaluator | |
94.50% |
498 / 527 |
|
89.23% |
58 / 65 |
184.34 | |
0.00% |
0 / 1 |
| __construct | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 | |||
| toggleConditionLimit | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| raiseCondCount | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
3.33 | |||
| setVariables | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getCacheVersion | |
23.08% |
3 / 13 |
|
0.00% |
0 / 1 |
3.82 | |||
| resetState | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
| checkSyntaxThrow | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
1 | |||
| checkSyntax | |
87.50% |
7 / 8 |
|
0.00% |
0 / 1 |
2.01 | |||
| checkConditions | |
62.50% |
15 / 24 |
|
0.00% |
0 / 1 |
6.32 | |||
| parse | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
| evaluateExpression | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getTree | |
100.00% |
26 / 26 |
|
100.00% |
1 / 1 |
1 | |||
| evalTree | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
2 | |||
| getUsedVars | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| evalNode | |
99.36% |
155 / 156 |
|
0.00% |
0 / 1 |
55 | |||
| callFunc | |
100.00% |
19 / 19 |
|
100.00% |
1 / 1 |
8 | |||
| callKeyword | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
4 | |||
| varExists | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
| getVarValue | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
4 | |||
| setUserVariable | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| funcLc | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| funcUc | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| funcLen | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
| funcSpecialRatio | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
2 | |||
| funcCount | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
5 | |||
| funcRCount | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
3 | |||
| funcGetMatches | |
100.00% |
18 / 18 |
|
100.00% |
1 / 1 |
2 | |||
| funcIPInRange | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 | |||
| funcIPInRanges | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
4 | |||
| funcCCNorm | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
| funcSanitize | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
| funcContainsAny | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| funcContainsAll | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| funcCCNormContainsAny | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| funcCCNormContainsAll | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| contains | |
100.00% |
15 / 15 |
|
100.00% |
1 / 1 |
7 | |||
| funcEqualsToAny | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| equalsToAny | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
3 | |||
| ccnorm | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| rmspecials | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
| rmdoubles | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
| rmwhitespace | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| funcRMSpecials | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| funcRMWhitespace | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| funcRMDoubles | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| funcNorm | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
| funcSubstr | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
| funcStrPos | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
5 | |||
| funcStrReplace | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
| funcStrReplaceRegexp | |
100.00% |
16 / 16 |
|
100.00% |
1 / 1 |
2 | |||
| funcStrRegexEscape | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| funcSetVar | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
| containmentKeyword | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
| keywordIn | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| keywordContains | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| keywordLike | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
| keywordRegex | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
3 | |||
| keywordRegexInsensitive | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| castString | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| castInt | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| castFloat | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| castBool | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| maybeDiscardNode | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
| mungeRegexp | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| checkRegexMatchesEmpty | |
25.00% |
2 / 8 |
|
0.00% |
0 / 1 |
6.80 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace MediaWiki\Extension\AbuseFilter\Parser; |
| 4 | |
| 5 | use Exception; |
| 6 | use InvalidArgumentException; |
| 7 | use MediaWiki\Extension\AbuseFilter\KeywordsManager; |
| 8 | use MediaWiki\Extension\AbuseFilter\Parser\Exception\ConditionLimitException; |
| 9 | use MediaWiki\Extension\AbuseFilter\Parser\Exception\ExceptionBase; |
| 10 | use MediaWiki\Extension\AbuseFilter\Parser\Exception\InternalException; |
| 11 | use MediaWiki\Extension\AbuseFilter\Parser\Exception\UserVisibleException; |
| 12 | use MediaWiki\Extension\AbuseFilter\Parser\Exception\UserVisibleWarning; |
| 13 | use MediaWiki\Extension\AbuseFilter\Variables\VariableHolder; |
| 14 | use MediaWiki\Extension\AbuseFilter\Variables\VariablesManager; |
| 15 | use MediaWiki\Language\Language; |
| 16 | use MediaWiki\Parser\Sanitizer; |
| 17 | use Psr\Log\LoggerInterface; |
| 18 | use Wikimedia\Equivset\Equivset; |
| 19 | use Wikimedia\IPUtils; |
| 20 | use Wikimedia\ObjectCache\BagOStuff; |
| 21 | use Wikimedia\Stats\IBufferingStatsdDataFactory; |
| 22 | |
| 23 | /** |
| 24 | * This class evaluates an AST generated by the filter parser. |
| 25 | * |
| 26 | * @todo Override checkSyntax and make it only try to build the AST. That would mean faster results, |
| 27 | * and no need to mess with DUNDEFINED and the like. However, we must first try to reduce the |
| 28 | * amount of runtime-only exceptions, and try to detect them in the AFPTreeParser instead. |
| 29 | * Otherwise, people may be able to save a broken filter without the syntax check reporting that. |
| 30 | */ |
| 31 | class FilterEvaluator { |
| 32 | private const CACHE_VERSION = 1; |
| 33 | |
| 34 | public const FUNCTIONS = [ |
| 35 | 'lcase' => 'funcLc', |
| 36 | 'ucase' => 'funcUc', |
| 37 | 'length' => 'funcLen', |
| 38 | 'string' => 'castString', |
| 39 | 'int' => 'castInt', |
| 40 | 'float' => 'castFloat', |
| 41 | 'bool' => 'castBool', |
| 42 | 'norm' => 'funcNorm', |
| 43 | 'ccnorm' => 'funcCCNorm', |
| 44 | 'ccnorm_contains_any' => 'funcCCNormContainsAny', |
| 45 | 'ccnorm_contains_all' => 'funcCCNormContainsAll', |
| 46 | 'specialratio' => 'funcSpecialRatio', |
| 47 | 'rmspecials' => 'funcRMSpecials', |
| 48 | 'rmdoubles' => 'funcRMDoubles', |
| 49 | 'rmwhitespace' => 'funcRMWhitespace', |
| 50 | 'count' => 'funcCount', |
| 51 | 'rcount' => 'funcRCount', |
| 52 | 'get_matches' => 'funcGetMatches', |
| 53 | 'ip_in_range' => 'funcIPInRange', |
| 54 | 'ip_in_ranges' => 'funcIPInRanges', |
| 55 | 'contains_any' => 'funcContainsAny', |
| 56 | 'contains_all' => 'funcContainsAll', |
| 57 | 'equals_to_any' => 'funcEqualsToAny', |
| 58 | 'substr' => 'funcSubstr', |
| 59 | 'strlen' => 'funcLen', |
| 60 | 'strpos' => 'funcStrPos', |
| 61 | 'str_replace' => 'funcStrReplace', |
| 62 | 'str_replace_regexp' => 'funcStrReplaceRegexp', |
| 63 | 'rescape' => 'funcStrRegexEscape', |
| 64 | 'set' => 'funcSetVar', |
| 65 | 'set_var' => 'funcSetVar', |
| 66 | 'sanitize' => 'funcSanitize', |
| 67 | ]; |
| 68 | |
| 69 | /** |
| 70 | * The minimum and maximum amount of arguments required by each function. |
| 71 | * @var int[][] |
| 72 | */ |
| 73 | public const FUNC_ARG_COUNT = [ |
| 74 | 'lcase' => [ 1, 1 ], |
| 75 | 'ucase' => [ 1, 1 ], |
| 76 | 'length' => [ 1, 1 ], |
| 77 | 'string' => [ 1, 1 ], |
| 78 | 'int' => [ 1, 1 ], |
| 79 | 'float' => [ 1, 1 ], |
| 80 | 'bool' => [ 1, 1 ], |
| 81 | 'norm' => [ 1, 1 ], |
| 82 | 'ccnorm' => [ 1, 1 ], |
| 83 | 'ccnorm_contains_any' => [ 2, INF ], |
| 84 | 'ccnorm_contains_all' => [ 2, INF ], |
| 85 | 'specialratio' => [ 1, 1 ], |
| 86 | 'rmspecials' => [ 1, 1 ], |
| 87 | 'rmdoubles' => [ 1, 1 ], |
| 88 | 'rmwhitespace' => [ 1, 1 ], |
| 89 | 'count' => [ 1, 2 ], |
| 90 | 'rcount' => [ 1, 2 ], |
| 91 | 'get_matches' => [ 2, 2 ], |
| 92 | 'ip_in_range' => [ 2, 2 ], |
| 93 | 'ip_in_ranges' => [ 2, INF ], |
| 94 | 'contains_any' => [ 2, INF ], |
| 95 | 'contains_all' => [ 2, INF ], |
| 96 | 'equals_to_any' => [ 2, INF ], |
| 97 | 'substr' => [ 2, 3 ], |
| 98 | 'strlen' => [ 1, 1 ], |
| 99 | 'strpos' => [ 2, 3 ], |
| 100 | 'str_replace' => [ 3, 3 ], |
| 101 | 'str_replace_regexp' => [ 3, 3 ], |
| 102 | 'rescape' => [ 1, 1 ], |
| 103 | 'set' => [ 2, 2 ], |
| 104 | 'set_var' => [ 2, 2 ], |
| 105 | 'sanitize' => [ 1, 1 ], |
| 106 | ]; |
| 107 | |
| 108 | // Functions that affect parser state, and shouldn't be cached. |
| 109 | private const ACTIVE_FUNCTIONS = [ |
| 110 | 'funcSetVar', |
| 111 | ]; |
| 112 | |
| 113 | public const KEYWORDS = [ |
| 114 | 'in' => 'keywordIn', |
| 115 | 'like' => 'keywordLike', |
| 116 | 'matches' => 'keywordLike', |
| 117 | 'contains' => 'keywordContains', |
| 118 | 'rlike' => 'keywordRegex', |
| 119 | 'irlike' => 'keywordRegexInsensitive', |
| 120 | 'regex' => 'keywordRegex', |
| 121 | ]; |
| 122 | |
| 123 | /** |
| 124 | * @var bool Are we allowed to use short-circuit evaluation? |
| 125 | */ |
| 126 | private $mAllowShort; |
| 127 | |
| 128 | /** |
| 129 | * @var VariableHolder |
| 130 | */ |
| 131 | private $mVariables; |
| 132 | /** |
| 133 | * @var int The current amount of conditions being consumed |
| 134 | */ |
| 135 | private $mCondCount; |
| 136 | /** |
| 137 | * @var bool Whether the condition limit is enabled. |
| 138 | */ |
| 139 | private $condLimitEnabled = true; |
| 140 | /** |
| 141 | * @var string|null The ID of the filter being parsed, if available. Can also be "global-$ID" |
| 142 | */ |
| 143 | private $mFilter; |
| 144 | /** |
| 145 | * @var bool Whether we can allow retrieving _builtin_ variables not included in $this->mVariables |
| 146 | */ |
| 147 | private $allowMissingVariables = false; |
| 148 | |
| 149 | /** |
| 150 | * @var bool Whether the AST was retrieved from cache |
| 151 | */ |
| 152 | private $fromCache = false; |
| 153 | |
| 154 | /** @var UserVisibleWarning[] */ |
| 155 | private $warnings = []; |
| 156 | |
| 157 | /** |
| 158 | * @var array Cached results of functions |
| 159 | */ |
| 160 | private $funcCache = []; |
| 161 | |
| 162 | /** |
| 163 | * @var array AFPToken::TID values found during node evaluation |
| 164 | */ |
| 165 | private $usedVars = []; |
| 166 | |
| 167 | /** |
| 168 | * Create a new instance |
| 169 | * |
| 170 | * @param Language $contLang Content language, used for language-dependent function |
| 171 | * @param BagOStuff $cache Used to cache the AST and the tokens |
| 172 | * @param LoggerInterface $logger Used for debugging |
| 173 | * @param KeywordsManager $keywordsManager |
| 174 | * @param VariablesManager $varManager |
| 175 | * @param IBufferingStatsdDataFactory $statsd |
| 176 | * @param Equivset $equivset |
| 177 | * @param int $conditionsLimit |
| 178 | * @param VariableHolder|null $vars |
| 179 | */ |
| 180 | public function __construct( |
| 181 | private readonly Language $contLang, |
| 182 | private readonly BagOStuff $cache, |
| 183 | private readonly LoggerInterface $logger, |
| 184 | private readonly KeywordsManager $keywordsManager, |
| 185 | private readonly VariablesManager $varManager, |
| 186 | private readonly IBufferingStatsdDataFactory $statsd, |
| 187 | private readonly Equivset $equivset, |
| 188 | private readonly int $conditionsLimit, |
| 189 | ?VariableHolder $vars = null |
| 190 | ) { |
| 191 | $this->resetState(); |
| 192 | if ( $vars ) { |
| 193 | $this->mVariables = $vars; |
| 194 | } |
| 195 | } |
| 196 | |
| 197 | /** |
| 198 | * For use in batch scripts and the like |
| 199 | * |
| 200 | * @param bool $enable True to enable the limit, false to disable it |
| 201 | */ |
| 202 | public function toggleConditionLimit( $enable ) { |
| 203 | $this->condLimitEnabled = $enable; |
| 204 | } |
| 205 | |
| 206 | /** |
| 207 | * @throws ConditionLimitException |
| 208 | */ |
| 209 | private function raiseCondCount() { |
| 210 | $this->mCondCount++; |
| 211 | if ( $this->condLimitEnabled && $this->mCondCount > $this->conditionsLimit ) { |
| 212 | throw new ConditionLimitException(); |
| 213 | } |
| 214 | } |
| 215 | |
| 216 | public function setVariables( VariableHolder $vars ) { |
| 217 | $this->mVariables = $vars; |
| 218 | } |
| 219 | |
| 220 | /** |
| 221 | * Return the generated version of the parser for cache invalidation |
| 222 | * purposes. Automatically tracks list of all functions and invalidates the |
| 223 | * cache if it is changed. |
| 224 | * @return string |
| 225 | */ |
| 226 | private static function getCacheVersion() { |
| 227 | static $version = null; |
| 228 | if ( $version !== null ) { |
| 229 | return $version; |
| 230 | } |
| 231 | |
| 232 | $versionKey = [ |
| 233 | self::CACHE_VERSION, |
| 234 | AFPTreeParser::CACHE_VERSION, |
| 235 | AbuseFilterTokenizer::CACHE_VERSION, |
| 236 | SyntaxChecker::CACHE_VERSION, |
| 237 | array_keys( self::FUNCTIONS ), |
| 238 | array_keys( self::KEYWORDS ), |
| 239 | ]; |
| 240 | $version = hash( 'sha256', serialize( $versionKey ) ); |
| 241 | |
| 242 | return $version; |
| 243 | } |
| 244 | |
| 245 | /** |
| 246 | * Resets the state of the parser |
| 247 | */ |
| 248 | private function resetState() { |
| 249 | $this->mVariables = new VariableHolder(); |
| 250 | $this->mCondCount = 0; |
| 251 | $this->mAllowShort = true; |
| 252 | $this->mFilter = null; |
| 253 | $this->warnings = []; |
| 254 | $this->usedVars = []; |
| 255 | } |
| 256 | |
| 257 | /** |
| 258 | * Check the syntax of $filter, throwing an exception if invalid |
| 259 | * @param string $filter |
| 260 | * @return true When successful |
| 261 | * @throws UserVisibleException |
| 262 | */ |
| 263 | public function checkSyntaxThrow( string $filter ): bool { |
| 264 | $this->allowMissingVariables = true; |
| 265 | $origAS = $this->mAllowShort; |
| 266 | try { |
| 267 | $this->mAllowShort = false; |
| 268 | $this->evalTree( $this->getTree( $filter ) ); |
| 269 | } finally { |
| 270 | $this->mAllowShort = $origAS; |
| 271 | $this->allowMissingVariables = false; |
| 272 | } |
| 273 | |
| 274 | return true; |
| 275 | } |
| 276 | |
| 277 | /** |
| 278 | * Check the syntax of $filter, without throwing |
| 279 | * |
| 280 | * @param string $filter |
| 281 | * @return ParserStatus |
| 282 | */ |
| 283 | public function checkSyntax( string $filter ): ParserStatus { |
| 284 | $initialConds = $this->mCondCount; |
| 285 | try { |
| 286 | $this->checkSyntaxThrow( $filter ); |
| 287 | } catch ( UserVisibleException $excep ) { |
| 288 | } |
| 289 | |
| 290 | return new ParserStatus( |
| 291 | $excep ?? null, |
| 292 | $this->warnings, |
| 293 | $this->mCondCount - $initialConds |
| 294 | ); |
| 295 | } |
| 296 | |
| 297 | /** |
| 298 | * This is the main entry point. It checks the given conditions and returns whether |
| 299 | * they match. Parser errors are always logged. |
| 300 | * |
| 301 | * @param string $conds |
| 302 | * @param string|null $filter The ID of the filter being parsed |
| 303 | * @return RuleCheckerStatus |
| 304 | */ |
| 305 | public function checkConditions( string $conds, $filter = null ): RuleCheckerStatus { |
| 306 | $this->mFilter = $filter; |
| 307 | $excep = null; |
| 308 | $initialConds = $this->mCondCount; |
| 309 | $startTime = microtime( true ); |
| 310 | try { |
| 311 | $res = $this->parse( $conds ); |
| 312 | } catch ( ExceptionBase $excep ) { |
| 313 | $res = false; |
| 314 | } |
| 315 | $this->statsd->timing( 'abusefilter_cachingParser_full', microtime( true ) - $startTime ); |
| 316 | $result = new RuleCheckerStatus( |
| 317 | $res, |
| 318 | $this->fromCache, |
| 319 | $excep, |
| 320 | $this->warnings, |
| 321 | $this->mCondCount - $initialConds |
| 322 | ); |
| 323 | |
| 324 | if ( $excep !== null ) { |
| 325 | if ( $excep instanceof UserVisibleException ) { |
| 326 | $msg = $excep->getMessageForLogs(); |
| 327 | } else { |
| 328 | $msg = $excep->getMessage(); |
| 329 | } |
| 330 | |
| 331 | $this->logger->warning( |
| 332 | "AbuseFilter parser error: {parser_error}", |
| 333 | [ 'parser_error' => $msg, 'broken_filter' => $filter ?: 'none' ] |
| 334 | ); |
| 335 | } |
| 336 | |
| 337 | return $result; |
| 338 | } |
| 339 | |
| 340 | /** |
| 341 | * @param string $code |
| 342 | * @return bool |
| 343 | */ |
| 344 | public function parse( $code ) { |
| 345 | $res = $this->evalTree( $this->getTree( $code ) ); |
| 346 | return $res->getType() === AFPData::DUNDEFINED ? false : $res->toBool(); |
| 347 | } |
| 348 | |
| 349 | /** |
| 350 | * @param string $filter |
| 351 | * @return mixed |
| 352 | */ |
| 353 | public function evaluateExpression( $filter ) { |
| 354 | return $this->evalTree( $this->getTree( $filter ) )->toNative(); |
| 355 | } |
| 356 | |
| 357 | /** |
| 358 | * @param string $code |
| 359 | * @return AFPSyntaxTree |
| 360 | */ |
| 361 | private function getTree( $code ): AFPSyntaxTree { |
| 362 | $this->fromCache = true; |
| 363 | return $this->cache->getWithSetCallback( |
| 364 | $this->cache->makeGlobalKey( |
| 365 | 'abusefilter-tree', |
| 366 | __CLASS__, |
| 367 | self::getCacheVersion(), |
| 368 | hash( 'sha256', $code ) |
| 369 | ), |
| 370 | BagOStuff::TTL_DAY, |
| 371 | function () use ( $code ) { |
| 372 | $this->fromCache = false; |
| 373 | $tokenizer = new AbuseFilterTokenizer( $this->cache ); |
| 374 | $tokens = $tokenizer->getTokens( $code ); |
| 375 | $parser = new AFPTreeParser( $this->logger, $this->statsd, $this->keywordsManager ); |
| 376 | $parser->setFilter( $this->mFilter ); |
| 377 | $tree = $parser->parse( $tokens ); |
| 378 | $checker = new SyntaxChecker( |
| 379 | $tree, |
| 380 | $this->keywordsManager, |
| 381 | SyntaxChecker::MCONSERVATIVE, |
| 382 | false |
| 383 | ); |
| 384 | $checker->start(); |
| 385 | return $tree; |
| 386 | } |
| 387 | ); |
| 388 | } |
| 389 | |
| 390 | private function evalTree( AFPSyntaxTree $tree ): AFPData { |
| 391 | $startTime = microtime( true ); |
| 392 | $root = $tree->getRoot(); |
| 393 | |
| 394 | if ( !$root ) { |
| 395 | return new AFPData( AFPData::DNULL ); |
| 396 | } |
| 397 | |
| 398 | $ret = $this->evalNode( $root ); |
| 399 | $this->statsd->timing( 'abusefilter_cachingParser_eval', microtime( true ) - $startTime ); |
| 400 | return $ret; |
| 401 | } |
| 402 | |
| 403 | /** |
| 404 | * Parse a filter and return the variables used. |
| 405 | * All variables are AFPToken::TID and are found during the node stepthrough in evaluation |
| 406 | * and saved to self::usedVars to be returned to the caller in this function. |
| 407 | * |
| 408 | * @param string $filter |
| 409 | * @return string[] |
| 410 | */ |
| 411 | public function getUsedVars( string $filter ): array { |
| 412 | $this->checkSyntax( $filter ); |
| 413 | return array_unique( $this->usedVars ); |
| 414 | } |
| 415 | |
| 416 | /** |
| 417 | * Evaluate the value of the specified AST node. |
| 418 | * |
| 419 | * @param AFPTreeNode $node The node to evaluate. |
| 420 | * @return AFPData|AFPTreeNode|string |
| 421 | * @throws ExceptionBase |
| 422 | * @throws UserVisibleException |
| 423 | */ |
| 424 | private function evalNode( AFPTreeNode $node ) { |
| 425 | switch ( $node->type ) { |
| 426 | case AFPTreeNode::ATOM: |
| 427 | $tok = $node->children; |
| 428 | switch ( $tok->type ) { |
| 429 | case AFPToken::TID: |
| 430 | return $this->getVarValue( strtolower( $tok->value ) ); |
| 431 | case AFPToken::TSTRING: |
| 432 | return new AFPData( AFPData::DSTRING, $tok->value ); |
| 433 | case AFPToken::TFLOAT: |
| 434 | return new AFPData( AFPData::DFLOAT, $tok->value ); |
| 435 | case AFPToken::TINT: |
| 436 | return new AFPData( AFPData::DINT, $tok->value ); |
| 437 | /** @noinspection PhpMissingBreakStatementInspection */ |
| 438 | case AFPToken::TKEYWORD: |
| 439 | switch ( $tok->value ) { |
| 440 | case "true": |
| 441 | return new AFPData( AFPData::DBOOL, true ); |
| 442 | case "false": |
| 443 | return new AFPData( AFPData::DBOOL, false ); |
| 444 | case "null": |
| 445 | return new AFPData( AFPData::DNULL ); |
| 446 | } |
| 447 | // Fallthrough intended |
| 448 | default: |
| 449 | // @codeCoverageIgnoreStart |
| 450 | throw new InternalException( "Unknown token provided in the ATOM node" ); |
| 451 | // @codeCoverageIgnoreEnd |
| 452 | } |
| 453 | // Unreachable line |
| 454 | case AFPTreeNode::ARRAY_DEFINITION: |
| 455 | $items = []; |
| 456 | // Foreach is usually faster than array_map |
| 457 | // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach children is array here |
| 458 | foreach ( $node->children as $el ) { |
| 459 | $items[] = $this->evalNode( $el ); |
| 460 | } |
| 461 | return new AFPData( AFPData::DARRAY, $items ); |
| 462 | |
| 463 | case AFPTreeNode::FUNCTION_CALL: |
| 464 | $functionName = $node->children[0]; |
| 465 | $args = array_slice( $node->children, 1 ); |
| 466 | |
| 467 | $dataArgs = []; |
| 468 | // Foreach is usually faster than array_map |
| 469 | foreach ( $args as $arg ) { |
| 470 | $dataArgs[] = $this->evalNode( $arg ); |
| 471 | } |
| 472 | |
| 473 | return $this->callFunc( $functionName, $dataArgs, $node->position ); |
| 474 | case AFPTreeNode::ARRAY_INDEX: |
| 475 | [ $array, $offset ] = $node->children; |
| 476 | |
| 477 | $array = $this->evalNode( $array ); |
| 478 | // Note: we MUST evaluate the offset to ensure it is valid, regardless |
| 479 | // of $array! |
| 480 | $offset = $this->evalNode( $offset ); |
| 481 | // @todo If $array has no elements we could already throw an outofbounds. We don't |
| 482 | // know what the index is, though. |
| 483 | if ( $offset->getType() === AFPData::DUNDEFINED ) { |
| 484 | return new AFPData( AFPData::DUNDEFINED ); |
| 485 | } |
| 486 | $offset = $offset->toInt(); |
| 487 | |
| 488 | if ( $array->getType() === AFPData::DUNDEFINED ) { |
| 489 | return new AFPData( AFPData::DUNDEFINED ); |
| 490 | } |
| 491 | |
| 492 | if ( $array->getType() !== AFPData::DARRAY ) { |
| 493 | throw new UserVisibleException( 'notarray', $node->position, [] ); |
| 494 | } |
| 495 | |
| 496 | $array = $array->toArray(); |
| 497 | if ( count( $array ) <= $offset ) { |
| 498 | throw new UserVisibleException( 'outofbounds', $node->position, |
| 499 | [ $offset, count( $array ) ] ); |
| 500 | } elseif ( $offset < 0 ) { |
| 501 | throw new UserVisibleException( 'negativeindex', $node->position, [ $offset ] ); |
| 502 | } |
| 503 | |
| 504 | return $array[$offset]; |
| 505 | |
| 506 | case AFPTreeNode::UNARY: |
| 507 | [ $operation, $argument ] = $node->children; |
| 508 | $argument = $this->evalNode( $argument ); |
| 509 | if ( $operation === '-' ) { |
| 510 | return $argument->unaryMinus(); |
| 511 | } |
| 512 | return $argument; |
| 513 | |
| 514 | case AFPTreeNode::KEYWORD_OPERATOR: |
| 515 | [ $keyword, $leftOperand, $rightOperand ] = $node->children; |
| 516 | $leftOperand = $this->evalNode( $leftOperand ); |
| 517 | $rightOperand = $this->evalNode( $rightOperand ); |
| 518 | |
| 519 | return $this->callKeyword( $keyword, $leftOperand, $rightOperand, $node->position ); |
| 520 | case AFPTreeNode::BOOL_INVERT: |
| 521 | [ $argument ] = $node->children; |
| 522 | $argument = $this->evalNode( $argument ); |
| 523 | return $argument->boolInvert(); |
| 524 | |
| 525 | case AFPTreeNode::POW: |
| 526 | [ $base, $exponent ] = $node->children; |
| 527 | $base = $this->evalNode( $base ); |
| 528 | $exponent = $this->evalNode( $exponent ); |
| 529 | return $base->pow( $exponent ); |
| 530 | |
| 531 | case AFPTreeNode::MUL_REL: |
| 532 | [ $op, $leftOperand, $rightOperand ] = $node->children; |
| 533 | $leftOperand = $this->evalNode( $leftOperand ); |
| 534 | $rightOperand = $this->evalNode( $rightOperand ); |
| 535 | return $leftOperand->mulRel( $rightOperand, $op, $node->position ); |
| 536 | |
| 537 | case AFPTreeNode::SUM_REL: |
| 538 | [ $op, $leftOperand, $rightOperand ] = $node->children; |
| 539 | $leftOperand = $this->evalNode( $leftOperand ); |
| 540 | $rightOperand = $this->evalNode( $rightOperand ); |
| 541 | return match ( $op ) { |
| 542 | '+' => $leftOperand->sum( $rightOperand ), |
| 543 | '-' => $leftOperand->sub( $rightOperand ), |
| 544 | // @codeCoverageIgnoreStart |
| 545 | default => throw new InternalException( "Unknown sum-related operator: {$op}" ), |
| 546 | // @codeCoverageIgnoreEnd |
| 547 | }; |
| 548 | // Unreachable line |
| 549 | case AFPTreeNode::COMPARE: |
| 550 | [ $op, $leftOperand, $rightOperand ] = $node->children; |
| 551 | $leftOperand = $this->evalNode( $leftOperand ); |
| 552 | $rightOperand = $this->evalNode( $rightOperand ); |
| 553 | $this->raiseCondCount(); |
| 554 | return $leftOperand->compareOp( $rightOperand, $op ); |
| 555 | |
| 556 | case AFPTreeNode::LOGIC: |
| 557 | [ $op, $leftOperand, $rightOperand ] = $node->children; |
| 558 | $leftOperand = $this->evalNode( $leftOperand ); |
| 559 | $value = $leftOperand->getType() === AFPData::DUNDEFINED ? false : $leftOperand->toBool(); |
| 560 | // Short-circuit. |
| 561 | if ( ( !$value && $op === '&' ) || ( $value && $op === '|' ) ) { |
| 562 | if ( $rightOperand instanceof AFPTreeNode ) { |
| 563 | $this->maybeDiscardNode( $rightOperand ); |
| 564 | } |
| 565 | return $leftOperand; |
| 566 | } |
| 567 | $rightOperand = $this->evalNode( $rightOperand ); |
| 568 | return $leftOperand->boolOp( $rightOperand, $op ); |
| 569 | |
| 570 | case AFPTreeNode::CONDITIONAL: |
| 571 | [ $condition, $valueIfTrue, $valueIfFalse ] = $node->children; |
| 572 | $condition = $this->evalNode( $condition ); |
| 573 | $isTrue = $condition->getType() === AFPData::DUNDEFINED ? false : $condition->toBool(); |
| 574 | if ( $isTrue ) { |
| 575 | if ( $valueIfFalse !== null ) { |
| 576 | $this->maybeDiscardNode( $valueIfFalse ); |
| 577 | } |
| 578 | return $this->evalNode( $valueIfTrue ); |
| 579 | } else { |
| 580 | $this->maybeDiscardNode( $valueIfTrue ); |
| 581 | return $valueIfFalse !== null |
| 582 | ? $this->evalNode( $valueIfFalse ) |
| 583 | // We assume null as default if the else is missing |
| 584 | : new AFPData( AFPData::DNULL ); |
| 585 | } |
| 586 | |