Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
94.46% |
494 / 523 |
|
89.06% |
57 / 64 |
CRAP | |
0.00% |
0 / 1 |
FilterEvaluator | |
94.46% |
494 / 523 |
|
89.06% |
57 / 64 |
186.59 | |
0.00% |
0 / 1 |
__construct | |
90.91% |
10 / 11 |
|
0.00% |
0 / 1 |
2.00 | |||
toggleConditionLimit | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
raiseCondCount | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
3.33 | |||
setVariables | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getCacheVersion | |
23.08% |
3 / 13 |
|
0.00% |
0 / 1 |
3.82 | |||
resetState | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
checkSyntaxThrow | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
1 | |||
checkSyntax | |
87.50% |
7 / 8 |
|
0.00% |
0 / 1 |
2.01 | |||
checkConditions | |
62.50% |
15 / 24 |
|
0.00% |
0 / 1 |
6.32 | |||
parse | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
evaluateExpression | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getTree | |
100.00% |
25 / 25 |
|
100.00% |
1 / 1 |
1 | |||
evalTree | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
2 | |||
evalNode | |
99.36% |
155 / 156 |
|
0.00% |
0 / 1 |
58 | |||
callFunc | |
100.00% |
19 / 19 |
|
100.00% |
1 / 1 |
8 | |||
callKeyword | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
4 | |||
varExists | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
getVarValue | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
4 | |||
setUserVariable | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
funcLc | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
funcUc | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
funcLen | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
funcSpecialRatio | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
2 | |||
funcCount | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
5 | |||
funcRCount | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
3 | |||
funcGetMatches | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
2 | |||
funcIPInRange | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 | |||
funcIPInRanges | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
4 | |||
funcCCNorm | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
funcSanitize | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
funcContainsAny | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
funcContainsAll | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
funcCCNormContainsAny | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
funcCCNormContainsAll | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
contains | |
100.00% |
15 / 15 |
|
100.00% |
1 / 1 |
7 | |||
funcEqualsToAny | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
equalsToAny | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
3 | |||
ccnorm | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
rmspecials | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
rmdoubles | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
rmwhitespace | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
funcRMSpecials | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
funcRMWhitespace | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
funcRMDoubles | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
funcNorm | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
funcSubstr | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
funcStrPos | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
5 | |||
funcStrReplace | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
funcStrReplaceRegexp | |
100.00% |
16 / 16 |
|
100.00% |
1 / 1 |
2 | |||
funcStrRegexEscape | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
funcSetVar | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
containmentKeyword | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
keywordIn | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
keywordContains | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
keywordLike | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
keywordRegex | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
3 | |||
keywordRegexInsensitive | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
castString | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
castInt | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
castFloat | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
castBool | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
maybeDiscardNode | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
mungeRegexp | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
checkRegexMatchesEmpty | |
25.00% |
2 / 8 |
|
0.00% |
0 / 1 |
6.80 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\AbuseFilter\Parser; |
4 | |
5 | use BagOStuff; |
6 | use Exception; |
7 | use IBufferingStatsdDataFactory; |
8 | use InvalidArgumentException; |
9 | use Language; |
10 | use MediaWiki\Extension\AbuseFilter\KeywordsManager; |
11 | use MediaWiki\Extension\AbuseFilter\Parser\Exception\ConditionLimitException; |
12 | use MediaWiki\Extension\AbuseFilter\Parser\Exception\ExceptionBase; |
13 | use MediaWiki\Extension\AbuseFilter\Parser\Exception\InternalException; |
14 | use MediaWiki\Extension\AbuseFilter\Parser\Exception\UserVisibleException; |
15 | use MediaWiki\Extension\AbuseFilter\Parser\Exception\UserVisibleWarning; |
16 | use MediaWiki\Extension\AbuseFilter\Variables\VariableHolder; |
17 | use MediaWiki\Extension\AbuseFilter\Variables\VariablesManager; |
18 | use MediaWiki\Parser\Sanitizer; |
19 | use Psr\Log\LoggerInterface; |
20 | use Wikimedia\Equivset\Equivset; |
21 | use Wikimedia\IPUtils; |
22 | |
23 | /** |
24 | * This class evaluates an AST generated by the filter parser. |
25 | * |
26 | * @todo Override checkSyntax and make it only try to build the AST. That would mean faster results, |
27 | * and no need to mess with DUNDEFINED and the like. However, we must first try to reduce the |
28 | * amount of runtime-only exceptions, and try to detect them in the AFPTreeParser instead. |
29 | * Otherwise, people may be able to save a broken filter without the syntax check reporting that. |
30 | */ |
31 | class FilterEvaluator { |
32 | private const CACHE_VERSION = 1; |
33 | |
34 | public const FUNCTIONS = [ |
35 | 'lcase' => 'funcLc', |
36 | 'ucase' => 'funcUc', |
37 | 'length' => 'funcLen', |
38 | 'string' => 'castString', |
39 | 'int' => 'castInt', |
40 | 'float' => 'castFloat', |
41 | 'bool' => 'castBool', |
42 | 'norm' => 'funcNorm', |
43 | 'ccnorm' => 'funcCCNorm', |
44 | 'ccnorm_contains_any' => 'funcCCNormContainsAny', |
45 | 'ccnorm_contains_all' => 'funcCCNormContainsAll', |
46 | 'specialratio' => 'funcSpecialRatio', |
47 | 'rmspecials' => 'funcRMSpecials', |
48 | 'rmdoubles' => 'funcRMDoubles', |
49 | 'rmwhitespace' => 'funcRMWhitespace', |
50 | 'count' => 'funcCount', |
51 | 'rcount' => 'funcRCount', |
52 | 'get_matches' => 'funcGetMatches', |
53 | 'ip_in_range' => 'funcIPInRange', |
54 | 'ip_in_ranges' => 'funcIPInRanges', |
55 | 'contains_any' => 'funcContainsAny', |
56 | 'contains_all' => 'funcContainsAll', |
57 | 'equals_to_any' => 'funcEqualsToAny', |
58 | 'substr' => 'funcSubstr', |
59 | 'strlen' => 'funcLen', |
60 | 'strpos' => 'funcStrPos', |
61 | 'str_replace' => 'funcStrReplace', |
62 | 'str_replace_regexp' => 'funcStrReplaceRegexp', |
63 | 'rescape' => 'funcStrRegexEscape', |
64 | 'set' => 'funcSetVar', |
65 | 'set_var' => 'funcSetVar', |
66 | 'sanitize' => 'funcSanitize', |
67 | ]; |
68 | |
69 | /** |
70 | * The minimum and maximum amount of arguments required by each function. |
71 | * @var int[][] |
72 | */ |
73 | public const FUNC_ARG_COUNT = [ |
74 | 'lcase' => [ 1, 1 ], |
75 | 'ucase' => [ 1, 1 ], |
76 | 'length' => [ 1, 1 ], |
77 | 'string' => [ 1, 1 ], |
78 | 'int' => [ 1, 1 ], |
79 | 'float' => [ 1, 1 ], |
80 | 'bool' => [ 1, 1 ], |
81 | 'norm' => [ 1, 1 ], |
82 | 'ccnorm' => [ 1, 1 ], |
83 | 'ccnorm_contains_any' => [ 2, INF ], |
84 | 'ccnorm_contains_all' => [ 2, INF ], |
85 | 'specialratio' => [ 1, 1 ], |
86 | 'rmspecials' => [ 1, 1 ], |
87 | 'rmdoubles' => [ 1, 1 ], |
88 | 'rmwhitespace' => [ 1, 1 ], |
89 | 'count' => [ 1, 2 ], |
90 | 'rcount' => [ 1, 2 ], |
91 | 'get_matches' => [ 2, 2 ], |
92 | 'ip_in_range' => [ 2, 2 ], |
93 | 'ip_in_ranges' => [ 2, INF ], |
94 | 'contains_any' => [ 2, INF ], |
95 | 'contains_all' => [ 2, INF ], |
96 | 'equals_to_any' => [ 2, INF ], |
97 | 'substr' => [ 2, 3 ], |
98 | 'strlen' => [ 1, 1 ], |
99 | 'strpos' => [ 2, 3 ], |
100 | 'str_replace' => [ 3, 3 ], |
101 | 'str_replace_regexp' => [ 3, 3 ], |
102 | 'rescape' => [ 1, 1 ], |
103 | 'set' => [ 2, 2 ], |
104 | 'set_var' => [ 2, 2 ], |
105 | 'sanitize' => [ 1, 1 ], |
106 | ]; |
107 | |
108 | // Functions that affect parser state, and shouldn't be cached. |
109 | private const ACTIVE_FUNCTIONS = [ |
110 | 'funcSetVar', |
111 | ]; |
112 | |
113 | public const KEYWORDS = [ |
114 | 'in' => 'keywordIn', |
115 | 'like' => 'keywordLike', |
116 | 'matches' => 'keywordLike', |
117 | 'contains' => 'keywordContains', |
118 | 'rlike' => 'keywordRegex', |
119 | 'irlike' => 'keywordRegexInsensitive', |
120 | 'regex' => 'keywordRegex', |
121 | ]; |
122 | |
123 | /** |
124 | * @var bool Are we allowed to use short-circuit evaluation? |
125 | */ |
126 | private $mAllowShort; |
127 | |
128 | /** |
129 | * @var VariableHolder |
130 | */ |
131 | private $mVariables; |
132 | /** |
133 | * @var int The current amount of conditions being consumed |
134 | */ |
135 | private $mCondCount; |
136 | /** |
137 | * @var bool Whether the condition limit is enabled. |
138 | */ |
139 | private $condLimitEnabled = true; |
140 | /** |
141 | * @var string|null The ID of the filter being parsed, if available. Can also be "global-$ID" |
142 | */ |
143 | private $mFilter; |
144 | /** |
145 | * @var bool Whether we can allow retrieving _builtin_ variables not included in $this->mVariables |
146 | */ |
147 | private $allowMissingVariables = false; |
148 | |
149 | /** |
150 | * @var BagOStuff Used to cache the AST and the tokens |
151 | */ |
152 | private $cache; |
153 | /** |
154 | * @var bool Whether the AST was retrieved from cache |
155 | */ |
156 | private $fromCache = false; |
157 | /** |
158 | * @var LoggerInterface Used for debugging |
159 | */ |
160 | private $logger; |
161 | /** |
162 | * @var Language Content language, used for language-dependent functions |
163 | */ |
164 | private $contLang; |
165 | /** |
166 | * @var IBufferingStatsdDataFactory |
167 | */ |
168 | private $statsd; |
169 | |
170 | /** @var KeywordsManager */ |
171 | private $keywordsManager; |
172 | |
173 | /** @var VariablesManager */ |
174 | private $varManager; |
175 | |
176 | /** @var int */ |
177 | private $conditionsLimit; |
178 | |
179 | /** @var UserVisibleWarning[] */ |
180 | private $warnings = []; |
181 | |
182 | /** |
183 | * @var array Cached results of functions |
184 | */ |
185 | private $funcCache = []; |
186 | |
187 | /** |
188 | * @var Equivset |
189 | */ |
190 | private $equivset; |
191 | |
192 | /** |
193 | * Create a new instance |
194 | * |
195 | * @param Language $contLang Content language, used for language-dependent function |
196 | * @param BagOStuff $cache Used to cache the AST and the tokens |
197 | * @param LoggerInterface $logger Used for debugging |
198 | * @param KeywordsManager $keywordsManager |
199 | * @param VariablesManager $varManager |
200 | * @param IBufferingStatsdDataFactory $statsdDataFactory |
201 | * @param Equivset $equivset |
202 | * @param int $conditionsLimit |
203 | * @param VariableHolder|null $vars |
204 | */ |
205 | public function __construct( |
206 | Language $contLang, |
207 | BagOStuff $cache, |
208 | LoggerInterface $logger, |
209 | KeywordsManager $keywordsManager, |
210 | VariablesManager $varManager, |
211 | IBufferingStatsdDataFactory $statsdDataFactory, |
212 | Equivset $equivset, |
213 | int $conditionsLimit, |
214 | VariableHolder $vars = null |
215 | ) { |
216 | $this->contLang = $contLang; |
217 | $this->cache = $cache; |
218 | $this->logger = $logger; |
219 | $this->statsd = $statsdDataFactory; |
220 | $this->keywordsManager = $keywordsManager; |
221 | $this->varManager = $varManager; |
222 | $this->equivset = $equivset; |
223 | $this->conditionsLimit = $conditionsLimit; |
224 | $this->resetState(); |
225 | if ( $vars ) { |
226 | $this->mVariables = $vars; |
227 | } |
228 | } |
229 | |
230 | /** |
231 | * For use in batch scripts and the like |
232 | * |
233 | * @param bool $enable True to enable the limit, false to disable it |
234 | */ |
235 | public function toggleConditionLimit( $enable ) { |
236 | $this->condLimitEnabled = $enable; |
237 | } |
238 | |
239 | /** |
240 | * @throws ConditionLimitException |
241 | */ |
242 | private function raiseCondCount() { |
243 | $this->mCondCount++; |
244 | if ( $this->condLimitEnabled && $this->mCondCount > $this->conditionsLimit ) { |
245 | throw new ConditionLimitException(); |
246 | } |
247 | } |
248 | |
249 | /** |
250 | * @param VariableHolder $vars |
251 | */ |
252 | public function setVariables( VariableHolder $vars ) { |
253 | $this->mVariables = $vars; |
254 | } |
255 | |
256 | /** |
257 | * Return the generated version of the parser for cache invalidation |
258 | * purposes. Automatically tracks list of all functions and invalidates the |
259 | * cache if it is changed. |
260 | * @return string |
261 | */ |
262 | private static function getCacheVersion() { |
263 | static $version = null; |
264 | if ( $version !== null ) { |
265 | return $version; |
266 | } |
267 | |
268 | $versionKey = [ |
269 | self::CACHE_VERSION, |
270 | AFPTreeParser::CACHE_VERSION, |
271 | AbuseFilterTokenizer::CACHE_VERSION, |
272 | SyntaxChecker::CACHE_VERSION, |
273 | array_keys( self::FUNCTIONS ), |
274 | array_keys( self::KEYWORDS ), |
275 | ]; |
276 | $version = hash( 'sha256', serialize( $versionKey ) ); |
277 | |
278 | return $version; |
279 | } |
280 | |
281 | /** |
282 | * Resets the state of the parser |
283 | */ |
284 | private function resetState() { |
285 | $this->mVariables = new VariableHolder(); |
286 | $this->mCondCount = 0; |
287 | $this->mAllowShort = true; |
288 | $this->mFilter = null; |
289 | $this->warnings = []; |
290 | } |
291 | |
292 | /** |
293 | * Check the syntax of $filter, throwing an exception if invalid |
294 | * @param string $filter |
295 | * @return true When successful |
296 | * @throws UserVisibleException |
297 | */ |
298 | public function checkSyntaxThrow( string $filter ): bool { |
299 | $this->allowMissingVariables = true; |
300 | $origAS = $this->mAllowShort; |
301 | try { |
302 | $this->mAllowShort = false; |
303 | $this->evalTree( $this->getTree( $filter ) ); |
304 | } finally { |
305 | $this->mAllowShort = $origAS; |
306 | $this->allowMissingVariables = false; |
307 | } |
308 | |
309 | return true; |
310 | } |
311 | |
312 | /** |
313 | * Check the syntax of $filter, without throwing |
314 | * |
315 | * @param string $filter |
316 | * @return ParserStatus |
317 | */ |
318 | public function checkSyntax( string $filter ): ParserStatus { |
319 | $initialConds = $this->mCondCount; |
320 | try { |
321 | $this->checkSyntaxThrow( $filter ); |
322 | } catch ( UserVisibleException $excep ) { |
323 | } |
324 | |
325 | return new ParserStatus( |
326 | $excep ?? null, |
327 | $this->warnings, |
328 | $this->mCondCount - $initialConds |
329 | ); |
330 | } |
331 | |
332 | /** |
333 | * This is the main entry point. It checks the given conditions and returns whether |
334 | * they match. Parser errors are always logged. |
335 | * |
336 | * @param string $conds |
337 | * @param string|null $filter The ID of the filter being parsed |
338 | * @return RuleCheckerStatus |
339 | */ |
340 | public function checkConditions( string $conds, $filter = null ): RuleCheckerStatus { |
341 | $this->mFilter = $filter; |
342 | $excep = null; |
343 | $initialConds = $this->mCondCount; |
344 | $startTime = microtime( true ); |
345 | try { |
346 | $res = $this->parse( $conds ); |
347 | } catch ( ExceptionBase $excep ) { |
348 | $res = false; |
349 | } |
350 | $this->statsd->timing( 'abusefilter_cachingParser_full', microtime( true ) - $startTime ); |
351 | $result = new RuleCheckerStatus( |
352 | $res, |
353 | $this->fromCache, |
354 | $excep, |
355 | $this->warnings, |
356 | $this->mCondCount - $initialConds |
357 | ); |
358 | |
359 | if ( $excep !== null ) { |
360 | if ( $excep instanceof UserVisibleException ) { |
361 | $msg = $excep->getMessageForLogs(); |
362 | } else { |
363 | $msg = $excep->getMessage(); |
364 | } |
365 | |
366 | $this->logger->warning( |
367 | "AbuseFilter parser error: {parser_error}", |
368 | [ 'parser_error' => $msg, 'broken_filter' => $filter ?: 'none' ] |
369 | ); |
370 | } |
371 | |
372 | return $result; |
373 | } |
374 | |
375 | /** |
376 | * @param string $code |
377 | * @return bool |
378 | */ |
379 | public function parse( $code ) { |
380 | $res = $this->evalTree( $this->getTree( $code ) ); |
381 | return $res->getType() === AFPData::DUNDEFINED ? false : $res->toBool(); |
382 | } |
383 | |
384 | /** |
385 | * @param string $filter |
386 | * @return mixed |
387 | */ |
388 | public function evaluateExpression( $filter ) { |
389 | return $this->evalTree( $this->getTree( $filter ) )->toNative(); |
390 | } |
391 | |
392 | /** |
393 | * @param string $code |
394 | * @return AFPSyntaxTree |
395 | */ |
396 | private function getTree( $code ): AFPSyntaxTree { |
397 | $this->fromCache = true; |
398 | return $this->cache->getWithSetCallback( |
399 | $this->cache->makeGlobalKey( |
400 | __CLASS__, |
401 | self::getCacheVersion(), |
402 | hash( 'sha256', $code ) |
403 | ), |
404 | BagOStuff::TTL_DAY, |
405 | function () use ( $code ) { |
406 | $this->fromCache = false; |
407 | $tokenizer = new AbuseFilterTokenizer( $this->cache ); |
408 | $tokens = $tokenizer->getTokens( $code ); |
409 | $parser = new AFPTreeParser( $this->logger, $this->statsd, $this->keywordsManager ); |
410 | $parser->setFilter( $this->mFilter ); |
411 | $tree = $parser->parse( $tokens ); |
412 | $checker = new SyntaxChecker( |
413 | $tree, |
414 | $this->keywordsManager, |
415 | SyntaxChecker::MCONSERVATIVE, |
416 | false |
417 | ); |
418 | $checker->start(); |
419 | return $tree; |
420 | } |
421 | ); |
422 | } |
423 | |
424 | /** |
425 | * @param AFPSyntaxTree $tree |
426 | * @return AFPData |
427 | */ |
428 | private function evalTree( AFPSyntaxTree $tree ): AFPData { |
429 | $startTime = microtime( true ); |
430 | $root = $tree->getRoot(); |
431 | |
432 | if ( !$root ) { |
433 | return new AFPData( AFPData::DNULL ); |
434 | } |
435 | |
436 | $ret = $this->evalNode( $root ); |
437 | $this->statsd->timing( 'abusefilter_cachingParser_eval', microtime( true ) - $startTime ); |
438 | return $ret; |
439 | } |
440 | |
441 | /** |
442 | * Evaluate the value of the specified AST node. |
443 | * |
444 | * @param AFPTreeNode $node The node to evaluate. |
445 | * @return AFPData|AFPTreeNode|string |
446 | * @throws ExceptionBase |
447 | * @throws UserVisibleException |
448 | */ |
449 | private function evalNode( AFPTreeNode $node ) { |
450 | switch ( $node->type ) { |
451 | case AFPTreeNode::ATOM: |
452 | $tok = $node->children; |
453 | switch ( $tok->type ) { |
454 | case AFPToken::TID: |
455 | return $this->getVarValue( strtolower( $tok->value ) ); |
456 | case AFPToken::TSTRING: |
457 | return new AFPData( AFPData::DSTRING, $tok->value ); |
458 | case AFPToken::TFLOAT: |
459 | return new AFPData( AFPData::DFLOAT, $tok->value ); |
460 | case AFPToken::TINT: |
461 | return new AFPData( AFPData::DINT, $tok->value ); |
462 | /** @noinspection PhpMissingBreakStatementInspection */ |
463 | case AFPToken::TKEYWORD: |
464 | switch ( $tok->value ) { |
465 | case "true": |
466 | return new AFPData( AFPData::DBOOL, true ); |
467 | case "false": |
468 | return new AFPData( AFPData::DBOOL, false ); |
469 | case "null": |
470 | return new AFPData( AFPData::DNULL ); |
471 | } |
472 | // Fallthrough intended |
473 | default: |
474 | // @codeCoverageIgnoreStart |
475 | throw new InternalException( "Unknown token provided in the ATOM node" ); |
476 | // @codeCoverageIgnoreEnd |
477 | } |
478 | // Unreachable line |
479 | case AFPTreeNode::ARRAY_DEFINITION: |
480 | $items = []; |
481 | // Foreach is usually faster than array_map |
482 | // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach children is array here |
483 | foreach ( $node->children as $el ) { |
484 | $items[] = $this->evalNode( $el ); |
485 | } |
486 | return new AFPData( AFPData::DARRAY, $items ); |
487 | |
488 | case AFPTreeNode::FUNCTION_CALL: |
489 | $functionName = $node->children[0]; |
490 | $args = array_slice( $node->children, 1 ); |
491 | |
492 | $dataArgs = []; |
493 | // Foreach is usually faster than array_map |
494 | foreach ( $args as $arg ) { |
495 | $dataArgs[] = $this->evalNode( $arg ); |
496 | } |
497 | |
498 | return $this->callFunc( $functionName, $dataArgs, $node->position ); |
499 | case AFPTreeNode::ARRAY_INDEX: |
500 | [ $array, $offset ] = $node->children; |
501 | |
502 | $array = $this->evalNode( $array ); |
503 | // Note: we MUST evaluate the offset to ensure it is valid, regardless |
504 | // of $array! |
505 | $offset = $this->evalNode( $offset ); |
506 | // @todo If $array has no elements we could already throw an outofbounds. We don't |
507 | // know what the index is, though. |
508 | if ( $offset->getType() === AFPData::DUNDEFINED ) { |
509 | return new AFPData( AFPData::DUNDEFINED ); |
510 | } |
511 | $offset = $offset->toInt(); |
512 | |
513 | if ( $array->getType() === AFPData::DUNDEFINED ) { |
514 | return new AFPData( AFPData::DUNDEFINED ); |
515 | } |
516 | |
517 | if ( $array->getType() !== AFPData::DARRAY ) { |
518 | throw new UserVisibleException( 'notarray', $node->position, [] ); |
519 | } |
520 | |
521 | $array = $array->toArray(); |
522 | if ( count( $array ) <= $offset ) { |
523 | throw new UserVisibleException( 'outofbounds', $node->position, |
524 | [ $offset, count( $array ) ] ); |
525 | } elseif ( $offset < 0 ) { |
526 | throw new UserVisibleException( 'negativeindex', $node->position, [ $offset ] ); |
527 | } |
528 | |
529 | return $array[$offset]; |
530 | |
531 | case AFPTreeNode::UNARY: |
532 | [ $operation, $argument ] = $node->children; |
533 | $argument = $this->evalNode( $argument ); |
534 | if ( $operation === '-' ) { |
535 | return $argument->unaryMinus(); |
536 | } |
537 | return $argument; |
538 | |
539 | case AFPTreeNode::KEYWORD_OPERATOR: |
540 | [ $keyword, $leftOperand, $rightOperand ] = $node->children; |
541 | $leftOperand = $this->evalNode( $leftOperand ); |
542 | $rightOperand = $this->evalNode( $rightOperand ); |
543 | |
544 | return $this->callKeyword( $keyword, $leftOperand, $rightOperand, $node->position ); |
545 | case AFPTreeNode::BOOL_INVERT: |
546 | [ $argument ] = $node->children; |
547 | $argument = $this->evalNode( $argument ); |
548 | return $argument->boolInvert(); |
549 | |
550 | case AFPTreeNode::POW: |
551 | [ $base, $exponent ] = $node->children; |
552 | $base = $this->evalNode( $base ); |
553 | $exponent = $this->evalNode( $exponent ); |
554 | return $base->pow( $exponent ); |
555 | |
556 | case AFPTreeNode::MUL_REL: |
557 | [ $op, $leftOperand, $rightOperand ] = $node->children; |
558 | $leftOperand = $this->evalNode( $leftOperand ); |
559 | $rightOperand = $this->evalNode( $rightOperand ); |
560 | return $leftOperand->mulRel( $rightOperand, $op, $node->position ); |
561 | |
562 | case AFPTreeNode::SUM_REL: |
563 | [ $op, $leftOperand, $rightOperand ] = $node->children; |
564 | $leftOperand = $this->evalNode( $leftOperand ); |
565 | $rightOperand = $this->evalNode( $rightOperand ); |
566 | switch ( $op ) { |
567 | case '+': |
568 | return $leftOperand->sum( $rightOperand ); |
569 | case '-': |
570 | return $leftOperand->sub( $rightOperand ); |
571 | default: |
572 | // @codeCoverageIgnoreStart |
573 | throw new InternalException( "Unknown sum-related operator: {$op}" ); |
574 | // @codeCoverageIgnoreEnd |
575 | } |
576 | // Unreachable line |
577 | case AFPTreeNode::COMPARE: |
578 | [ $op, $leftOperand, $rightOperand ] = $node->children; |
579 | $leftOperand = $this->evalNode( $leftOperand ); |
580 | $rightOperand = $this->evalNode( $rightOperand ); |
581 | $this->raiseCondCount(); |
582 | return $leftOperand->compareOp( $rightOperand, $op ); |
583 | |
584 | case AFPTreeNode::LOGIC: |
585 | [ $op, $leftOperand, $rightOperand ] = $node->children; |
586 | $leftOperand = $this->evalNode( $leftOperand ); |
587 | $value = $leftOperand->getType() === AFPData::DUNDEFINED ? false : $leftOperand->toBool(); |
588 | // Short-circuit. |
589 | if ( ( !$value && $op === '&' ) || ( $value && $op === '|' ) ) { |
590 | if ( $rightOperand instanceof AFPTreeNode ) { |
591 | $this->maybeDiscardNode( $rightOperand ); |
592 | } |
593 | return $leftOperand; |
594 | } |
595 | $rightOperand = $this->evalNode( $rightOperand ); |
596 | return $leftOperand->boolOp( $rightOperand, $op ); |
597 | |
598 | case AFPTreeNode::CONDITIONAL: |
599 | [ $condition, $valueIfTrue, $valueIfFalse ] = $node->children; |
600 | $condition = $this->evalNode( $condition ); |
601 | $isTrue = $condition->getType() === AFPData::DUNDEFINED ? false : $condition->toBool(); |
602 | if ( $isTrue ) { |
603 | if ( $valueIfFalse !== null ) { |
604 | $this->maybeDiscardNode( $valueIfFalse ); |
605 | } |
606 | return $this->evalNode( $valueIfTrue ); |
607 | } else { |
608 | $this->maybeDiscardNode( $valueIfTrue ); |
609 | return $valueIfFalse !== null |
610 | ? $this->evalNode( $valueIfFalse ) |
611 | // We assume null as default if the else is missing |
612 | : new AFPData( AFPData::DNULL ); |
613 | } |
614 | |
615 | case AFPTreeNode::ASSIGNMENT: |
616 | [ $varName, $value ] = $node->children; |
617 | $value = $this->evalNode( $value ); |
618 | $this->setUserVariable( $varName, $value ); |
619 | return $value; |
620 | |
621 | case AFPTreeNode::INDEX_ASSIGNMENT: |
622 | [ $varName, $offset, $value ] = $node->children; |
623 | |
624 | $array = $this->getVarValue( $varName ); |
625 | |
626 | if ( $array->getType() !== AFPData::DARRAY && $array->getType() !== AFPData::DUNDEFINED ) { |
627 | throw new UserVisibleException( 'notarray', $node->position, [] ); |
628 | } |
629 | |
630 | $offset = $this->evalNode( $offset ); |
631 | // @todo If $array has no elements we could already throw an outofbounds. We don't |
632 | // know what the index is, though. |
633 | |
634 | if ( $array->getType() !== AFPData::DUNDEFINED ) { |
635 | // If it's a DUNDEFINED, leave it as is |
636 | if ( $offset->getType() !== AFPData::DUNDEFINED ) { |
637 | $offset = $offset->toInt(); |
638 | $array = $array->toArray(); |
639 | if ( count( $array ) <= $offset ) { |
640 | throw new UserVisibleException( 'outofbounds', $node->position, |
641 | [ $offset, count( $array ) ] ); |
642 | } elseif ( $offset < 0 ) { |
643 | throw new UserVisibleException( 'negativeindex', $node->position, [ $offset ] ); |
644 | } |
645 | |
646 | $value = $this->evalNode( $value ); |
647 | $array[$offset] = $value; |
648 | $array = new AFPData( AFPData::DARRAY, $array ); |
649 | } else { |
650 | $value = $this->evalNode( $value ); |
651 | $array = new AFPData( AFPData::DUNDEFINED ); |
652 | } |
653 | $this->setUserVariable( $varName, $array ); |
654 | } else { |
655 | $value = $this->evalNode( $value ); |
656 | } |
657 | |
658 | return $value; |
659 | |
660 | case AFPTreeNode::ARRAY_APPEND: |
661 | [ $varName, $value ] = $node->children; |
662 | |
663 | $array = $this->getVarValue( $varName ); |
664 | $value = $this->evalNode( $value ); |
665 | if ( $array->getType() !== AFPData::DUNDEFINED ) { |
666 | // If it's a DUNDEFINED, leave it as is |
667 | if ( $array->getType() !== AFPData::DARRAY ) { |
668 | throw new UserVisibleException( 'notarray', $node->position, [] ); |
669 | } |
670 | |
671 | $array = $array->toArray(); |
672 | $array[] = $value; |
673 | $this->setUserVariable( $varName, new AFPData( AFPData::DARRAY, $array ) ); |
674 | } |
675 | return $value; |
676 | |
677 | case AFPTreeNode::SEMICOLON: |
678 | $lastValue = null; |
679 | // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach children is array here |
680 | foreach ( $node->children as $statement ) { |
681 | $lastValue = $this->evalNode( $statement ); |
682 | } |
683 | |
684 | // @phan-suppress-next-next-line PhanTypeMismatchReturnNullable Can never be null because |
685 | // empty statements are discarded in AFPTreeParser |
686 | return $lastValue; |
687 | default: |
688 | // @codeCoverageIgnoreStart |
689 | throw new InternalException( "Unknown node type passed: {$node->type}" ); |
690 | // @codeCoverageIgnoreEnd |
691 | } |
692 | } |
693 | |
694 | /** |
695 | * Helper to call a built-in function. |
696 | * |
697 | * @param string $fname The name of the function as found in the filter code |
698 | * @param AFPData[] $args Arguments for the function |
699 | * @param int $position |
700 | * @return AFPData The return value of the function |
701 | * @throws InvalidArgumentException if given an invalid func |
702 | */ |
703 | private function callFunc( $fname, array $args, int $position ): AFPData { |
704 | if ( !array_key_exists( $fname, self::FUNCTIONS ) ) { |
705 | // @codeCoverageIgnoreStart |
706 | throw new InvalidArgumentException( "$fname is not a valid function." ); |
707 | // @codeCoverageIgnoreEnd |
708 | } |
709 | |
710 | $funcHandler = self::FUNCTIONS[$fname]; |
711 | $funcHash = md5( $funcHandler . serialize( $args ) ); |
712 | |
713 | if ( isset( $this->funcCache[$funcHash] ) && |
714 | !in_array( $funcHandler, self::ACTIVE_FUNCTIONS ) |
715 | ) { |
716 | $result = $this->funcCache[$funcHash]; |
717 | } else { |
718 | $this->raiseCondCount(); |
719 | |
720 | // Any undefined argument should be special-cased by the function, but that would be too |
721 | // much overhead. We also cannot skip calling the handler in case it's making further |
722 | // validation (T234339). So temporarily replace the DUNDEFINED with a DNULL. |
723 | // @todo This is subpar. |
724 | $hasUndefinedArg = false; |
725 | foreach ( $args as $i => $arg ) { |
726 | if ( $arg->hasUndefined() ) { |
727 | $args[$i] = $arg->cloneAsUndefinedReplacedWithNull(); |
728 | $hasUndefinedArg = true; |
729 | } |
730 | } |
731 | if ( $hasUndefinedArg ) { |
732 | // @phan-suppress-next-line PhanParamTooMany Not every function needs the position |
733 | $this->$funcHandler( $args, $position ); |
734 | $result = new AFPData( AFPData::DUNDEFINED ); |
735 | } else { |
736 | // @phan-suppress-next-line PhanParamTooMany Not every function needs the position |
737 | $result = $this->$funcHandler( $args, $position ); |
738 | } |
739 | $this->funcCache[$funcHash] = $result; |
740 | } |
741 | |
742 | if ( count( $this->funcCache ) > 1000 ) { |
743 | // @codeCoverageIgnoreStart |
744 | $this->funcCache = []; |
745 | // @codeCoverageIgnoreEnd |
746 | } |
747 | return $result; |
748 | } |
749 | |
750 | /** |
751 | * Helper to invoke a built-in keyword. Note that this assumes that $kname is |
752 | * a valid keyword name. |
753 | * |
754 | * @param string $kname |
755 | * @param AFPData $lhs |
756 | * @param AFPData $rhs |
757 | * @param int $position |
758 | * @return AFPData |
759 | */ |
760 | private function callKeyword( $kname, AFPData $lhs, AFPData $rhs, int $position ): AFPData { |
761 | $func = self::KEYWORDS[$kname]; |
762 | $this->raiseCondCount(); |
763 | |
764 | $hasUndefinedOperand = false; |
765 | if ( $lhs->hasUndefined() ) { |
766 | $lhs = $lhs->cloneAsUndefinedReplacedWithNull(); |
767 | $hasUndefinedOperand = true; |
768 | } |
769 | if ( $rhs->hasUndefined() ) { |
770 | $rhs = $rhs->cloneAsUndefinedReplacedWithNull(); |
771 | $hasUndefinedOperand = true; |
772 | } |
773 | if ( $hasUndefinedOperand ) { |
774 | // We need to run the handler with bogus args, see the comment in self::callFunc (T234339) |
775 | // @todo Likewise, this is subpar. |
776 | // @phan-suppress-next-line PhanParamTooMany Not every function needs the position |
777 | $this->$func( $lhs, $rhs, $position ); |
778 | $result = new AFPData( AFPData::DUNDEFINED ); |
779 | } else { |
780 | // @phan-suppress-next-line PhanParamTooMany Not every function needs the position |
781 | $result = $this->$func( $lhs, $rhs, $position ); |
782 | } |
783 | return $result; |
784 | } |
785 | |
786 | /** |
787 | * Check whether a variable exists, being either built-in or user-defined. Doesn't include |
788 | * disabled variables. |
789 | * |
790 | * @param string $varname |
791 | * @return bool |
792 | */ |
793 | private function varExists( $varname ) { |
794 | return $this->keywordsManager->isVarInUse( $varname ) || |
795 | $this->mVariables->varIsSet( $varname ); |
796 | } |
797 | |
798 | /** |
799 | * @param string $var |
800 | * @return AFPData |
801 | * @throws UserVisibleException |
802 | */ |
803 | private function getVarValue( $var ) { |
804 | $var = strtolower( $var ); |
805 | $deprecatedVars = $this->keywordsManager->getDeprecatedVariables(); |
806 | |
807 | if ( array_key_exists( $var, $deprecatedVars ) ) { |
808 | $var = $deprecatedVars[ $var ]; |
809 | } |
810 | // With check syntax, all unbound variables will be caught |
811 | // already. So we do not error unbound variables at runtime, |
812 | // allowing it to result in DUNDEFINED. |
813 | $allowMissingVariables = !$this->varExists( $var ) || $this->allowMissingVariables; |
814 | |
815 | // It's a built-in, non-disabled variable (either set or unset), or a set custom variable |
816 | $flags = $allowMissingVariables |
817 | ? VariablesManager::GET_LAX |
818 | // TODO: This should be GET_STRICT, but that's going to be very hard (see T230256) |
819 | : VariablesManager::GET_BC; |
820 | return $this->varManager->getVar( $this->mVariables, $var, $flags ); |
821 | } |
822 | |
823 | /** |
824 | * @param string $name |
825 | * @param mixed $value |
826 | * @throws UserVisibleException |
827 | */ |
828 | private function setUserVariable( $name, $value ) { |
829 | $this->mVariables->setVar( $name, $value ); |
830 | } |
831 | |
832 | // Built-in functions |
833 | |
834 | /** |
835 | * @param array $args |
836 | * @return AFPData |
837 | */ |
838 | private function funcLc( $args ) { |
839 | $s = $args[0]->toString(); |
840 | |
841 | return new AFPData( AFPData::DSTRING, $this->contLang->lc( $s ) ); |
842 | } |
843 | |
844 | /** |
845 | * @param array $args |
846 | * @return AFPData |
847 | */ |
848 | private function funcUc( $args ) { |
849 | $s = $args[0]->toString(); |
850 | |
851 | return new AFPData( AFPData::DSTRING, $this->contLang->uc( $s ) ); |
852 | } |
853 | |
854 | /** |
855 | * @param array $args |
856 | * @return AFPData |
857 | */ |
858 | private function funcLen( $args ) { |
859 | if ( $args[0]->type === AFPData::DARRAY ) { |
860 | // Don't use toString on arrays, but count |
861 | $val = count( $args[0]->data ); |
862 | } else { |
863 | $val = mb_strlen( $args[0]->toString(), 'utf-8' ); |
864 | } |
865 | |
866 | return new AFPData( AFPData::DINT, $val ); |
867 | } |
868 | |
869 | /** |
870 | * @param array $args |
871 | * @return AFPData |
872 | */ |
873 | private function funcSpecialRatio( $args ) { |
874 | $s = $args[0]->toString(); |
875 | |
876 | if ( !strlen( $s ) ) { |
877 | return new AFPData( AFPData::DFLOAT, 0 ); |
878 | } |
879 | |
880 | $nospecials = $this->rmspecials( $s ); |
881 | |
882 | $val = 1. - ( ( mb_strlen( $nospecials ) / mb_strlen( $s ) ) ); |
883 | |
884 | return new AFPData( AFPData::DFLOAT, $val ); |
885 | } |
886 | |
887 | /** |
888 | * @param array $args |
889 | * @return AFPData |
890 | */ |
891 | private function funcCount( $args ) { |
892 | if ( $args[0]->type === AFPData::DARRAY && count( $args ) === 1 ) { |
893 | return new AFPData( AFPData::DINT, count( $args[0]->data ) ); |
894 | } |
895 | |
896 | if ( count( $args ) === 1 ) { |
897 | $count = count( explode( ',', $args[0]->toString() ) ); |
898 | } else { |
899 | $needle = $args[0]->toString(); |
900 | $haystack = $args[1]->toString(); |
901 | |
902 | // T62203: Keep empty parameters from causing PHP warnings |
903 | if ( $needle === '' ) { |
904 | $count = 0; |
905 | } else { |
906 | $count = substr_count( $haystack, $needle ); |
907 | } |
908 | } |
909 | |
910 | return new AFPData( AFPData::DINT, $count ); |
911 | } |
912 | |
913 | /** |
914 | * @param array $args |
915 | * @param int $position |
916 | * @return AFPData |
917 | * @throws UserVisibleException |
918 | */ |
919 | private function funcRCount( $args, int $position ) { |
920 | if ( count( $args ) === 1 ) { |
921 | $count = count( explode( ',', $args[0]->toString() ) ); |
922 | } else { |
923 | $needle = $args[0]->toString(); |
924 | $haystack = $args[1]->toString(); |
925 | |
926 | $needle = $this->mungeRegexp( $needle ); |
927 | |
928 | $this->checkRegexMatchesEmpty( $args[0], $needle, $position ); |
929 | // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged |
930 | $count = @preg_match_all( $needle, $haystack ); |
931 | |
932 | if ( $count === false ) { |
933 | throw new UserVisibleException( |
934 | 'regexfailure', |
935 | $position, |
936 | [ $needle ] |
937 | ); |
938 | } |
939 | } |
940 | |
941 | return new AFPData( AFPData::DINT, $count ); |
942 | } |
943 | |
944 | /** |
945 | * Returns an array of matches of needle in the haystack, the first one for the whole regex, |
946 | * the other ones for every capturing group. |
947 | * |
948 | * @param array $args |
949 | * @param int $position |
950 | * @return AFPData An array of matches. |
951 | * @throws UserVisibleException |
952 | */ |
953 | private function funcGetMatches( $args, int $position ) { |
954 | $needle = $args[0]->toString(); |
955 | $haystack = $args[1]->toString(); |
956 | |
957 | // Count the amount of capturing groups in the submitted pattern. |
958 | // This way we can return a fixed-dimension array, much easier to manage. |
959 | // ToDo: Find a better way to do this. |
960 | // First, strip away escaped parentheses |
961 | $sanitized = preg_replace( '/(\\\\\\\\)*\\\\\(/', '', $needle ); |
962 | // Then strip starting parentheses of non-capturing groups, including |
963 | // atomics, lookaheads and so on, even if not every of them is supported. |
964 | $sanitized = str_replace( '(?', '', $sanitized ); |
965 | // And also strip "(*", used with backtracking verbs like (*FAIL) |
966 | $sanitized = str_replace( '(*', '', $sanitized ); |
967 | // Finally create an array of falses with dimension = # of capturing groups |
968 | $groupscount = substr_count( $sanitized, '(' ) + 1; |
969 | $falsy = array_fill( 0, $groupscount, false ); |
970 | |
971 | $needle = $this->mungeRegexp( $needle ); |
972 | |
973 | $this->checkRegexMatchesEmpty( $args[0], $needle, $position ); |
974 | // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged |
975 | $check = @preg_match( $needle, $haystack, $matches ); |
976 | |
977 | if ( $check === false ) { |
978 | throw new UserVisibleException( |
979 | 'regexfailure', |
980 | $position, |
981 | [ $needle ] |
982 | ); |
983 | } |
984 | |
985 | // Returned array has non-empty positions identical to the ones returned |
986 | // by the third parameter of a standard preg_match call ($matches in this case). |
987 | // We want an union with falsy to return a fixed-dimension array. |
988 | return AFPData::newFromPHPVar( $matches + $falsy ); |
989 | } |
990 | |
991 | /** |
992 | * @param array $args |
993 | * @param int $position |
994 | * @return AFPData |
995 | * @throws UserVisibleException |
996 | */ |
997 | private function funcIPInRange( $args, int $position ) { |
998 | $ip = $args[0]->toString(); |
999 | $range = $args[1]->toString(); |
1000 | |
1001 | if ( !IPUtils::isValidRange( $range ) && !IPUtils::isIPAddress( $range ) ) { |
1002 | throw new UserVisibleException( |
1003 | 'invalidiprange', |
1004 | $position, |
1005 | [ $range ] |
1006 | ); |
1007 | } |
1008 | |
1009 | $result = IPUtils::isInRange( $ip, $range ); |
1010 | |
1011 | return new AFPData( AFPData::DBOOL, $result ); |
1012 | } |
1013 | |
1014 | /** |
1015 | * @param array $args |
1016 | * @param int $position |
1017 | * @return AFPData |
1018 | * @throws UserVisibleException |
1019 | */ |
1020 | private function funcIPInRanges( $args, int $position ) { |
1021 | $ip = array_shift( $args )->toString(); |
1022 | |
1023 | $strRanges = []; |
1024 | foreach ( $args as $range ) { |
1025 | $range = $range->toString(); |
1026 | |
1027 | if ( !IPUtils::isValidRange( $range ) && !IPUtils::isIPAddress( $range ) ) { |
1028 | throw new UserVisibleException( |
1029 | 'invalidiprange', |
1030 | $position, |
1031 | [ $range ] |
1032 | ); |
1033 | } |
1034 | |
1035 | $strRanges[] = $range; |
1036 | } |
1037 | |
1038 | return new AFPData( AFPData::DBOOL, IPUtils::isInRanges( $ip, $strRanges ) ); |
1039 | } |
1040 | |
1041 | /** |
1042 | * @param array $args |
1043 | * @return AFPData |
1044 | */ |
1045 | private function funcCCNorm( $args ) { |
1046 | $s = $args[0]->toString(); |
1047 | |
1048 | $s = html_entity_decode( $s, ENT_QUOTES, 'UTF-8' ); |
1049 | $s = $this->ccnorm( $s ); |
1050 | |
1051 | return new AFPData( AFPData::DSTRING, $s ); |
1052 | } |
1053 | |
1054 | /** |
1055 | * @param array $args |
1056 | * @return AFPData |
1057 | */ |
1058 | private function funcSanitize( $args ) { |
1059 | $s = $args[0]->toString(); |
1060 | |
1061 | $s = html_entity_decode( $s, ENT_QUOTES, 'UTF-8' ); |
1062 | $s = Sanitizer::decodeCharReferences( $s ); |
1063 | |
1064 | return new AFPData( AFPData::DSTRING, $s ); |
1065 | } |
1066 | |
1067 | /** |
1068 | * @param array $args |
1069 | * @return AFPData |
1070 | */ |
1071 | private function funcContainsAny( $args ) { |
1072 | $s = array_shift( $args ); |
1073 | |
1074 | return new AFPData( AFPData::DBOOL, $this->contains( $s, $args, true ) ); |
1075 | } |
1076 | |
1077 | /** |
1078 | * @param array $args |
1079 | * @return AFPData |
1080 | */ |
1081 | private function funcContainsAll( $args ) { |
1082 | $s = array_shift( $args ); |
1083 | |
1084 | return new AFPData( AFPData::DBOOL, $this->contains( $s, $args, false, false ) ); |
1085 | } |
1086 | |
1087 | /** |
1088 | * Normalize and search a string for multiple substrings in OR mode |
1089 | * |
1090 | * @param array $args |
1091 | * @return AFPData |
1092 | */ |
1093 | private function funcCCNormContainsAny( $args ) { |
1094 | $s = array_shift( $args ); |
1095 | |
1096 | return new AFPData( AFPData::DBOOL, $this->contains( $s, $args, true, true ) ); |
1097 | } |
1098 | |
1099 | /** |
1100 | * Normalize and search a string for multiple substrings in AND mode |
1101 | * |
1102 | * @param array $args |
1103 | * @return AFPData |
1104 | */ |
1105 | private function funcCCNormContainsAll( $args ) { |
1106 | $s = array_shift( $args ); |
1107 | |
1108 | return new AFPData( AFPData::DBOOL, $this->contains( $s, $args, false, true ) ); |
1109 | } |
1110 | |
1111 | /** |
1112 | * Search for substrings in a string |
1113 | * |
1114 | * Use is_any to determine whether to use logic OR (true) or AND (false). |
1115 | * |
1116 | * Use normalize = true to make use of ccnorm and |
1117 | * normalize both sides of the search. |
1118 | * |
1119 | * @param AFPData $string |
1120 | * @param AFPData[] $values |
1121 | * @param bool $is_any |
1122 | * @param bool $normalize |
1123 | * |
1124 | * @return bool |
1125 | */ |
1126 | private function contains( $string, $values, $is_any = true, $normalize = false ) { |
1127 | $string = $string->toString(); |
1128 | |
1129 | if ( $string === '' ) { |
1130 | return false; |
1131 | } |
1132 | |
1133 | if ( $normalize ) { |
1134 | $string = $this->ccnorm( $string ); |
1135 | } |
1136 | |
1137 | foreach ( $values as $needle ) { |
1138 | $needle = $needle->toString(); |
1139 | if ( $normalize ) { |
1140 | $needle = $this->ccnorm( $needle ); |
1141 | } |
1142 | if ( $needle === '' ) { |
1143 | // T62203: Keep empty parameters from causing PHP warnings |
1144 | continue; |
1145 | } |
1146 | |
1147 | $is_found = strpos( $string, $needle ) !== false; |
1148 | if ( $is_found === $is_any ) { |
1149 | // If I'm here and it's ANY (OR) => something is found. |
1150 | // If I'm here and it's ALL (AND) => nothing is found. |
1151 | // In both cases, we've had enough. |
1152 | return $is_found; |
1153 | } |
1154 | } |
1155 | |
1156 | // If I'm here and it's ANY (OR) => nothing was found: return false ($is_any is true) |
1157 | // If I'm here and it's ALL (AND) => everything was found: return true ($is_any is false) |
1158 | return !$is_any; |
1159 | } |
1160 | |
1161 | /** |
1162 | * @param array $args |
1163 | * @return AFPData |
1164 | */ |
1165 | private function funcEqualsToAny( $args ) { |
1166 | $s = array_shift( $args ); |
1167 | |
1168 | return new AFPData( AFPData::DBOOL, self::equalsToAny( $s, $args ) ); |
1169 | } |
1170 | |
1171 | /** |
1172 | * Check if the given string is equals to any of the following strings |
1173 | * |
1174 | * @param AFPData $string |
1175 | * @param AFPData[] $values |
1176 | * |
1177 | * @return bool |
1178 | */ |
1179 | private static function equalsToAny( $string, $values ) { |
1180 | foreach ( $values as $needle ) { |
1181 | if ( $string->equals( $needle, true ) ) { |
1182 | return true; |
1183 | } |
1184 | } |
1185 | |
1186 | return false; |
1187 | } |
1188 | |
1189 | /** |
1190 | * @param string $s |
1191 | * @return string |
1192 | */ |
1193 | private function ccnorm( $s ): string { |
1194 | return $this->equivset->normalize( $s ); |
1195 | } |
1196 | |
1197 | /** |
1198 | * @param string $s |
1199 | * @return array|string |
1200 | */ |
1201 | private function rmspecials( $s ) { |
1202 | return preg_replace( '/[^\p{L}\p{N}\s]/u', '', $s ); |
1203 | } |
1204 | |
1205 | /** |
1206 | * @param string $s |
1207 | * @return array|string |
1208 | */ |
1209 | private function rmdoubles( $s ) { |
1210 | return preg_replace( '/(.)\1+/us', '\1', $s ); |
1211 | } |
1212 | |
1213 | /** |
1214 | * @param string $s |
1215 | * @return array|string |
1216 | */ |
1217 | private function rmwhitespace( $s ) { |
1218 | return preg_replace( '/\s+/u', '', $s ); |
1219 | } |
1220 | |
1221 | /** |
1222 | * @param array $args |
1223 | * @return AFPData |
1224 | */ |
1225 | private function funcRMSpecials( $args ) { |
1226 | $s = $args[0]->toString(); |
1227 | |
1228 | return new AFPData( AFPData::DSTRING, $this->rmspecials( $s ) ); |
1229 | } |
1230 | |
1231 | /** |
1232 | * @param array $args |
1233 | * @return AFPData |
1234 | */ |
1235 | private function funcRMWhitespace( $args ) { |
1236 | $s = $args[0]->toString(); |
1237 | |
1238 | return new AFPData( AFPData::DSTRING, $this->rmwhitespace( $s ) ); |
1239 | } |
1240 | |
1241 | /** |
1242 | * @param array $args |
1243 | * @return AFPData |
1244 | */ |
1245 | private function funcRMDoubles( $args ) { |
1246 | $s = $args[0]->toString(); |
1247 | |
1248 | return new AFPData( AFPData::DSTRING, $this->rmdoubles( $s ) ); |
1249 | } |
1250 | |
1251 | /** |
1252 | * @param array $args |
1253 | * @return AFPData |
1254 | */ |
1255 | private function funcNorm( $args ) { |
1256 | $s = $args[0]->toString(); |
1257 | |
1258 | $s = $this->ccnorm( $s ); |
1259 | $s = $this->rmdoubles( $s ); |
1260 | $s = $this->rmspecials( $s ); |
1261 | $s = $this->rmwhitespace( $s ); |
1262 | |
1263 | return new AFPData( AFPData::DSTRING, $s ); |
1264 | } |
1265 | |
1266 | /** |
1267 | * @param array $args |
1268 | * @return AFPData |
1269 | */ |
1270 | private function funcSubstr( $args ) { |
1271 | $s = $args[0]->toString(); |
1272 | $offset = $args[1]->toInt(); |
1273 | $length = isset( $args[2] ) ? $args[2]->toInt() : null; |
1274 | |
1275 | $result = mb_substr( $s, $offset, $length ); |
1276 | |
1277 | return new AFPData( AFPData::DSTRING, $result ); |
1278 | } |
1279 | |
1280 | /** |
1281 | * @param array $args |
1282 | * @return AFPData |
1283 | */ |
1284 | private function funcStrPos( $args ) { |
1285 | $haystack = $args[0]->toString(); |
1286 | $needle = $args[1]->toString(); |
1287 | $offset = isset( $args[2] ) ? $args[2]->toInt() : 0; |
1288 | |
1289 | // T62203: Keep empty parameters from causing PHP warnings |
1290 | if ( $needle === '' ) { |
1291 | return new AFPData( AFPData::DINT, -1 ); |
1292 | } |
1293 | // Special handling for when the offset is not contained in $haystack. PHP can emit a warning |
1294 | // or throw an error depending on the version (T285978). TODO Should we also throw? |
1295 | if ( $offset > mb_strlen( $haystack ) ) { |
1296 | return new AFPData( AFPData::DINT, -1 ); |
1297 | } |
1298 | $result = mb_strpos( $haystack, $needle, $offset ); |
1299 | |
1300 | if ( $result === false ) { |
1301 | $result = -1; |
1302 | } |
1303 | |
1304 | return new AFPData( AFPData::DINT, $result ); |
1305 | } |
1306 | |
1307 | /** |
1308 | * @param array $args |
1309 | * @return AFPData |
1310 | */ |
1311 | private function funcStrReplace( $args ) { |
1312 | $subject = $args[0]->toString(); |
1313 | $search = $args[1]->toString(); |
1314 | $replace = $args[2]->toString(); |
1315 | |
1316 | return new AFPData( AFPData::DSTRING, str_replace( $search, $replace, $subject ) ); |
1317 | } |
1318 | |
1319 | /** |
1320 | * @param array $args |
1321 | * @param int $position |
1322 | * @return AFPData |
1323 | */ |
1324 | private function funcStrReplaceRegexp( $args, int $position ) { |
1325 | $subject = $args[0]->toString(); |
1326 | $search = $args[1]->toString(); |
1327 | $replace = $args[2]->toString(); |
1328 | |
1329 | $this->checkRegexMatchesEmpty( $args[1], $search, $position ); |
1330 | // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged |
1331 | $result = @preg_replace( |
1332 | $this->mungeRegexp( $search ), |
1333 | $replace, |
1334 | $subject |
1335 | ); |
1336 | |
1337 | if ( $result === null ) { |
1338 | throw new UserVisibleException( |
1339 | 'regexfailure', |
1340 | $position, |
1341 | [ $search ] |
1342 | ); |
1343 | } |
1344 | |
1345 | return new AFPData( AFPData::DSTRING, $result ); |
1346 | } |
1347 | |
1348 | /** |
1349 | * @param array $args |
1350 | * @return AFPData |
1351 | */ |
1352 | private function funcStrRegexEscape( $args ) { |
1353 | $string = $args[0]->toString(); |
1354 | |
1355 | // preg_quote does not need the second parameter, since rlike takes |
1356 | // care of the delimiter symbol itself |
1357 | return new AFPData( AFPData::DSTRING, preg_quote( $string ) ); |
1358 | } |
1359 | |
1360 | /** |
1361 | * @param array $args |
1362 | * @return mixed |
1363 | */ |
1364 | private function funcSetVar( $args ) { |
1365 | $varName = $args[0]->toString(); |
1366 | $value = $args[1]; |
1367 | |
1368 | $this->setUserVariable( $varName, $value ); |
1369 | |
1370 | return $value; |
1371 | } |
1372 | |
1373 | /** |
1374 | * Checks if $a contains $b |
1375 | * |
1376 | * @param AFPData $a |
1377 | * @param AFPData $b |
1378 | * @return AFPData |
1379 | */ |
1380 | private function containmentKeyword( AFPData $a, AFPData $b ) { |
1381 | $a = $a->toString(); |
1382 | $b = $b->toString(); |
1383 | |
1384 | if ( $a === '' || $b === '' ) { |
1385 | return new AFPData( AFPData::DBOOL, false ); |
1386 | } |
1387 | |
1388 | return new AFPData( AFPData::DBOOL, strpos( $a, $b ) !== false ); |
1389 | } |
1390 | |
1391 | /** |
1392 | * @param AFPData $a |
1393 | * @param AFPData $b |
1394 | * @return AFPData |
1395 | */ |
1396 | private function keywordIn( AFPData $a, AFPData $b ) { |
1397 | return $this->containmentKeyword( $b, $a ); |
1398 | } |
1399 | |
1400 | /** |
1401 | * @param AFPData $a |
1402 | * @param AFPData $b |
1403 | * @return AFPData |
1404 | */ |
1405 | private function keywordContains( AFPData $a, AFPData $b ) { |
1406 | return $this->containmentKeyword( $a, $b ); |
1407 | } |
1408 | |
1409 | /** |
1410 | * @param AFPData $str |
1411 | * @param AFPData $pattern |
1412 | * @return AFPData |
1413 | */ |
1414 | private function keywordLike( AFPData $str, AFPData $pattern ) { |
1415 | $str = $str->toString(); |
1416 | $pattern = '#^' . strtr( preg_quote( $pattern->toString(), '#' ), AFPData::WILDCARD_MAP ) . '$#u'; |
1417 | // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged |
1418 | $result = @preg_match( $pattern, $str ); |
1419 | |
1420 | return new AFPData( AFPData::DBOOL, (bool)$result ); |
1421 | } |
1422 | |
1423 | /** |
1424 | * @param AFPData $str |
1425 | * @param AFPData $regex |
1426 | * @param int $pos |
1427 | * @param bool $insensitive |
1428 | * @return AFPData |
1429 | * @throws Exception |
1430 | */ |
1431 | private function keywordRegex( AFPData $str, AFPData $regex, $pos, $insensitive = false ) { |
1432 | $str = $str->toString(); |
1433 | $pattern = $regex->toString(); |
1434 | |
1435 | $pattern = $this->mungeRegexp( $pattern ); |
1436 | |
1437 | if ( $insensitive ) { |
1438 | $pattern .= 'i'; |
1439 | } |
1440 | |
1441 | $this->checkRegexMatchesEmpty( $regex, $pattern, $pos ); |
1442 | // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged |
1443 | $result = @preg_match( $pattern, $str ); |
1444 | if ( $result === false ) { |
1445 | throw new UserVisibleException( |
1446 | 'regexfailure', |
1447 | // Coverage bug |
1448 | // @codeCoverageIgnoreStart |
1449 | $pos, |
1450 | // @codeCoverageIgnoreEnd |
1451 | [ $pattern ] |
1452 | ); |
1453 | } |
1454 | |
1455 | return new AFPData( AFPData::DBOOL, (bool)$result ); |
1456 | } |
1457 | |
1458 | /** |
1459 | * @param AFPData $str |
1460 | * @param AFPData $regex |
1461 | * @param int $pos |
1462 | * @return AFPData |
1463 | */ |
1464 | private function keywordRegexInsensitive( AFPData $str, AFPData $regex, $pos ) { |
1465 | return $this->keywordRegex( $str, $regex, $pos, true ); |
1466 | } |
1467 | |
1468 | /** |
1469 | * @param array $args |
1470 | * @return AFPData |
1471 | */ |
1472 | private function castString( $args ) { |
1473 | return AFPData::castTypes( $args[0], AFPData::DSTRING ); |
1474 | } |
1475 | |
1476 | /** |
1477 | * @param array $args |
1478 | * @return AFPData |
1479 | */ |
1480 | private function castInt( $args ) { |
1481 | return AFPData::castTypes( $args[0], AFPData::DINT ); |
1482 | } |
1483 | |
1484 | /** |
1485 | * @param array $args |
1486 | * @return AFPData |
1487 | */ |
1488 | private function castFloat( $args ) { |
1489 | return AFPData::castTypes( $args[0], AFPData::DFLOAT ); |
1490 | } |
1491 | |
1492 | /** |
1493 | * @param array $args |
1494 | * @return AFPData |
1495 | */ |
1496 | private function castBool( $args ) { |
1497 | return AFPData::castTypes( $args[0], AFPData::DBOOL ); |
1498 | } |
1499 | |
1500 | /** |
1501 | * Given a node that we don't need to evaluate, decide what to do with it. |
1502 | * The nodes passed in will usually be discarded by short-circuit |
1503 | * evaluation. If we don't allow it, we fully evaluate the node. |
1504 | * |
1505 | * @param AFPTreeNode $node |
1506 | */ |
1507 | private function maybeDiscardNode( AFPTreeNode $node ) { |
1508 | if ( !$this->mAllowShort ) { |
1509 | $this->evalNode( $node ); |
1510 | } |
1511 | } |
1512 | |
1513 | /** |
1514 | * Given a regexp in the AF syntax, make it PCRE-compliant (i.e. we need to escape slashes, add |
1515 | * delimiters and modifiers). |
1516 | * |
1517 | * @param string $rawRegexp |
1518 | * @return string |
1519 | */ |
1520 | private function mungeRegexp( string $rawRegexp ): string { |
1521 | $needle = preg_replace( '!(\\\\\\\\)*(\\\\)?/!', '$1\/', $rawRegexp ); |
1522 | return "/$needle/u"; |
1523 | } |
1524 | |
1525 | /** |
1526 | * Check whether the provided regex matches the empty string. |
1527 | * @note This method can generate a PHP notice if the regex is invalid |
1528 | * |
1529 | * @param AFPData $regex TODO Can we avoid passing this in? |
1530 | * @param string $pattern Already munged |
1531 | * @param int $position |
1532 | */ |
1533 | private function checkRegexMatchesEmpty( AFPData $regex, string $pattern, int $position ): void { |
1534 | if ( $regex->getType() === AFPData::DUNDEFINED ) { |
1535 | // We can't tell, and toString() would return the empty string (T273809) |
1536 | return; |
1537 | } |
1538 | // @phan-suppress-next-next-line PhanParamSuspiciousOrder |
1539 | // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged |
1540 | if ( @preg_match( $pattern, '' ) === 1 ) { |
1541 | $this->warnings[] = new UserVisibleWarning( |
1542 | 'match-empty-regex', |
1543 | $position, |
1544 | [] |
1545 | ); |
1546 | } |
1547 | } |
1548 | } |