Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
96.59% |
85 / 88 |
|
81.25% |
13 / 16 |
CRAP | |
0.00% |
0 / 1 |
ParsedQuery | |
96.59% |
85 / 88 |
|
81.25% |
13 / 16 |
38 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
2 | |||
getRoot | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getQuery | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getQueryWithoutNsHeader | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 | |||
getRawQuery | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
hasCleanup | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getParseWarnings | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getNamespaceHeader | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getRequiredNamespaces | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getActualNamespaces | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
10 | |||
getCrossSearchStrategy | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
2 | |||
isQueryOfClass | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
loadQueryClass | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
preloadQueryClasses | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
getFeaturesUsed | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
3 | |||
toArray | |
100.00% |
22 / 22 |
|
100.00% |
1 / 1 |
7 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Parser\AST; |
4 | |
5 | use CirrusSearch\CrossSearchStrategy; |
6 | use CirrusSearch\Parser\AST\Visitor\KeywordNodeVisitor; |
7 | use CirrusSearch\Parser\ParsedQueryClassifiersRepository; |
8 | use Wikimedia\Assert\Assert; |
9 | |
10 | /** |
11 | * Parsed query |
12 | */ |
13 | class ParsedQuery { |
14 | |
15 | /** |
16 | * markup to indicate that the query was cleaned up |
17 | * detecting a double quote used as a gershayim |
18 | * see T66350 |
19 | */ |
20 | public const CLEANUP_GERSHAYIM_QUIRKS = 'gershayim_quirks'; |
21 | |
22 | /** |
23 | * markup to indicate that the had some question marks |
24 | * stripped |
25 | * @see \CirrusSearch\Util::stripQuestionMarks |
26 | */ |
27 | public const CLEANUP_QMARK_STRIPPING = 'stripped_qmark'; |
28 | |
29 | /** |
30 | * markup to indicate that we removed a '~' at the beginning of the query |
31 | */ |
32 | public const TILDE_HEADER = 'tilde_header'; |
33 | |
34 | /** |
35 | * @var ParsedNode |
36 | */ |
37 | private $root; |
38 | |
39 | /** |
40 | * @var string |
41 | */ |
42 | private $query; |
43 | |
44 | /** |
45 | * @var string |
46 | */ |
47 | private $rawQuery; |
48 | |
49 | /** |
50 | * @var bool[] indexed by cleanup type |
51 | */ |
52 | private $queryCleanups; |
53 | |
54 | /** |
55 | * @var ParseWarning[] |
56 | */ |
57 | private $parseWarnings; |
58 | |
59 | /** |
60 | * @var NamespaceHeaderNode|null namespace specified at the beginning of the query |
61 | */ |
62 | private $namespaceHeader; |
63 | |
64 | /** |
65 | * @var array|string (array of int or 'all') list of required namespaces |
66 | * for the query to be able to return results. |
67 | * This list of namespace must always be added no matter what is requested |
68 | * before. |
69 | * Main use-case is the prefix keyword that must supersede any other settings. |
70 | */ |
71 | private $requiredNamespaces; |
72 | |
73 | /** |
74 | * @var CrossSearchStrategy|null (lazy loaded) |
75 | */ |
76 | private $crossSearchStrategy; |
77 | |
78 | /** |
79 | * @var ParsedQueryClassifiersRepository |
80 | */ |
81 | private $classifierRepository; |
82 | |
83 | /** |
84 | * @var bool[] indexed by query class name |
85 | */ |
86 | private $queryClassCache = []; |
87 | |
88 | /** |
89 | * @var string[] list of used features in the query |
90 | * @see \CirrusSearch\Query\KeywordFeature::getFeatureName() |
91 | */ |
92 | private $featuresUsed; |
93 | |
94 | /** |
95 | * @param ParsedNode $root |
96 | * @param string $query cleaned up query string |
97 | * @param string $rawQuery original query as received by the search engine |
98 | * @param bool[] $queryCleanups indexed by cleanup type (non-empty when $query !== $rawQuery) |
99 | * @param ?NamespaceHeaderNode $namespaceHeader namespace found as a "header" of the query |
100 | * is a int when a namespace id is provided, string with 'all' or null if none specified |
101 | * @param array|string $requiredNamespaces |
102 | * @param ParseWarning[] $parseWarnings list of warnings detected during parsing |
103 | * @param ParsedQueryClassifiersRepository $repository |
104 | */ |
105 | public function __construct( |
106 | ParsedNode $root, |
107 | $query, |
108 | $rawQuery, |
109 | $queryCleanups, |
110 | ?NamespaceHeaderNode $namespaceHeader, |
111 | $requiredNamespaces, |
112 | array $parseWarnings, |
113 | ParsedQueryClassifiersRepository $repository |
114 | ) { |
115 | $this->root = $root; |
116 | $this->query = $query; |
117 | $this->rawQuery = $rawQuery; |
118 | $this->queryCleanups = $queryCleanups; |
119 | $this->parseWarnings = $parseWarnings; |
120 | $this->namespaceHeader = $namespaceHeader; |
121 | Assert::parameter( is_array( $requiredNamespaces ) || $requiredNamespaces === 'all', |
122 | '$requiredNamespaces', 'must be an array or "all"' ); |
123 | $this->requiredNamespaces = $requiredNamespaces; |
124 | $this->classifierRepository = $repository; |
125 | } |
126 | |
127 | /** |
128 | * @return ParsedNode |
129 | */ |
130 | public function getRoot() { |
131 | return $this->root; |
132 | } |
133 | |
134 | /** |
135 | * The query being parsed |
136 | * Some cleanups may have been made to the raw query |
137 | * NOTE: the query may include the namespace header |
138 | * @return string |
139 | */ |
140 | public function getQuery() { |
141 | return $this->query; |
142 | } |
143 | |
144 | /** |
145 | * The query string without the namespace header |
146 | * @return string |
147 | */ |
148 | public function getQueryWithoutNsHeader() { |
149 | // TODO: remove once the AST is fully used by all building components. |
150 | if ( $this->namespaceHeader !== null ) { |
151 | return substr( $this->query, $this->namespaceHeader->getEndOffset() ); |
152 | } |
153 | return $this->query; |
154 | } |
155 | |
156 | /** |
157 | * The raw query as received by the search engine |
158 | * @return string |
159 | */ |
160 | public function getRawQuery() { |
161 | return $this->rawQuery; |
162 | } |
163 | |
164 | /** |
165 | * Check if the query was cleanup with this type |
166 | * @see ParsedQuery::CLEANUP_QMARK_STRIPPING |
167 | * @see ParsedQuery::CLEANUP_GERSHAYIM_QUIRKS |
168 | * @param string $cleanup |
169 | * @return bool |
170 | */ |
171 | public function hasCleanup( $cleanup ) { |
172 | return isset( $this->queryCleanups[$cleanup] ); |
173 | } |
174 | |
175 | /** |
176 | * List of warnings detected at parse time |
177 | * @return ParseWarning[] |
178 | */ |
179 | public function getParseWarnings() { |
180 | return $this->parseWarnings; |
181 | } |
182 | |
183 | /** |
184 | * Get the node of the namespace header identified in the prefix of the query |
185 | * if specified. |
186 | * It can be null in all other cases |
187 | * @return NamespaceHeaderNode|null |
188 | */ |
189 | public function getNamespaceHeader() { |
190 | return $this->namespaceHeader; |
191 | } |
192 | |
193 | /** |
194 | * @return array|string array of additional namespaces or 'all' if all namespaces required |
195 | */ |
196 | public function getRequiredNamespaces() { |
197 | return $this->requiredNamespaces; |
198 | } |
199 | |
200 | /** |
201 | * Determine the actual namespaces required for this query to run |
202 | * assuming that $namespaces is the list of namespaces initially requested |
203 | * usually set <code>\SearchEngine::setNamespaces()</code>. |
204 | * |
205 | * @param int[]|null $namespaces initial namespaces |
206 | * @param int[]|null $additionalRequiredNamespaces additional namespaces required (by ContextualFilters) |
207 | * @return int[] the list of namespaces that have to be queried, |
208 | * empty array means all namespaces |
209 | * @see \SearchEngine::setNamespaces() |
210 | * @see self::getRequiredNamespaces() |
211 | * @see self::getNamespaceHeader() |
212 | * @see \CirrusSearch\Query\Builder\ContextualFilter::requiredNamespaces() |
213 | */ |
214 | public function getActualNamespaces( ?array $namespaces = null, ?array $additionalRequiredNamespaces = null ) { |
215 | if ( $this->requiredNamespaces === 'all' ) { |
216 | // e.g. prefix:all:foo (all namespaces must be queried no matter what is requested before |
217 | return []; |
218 | } |
219 | |
220 | if ( $additionalRequiredNamespaces === [] ) { |
221 | return []; |
222 | } |
223 | |
224 | if ( $this->namespaceHeader !== null && $this->namespaceHeader->getNamespace() === 'all' ) { |
225 | // e.g. all:foo |
226 | return []; |
227 | } |
228 | |
229 | if ( $this->namespaceHeader === null && !$namespaces ) { |
230 | // Everything was selected using SearchEngine::setNamespaces() but nothing more specific |
231 | // was requested using a prefixed ns |
232 | return []; |
233 | } |
234 | |
235 | // now everything else will be an explicit list of namespaces |
236 | Assert::postcondition( $this->namespaceHeader === null || is_int( $this->namespaceHeader->getNamespace() ), |
237 | '$this->namespaceHeader must be null or an integer' ); |
238 | |
239 | $ns = $this->namespaceHeader === null ? $namespaces : [ $this->namespaceHeader->getNamespace() ]; |
240 | Assert::postcondition( is_array( $ns ) && $ns !== [], |
241 | 'at this point we must have a list of specific namespaces' ); |
242 | |
243 | return array_values( array_unique( |
244 | // @phan-suppress-next-line PhanCoalescingNeverNull |
245 | array_merge( $ns, $this->requiredNamespaces, $additionalRequiredNamespaces ?? [] ), |
246 | SORT_REGULAR |
247 | ) ); |
248 | } |
249 | |
250 | /** |
251 | * Get the cross search strategy supported by this query. |
252 | * |
253 | * @return CrossSearchStrategy |
254 | */ |
255 | public function getCrossSearchStrategy() { |
256 | if ( $this->crossSearchStrategy === null ) { |
257 | $visitor = new class() extends KeywordNodeVisitor { |
258 | public $strategy; |
259 | |
260 | public function __construct( array $excludeOccurs = [], array $keywordClasses = [] ) { |
261 | parent::__construct( $excludeOccurs, $keywordClasses ); |
262 | $this->strategy = CrossSearchStrategy::allWikisStrategy(); |
263 | } |
264 | |
265 | /** |
266 | * @param KeywordFeatureNode $node |
267 | */ |
268 | public function doVisitKeyword( KeywordFeatureNode $node ) { |
269 | $this->strategy = $this->strategy |
270 | ->intersect( $node->getKeyword()->getCrossSearchStrategy( $node ) ); |
271 | } |
272 | }; |
273 | $this->root->accept( $visitor ); |
274 | $this->crossSearchStrategy = $visitor->strategy; |
275 | } |
276 | return $this->crossSearchStrategy; |
277 | } |
278 | |
279 | /** |
280 | * @param string $class |
281 | * @return bool |
282 | * @throws \CirrusSearch\Parser\ParsedQueryClassifierException if the class is unknown |
283 | */ |
284 | public function isQueryOfClass( $class ) { |
285 | return $this->queryClassCache[$class] ?? $this->loadQueryClass( $class ); |
286 | } |
287 | |
288 | /** |
289 | * @param string $class |
290 | * @return bool |
291 | * @throws \CirrusSearch\Parser\ParsedQueryClassifierException |
292 | */ |
293 | private function loadQueryClass( $class ) { |
294 | $classifier = $this->classifierRepository->getClassifier( $class ); |
295 | $newClasses = $classifier->classify( $this ); |
296 | foreach ( $classifier->classes() as $k ) { |
297 | $this->queryClassCache[$k] = in_array( $k, $newClasses, true ); |
298 | } |
299 | return $this->queryClassCache[$class]; |
300 | } |
301 | |
302 | /** |
303 | * Preload all known query classes and classify this |
304 | * query. |
305 | * @throws \CirrusSearch\Parser\ParsedQueryClassifierException |
306 | */ |
307 | public function preloadQueryClasses() { |
308 | foreach ( $this->classifierRepository->getKnownClassifiers() as $class ) { |
309 | $this->isQueryOfClass( $class ); |
310 | } |
311 | } |
312 | |
313 | /** |
314 | * Get the list of keyword features used by this query. |
315 | * @see \CirrusSearch\Query\KeywordFeature::getFeatureName() |
316 | * @return string[] |
317 | */ |
318 | public function getFeaturesUsed() { |
319 | if ( $this->featuresUsed === null ) { |
320 | $visitor = new class() extends KeywordNodeVisitor { |
321 | public $features = []; |
322 | |
323 | /** |
324 | * @param KeywordFeatureNode $node |
325 | */ |
326 | public function doVisitKeyword( KeywordFeatureNode $node ) { |
327 | $name = $node->getKeyword() |
328 | ->getFeatureName( $node->getKey(), $node->getDelimiter() ); |
329 | $this->features[$name] = true; |
330 | } |
331 | }; |
332 | $this->root->accept( $visitor ); |
333 | $this->featuresUsed = array_keys( $visitor->features ); |
334 | if ( $this->namespaceHeader ) { |
335 | $this->featuresUsed[] = 'namespace_header'; |
336 | } |
337 | } |
338 | return $this->featuresUsed; |
339 | } |
340 | |
341 | /** |
342 | * @return array |
343 | */ |
344 | public function toArray() { |
345 | $ar = [ |
346 | 'query' => $this->query, |
347 | 'rawQuery' => $this->rawQuery |
348 | ]; |
349 | |
350 | if ( $this->namespaceHeader !== null ) { |
351 | $ar += $this->namespaceHeader->toArray(); |
352 | } |
353 | if ( $this->requiredNamespaces !== [] ) { |
354 | $ar['requiredNamespaces'] = $this->requiredNamespaces; |
355 | } |
356 | if ( $this->queryCleanups !== [] ) { |
357 | $ar['queryCleanups'] = $this->queryCleanups; |
358 | } |
359 | $this->preloadQueryClasses(); |
360 | $classes = array_keys( array_filter( $this->queryClassCache ) ); |
361 | if ( $classes !== [] ) { |
362 | $ar['queryClassCache'] = $classes; |
363 | } |
364 | if ( $this->parseWarnings !== [] ) { |
365 | $ar['warnings'] = array_map( static function ( ParseWarning $w ) { |
366 | return $w->toArray(); |
367 | }, $this->parseWarnings ); |
368 | } |
369 | if ( $this->getFeaturesUsed() !== [] ) { |
370 | $ar['featuresUsed'] = $this->getFeaturesUsed(); |
371 | } |
372 | $ar['root'] = $this->getRoot()->toArray(); |
373 | |
374 | return $ar; |
375 | } |
376 | } |