Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
94.00% |
47 / 50 |
|
78.57% |
11 / 14 |
CRAP | |
0.00% |
0 / 1 |
BasicQueryClassifier | |
94.00% |
47 / 50 |
|
78.57% |
11 / 14 |
34.25 | |
0.00% |
0 / 1 |
classify | |
100.00% |
20 / 20 |
|
100.00% |
1 / 1 |
14 | |||
visitWordsQueryNode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
visitPhraseQueryNode | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
4 | |||
visitPhrasePrefixNode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
visitFuzzyNode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
visitPrefixNode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
visitWildcardNode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
visitEmptyQueryNode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
visitKeywordFeatureNode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
visitParsedBooleanNode | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
3.14 | |||
visitBooleanClause | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
visitNegatedNode | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
classes | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
visitNamespaceHeader | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Parser; |
4 | |
5 | use CirrusSearch\Parser\AST\BooleanClause; |
6 | use CirrusSearch\Parser\AST\EmptyQueryNode; |
7 | use CirrusSearch\Parser\AST\FuzzyNode; |
8 | use CirrusSearch\Parser\AST\KeywordFeatureNode; |
9 | use CirrusSearch\Parser\AST\NamespaceHeaderNode; |
10 | use CirrusSearch\Parser\AST\NegatedNode; |
11 | use CirrusSearch\Parser\AST\ParsedBooleanNode; |
12 | use CirrusSearch\Parser\AST\ParsedQuery; |
13 | use CirrusSearch\Parser\AST\PhrasePrefixNode; |
14 | use CirrusSearch\Parser\AST\PhraseQueryNode; |
15 | use CirrusSearch\Parser\AST\PrefixNode; |
16 | use CirrusSearch\Parser\AST\Visitor\Visitor; |
17 | use CirrusSearch\Parser\AST\WildcardNode; |
18 | use CirrusSearch\Parser\AST\WordsQueryNode; |
19 | |
20 | /** |
21 | * Basic classifier to identify queries like: |
22 | * - simple words: foo bar |
23 | * - simple phrase: "foo bar" |
24 | * - simple words + simple phrase; foo "bar baz" |
25 | * - complex: any queries that use a keyword, or any non trivial features |
26 | * - bogus queries: queries where a bogus pattern have been identified at |
27 | * parse time |
28 | */ |
29 | class BasicQueryClassifier implements ParsedQueryClassifier, Visitor { |
30 | |
31 | /** |
32 | * The simplest query ever: only words |
33 | */ |
34 | public const SIMPLE_BAG_OF_WORDS = 'simple_bag_of_words'; |
35 | |
36 | /** |
37 | * Only quoted words |
38 | */ |
39 | public const SIMPLE_PHRASE = 'simple_phrase_query'; |
40 | |
41 | /** |
42 | * A simple bag of words query with some quoted words |
43 | */ |
44 | public const BAG_OF_WORDS_WITH_PHRASE = 'bag_of_words_with_phrase_query'; |
45 | |
46 | /** |
47 | * Expert: a query that uses some special syntax such as: |
48 | * - wildcards/fuzzy/word prefix |
49 | * - explicit boolean expression |
50 | * - complex phrase (phrase prefix, non default slop) |
51 | */ |
52 | public const COMPLEX_QUERY = 'complex_query'; |
53 | |
54 | /** |
55 | * Query that was fixed/corrected |
56 | */ |
57 | public const BOGUS_QUERY = 'bogus_query'; |
58 | |
59 | /** |
60 | * Query that is only a morelike |
61 | */ |
62 | public const MORE_LIKE_ONLY = 'more_like_only'; |
63 | |
64 | private bool $hasWords; |
65 | |
66 | private bool $hasSimplePhrase; |
67 | |
68 | private bool $hasComplex; |
69 | |
70 | private int $depth; |
71 | |
72 | /** |
73 | * @var int |
74 | */ |
75 | private int $maxDepth; |
76 | |
77 | /** |
78 | * @param ParsedQuery $query |
79 | * @return string[] |
80 | */ |
81 | public function classify( ParsedQuery $query ) { |
82 | $this->hasWords = false; |
83 | $this->hasSimplePhrase = false; |
84 | $this->hasComplex = false; |
85 | $this->depth = 0; |
86 | $this->maxDepth = 0; |
87 | |
88 | $classes = []; |
89 | if ( $query->getParseWarnings() !== [] ) { |
90 | $classes[] = self::BOGUS_QUERY; |
91 | } |
92 | |
93 | $query->getRoot()->accept( $this ); |
94 | |
95 | // @phan-suppress-next-line PhanSuspiciousValueComparison |
96 | if ( $this->maxDepth === 0 && in_array( 'more_like', $query->getFeaturesUsed() ) ) { |
97 | $classes[] = self::MORE_LIKE_ONLY; |
98 | } |
99 | if ( $this->hasComplex ) { |
100 | $classes[] = self::COMPLEX_QUERY; |
101 | } elseif ( $this->maxDepth === 0 && $this->hasWords && !$this->hasSimplePhrase ) { |
102 | $classes[] = self::SIMPLE_BAG_OF_WORDS; |
103 | } elseif ( $this->maxDepth === 0 && !$this->hasWords && $this->hasSimplePhrase ) { |
104 | $classes[] = self::SIMPLE_PHRASE; |
105 | } elseif ( $this->maxDepth === 1 && $this->hasWords && $this->hasSimplePhrase ) { |
106 | $classes[] = self::BAG_OF_WORDS_WITH_PHRASE; |
107 | } |
108 | |
109 | return $classes; |
110 | } |
111 | |
112 | /** |
113 | * @param WordsQueryNode $node |
114 | */ |
115 | public function visitWordsQueryNode( WordsQueryNode $node ) { |
116 | $this->hasWords = true; |
117 | } |
118 | |
119 | /** |
120 | * @param PhraseQueryNode $node |
121 | */ |
122 | public function visitPhraseQueryNode( PhraseQueryNode $node ) { |
123 | if ( $node->isStem() || $node->getSlop() !== -1 ) { |
124 | $this->hasComplex = true; |
125 | } elseif ( !$node->isUnbalanced() ) { |
126 | $this->hasSimplePhrase = true; |
127 | } |
128 | } |
129 | |
130 | /** |
131 | * @param PhrasePrefixNode $node |
132 | */ |
133 | public function visitPhrasePrefixNode( PhrasePrefixNode $node ) { |
134 | $this->hasComplex = true; |
135 | } |
136 | |
137 | /** |
138 | * @param FuzzyNode $node |
139 | */ |
140 | public function visitFuzzyNode( FuzzyNode $node ) { |
141 | $this->hasComplex = true; |
142 | } |
143 | |
144 | /** |
145 | * @param PrefixNode $node |
146 | */ |
147 | public function visitPrefixNode( PrefixNode $node ) { |
148 | $this->hasComplex = true; |
149 | } |
150 | |
151 | /** |
152 | * @param WildcardNode $node |
153 | */ |
154 | public function visitWildcardNode( WildcardNode $node ) { |
155 | $this->hasComplex = true; |
156 | } |
157 | |
158 | /** |
159 | * @param EmptyQueryNode $node |
160 | */ |
161 | public function visitEmptyQueryNode( EmptyQueryNode $node ) { |
162 | } |
163 | |
164 | /** |
165 | * @param KeywordFeatureNode $node |
166 | */ |
167 | public function visitKeywordFeatureNode( KeywordFeatureNode $node ) { |
168 | $this->hasComplex = true; |
169 | } |
170 | |
171 | /** |
172 | * @param ParsedBooleanNode $node |
173 | */ |
174 | public function visitParsedBooleanNode( ParsedBooleanNode $node ) { |
175 | if ( $this->hasComplex ) { |
176 | // we can quickly skip, this query cannot belong to this class |
177 | return; |
178 | } |
179 | foreach ( $node->getClauses() as $clause ) { |
180 | $clause->accept( $this ); |
181 | } |
182 | } |
183 | |
184 | /** |
185 | * @param BooleanClause $clause |
186 | */ |
187 | public function visitBooleanClause( BooleanClause $clause ) { |
188 | $this->depth++; |
189 | $this->maxDepth = max( $this->depth, $this->maxDepth ); |
190 | $this->hasComplex = $this->hasComplex || $clause->isExplicit() || $clause->getOccur() === BooleanClause::MUST_NOT; |
191 | $clause->getNode()->accept( $this ); |
192 | $this->depth--; |
193 | } |
194 | |
195 | /** |
196 | * @param NegatedNode $node |
197 | */ |
198 | public function visitNegatedNode( NegatedNode $node ) { |
199 | $this->hasComplex = true; |
200 | } |
201 | |
202 | /** |
203 | * @return string[] |
204 | */ |
205 | public function classes() { |
206 | return [ |
207 | self::SIMPLE_BAG_OF_WORDS, |
208 | self::SIMPLE_PHRASE, |
209 | self::BAG_OF_WORDS_WITH_PHRASE, |
210 | self::COMPLEX_QUERY, |
211 | self::BOGUS_QUERY, |
212 | self::MORE_LIKE_ONLY, |
213 | ]; |
214 | } |
215 | |
216 | /** |
217 | * @param NamespaceHeaderNode $node |
218 | */ |
219 | public function visitNamespaceHeader( NamespaceHeaderNode $node ) { |
220 | } |
221 | } |