Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
93.62% |
44 / 47 |
|
78.57% |
11 / 14 |
CRAP | |
0.00% |
0 / 1 |
BasicQueryClassifier | |
93.62% |
44 / 47 |
|
78.57% |
11 / 14 |
32.27 | |
0.00% |
0 / 1 |
classify | |
100.00% |
18 / 18 |
|
100.00% |
1 / 1 |
12 | |||
visitWordsQueryNode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
visitPhraseQueryNode | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
4 | |||
visitPhrasePrefixNode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
visitFuzzyNode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
visitPrefixNode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
visitWildcardNode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
visitEmptyQueryNode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
visitKeywordFeatureNode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
visitParsedBooleanNode | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
3.14 | |||
visitBooleanClause | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
visitNegatedNode | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
classes | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
1 | |||
visitNamespaceHeader | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Parser; |
4 | |
5 | use CirrusSearch\Parser\AST\BooleanClause; |
6 | use CirrusSearch\Parser\AST\EmptyQueryNode; |
7 | use CirrusSearch\Parser\AST\FuzzyNode; |
8 | use CirrusSearch\Parser\AST\KeywordFeatureNode; |
9 | use CirrusSearch\Parser\AST\NamespaceHeaderNode; |
10 | use CirrusSearch\Parser\AST\NegatedNode; |
11 | use CirrusSearch\Parser\AST\ParsedBooleanNode; |
12 | use CirrusSearch\Parser\AST\ParsedQuery; |
13 | use CirrusSearch\Parser\AST\PhrasePrefixNode; |
14 | use CirrusSearch\Parser\AST\PhraseQueryNode; |
15 | use CirrusSearch\Parser\AST\PrefixNode; |
16 | use CirrusSearch\Parser\AST\Visitor\Visitor; |
17 | use CirrusSearch\Parser\AST\WildcardNode; |
18 | use CirrusSearch\Parser\AST\WordsQueryNode; |
19 | |
20 | /** |
21 | * Basic classifier to identify queries like: |
22 | * - simple words: foo bar |
23 | * - simple phrase: "foo bar" |
24 | * - simple words + simple phrase; foo "bar baz" |
25 | * - complex: any queries that use a keyword, or any non trivial features |
26 | * - bogus queries: queries where a bogus pattern have been identified at |
27 | * parse time |
28 | */ |
29 | class BasicQueryClassifier implements ParsedQueryClassifier, Visitor { |
30 | |
31 | /** |
32 | * The simplest query ever: only words |
33 | */ |
34 | public const SIMPLE_BAG_OF_WORDS = 'simple_bag_of_words'; |
35 | |
36 | /** |
37 | * Only quoted words |
38 | */ |
39 | public const SIMPLE_PHRASE = 'simple_phrase_query'; |
40 | |
41 | /** |
42 | * A simple bag of words query with some quoted words |
43 | */ |
44 | public const BAG_OF_WORDS_WITH_PHRASE = 'bag_of_words_with_phrase_query'; |
45 | |
46 | /** |
47 | * Expert: a query that uses some special syntax such as: |
48 | * - wildcards/fuzzy/word prefix |
49 | * - explicit boolean expression |
50 | * - complex phrase (phrase prefix, non default slop) |
51 | */ |
52 | public const COMPLEX_QUERY = 'complex_query'; |
53 | |
54 | /** |
55 | * Query that was fixed/corrected |
56 | */ |
57 | public const BOGUS_QUERY = 'bogus_query'; |
58 | |
59 | /** |
60 | * @var bool |
61 | */ |
62 | private $hasWords; |
63 | |
64 | /** |
65 | * @var bool |
66 | */ |
67 | private $hasSimplePhrase; |
68 | |
69 | /** |
70 | * @var bool |
71 | */ |
72 | private $hasComplex; |
73 | |
74 | /** |
75 | * @var int |
76 | */ |
77 | private $depth; |
78 | |
79 | /** |
80 | * @var int |
81 | */ |
82 | private $maxDepth; |
83 | |
84 | /** |
85 | * @param ParsedQuery $query |
86 | * @return string[] |
87 | */ |
88 | public function classify( ParsedQuery $query ) { |
89 | $this->hasWords = false; |
90 | $this->hasSimplePhrase = false; |
91 | $this->hasComplex = false; |
92 | $this->depth = 0; |
93 | $this->maxDepth = 0; |
94 | |
95 | $classes = []; |
96 | if ( $query->getParseWarnings() !== [] ) { |
97 | $classes[] = self::BOGUS_QUERY; |
98 | } |
99 | |
100 | $query->getRoot()->accept( $this ); |
101 | |
102 | if ( $this->hasComplex ) { |
103 | $classes[] = self::COMPLEX_QUERY; |
104 | // @phan-suppress-next-line PhanSuspiciousValueComparison |
105 | } elseif ( $this->maxDepth === 0 && $this->hasWords && !$this->hasSimplePhrase ) { |
106 | $classes[] = self::SIMPLE_BAG_OF_WORDS; |
107 | } elseif ( $this->maxDepth === 0 && !$this->hasWords && $this->hasSimplePhrase ) { |
108 | $classes[] = self::SIMPLE_PHRASE; |
109 | } elseif ( $this->maxDepth === 1 && $this->hasWords && $this->hasSimplePhrase ) { |
110 | $classes[] = self::BAG_OF_WORDS_WITH_PHRASE; |
111 | } |
112 | |
113 | return $classes; |
114 | } |
115 | |
116 | /** |
117 | * @param WordsQueryNode $node |
118 | */ |
119 | public function visitWordsQueryNode( WordsQueryNode $node ) { |
120 | $this->hasWords = true; |
121 | } |
122 | |
123 | /** |
124 | * @param PhraseQueryNode $node |
125 | */ |
126 | public function visitPhraseQueryNode( PhraseQueryNode $node ) { |
127 | if ( $node->isStem() || $node->getSlop() !== -1 ) { |
128 | $this->hasComplex = true; |
129 | } elseif ( !$node->isUnbalanced() ) { |
130 | $this->hasSimplePhrase = true; |
131 | } |
132 | } |
133 | |
134 | /** |
135 | * @param PhrasePrefixNode $node |
136 | */ |
137 | public function visitPhrasePrefixNode( PhrasePrefixNode $node ) { |
138 | $this->hasComplex = true; |
139 | } |
140 | |
141 | /** |
142 | * @param FuzzyNode $node |
143 | */ |
144 | public function visitFuzzyNode( FuzzyNode $node ) { |
145 | $this->hasComplex = true; |
146 | } |
147 | |
148 | /** |
149 | * @param PrefixNode $node |
150 | */ |
151 | public function visitPrefixNode( PrefixNode $node ) { |
152 | $this->hasComplex = true; |
153 | } |
154 | |
155 | /** |
156 | * @param WildcardNode $node |
157 | */ |
158 | public function visitWildcardNode( WildcardNode $node ) { |
159 | $this->hasComplex = true; |
160 | } |
161 | |
162 | /** |
163 | * @param EmptyQueryNode $node |
164 | */ |
165 | public function visitEmptyQueryNode( EmptyQueryNode $node ) { |
166 | } |
167 | |
168 | /** |
169 | * @param KeywordFeatureNode $node |
170 | */ |
171 | public function visitKeywordFeatureNode( KeywordFeatureNode $node ) { |
172 | $this->hasComplex = true; |
173 | } |
174 | |
175 | /** |
176 | * @param ParsedBooleanNode $node |
177 | */ |
178 | public function visitParsedBooleanNode( ParsedBooleanNode $node ) { |
179 | if ( $this->hasComplex ) { |
180 | // we can quickly skip, this query cannot belong to this class |
181 | return; |
182 | } |
183 | foreach ( $node->getClauses() as $clause ) { |
184 | $clause->accept( $this ); |
185 | } |
186 | } |
187 | |
188 | /** |
189 | * @param BooleanClause $clause |
190 | */ |
191 | public function visitBooleanClause( BooleanClause $clause ) { |
192 | $this->depth++; |
193 | $this->maxDepth = max( $this->depth, $this->maxDepth ); |
194 | $this->hasComplex = $this->hasComplex || $clause->isExplicit() || $clause->getOccur() === BooleanClause::MUST_NOT; |
195 | $clause->getNode()->accept( $this ); |
196 | $this->depth--; |
197 | } |
198 | |
199 | /** |
200 | * @param NegatedNode $node |
201 | */ |
202 | public function visitNegatedNode( NegatedNode $node ) { |
203 | $this->hasComplex = true; |
204 | } |
205 | |
206 | /** |
207 | * @return string[] |
208 | */ |
209 | public function classes() { |
210 | return [ |
211 | self::SIMPLE_BAG_OF_WORDS, |
212 | self::SIMPLE_PHRASE, |
213 | self::BAG_OF_WORDS_WITH_PHRASE, |
214 | self::COMPLEX_QUERY, |
215 | self::BOGUS_QUERY, |
216 | ]; |
217 | } |
218 | |
219 | /** |
220 | * @param NamespaceHeaderNode $node |
221 | */ |
222 | public function visitNamespaceHeader( NamespaceHeaderNode $node ) { |
223 | } |
224 | } |