Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
94.00% covered (success)
94.00%
47 / 50
78.57% covered (warning)
78.57%
11 / 14
CRAP
0.00% covered (danger)
0.00%
0 / 1
BasicQueryClassifier
94.00% covered (success)
94.00%
47 / 50
78.57% covered (warning)
78.57%
11 / 14
34.25
0.00% covered (danger)
0.00%
0 / 1
 classify
100.00% covered (success)
100.00%
20 / 20
100.00% covered (success)
100.00%
1 / 1
14
 visitWordsQueryNode
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 visitPhraseQueryNode
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
4
 visitPhrasePrefixNode
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 visitFuzzyNode
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 visitPrefixNode
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 visitWildcardNode
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 visitEmptyQueryNode
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 visitKeywordFeatureNode
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 visitParsedBooleanNode
75.00% covered (warning)
75.00%
3 / 4
0.00% covered (danger)
0.00%
0 / 1
3.14
 visitBooleanClause
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
3
 visitNegatedNode
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 classes
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
1
 visitNamespaceHeader
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2
3namespace CirrusSearch\Parser;
4
5use CirrusSearch\Parser\AST\BooleanClause;
6use CirrusSearch\Parser\AST\EmptyQueryNode;
7use CirrusSearch\Parser\AST\FuzzyNode;
8use CirrusSearch\Parser\AST\KeywordFeatureNode;
9use CirrusSearch\Parser\AST\NamespaceHeaderNode;
10use CirrusSearch\Parser\AST\NegatedNode;
11use CirrusSearch\Parser\AST\ParsedBooleanNode;
12use CirrusSearch\Parser\AST\ParsedQuery;
13use CirrusSearch\Parser\AST\PhrasePrefixNode;
14use CirrusSearch\Parser\AST\PhraseQueryNode;
15use CirrusSearch\Parser\AST\PrefixNode;
16use CirrusSearch\Parser\AST\Visitor\Visitor;
17use CirrusSearch\Parser\AST\WildcardNode;
18use CirrusSearch\Parser\AST\WordsQueryNode;
19
20/**
21 * Basic classifier to identify queries like:
22 * - simple words: foo bar
23 * - simple phrase: "foo bar"
24 * - simple words + simple phrase; foo "bar baz"
25 * - complex: any queries that use a keyword, or any non trivial features
26 * - bogus queries: queries where a bogus pattern have been identified at
27 *      parse time
28 */
29class BasicQueryClassifier implements ParsedQueryClassifier, Visitor {
30
31    /**
32     * The simplest query ever: only words
33     */
34    public const SIMPLE_BAG_OF_WORDS = 'simple_bag_of_words';
35
36    /**
37     * Only quoted words
38     */
39    public const SIMPLE_PHRASE = 'simple_phrase_query';
40
41    /**
42     * A simple bag of words query with some quoted words
43     */
44    public const BAG_OF_WORDS_WITH_PHRASE = 'bag_of_words_with_phrase_query';
45
46    /**
47     * Expert: a query that uses some special syntax such as:
48     * - wildcards/fuzzy/word prefix
49     * - explicit boolean expression
50     * - complex phrase (phrase prefix, non default slop)
51     */
52    public const COMPLEX_QUERY = 'complex_query';
53
54    /**
55     * Query that was fixed/corrected
56     */
57    public const BOGUS_QUERY = 'bogus_query';
58
59    /**
60     * Query that is only a morelike
61     */
62    public const MORE_LIKE_ONLY = 'more_like_only';
63
64    private bool $hasWords;
65
66    private bool $hasSimplePhrase;
67
68    private bool $hasComplex;
69
70    private int $depth;
71
72    /**
73     * @var int
74     */
75    private int $maxDepth;
76
77    /**
78     * @param ParsedQuery $query
79     * @return string[]
80     */
81    public function classify( ParsedQuery $query ) {
82        $this->hasWords = false;
83        $this->hasSimplePhrase = false;
84        $this->hasComplex = false;
85        $this->depth = 0;
86        $this->maxDepth = 0;
87
88        $classes = [];
89        if ( $query->getParseWarnings() !== [] ) {
90            $classes[] = self::BOGUS_QUERY;
91        }
92
93        $query->getRoot()->accept( $this );
94
95        // @phan-suppress-next-line PhanSuspiciousValueComparison
96        if ( $this->maxDepth === 0 && in_array( 'more_like', $query->getFeaturesUsed() ) ) {
97            $classes[] = self::MORE_LIKE_ONLY;
98        }
99        if ( $this->hasComplex ) {
100            $classes[] = self::COMPLEX_QUERY;
101        } elseif ( $this->maxDepth === 0 && $this->hasWords && !$this->hasSimplePhrase ) {
102            $classes[] = self::SIMPLE_BAG_OF_WORDS;
103        } elseif ( $this->maxDepth === 0 && !$this->hasWords && $this->hasSimplePhrase ) {
104            $classes[] = self::SIMPLE_PHRASE;
105        } elseif ( $this->maxDepth === 1 && $this->hasWords && $this->hasSimplePhrase ) {
106            $classes[] = self::BAG_OF_WORDS_WITH_PHRASE;
107        }
108
109        return $classes;
110    }
111
112    /**
113     * @param WordsQueryNode $node
114     */
115    public function visitWordsQueryNode( WordsQueryNode $node ) {
116        $this->hasWords = true;
117    }
118
119    /**
120     * @param PhraseQueryNode $node
121     */
122    public function visitPhraseQueryNode( PhraseQueryNode $node ) {
123        if ( $node->isStem() || $node->getSlop() !== -1 ) {
124            $this->hasComplex = true;
125        } elseif ( !$node->isUnbalanced() ) {
126            $this->hasSimplePhrase = true;
127        }
128    }
129
130    /**
131     * @param PhrasePrefixNode $node
132     */
133    public function visitPhrasePrefixNode( PhrasePrefixNode $node ) {
134        $this->hasComplex = true;
135    }
136
137    /**
138     * @param FuzzyNode $node
139     */
140    public function visitFuzzyNode( FuzzyNode $node ) {
141        $this->hasComplex = true;
142    }
143
144    /**
145     * @param PrefixNode $node
146     */
147    public function visitPrefixNode( PrefixNode $node ) {
148        $this->hasComplex = true;
149    }
150
151    /**
152     * @param WildcardNode $node
153     */
154    public function visitWildcardNode( WildcardNode $node ) {
155        $this->hasComplex = true;
156    }
157
158    /**
159     * @param EmptyQueryNode $node
160     */
161    public function visitEmptyQueryNode( EmptyQueryNode $node ) {
162    }
163
164    /**
165     * @param KeywordFeatureNode $node
166     */
167    public function visitKeywordFeatureNode( KeywordFeatureNode $node ) {
168        $this->hasComplex = true;
169    }
170
171    /**
172     * @param ParsedBooleanNode $node
173     */
174    public function visitParsedBooleanNode( ParsedBooleanNode $node ) {
175        if ( $this->hasComplex ) {
176            // we can quickly skip, this query cannot belong to this class
177            return;
178        }
179        foreach ( $node->getClauses() as $clause ) {
180            $clause->accept( $this );
181        }
182    }
183
184    /**
185     * @param BooleanClause $clause
186     */
187    public function visitBooleanClause( BooleanClause $clause ) {
188        $this->depth++;
189        $this->maxDepth = max( $this->depth, $this->maxDepth );
190        $this->hasComplex = $this->hasComplex || $clause->isExplicit() || $clause->getOccur() === BooleanClause::MUST_NOT;
191        $clause->getNode()->accept( $this );
192        $this->depth--;
193    }
194
195    /**
196     * @param NegatedNode $node
197     */
198    public function visitNegatedNode( NegatedNode $node ) {
199        $this->hasComplex = true;
200    }
201
202    /**
203     * @return string[]
204     */
205    public function classes() {
206        return [
207            self::SIMPLE_BAG_OF_WORDS,
208            self::SIMPLE_PHRASE,
209            self::BAG_OF_WORDS_WITH_PHRASE,
210            self::COMPLEX_QUERY,
211            self::BOGUS_QUERY,
212            self::MORE_LIKE_ONLY,
213        ];
214    }
215
216    /**
217     * @param NamespaceHeaderNode $node
218     */
219    public function visitNamespaceHeader( NamespaceHeaderNode $node ) {
220    }
221}