Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
93.62% covered (success)
93.62%
44 / 47
78.57% covered (warning)
78.57%
11 / 14
CRAP
0.00% covered (danger)
0.00%
0 / 1
BasicQueryClassifier
93.62% covered (success)
93.62%
44 / 47
78.57% covered (warning)
78.57%
11 / 14
32.27
0.00% covered (danger)
0.00%
0 / 1
 classify
100.00% covered (success)
100.00%
18 / 18
100.00% covered (success)
100.00%
1 / 1
12
 visitWordsQueryNode
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 visitPhraseQueryNode
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
4
 visitPhrasePrefixNode
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 visitFuzzyNode
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 visitPrefixNode
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 visitWildcardNode
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 visitEmptyQueryNode
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 visitKeywordFeatureNode
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 visitParsedBooleanNode
75.00% covered (warning)
75.00%
3 / 4
0.00% covered (danger)
0.00%
0 / 1
3.14
 visitBooleanClause
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
3
 visitNegatedNode
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 classes
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
1
 visitNamespaceHeader
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2
3namespace CirrusSearch\Parser;
4
5use CirrusSearch\Parser\AST\BooleanClause;
6use CirrusSearch\Parser\AST\EmptyQueryNode;
7use CirrusSearch\Parser\AST\FuzzyNode;
8use CirrusSearch\Parser\AST\KeywordFeatureNode;
9use CirrusSearch\Parser\AST\NamespaceHeaderNode;
10use CirrusSearch\Parser\AST\NegatedNode;
11use CirrusSearch\Parser\AST\ParsedBooleanNode;
12use CirrusSearch\Parser\AST\ParsedQuery;
13use CirrusSearch\Parser\AST\PhrasePrefixNode;
14use CirrusSearch\Parser\AST\PhraseQueryNode;
15use CirrusSearch\Parser\AST\PrefixNode;
16use CirrusSearch\Parser\AST\Visitor\Visitor;
17use CirrusSearch\Parser\AST\WildcardNode;
18use CirrusSearch\Parser\AST\WordsQueryNode;
19
20/**
21 * Basic classifier to identify queries like:
22 * - simple words: foo bar
23 * - simple phrase: "foo bar"
24 * - simple words + simple phrase; foo "bar baz"
25 * - complex: any queries that use a keyword, or any non trivial features
26 * - bogus queries: queries where a bogus pattern have been identified at
27 *      parse time
28 */
29class BasicQueryClassifier implements ParsedQueryClassifier, Visitor {
30
31    /**
32     * The simplest query ever: only words
33     */
34    public const SIMPLE_BAG_OF_WORDS = 'simple_bag_of_words';
35
36    /**
37     * Only quoted words
38     */
39    public const SIMPLE_PHRASE = 'simple_phrase_query';
40
41    /**
42     * A simple bag of words query with some quoted words
43     */
44    public const BAG_OF_WORDS_WITH_PHRASE = 'bag_of_words_with_phrase_query';
45
46    /**
47     * Expert: a query that uses some special syntax such as:
48     * - wildcards/fuzzy/word prefix
49     * - explicit boolean expression
50     * - complex phrase (phrase prefix, non default slop)
51     */
52    public const COMPLEX_QUERY = 'complex_query';
53
54    /**
55     * Query that was fixed/corrected
56     */
57    public const BOGUS_QUERY = 'bogus_query';
58
59    /**
60     * @var bool
61     */
62    private $hasWords;
63
64    /**
65     * @var bool
66     */
67    private $hasSimplePhrase;
68
69    /**
70     * @var bool
71     */
72    private $hasComplex;
73
74    /**
75     * @var int
76     */
77    private $depth;
78
79    /**
80     * @var int
81     */
82    private $maxDepth;
83
84    /**
85     * @param ParsedQuery $query
86     * @return string[]
87     */
88    public function classify( ParsedQuery $query ) {
89        $this->hasWords = false;
90        $this->hasSimplePhrase = false;
91        $this->hasComplex = false;
92        $this->depth = 0;
93        $this->maxDepth = 0;
94
95        $classes = [];
96        if ( $query->getParseWarnings() !== [] ) {
97            $classes[] = self::BOGUS_QUERY;
98        }
99
100        $query->getRoot()->accept( $this );
101
102        if ( $this->hasComplex ) {
103            $classes[] = self::COMPLEX_QUERY;
104            // @phan-suppress-next-line PhanSuspiciousValueComparison
105        } elseif ( $this->maxDepth === 0 && $this->hasWords && !$this->hasSimplePhrase ) {
106            $classes[] = self::SIMPLE_BAG_OF_WORDS;
107        } elseif ( $this->maxDepth === 0 && !$this->hasWords && $this->hasSimplePhrase ) {
108            $classes[] = self::SIMPLE_PHRASE;
109        } elseif ( $this->maxDepth === 1 && $this->hasWords && $this->hasSimplePhrase ) {
110            $classes[] = self::BAG_OF_WORDS_WITH_PHRASE;
111        }
112
113        return $classes;
114    }
115
116    /**
117     * @param WordsQueryNode $node
118     */
119    public function visitWordsQueryNode( WordsQueryNode $node ) {
120        $this->hasWords = true;
121    }
122
123    /**
124     * @param PhraseQueryNode $node
125     */
126    public function visitPhraseQueryNode( PhraseQueryNode $node ) {
127        if ( $node->isStem() || $node->getSlop() !== -1 ) {
128            $this->hasComplex = true;
129        } elseif ( !$node->isUnbalanced() ) {
130            $this->hasSimplePhrase = true;
131        }
132    }
133
134    /**
135     * @param PhrasePrefixNode $node
136     */
137    public function visitPhrasePrefixNode( PhrasePrefixNode $node ) {
138        $this->hasComplex = true;
139    }
140
141    /**
142     * @param FuzzyNode $node
143     */
144    public function visitFuzzyNode( FuzzyNode $node ) {
145        $this->hasComplex = true;
146    }
147
148    /**
149     * @param PrefixNode $node
150     */
151    public function visitPrefixNode( PrefixNode $node ) {
152        $this->hasComplex = true;
153    }
154
155    /**
156     * @param WildcardNode $node
157     */
158    public function visitWildcardNode( WildcardNode $node ) {
159        $this->hasComplex = true;
160    }
161
162    /**
163     * @param EmptyQueryNode $node
164     */
165    public function visitEmptyQueryNode( EmptyQueryNode $node ) {
166    }
167
168    /**
169     * @param KeywordFeatureNode $node
170     */
171    public function visitKeywordFeatureNode( KeywordFeatureNode $node ) {
172        $this->hasComplex = true;
173    }
174
175    /**
176     * @param ParsedBooleanNode $node
177     */
178    public function visitParsedBooleanNode( ParsedBooleanNode $node ) {
179        if ( $this->hasComplex ) {
180            // we can quickly skip, this query cannot belong to this class
181            return;
182        }
183        foreach ( $node->getClauses() as $clause ) {
184            $clause->accept( $this );
185        }
186    }
187
188    /**
189     * @param BooleanClause $clause
190     */
191    public function visitBooleanClause( BooleanClause $clause ) {
192        $this->depth++;
193        $this->maxDepth = max( $this->depth, $this->maxDepth );
194        $this->hasComplex = $this->hasComplex || $clause->isExplicit() || $clause->getOccur() === BooleanClause::MUST_NOT;
195        $clause->getNode()->accept( $this );
196        $this->depth--;
197    }
198
199    /**
200     * @param NegatedNode $node
201     */
202    public function visitNegatedNode( NegatedNode $node ) {
203        $this->hasComplex = true;
204    }
205
206    /**
207     * @return string[]
208     */
209    public function classes() {
210        return [
211            self::SIMPLE_BAG_OF_WORDS,
212            self::SIMPLE_PHRASE,
213            self::BAG_OF_WORDS_WITH_PHRASE,
214            self::COMPLEX_QUERY,
215            self::BOGUS_QUERY,
216        ];
217    }
218
219    /**
220     * @param NamespaceHeaderNode $node
221     */
222    public function visitNamespaceHeader( NamespaceHeaderNode $node ) {
223    }
224}