Code Coverage |
||||||||||
Classes and Traits |
Functions and Methods |
Lines |
||||||||
Total | |
0.00% |
0 / 1 |
|
78.57% |
11 / 14 |
CRAP | |
92.73% |
51 / 55 |
BasicQueryClassifier | |
0.00% |
0 / 1 |
|
78.57% |
11 / 14 |
32.39 | |
92.73% |
51 / 55 |
classify | |
100.00% |
1 / 1 |
12 | |
100.00% |
18 / 18 |
|||
visitWordsQueryNode | |
100.00% |
1 / 1 |
1 | |
100.00% |
2 / 2 |
|||
visitPhraseQueryNode | |
100.00% |
1 / 1 |
4 | |
100.00% |
5 / 5 |
|||
visitPhrasePrefixNode | |
100.00% |
1 / 1 |
1 | |
100.00% |
2 / 2 |
|||
visitFuzzyNode | |
100.00% |
1 / 1 |
1 | |
100.00% |
2 / 2 |
|||
visitPrefixNode | |
100.00% |
1 / 1 |
1 | |
100.00% |
2 / 2 |
|||
visitWildcardNode | |
100.00% |
1 / 1 |
1 | |
100.00% |
2 / 2 |
|||
visitEmptyQueryNode | |
100.00% |
1 / 1 |
1 | |
100.00% |
1 / 1 |
|||
visitKeywordFeatureNode | |
100.00% |
1 / 1 |
1 | |
100.00% |
2 / 2 |
|||
visitParsedBooleanNode | |
0.00% |
0 / 1 |
3.07 | |
80.00% |
4 / 5 |
|||
visitBooleanClause | |
100.00% |
1 / 1 |
3 | |
100.00% |
6 / 6 |
|||
visitNegatedNode | |
0.00% |
0 / 1 |
2 | |
0.00% |
0 / 2 |
|||
classes | |
100.00% |
1 / 1 |
1 | |
100.00% |
5 / 5 |
|||
visitNamespaceHeader | |
0.00% |
0 / 1 |
2 | |
0.00% |
0 / 1 |
<?php | |
namespace CirrusSearch\Parser; | |
use CirrusSearch\Parser\AST\BooleanClause; | |
use CirrusSearch\Parser\AST\EmptyQueryNode; | |
use CirrusSearch\Parser\AST\FuzzyNode; | |
use CirrusSearch\Parser\AST\KeywordFeatureNode; | |
use CirrusSearch\Parser\AST\NamespaceHeaderNode; | |
use CirrusSearch\Parser\AST\NegatedNode; | |
use CirrusSearch\Parser\AST\ParsedBooleanNode; | |
use CirrusSearch\Parser\AST\ParsedQuery; | |
use CirrusSearch\Parser\AST\PhrasePrefixNode; | |
use CirrusSearch\Parser\AST\PhraseQueryNode; | |
use CirrusSearch\Parser\AST\PrefixNode; | |
use CirrusSearch\Parser\AST\Visitor\Visitor; | |
use CirrusSearch\Parser\AST\WildcardNode; | |
use CirrusSearch\Parser\AST\WordsQueryNode; | |
/** | |
* Basic classifier to identify queries like: | |
* - simple words: foo bar | |
* - simple phrase: "foo bar" | |
* - simple words + simple phrase; foo "bar baz" | |
* - complex: any queries that use a keyword, or any non trivial features | |
* - bogus queries: queries where a bogus pattern have been identified at | |
* parse time | |
*/ | |
class BasicQueryClassifier implements ParsedQueryClassifier, Visitor { | |
/** | |
* The simplest query ever: only words | |
*/ | |
public const SIMPLE_BAG_OF_WORDS = 'simple_bag_of_words'; | |
/** | |
* Only quoted words | |
*/ | |
public const SIMPLE_PHRASE = 'simple_phrase_query'; | |
/** | |
* A simple bag of words query with some quoted words | |
*/ | |
public const BAG_OF_WORDS_WITH_PHRASE = 'bag_of_words_with_phrase_query'; | |
/** | |
* Expert: a query that uses some special syntax such as: | |
* - wildcards/fuzzy/word prefix | |
* - explicit boolean expression | |
* - complex phrase (phrase prefix, non default slop) | |
*/ | |
public const COMPLEX_QUERY = 'complex_query'; | |
/** | |
* Query that was fixed/corrected | |
*/ | |
public const BOGUS_QUERY = 'bogus_query'; | |
/** | |
* @var bool | |
*/ | |
private $hasWords; | |
/** | |
* @var bool | |
*/ | |
private $hasSimplePhrase; | |
/** | |
* @var bool | |
*/ | |
private $hasComplex; | |
/** | |
* @var int | |
*/ | |
private $depth; | |
/** | |
* @var int | |
*/ | |
private $maxDepth; | |
/** | |
* @param ParsedQuery $query | |
* @return string[] | |
*/ | |
public function classify( ParsedQuery $query ) { | |
$this->hasWords = false; | |
$this->hasSimplePhrase = false; | |
$this->hasComplex = false; | |
$this->depth = 0; | |
$this->maxDepth = 0; | |
$classes = []; | |
if ( $query->getParseWarnings() !== [] ) { | |
$classes[] = self::BOGUS_QUERY; | |
} | |
$query->getRoot()->accept( $this ); | |
if ( $this->hasComplex ) { | |
$classes[] = self::COMPLEX_QUERY; | |
// @phan-suppress-next-line PhanSuspiciousValueComparison | |
} elseif ( $this->maxDepth === 0 && $this->hasWords && !$this->hasSimplePhrase ) { | |
$classes[] = self::SIMPLE_BAG_OF_WORDS; | |
} elseif ( $this->maxDepth === 0 && !$this->hasWords && $this->hasSimplePhrase ) { | |
$classes[] = self::SIMPLE_PHRASE; | |
} elseif ( $this->maxDepth === 1 && $this->hasWords && $this->hasSimplePhrase ) { | |
$classes[] = self::BAG_OF_WORDS_WITH_PHRASE; | |
} | |
return $classes; | |
} | |
/** | |
* @param WordsQueryNode $node | |
*/ | |
public function visitWordsQueryNode( WordsQueryNode $node ) { | |
$this->hasWords = true; | |
} | |
/** | |
* @param PhraseQueryNode $node | |
*/ | |
public function visitPhraseQueryNode( PhraseQueryNode $node ) { | |
if ( $node->isStem() || $node->getSlop() !== -1 ) { | |
$this->hasComplex = true; | |
} elseif ( !$node->isUnbalanced() ) { | |
$this->hasSimplePhrase = true; | |
} | |
} | |
/** | |
* @param PhrasePrefixNode $node | |
*/ | |
public function visitPhrasePrefixNode( PhrasePrefixNode $node ) { | |
$this->hasComplex = true; | |
} | |
/** | |
* @param FuzzyNode $node | |
*/ | |
public function visitFuzzyNode( FuzzyNode $node ) { | |
$this->hasComplex = true; | |
} | |
/** | |
* @param PrefixNode $node | |
*/ | |
public function visitPrefixNode( PrefixNode $node ) { | |
$this->hasComplex = true; | |
} | |
/** | |
* @param WildcardNode $node | |
*/ | |
public function visitWildcardNode( WildcardNode $node ) { | |
$this->hasComplex = true; | |
} | |
/** | |
* @param EmptyQueryNode $node | |
*/ | |
public function visitEmptyQueryNode( EmptyQueryNode $node ) { | |
} | |
/** | |
* @param KeywordFeatureNode $node | |
*/ | |
public function visitKeywordFeatureNode( KeywordFeatureNode $node ) { | |
$this->hasComplex = true; | |
} | |
/** | |
* @param ParsedBooleanNode $node | |
*/ | |
public function visitParsedBooleanNode( ParsedBooleanNode $node ) { | |
if ( $this->hasComplex ) { | |
// we can quickly skip, this query cannot belong to this class | |
return; | |
} | |
foreach ( $node->getClauses() as $clause ) { | |
$clause->accept( $this ); | |
} | |
} | |
/** | |
* @param BooleanClause $clause | |
*/ | |
public function visitBooleanClause( BooleanClause $clause ) { | |
$this->depth++; | |
$this->maxDepth = max( $this->depth, $this->maxDepth ); | |
$this->hasComplex = $this->hasComplex || $clause->isExplicit() || $clause->getOccur() === BooleanClause::MUST_NOT; | |
$clause->getNode()->accept( $this ); | |
$this->depth--; | |
} | |
/** | |
* @param NegatedNode $node | |
*/ | |
public function visitNegatedNode( NegatedNode $node ) { | |
$this->hasComplex = true; | |
} | |
/** | |
* @return string[] | |
*/ | |
public function classes() { | |
return [ | |
self::SIMPLE_BAG_OF_WORDS, | |
self::SIMPLE_PHRASE, | |
self::BAG_OF_WORDS_WITH_PHRASE, | |
self::COMPLEX_QUERY, | |
self::BOGUS_QUERY, | |
]; | |
} | |
/** | |
* @param NamespaceHeaderNode $node | |
*/ | |
public function visitNamespaceHeader( NamespaceHeaderNode $node ) { | |
} | |
} |