Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
95.00% |
38 / 40 |
|
80.00% |
8 / 10 |
CRAP | |
0.00% |
0 / 1 |
| InLabelScoringVisitor | |
95.00% |
38 / 40 |
|
80.00% |
8 / 10 |
13 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| buildScoringQuery | |
100.00% |
28 / 28 |
|
100.00% |
1 / 1 |
4 | |||
| visitWordsQueryNode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| visitPhraseQueryNode | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| visitPhrasePrefixNode | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| visitFuzzyNode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| visitPrefixNode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| visitWildcardNode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| visitEmptyQueryNode | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| visitKeywordFeatureNode | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| 1 | <?php declare( strict_types=1 ); |
| 2 | |
| 3 | namespace Wikibase\Search\Elastic\Query; |
| 4 | |
| 5 | use CirrusSearch\Parser\AST\BooleanClause; |
| 6 | use CirrusSearch\Parser\AST\EmptyQueryNode; |
| 7 | use CirrusSearch\Parser\AST\FuzzyNode; |
| 8 | use CirrusSearch\Parser\AST\KeywordFeatureNode; |
| 9 | use CirrusSearch\Parser\AST\PhrasePrefixNode; |
| 10 | use CirrusSearch\Parser\AST\PhraseQueryNode; |
| 11 | use CirrusSearch\Parser\AST\PrefixNode; |
| 12 | use CirrusSearch\Parser\AST\Visitor\LeafVisitor; |
| 13 | use CirrusSearch\Parser\AST\WildcardNode; |
| 14 | use CirrusSearch\Parser\AST\WordsQueryNode; |
| 15 | use Elastica\Query\DisMax; |
| 16 | use Wikibase\Search\Elastic\EntitySearchUtils; |
| 17 | use Wikibase\Search\Elastic\Fields\LabelsField; |
| 18 | |
| 19 | /** |
| 20 | * @license GPL-2.0-or-later |
| 21 | */ |
| 22 | class InLabelScoringVisitor extends LeafVisitor { |
| 23 | |
| 24 | private array $nonNegatedWords = []; |
| 25 | private bool $containsPhrase = false; |
| 26 | private array $stemmingSettings; |
| 27 | |
| 28 | public function __construct( array $stemmingSettings ) { |
| 29 | parent::__construct( [ BooleanClause::MUST_NOT ] ); |
| 30 | $this->stemmingSettings = $stemmingSettings; |
| 31 | } |
| 32 | |
| 33 | public function buildScoringQuery( array $languageCodes, array $profile ): DisMax { |
| 34 | $labelsName = LabelsField::NAME; |
| 35 | $text = implode( ' ', $this->nonNegatedWords ); |
| 36 | // TODO: Should this be a DisMax or a BoolQuery? |
| 37 | $dismax = new DisMax(); |
| 38 | $dismax->setTieBreaker( $profile['tie-breaker'] ); |
| 39 | foreach ( $languageCodes as $languageCode ) { |
| 40 | if ( !$this->containsPhrase ) { |
| 41 | // TODO: Using near match on more complex boolean queries is up for debate |
| 42 | $dismax->addQuery( EntitySearchUtils::makeConstScoreQuery( |
| 43 | "$labelsName.$languageCode.near_match", |
| 44 | $profile["$languageCode-exact"], |
| 45 | $text |
| 46 | ) ); |
| 47 | $dismax->addQuery( EntitySearchUtils::makeConstScoreQuery( |
| 48 | "$labelsName.$languageCode.near_match_folded", |
| 49 | $profile["$languageCode-folded"], |
| 50 | $text |
| 51 | ) ); |
| 52 | } |
| 53 | |
| 54 | $dismax->addQuery( EntitySearchUtils::makeConstScoreQuery( |
| 55 | "$labelsName.$languageCode.plain", |
| 56 | $profile["$languageCode-tokenized"], |
| 57 | $text |
| 58 | ) ); |
| 59 | |
| 60 | if ( !empty( $this->stemmingSettings[$languageCode]['query'] ) ) { |
| 61 | $dismax->addQuery( EntitySearchUtils::makeConstScoreQuery( |
| 62 | "$labelsName.$languageCode", |
| 63 | $profile["$languageCode-stemmed"] ?? $profile["$languageCode-tokenized"], |
| 64 | $text |
| 65 | ) ); |
| 66 | } |
| 67 | |
| 68 | // TODO: Should we also add a 'labels_all' field using $profile['any']? |
| 69 | // Which type(s)? '.plain' / '.near_match' / '.near_match_folded'? |
| 70 | } |
| 71 | |
| 72 | return $dismax; |
| 73 | } |
| 74 | |
| 75 | /** @inheritDoc */ |
| 76 | public function visitWordsQueryNode( WordsQueryNode $node ) { |
| 77 | $this->nonNegatedWords[] = $node->getWords(); |
| 78 | } |
| 79 | |
| 80 | /** @inheritDoc */ |
| 81 | public function visitPhraseQueryNode( PhraseQueryNode $node ) { |
| 82 | // TODO: Ok to blend phrases with the other query words, or should they be scored specifically with a MatchPhrase? |
| 83 | $this->containsPhrase = true; |
| 84 | $this->nonNegatedWords[] = $node->getPhrase(); |
| 85 | } |
| 86 | |
| 87 | /** @inheritDoc */ |
| 88 | public function visitPhrasePrefixNode( PhrasePrefixNode $node ) { |
| 89 | $this->containsPhrase = true; |
| 90 | $this->nonNegatedWords[] = $node->getPhrase(); |
| 91 | } |
| 92 | |
| 93 | /** @inheritDoc */ |
| 94 | public function visitFuzzyNode( FuzzyNode $node ) { |
| 95 | $this->nonNegatedWords[] = $node->getWord(); |
| 96 | } |
| 97 | |
| 98 | /** @inheritDoc */ |
| 99 | public function visitPrefixNode( PrefixNode $node ) { |
| 100 | $this->nonNegatedWords[] = $node->getPrefix(); |
| 101 | } |
| 102 | |
| 103 | /** @inheritDoc */ |
| 104 | public function visitWildcardNode( WildcardNode $node ) { |
| 105 | $this->nonNegatedWords[] = $node->getWildcardQuery(); |
| 106 | } |
| 107 | |
| 108 | /** @inheritDoc */ |
| 109 | public function visitEmptyQueryNode( EmptyQueryNode $node ) { |
| 110 | } |
| 111 | |
| 112 | /** @inheritDoc */ |
| 113 | public function visitKeywordFeatureNode( KeywordFeatureNode $node ) { |
| 114 | } |
| 115 | } |