Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
95.00% |
38 / 40 |
|
80.00% |
8 / 10 |
CRAP | |
0.00% |
0 / 1 |
InLabelScoringVisitor | |
95.00% |
38 / 40 |
|
80.00% |
8 / 10 |
13 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
buildScoringQuery | |
100.00% |
28 / 28 |
|
100.00% |
1 / 1 |
4 | |||
visitWordsQueryNode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
visitPhraseQueryNode | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
visitPhrasePrefixNode | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
visitFuzzyNode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
visitPrefixNode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
visitWildcardNode | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
visitEmptyQueryNode | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
visitKeywordFeatureNode | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php declare( strict_types=1 ); |
2 | |
3 | namespace Wikibase\Search\Elastic\Query; |
4 | |
5 | use CirrusSearch\Parser\AST\BooleanClause; |
6 | use CirrusSearch\Parser\AST\EmptyQueryNode; |
7 | use CirrusSearch\Parser\AST\FuzzyNode; |
8 | use CirrusSearch\Parser\AST\KeywordFeatureNode; |
9 | use CirrusSearch\Parser\AST\PhrasePrefixNode; |
10 | use CirrusSearch\Parser\AST\PhraseQueryNode; |
11 | use CirrusSearch\Parser\AST\PrefixNode; |
12 | use CirrusSearch\Parser\AST\Visitor\LeafVisitor; |
13 | use CirrusSearch\Parser\AST\WildcardNode; |
14 | use CirrusSearch\Parser\AST\WordsQueryNode; |
15 | use Elastica\Query\DisMax; |
16 | use Wikibase\Search\Elastic\EntitySearchUtils; |
17 | use Wikibase\Search\Elastic\Fields\LabelsField; |
18 | |
19 | /** |
20 | * @license GPL-2.0-or-later |
21 | */ |
22 | class InLabelScoringVisitor extends LeafVisitor { |
23 | |
24 | private array $nonNegatedWords = []; |
25 | private bool $containsPhrase = false; |
26 | private array $stemmingSettings; |
27 | |
28 | public function __construct( array $stemmingSettings ) { |
29 | parent::__construct( [ BooleanClause::MUST_NOT ] ); |
30 | $this->stemmingSettings = $stemmingSettings; |
31 | } |
32 | |
33 | public function buildScoringQuery( array $languageCodes, array $profile ): DisMax { |
34 | $labelsName = LabelsField::NAME; |
35 | $text = implode( ' ', $this->nonNegatedWords ); |
36 | // TODO: Should this be a DisMax or a BoolQuery? |
37 | $dismax = new DisMax(); |
38 | $dismax->setTieBreaker( $profile['tie-breaker'] ); |
39 | foreach ( $languageCodes as $languageCode ) { |
40 | if ( !$this->containsPhrase ) { |
41 | // TODO: Using near match on more complex boolean queries is up for debate |
42 | $dismax->addQuery( EntitySearchUtils::makeConstScoreQuery( |
43 | "$labelsName.$languageCode.near_match", |
44 | $profile["$languageCode-exact"], |
45 | $text |
46 | ) ); |
47 | $dismax->addQuery( EntitySearchUtils::makeConstScoreQuery( |
48 | "$labelsName.$languageCode.near_match_folded", |
49 | $profile["$languageCode-folded"], |
50 | $text |
51 | ) ); |
52 | } |
53 | |
54 | $dismax->addQuery( EntitySearchUtils::makeConstScoreQuery( |
55 | "$labelsName.$languageCode.plain", |
56 | $profile["$languageCode-tokenized"], |
57 | $text |
58 | ) ); |
59 | |
60 | if ( !empty( $this->stemmingSettings[$languageCode]['query'] ) ) { |
61 | $dismax->addQuery( EntitySearchUtils::makeConstScoreQuery( |
62 | "$labelsName.$languageCode", |
63 | $profile["$languageCode-stemmed"] ?? $profile["$languageCode-tokenized"], |
64 | $text |
65 | ) ); |
66 | } |
67 | |
68 | // TODO: Should we also add a 'labels_all' field using $profile['any']? |
69 | // Which type(s)? '.plain' / '.near_match' / '.near_match_folded'? |
70 | } |
71 | |
72 | return $dismax; |
73 | } |
74 | |
75 | /** @inheritDoc */ |
76 | public function visitWordsQueryNode( WordsQueryNode $node ) { |
77 | $this->nonNegatedWords[] = $node->getWords(); |
78 | } |
79 | |
80 | /** @inheritDoc */ |
81 | public function visitPhraseQueryNode( PhraseQueryNode $node ) { |
82 | // TODO: Ok to blend phrases with the other query words, or should they be scored specifically with a MatchPhrase? |
83 | $this->containsPhrase = true; |
84 | $this->nonNegatedWords[] = $node->getPhrase(); |
85 | } |
86 | |
87 | /** @inheritDoc */ |
88 | public function visitPhrasePrefixNode( PhrasePrefixNode $node ) { |
89 | $this->containsPhrase = true; |
90 | $this->nonNegatedWords[] = $node->getPhrase(); |
91 | } |
92 | |
93 | /** @inheritDoc */ |
94 | public function visitFuzzyNode( FuzzyNode $node ) { |
95 | $this->nonNegatedWords[] = $node->getWord(); |
96 | } |
97 | |
98 | /** @inheritDoc */ |
99 | public function visitPrefixNode( PrefixNode $node ) { |
100 | $this->nonNegatedWords[] = $node->getPrefix(); |
101 | } |
102 | |
103 | /** @inheritDoc */ |
104 | public function visitWildcardNode( WildcardNode $node ) { |
105 | $this->nonNegatedWords[] = $node->getWildcardQuery(); |
106 | } |
107 | |
108 | /** @inheritDoc */ |
109 | public function visitEmptyQueryNode( EmptyQueryNode $node ) { |
110 | } |
111 | |
112 | /** @inheritDoc */ |
113 | public function visitKeywordFeatureNode( KeywordFeatureNode $node ) { |
114 | } |
115 | } |