Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
95.00% covered (success)
95.00%
38 / 40
80.00% covered (warning)
80.00%
8 / 10
CRAP
0.00% covered (danger)
0.00%
0 / 1
InLabelScoringVisitor
95.00% covered (success)
95.00%
38 / 40
80.00% covered (warning)
80.00%
8 / 10
13
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 buildScoringQuery
100.00% covered (success)
100.00%
28 / 28
100.00% covered (success)
100.00%
1 / 1
4
 visitWordsQueryNode
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 visitPhraseQueryNode
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 visitPhrasePrefixNode
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 visitFuzzyNode
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 visitPrefixNode
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 visitWildcardNode
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 visitEmptyQueryNode
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 visitKeywordFeatureNode
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
1<?php declare( strict_types=1 );
2
3namespace Wikibase\Search\Elastic\Query;
4
5use CirrusSearch\Parser\AST\BooleanClause;
6use CirrusSearch\Parser\AST\EmptyQueryNode;
7use CirrusSearch\Parser\AST\FuzzyNode;
8use CirrusSearch\Parser\AST\KeywordFeatureNode;
9use CirrusSearch\Parser\AST\PhrasePrefixNode;
10use CirrusSearch\Parser\AST\PhraseQueryNode;
11use CirrusSearch\Parser\AST\PrefixNode;
12use CirrusSearch\Parser\AST\Visitor\LeafVisitor;
13use CirrusSearch\Parser\AST\WildcardNode;
14use CirrusSearch\Parser\AST\WordsQueryNode;
15use Elastica\Query\DisMax;
16use Wikibase\Search\Elastic\EntitySearchUtils;
17use Wikibase\Search\Elastic\Fields\LabelsField;
18
19/**
20 * @license GPL-2.0-or-later
21 */
22class InLabelScoringVisitor extends LeafVisitor {
23
24    private array $nonNegatedWords = [];
25    private bool $containsPhrase = false;
26    private array $stemmingSettings;
27
28    public function __construct( array $stemmingSettings ) {
29        parent::__construct( [ BooleanClause::MUST_NOT ] );
30        $this->stemmingSettings = $stemmingSettings;
31    }
32
33    public function buildScoringQuery( array $languageCodes, array $profile ): DisMax {
34        $labelsName = LabelsField::NAME;
35        $text = implode( ' ', $this->nonNegatedWords );
36        // TODO: Should this be a DisMax or a BoolQuery?
37        $dismax = new DisMax();
38        $dismax->setTieBreaker( $profile['tie-breaker'] );
39        foreach ( $languageCodes as $languageCode ) {
40            if ( !$this->containsPhrase ) {
41                // TODO: Using near match on more complex boolean queries is up for debate
42                $dismax->addQuery( EntitySearchUtils::makeConstScoreQuery(
43                    "$labelsName.$languageCode.near_match",
44                    $profile["$languageCode-exact"],
45                    $text
46                ) );
47                $dismax->addQuery( EntitySearchUtils::makeConstScoreQuery(
48                    "$labelsName.$languageCode.near_match_folded",
49                    $profile["$languageCode-folded"],
50                    $text
51                ) );
52            }
53
54            $dismax->addQuery( EntitySearchUtils::makeConstScoreQuery(
55                "$labelsName.$languageCode.plain",
56                $profile["$languageCode-tokenized"],
57                $text
58            ) );
59
60            if ( !empty( $this->stemmingSettings[$languageCode]['query'] ) ) {
61                $dismax->addQuery( EntitySearchUtils::makeConstScoreQuery(
62                    "$labelsName.$languageCode",
63                    $profile["$languageCode-stemmed"] ?? $profile["$languageCode-tokenized"],
64                    $text
65                ) );
66            }
67
68            // TODO: Should we also add a 'labels_all' field using $profile['any']?
69            //       Which type(s)? '.plain' / '.near_match' / '.near_match_folded'?
70        }
71
72        return $dismax;
73    }
74
75    /** @inheritDoc    */
76    public function visitWordsQueryNode( WordsQueryNode $node ) {
77        $this->nonNegatedWords[] = $node->getWords();
78    }
79
80    /** @inheritDoc    */
81    public function visitPhraseQueryNode( PhraseQueryNode $node ) {
82        // TODO: Ok to blend phrases with the other query words, or should they be scored specifically with a MatchPhrase?
83        $this->containsPhrase = true;
84        $this->nonNegatedWords[] = $node->getPhrase();
85    }
86
87    /** @inheritDoc    */
88    public function visitPhrasePrefixNode( PhrasePrefixNode $node ) {
89        $this->containsPhrase = true;
90        $this->nonNegatedWords[] = $node->getPhrase();
91    }
92
93    /** @inheritDoc    */
94    public function visitFuzzyNode( FuzzyNode $node ) {
95        $this->nonNegatedWords[] = $node->getWord();
96    }
97
98    /** @inheritDoc    */
99    public function visitPrefixNode( PrefixNode $node ) {
100        $this->nonNegatedWords[] = $node->getPrefix();
101    }
102
103    /** @inheritDoc    */
104    public function visitWildcardNode( WildcardNode $node ) {
105        $this->nonNegatedWords[] = $node->getWildcardQuery();
106    }
107
108    /** @inheritDoc    */
109    public function visitEmptyQueryNode( EmptyQueryNode $node ) {
110    }
111
112    /** @inheritDoc    */
113    public function visitKeywordFeatureNode( KeywordFeatureNode $node ) {
114    }
115}