Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
98.39% covered (success)
98.39%
61 / 62
83.33% covered (warning)
83.33%
5 / 6
CRAP
0.00% covered (danger)
0.00%
0 / 1
LexemeFullTextQueryBuilder
98.39% covered (success)
98.39%
61 / 62
83.33% covered (warning)
83.33%
5 / 6
11
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
1
 newFromGlobals
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
1
 build
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
3
 buildDegraded
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 buildEntitySearchQuery
100.00% covered (success)
100.00%
37 / 37
100.00% covered (success)
100.00%
1 / 1
2
 buildSimpleAllFilter
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
3
1<?php
2
3namespace Wikibase\Lexeme\Search\Elastic;
4
5use CirrusSearch\Query\FullTextQueryBuilder;
6use CirrusSearch\Search\SearchContext;
7use Elastica\Query\BoolQuery;
8use Elastica\Query\DisMax;
9use Elastica\Query\MatchQuery;
10use Elastica\Query\Term;
11use MediaWiki\Language\Language;
12use Wikibase\DataModel\Entity\EntityIdParser;
13use Wikibase\Lib\Store\FallbackLabelDescriptionLookupFactory;
14use Wikibase\Repo\WikibaseRepo;
15use Wikibase\Search\Elastic\EntitySearchUtils;
16
17/**
18 * Builder for Lexeme fulltext queries
19 */
20class LexemeFullTextQueryBuilder implements FullTextQueryBuilder {
21    /**
22     * Default profile name for lexemes
23     */
24    public const LEXEME_DEFAULT_PROFILE = 'lexeme_fulltext';
25    /**
26     * Lexeme fulltext search context name
27     */
28    public const CONTEXT_LEXEME_FULLTEXT = 'wikibase_lexeme_fulltext';
29    public const LEXEME_FULL_TEXT_MARKER = 'lexeme_full_text';
30
31    /**
32     * @var array
33     */
34    private $settings;
35    /**
36     * @var EntityIdParser
37     */
38    private $entityIdParser;
39    /**
40     * @var Language User language code
41     */
42    private $userLanguage;
43    /**
44     * @var FallbackLabelDescriptionLookupFactory
45     */
46    private $lookupFactory;
47
48    /**
49     * @param array $settings Settings from EntitySearchProfiles.php
50     * @param FallbackLabelDescriptionLookupFactory $lookupFactory
51     * @param EntityIdParser $entityIdParser
52     * @param Language $userLanguage User's display language
53     */
54    public function __construct(
55        array $settings,
56        FallbackLabelDescriptionLookupFactory $lookupFactory,
57        EntityIdParser $entityIdParser,
58        Language $userLanguage
59    ) {
60        $this->settings = $settings;
61        $this->entityIdParser = $entityIdParser;
62        $this->userLanguage = $userLanguage;
63        $this->lookupFactory = $lookupFactory;
64    }
65
66    /**
67     * Create fulltext builder from global environment.
68     * @param array $settings Configuration from config file
69     * @return LexemeFullTextQueryBuilder
70     */
71    public static function newFromGlobals( array $settings ) {
72        return new static(
73            $settings,
74            WikibaseRepo::getFallbackLabelDescriptionLookupFactory(),
75            WikibaseRepo::getEntityIdParser(),
76            WikibaseRepo::getUserLanguage()
77        );
78    }
79
80    /**
81     * Search articles with provided term.
82     *
83     * @param SearchContext $searchContext
84     * @param string $term term to search
85     */
86    public function build( SearchContext $searchContext, $term ) {
87        if ( $searchContext->areResultsPossible() && !$searchContext->isSpecialKeywordUsed() ) {
88            // We use entity search query if we did not find any advanced syntax
89            // and the base builder did not reject the query
90            $this->buildEntitySearchQuery( $searchContext, $term );
91        }
92        // if we did find advanced query, we keep the old setup but change the result type
93        // FIXME: make it dispatch by content model
94        $searchContext->setResultsType( new LexemeFulltextResult( $this->entityIdParser,
95            $this->userLanguage,
96            $this->lookupFactory ) );
97    }
98
99    /**
100     * @param SearchContext $searchContext
101     * @return bool
102     */
103    public function buildDegraded( SearchContext $searchContext ) {
104        // Not doing anything for now
105        return false;
106    }
107
108    /**
109     * Build a fulltext query for Wikibase entity.
110     * @param SearchContext $searchContext
111     * @param string $term Search term
112     */
113    protected function buildEntitySearchQuery( SearchContext $searchContext, $term ) {
114        $searchContext->setProfileContext( self::CONTEXT_LEXEME_FULLTEXT );
115        $searchContext->addSyntaxUsed( self::LEXEME_FULL_TEXT_MARKER, 10 );
116        /*
117         * Overall query structure is as follows:
118         * - Bool with:
119         *   Filter of namespace = N
120         *   OR (Should with 1 mininmum) of:
121         *     title.keyword = QUERY
122         *     lexeme_forms.id = QUERY
123         *     fulltext match query
124         *
125         * Fulltext match query is:
126         *   Filter of:
127         *      at least one of: all, all.plain matching
128         *   OR (should with 0 minimum) of:
129         *     DISMAX query of: {lemma|form}.near_match
130         *     OR (should with 0 minimum) of:
131         *        all
132         *        all.plain
133         */
134
135        $profile = $this->settings;
136        // $fields is collecting all the fields for dismax query to be used in
137        // scoring match
138        $fields = [
139            [ "lemma.near_match", $profile['exact'] ],
140            [ "lemma.near_match_folded", $profile['folded'] ],
141            [
142                "lexeme_forms.representation.near_match",
143                $profile['exact'] * $profile['form-discount'],
144            ],
145            [
146                "lexeme_forms.representation.near_match_folded",
147                $profile['folded'] * $profile['form-discount'],
148            ],
149        ];
150
151        $titleMatch = new Term( [
152            'title.keyword' => EntitySearchUtils::normalizeId( $term, $this->entityIdParser ),
153        ] );
154        // lexeme_forms.id is a lowercase_keyword so use Match to apply the analyzer
155        $formIdMatch = new MatchQuery( 'lexeme_forms.id',
156            EntitySearchUtils::normalizeId( $term, $this->entityIdParser ) );
157
158        // Main query filter
159        $filterQuery = $this->buildSimpleAllFilter( $term );
160
161        // Near match ones, they use constant score
162        $nearMatchQuery = new DisMax();
163        $nearMatchQuery->setTieBreaker( $profile['tie-breaker'] ?? 0 );
164        foreach ( $fields as $field ) {
165            $nearMatchQuery->addQuery( EntitySearchUtils::makeConstScoreQuery( $field[0], $field[1],
166                $term ) );
167        }
168
169        // Tokenized ones
170        $tokenizedQuery = $this->buildSimpleAllFilter( $term, 'OR', $profile['any'] );
171
172        // Main labels/desc query
173        $fullTextQuery = new BoolQuery();
174        $fullTextQuery->addFilter( $filterQuery );
175        $fullTextQuery->addShould( $nearMatchQuery );
176        $fullTextQuery->addShould( $tokenizedQuery );
177
178        // Main query
179        $query = new BoolQuery();
180
181        // Match either labels or exact match to title
182        $query->addShould( $titleMatch );
183        $query->addShould( $formIdMatch );
184        $query->addShould( $fullTextQuery );
185        $query->setMinimumShouldMatch( 1 );
186
187        $searchContext->setMainQuery( $query );
188    }
189
190    /**
191     * Builds a simple filter on all and all.plain when all terms must match
192     *
193     * @param string $query
194     * @param string $operator
195     * @param null $boost
196     * @return BoolQuery
197     */
198    private function buildSimpleAllFilter( $query, $operator = 'AND', $boost = null ) {
199        $filter = new BoolQuery();
200        // FIXME: We can't use solely the stem field here
201        // - Depending on languages it may lack stopwords,
202        // A dedicated field used for filtering would be nice
203        foreach ( [ 'all', 'all.plain' ] as $field ) {
204            $m = new MatchQuery();
205            $m->setFieldQuery( $field, $query );
206            $m->setFieldOperator( $field, $operator );
207            if ( $boost ) {
208                $m->setFieldBoost( $field, $boost );
209            }
210            $filter->addShould( $m );
211        }
212        return $filter;
213    }
214
215}