Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
76.14% covered (warning)
76.14%
67 / 88
50.00% covered (danger)
50.00%
2 / 4
CRAP
0.00% covered (danger)
0.00%
0 / 1
LexemeSearchEntity
76.14% covered (warning)
76.14%
67 / 88
50.00% covered (danger)
50.00%
2 / 4
11.36
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
1
 getElasticSearchQuery
69.70% covered (warning)
69.70%
46 / 66
0.00% covered (danger)
0.00%
0 / 1
5.70
 makeResultType
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
1
 getRankedSearchResults
91.67% covered (success)
91.67%
11 / 12
0.00% covered (danger)
0.00%
0 / 1
3.01
1<?php
2namespace Wikibase\Lexeme\Search\Elastic;
3
4use CirrusSearch\CirrusDebugOptions;
5use CirrusSearch\Search\ResultsType;
6use CirrusSearch\Search\SearchContext;
7use Elastica\Query\AbstractQuery;
8use Elastica\Query\BoolQuery;
9use Elastica\Query\DisMax;
10use Elastica\Query\MatchNone;
11use Elastica\Query\MatchQuery;
12use Elastica\Query\Term;
13use Language;
14use MediaWiki\Request\WebRequest;
15use Wikibase\DataModel\Entity\EntityIdParser;
16use Wikibase\Lexeme\MediaWiki\Content\LexemeContent;
17use Wikibase\Lib\Store\FallbackLabelDescriptionLookupFactory;
18use Wikibase\Repo\Api\EntitySearchException;
19use Wikibase\Repo\Api\EntitySearchHelper;
20use Wikibase\Search\Elastic\EntitySearchElastic;
21use Wikibase\Search\Elastic\EntitySearchUtils;
22use Wikibase\Search\Elastic\WikibasePrefixSearcher;
23
24/**
25 * Implementation of ElasticSearch prefix/completion search for Lexemes
26 *
27 * @license GPL-2.0-or-later
28 * @author Stas Malyshev
29 */
30class LexemeSearchEntity implements EntitySearchHelper {
31    public const CONTEXT_LEXEME_PREFIX = 'lexeme_prefix';
32
33    /**
34     * @var EntityIdParser
35     */
36    protected $idParser;
37    /**
38     * Web request context.
39     * Used for implementing debug features such as cirrusDumpQuery.
40     * @var WebRequest
41     */
42    private $request;
43    /**
44     * @var Language
45     */
46    protected $userLanguage;
47    /**
48     * @var FallbackLabelDescriptionLookupFactory
49     */
50    protected $lookupFactory;
51
52    /**
53     * @var CirrusDebugOptions|null
54     */
55    private $debugOptions;
56
57    public function __construct(
58        EntityIdParser $idParser,
59        WebRequest $request,
60        Language $userLanguage,
61        FallbackLabelDescriptionLookupFactory $lookupFactory,
62        CirrusDebugOptions $options = null
63    ) {
64        $this->idParser = $idParser;
65        $this->request = $request;
66        $this->userLanguage = $userLanguage;
67        $this->lookupFactory = $lookupFactory;
68        $this->debugOptions = $options ?? CirrusDebugOptions::fromRequest( $this->request );
69    }
70
71    /**
72     * Produce ES query that matches the arguments.
73     *
74     * @param string $text
75     * @param string $entityType
76     * @param SearchContext $context
77     *
78     * @return AbstractQuery
79     */
80    protected function getElasticSearchQuery(
81        $text,
82        $entityType,
83        SearchContext $context
84    ) {
85        $context->setOriginalSearchTerm( $text );
86        if ( $entityType !== 'lexeme' ) {
87            $context->setResultsPossible( false );
88            $context->addWarning( 'wikibase-search-bad-entity-type', $entityType );
89            return new MatchNone();
90        }
91        // Drop only leading spaces for exact matches, and all spaces for the rest
92        $textExact = ltrim( $text );
93        $text = trim( $text );
94
95        $labelsFilter = new MatchQuery( 'labels_all.prefix', $text );
96
97        $profile = $context->getConfig()
98            ->getProfileService()
99            ->loadProfile( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER,
100                self::CONTEXT_LEXEME_PREFIX );
101
102        $dismax = new DisMax();
103        $dismax->setTieBreaker( $profile['tie-breaker'] ?? 0 );
104
105        $fields = [
106            [ "lemma.near_match", $profile['exact'] ],
107            [ "lemma.near_match_folded", $profile['folded'] ],
108            [
109                "lexeme_forms.representation.near_match",
110                $profile['exact'] * $profile['form-discount'],
111            ],
112            [
113                "lexeme_forms.representation.near_match_folded",
114                $profile['folded'] * $profile['form-discount'],
115            ],
116        ];
117        // Fields to which query applies exactly as stated, without trailing space trimming
118        $fieldsExact = [];
119        if ( $textExact !== $text ) {
120            $fields[] =
121                [
122                    "lemma.prefix",
123                    $profile['prefix'] * $profile['space-discount'],
124                ];
125            $fields[] =
126                [
127                    "lexeme_forms.representation.prefix",
128                    $profile['prefix'] * $profile['space-discount'] * $profile['form-discount'],
129                ];
130            $fieldsExact[] = [ "lemma.prefix", $profile['prefix'] ];
131            $fieldsExact[] =
132                [
133                    "lexeme_forms.representation.prefix",
134                    $profile['prefix'] * $profile['form-discount'],
135                ];
136        } else {
137            $fields[] = [ "lemma.prefix", $profile['prefix'] ];
138            $fields[] =
139                [
140                    "lexeme_forms.representation.prefix",
141                    $profile['prefix'] * $profile['form-discount'],
142                ];
143        }
144
145        foreach ( $fields as $field ) {
146            $dismax->addQuery( EntitySearchUtils::makeConstScoreQuery( $field[0], $field[1], $text ) );
147        }
148
149        foreach ( $fieldsExact as $field ) {
150            $dismax->addQuery( EntitySearchUtils::makeConstScoreQuery( $field[0], $field[1], $textExact ) );
151        }
152
153        $labelsQuery = new BoolQuery();
154        $labelsQuery->addFilter( $labelsFilter );
155        $labelsQuery->addShould( $dismax );
156        $titleMatch = new Term( [
157                'title.keyword' => EntitySearchUtils::normalizeId( $text, $this->idParser ),
158            ] );
159
160        $query = new BoolQuery();
161        // Match either labels or exact match to title
162        $query->addShould( $labelsQuery );
163        $query->addShould( $titleMatch );
164        $query->setMinimumShouldMatch( 1 );
165
166        // Filter to fetch only given entity type
167        $query->addFilter( new Term( [ 'content_model' => LexemeContent::CONTENT_MODEL_ID ] ) );
168
169        return $query;
170    }
171
172    /**
173     * Get results type object for this search.
174     * @return ResultsType
175     */
176    protected function makeResultType() {
177        return new LexemeTermResult(
178            $this->idParser,
179            $this->userLanguage,
180            $this->lookupFactory
181        );
182    }
183
184    /**
185     * @inheritDoc
186     */
187    public function getRankedSearchResults(
188        $text,
189        $languageCode,
190        $entityType,
191        $limit,
192        $strictLanguage,
193        ?string $profileContext = null
194    ) {
195        $profileContext ??= self::CONTEXT_LEXEME_PREFIX;
196        $searcher = new WikibasePrefixSearcher( 0, $limit, $this->debugOptions );
197        $searcher->getSearchContext()->setProfileContext( $profileContext );
198        $query = $this->getElasticSearchQuery( $text, $entityType, $searcher->getSearchContext() );
199
200        $searcher->setResultsType( $this->makeResultType() );
201
202        $result = $searcher->performSearch( $query );
203
204        if ( $result->isOK() ) {
205            $result = $result->getValue();
206        } else {
207            throw new EntitySearchException( $result );
208        }
209
210        if ( $searcher->isReturnRaw() ) {
211            $result = $searcher->processRawReturn( $result, $this->request );
212        }
213
214        return $result;
215    }
216
217}