Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
96.49% covered (success)
96.49%
110 / 114
60.00% covered (warning)
60.00%
3 / 5
CRAP
0.00% covered (danger)
0.00%
0 / 1
EntitySearchElastic
96.49% covered (success)
96.49%
110 / 114
60.00% covered (warning)
60.00%
3 / 5
23
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
3
 expandGenericProfile
100.00% covered (success)
100.00%
20 / 20
100.00% covered (success)
100.00%
1 / 1
3
 loadProfile
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
2
 getElasticSearchQuery
95.08% covered (success)
95.08%
58 / 61
0.00% covered (danger)
0.00%
0 / 1
12
 getRankedSearchResults
94.74% covered (success)
94.74%
18 / 19
0.00% covered (danger)
0.00%
0 / 1
3.00
1<?php
2
3namespace Wikibase\Search\Elastic;
4
5use CirrusSearch\CirrusDebugOptions;
6use CirrusSearch\Search\SearchContext;
7use Elastica\Query\AbstractQuery;
8use Elastica\Query\BoolQuery;
9use Elastica\Query\DisMax;
10use Elastica\Query\MatchQuery;
11use Elastica\Query\Term;
12use Language;
13use MediaWiki\Request\FauxRequest;
14use MediaWiki\Request\WebRequest;
15use Wikibase\DataModel\Entity\EntityIdParser;
16use Wikibase\Lib\LanguageFallbackChainFactory;
17use Wikibase\Repo\Api\EntitySearchException;
18use Wikibase\Repo\Api\EntitySearchHelper;
19
20/**
21 * Entity search implementation using ElasticSearch.
22 * Requires CirrusSearch extension and $wgEntitySearchUseCirrus to be on.
23 *
24 * @license GPL-2.0-or-later
25 * @author Stas Malyshev
26 */
27class EntitySearchElastic implements EntitySearchHelper {
28    /**
29     * Default rescore profile
30     */
31    public const DEFAULT_RESCORE_PROFILE = 'wikibase_prefix';
32
33    /**
34     * Name of the context for profile name resolution
35     */
36    public const CONTEXT_WIKIBASE_PREFIX = 'wikibase_prefix_search';
37
38    /**
39     * Name of the context for profile name resolution
40     */
41    public const CONTEXT_WIKIBASE_FULLTEXT = 'wikibase_fulltext_search';
42
43    /**
44     * Name of the profile type used to build the elastic query
45     */
46    public const WIKIBASE_PREFIX_QUERY_BUILDER = 'wikibase_prefix_querybuilder';
47
48    /**
49     * Default query builder profile for prefix searches
50     */
51    public const DEFAULT_QUERY_BUILDER_PROFILE = 'default';
52
53    /**
54     * Default query builder profile for fulltext searches
55     *
56     */
57    public const DEFAULT_FULL_TEXT_QUERY_BUILDER_PROFILE = 'wikibase';
58
59    /**
60     * Replacement syntax for statement boosting
61     * @see \CirrusSearch\Profile\SearchProfileRepositoryTransformer
62     * and repo/config/ElasticSearchRescoreFunctions.php
63     */
64    public const STMT_BOOST_PROFILE_REPL = 'functions.*[type=term_boost].params[statement_keywords=_statementBoost_].statement_keywords';
65
66    /**
67     * @var LanguageFallbackChainFactory
68     */
69    private $languageChainFactory;
70
71    /**
72     * @var EntityIdParser
73     */
74    private $idParser;
75
76    /**
77     * @var string[]
78     */
79    private $contentModelMap;
80
81    /**
82     * Web request context.
83     * Used for implementing debug features such as cirrusDumpQuery.
84     * @var WebRequest
85     */
86    private $request;
87
88    /**
89     * List of fallback codes for search language
90     * @var string[]
91     */
92    private $searchLanguageCodes = [];
93
94    /**
95     * @var Language User language for display.
96     */
97    private $userLang;
98
99    /**
100     * @var CirrusDebugOptions
101     */
102    private $debugOptions;
103
104    /**
105     * @param LanguageFallbackChainFactory $languageChainFactory
106     * @param EntityIdParser $idParser
107     * @param Language $userLang
108     * @param array $contentModelMap Maps entity type => content model name
109     * @param WebRequest|null $request Web request context
110     * @param CirrusDebugOptions|null $options
111     */
112    public function __construct(
113        LanguageFallbackChainFactory $languageChainFactory,
114        EntityIdParser $idParser,
115        Language $userLang,
116        array $contentModelMap,
117        WebRequest $request = null,
118        CirrusDebugOptions $options = null
119    ) {
120        $this->languageChainFactory = $languageChainFactory;
121        $this->idParser = $idParser;
122        $this->userLang = $userLang;
123        $this->contentModelMap = $contentModelMap;
124        $this->request = $request ?: new FauxRequest();
125        $this->debugOptions = $options ?: CirrusDebugOptions::fromRequest( $this->request );
126    }
127
128    private function expandGenericProfile( $languageCode, array $profile ) {
129        $res = [
130            'language-chain' => $this->languageChainFactory
131                ->newFromLanguageCode( $languageCode )
132                ->getFetchLanguageCodes(),
133            'any' => $profile['any'],
134            'tie-breaker' => $profile['tie-breaker'],
135            'space-discount' => $profile['space-discount'] ?? null,
136            "{$languageCode}-exact" => $profile['lang-exact'],
137            "{$languageCode}-folded" => $profile['lang-folded'],
138            "{$languageCode}-prefix" => $profile['lang-prefix'],
139        ];
140
141        $discount = $profile['fallback-discount'];
142        foreach ( $res['language-chain'] as $fallback ) {
143            if ( $fallback === $languageCode ) {
144                continue;
145            }
146            $res["{$fallback}-exact"] = $profile['fallback-exact'] * $discount;
147            $res["{$fallback}-folded"] = $profile['fallback-folded'] * $discount;
148            $res["{$fallback}-prefix"] = $profile['fallback-prefix'] * $discount;
149            $discount *= $profile['fallback-discount'];
150        }
151
152        return $res;
153    }
154
155    private function loadProfile( SearchContext $context, $languageCode ) {
156        $profile = $context->getConfig()
157            ->getProfileService()
158            ->loadProfile( self::WIKIBASE_PREFIX_QUERY_BUILDER, $context->getProfileContext(), null,
159                $context->getProfileContextParams() );
160
161        // Set some bc defaults for properties that didn't always exist.
162        $profile['tie-breaker'] ??= 0;
163
164        // There are two flavors of profiles: fully specified, and generic
165        // fallback. When language-chain is provided we assume a fully
166        // specified profile. Otherwise we expand the language agnostic
167        // profile into a language specific profile.
168        if ( !isset( $profile['language-chain'] ) ) {
169            $profile = $this->expandGenericProfile( $languageCode, $profile );
170        }
171
172        return $profile;
173    }
174
175    /**
176     * Produce ES query that matches the arguments.
177     *
178     * @param string $text
179     * @param string $languageCode
180     * @param string $entityType
181     * @param bool $strictLanguage
182     * @param SearchContext $context
183     *
184     * @return AbstractQuery
185     */
186    protected function getElasticSearchQuery(
187        $text,
188        $languageCode,
189        $entityType,
190        $strictLanguage,
191        SearchContext $context
192    ) {
193        $query = new BoolQuery();
194
195        $context->setOriginalSearchTerm( $text );
196        // Drop only leading spaces for exact matches, and all spaces for the rest
197        $textExact = ltrim( $text );
198        $text = trim( $text );
199        if ( empty( $this->contentModelMap[$entityType] ) ) {
200            $context->setResultsPossible( false );
201            $context->addWarning( 'wikibasecirrus-search-bad-entity-type', $entityType );
202            return $query;
203        }
204
205        $labelsFilter = new MatchQuery( 'labels_all.prefix', $text );
206
207        $profile = $this->loadProfile( $context, $languageCode );
208        $this->searchLanguageCodes = $profile['language-chain'];
209        if ( $languageCode !== $this->searchLanguageCodes[0] ) {
210            // Log a warning? Are there valid reasons for the primary language
211            // in the profile to not match the profile request?
212            $languageCode = $this->searchLanguageCodes[0];
213        }
214
215        $fields = [
216            [ "labels.{$languageCode}.near_match", $profile["{$languageCode}-exact"] ],
217            [ "labels.{$languageCode}.near_match_folded", $profile["{$languageCode}-folded"] ],
218        ];
219        // Fields to which query applies exactly as stated, without trailing space trimming
220        $fieldsExact = [];
221        $weight = $profile["{$languageCode}-prefix"];
222        if ( $textExact !== $text && isset( $profile['space-discount'] ) ) {
223            $fields[] =
224                [
225                    "labels.{$languageCode}.prefix",
226                    $weight * $profile['space-discount'],
227                ];
228            $fieldsExact[] = [ "labels.{$languageCode}.prefix", $weight ];
229        } else {
230            $fields[] = [ "labels.{$languageCode}.prefix", $weight ];
231        }
232
233        if ( !$strictLanguage ) {
234            $fields[] = [ "labels_all.near_match_folded", $profile['any'] ];
235            foreach ( $this->searchLanguageCodes as $fallbackCode ) {
236                if ( $fallbackCode === $languageCode ) {
237                    continue;
238                }
239                $fields[] = [
240                    "labels.{$fallbackCode}.near_match",
241                    $profile["{$fallbackCode}-exact"] ];
242                $fields[] = [
243                    "labels.{$fallbackCode}.near_match_folded",
244                    $profile["{$fallbackCode}-folded"] ];
245
246                $weight = $profile["{$fallbackCode}-prefix"];
247                if ( $textExact !== $text && isset( $profile['space-discount'] ) ) {
248                    $fields[] = [
249                        "labels.{$fallbackCode}.prefix",
250                        $weight * $profile['space-discount']
251                    ];
252                    $fieldsExact[] = [ "labels.{$fallbackCode}.prefix", $weight ];
253                } else {
254                    $fields[] = [ "labels.{$fallbackCode}.prefix", $weight ];
255                }
256            }
257        }
258
259        $dismax = new DisMax();
260        $dismax->setTieBreaker( $profile['tie-breaker'] );
261        foreach ( $fields as $field ) {
262            $dismax->addQuery( EntitySearchUtils::makeConstScoreQuery( $field[0], $field[1], $text ) );
263        }
264
265        foreach ( $fieldsExact as $field ) {
266            $dismax->addQuery( EntitySearchUtils::makeConstScoreQuery( $field[0], $field[1], $textExact ) );
267        }
268
269        $labelsQuery = new BoolQuery();
270        $labelsQuery->addFilter( $labelsFilter );
271        $labelsQuery->addShould( $dismax );
272        $titleMatch = new Term( [ 'title.keyword' => EntitySearchUtils::normalizeId( $text, $this->idParser ) ] );
273
274        // Match either labels or exact match to title
275        $query->addShould( $labelsQuery );
276        $query->addShould( $titleMatch );
277        $query->setMinimumShouldMatch( 1 );
278
279        // Filter to fetch only given entity type
280        $query->addFilter( new Term( [ 'content_model' => $this->contentModelMap[$entityType] ] ) );
281
282        return $query;
283    }
284
285    /**
286     * @inheritDoc
287     */
288    public function getRankedSearchResults(
289        $text,
290        $languageCode,
291        $entityType,
292        $limit,
293        $strictLanguage,
294        string $profileContext = null
295    ) {
296        $profileContext ??= self::CONTEXT_WIKIBASE_PREFIX;
297        $searcher = new WikibasePrefixSearcher( 0, $limit, $this->debugOptions );
298        $searcher->getSearchContext()->setProfileContext(
299            $profileContext,
300            [ 'language' => $languageCode ] );
301        $query = $this->getElasticSearchQuery( $text, $languageCode, $entityType, $strictLanguage,
302                $searcher->getSearchContext() );
303
304        $searcher->setResultsType( new ElasticTermResult(
305            $this->idParser,
306            $this->searchLanguageCodes,
307            $this->languageChainFactory->newFromLanguage( $this->userLang )
308        ) );
309
310        $result = $searcher->performSearch( $query );
311
312        if ( $result->isOK() ) {
313            $result = $result->getValue();
314        } else {
315            throw new EntitySearchException( $result );
316        }
317
318        if ( $searcher->isReturnRaw() ) {
319            $result = $searcher->processRawReturn( $result, $this->request );
320        }
321
322        return $result;
323    }
324
325}