Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
76.14% |
67 / 88 |
|
50.00% |
2 / 4 |
CRAP | |
0.00% |
0 / 1 |
LexemeSearchEntity | |
76.14% |
67 / 88 |
|
50.00% |
2 / 4 |
11.36 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
getElasticSearchQuery | |
69.70% |
46 / 66 |
|
0.00% |
0 / 1 |
5.70 | |||
makeResultType | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
getRankedSearchResults | |
91.67% |
11 / 12 |
|
0.00% |
0 / 1 |
3.01 |
1 | <?php |
2 | namespace Wikibase\Lexeme\Search\Elastic; |
3 | |
4 | use CirrusSearch\CirrusDebugOptions; |
5 | use CirrusSearch\Search\ResultsType; |
6 | use CirrusSearch\Search\SearchContext; |
7 | use Elastica\Query\AbstractQuery; |
8 | use Elastica\Query\BoolQuery; |
9 | use Elastica\Query\DisMax; |
10 | use Elastica\Query\MatchNone; |
11 | use Elastica\Query\MatchQuery; |
12 | use Elastica\Query\Term; |
13 | use Language; |
14 | use MediaWiki\Request\WebRequest; |
15 | use Wikibase\DataModel\Entity\EntityIdParser; |
16 | use Wikibase\Lexeme\MediaWiki\Content\LexemeContent; |
17 | use Wikibase\Lib\Store\FallbackLabelDescriptionLookupFactory; |
18 | use Wikibase\Repo\Api\EntitySearchException; |
19 | use Wikibase\Repo\Api\EntitySearchHelper; |
20 | use Wikibase\Search\Elastic\EntitySearchElastic; |
21 | use Wikibase\Search\Elastic\EntitySearchUtils; |
22 | use Wikibase\Search\Elastic\WikibasePrefixSearcher; |
23 | |
24 | /** |
25 | * Implementation of ElasticSearch prefix/completion search for Lexemes |
26 | * |
27 | * @license GPL-2.0-or-later |
28 | * @author Stas Malyshev |
29 | */ |
30 | class LexemeSearchEntity implements EntitySearchHelper { |
31 | public const CONTEXT_LEXEME_PREFIX = 'lexeme_prefix'; |
32 | |
33 | /** |
34 | * @var EntityIdParser |
35 | */ |
36 | protected $idParser; |
37 | /** |
38 | * Web request context. |
39 | * Used for implementing debug features such as cirrusDumpQuery. |
40 | * @var WebRequest |
41 | */ |
42 | private $request; |
43 | /** |
44 | * @var Language |
45 | */ |
46 | protected $userLanguage; |
47 | /** |
48 | * @var FallbackLabelDescriptionLookupFactory |
49 | */ |
50 | protected $lookupFactory; |
51 | |
52 | /** |
53 | * @var CirrusDebugOptions|null |
54 | */ |
55 | private $debugOptions; |
56 | |
57 | public function __construct( |
58 | EntityIdParser $idParser, |
59 | WebRequest $request, |
60 | Language $userLanguage, |
61 | FallbackLabelDescriptionLookupFactory $lookupFactory, |
62 | CirrusDebugOptions $options = null |
63 | ) { |
64 | $this->idParser = $idParser; |
65 | $this->request = $request; |
66 | $this->userLanguage = $userLanguage; |
67 | $this->lookupFactory = $lookupFactory; |
68 | $this->debugOptions = $options ?? CirrusDebugOptions::fromRequest( $this->request ); |
69 | } |
70 | |
71 | /** |
72 | * Produce ES query that matches the arguments. |
73 | * |
74 | * @param string $text |
75 | * @param string $entityType |
76 | * @param SearchContext $context |
77 | * |
78 | * @return AbstractQuery |
79 | */ |
80 | protected function getElasticSearchQuery( |
81 | $text, |
82 | $entityType, |
83 | SearchContext $context |
84 | ) { |
85 | $context->setOriginalSearchTerm( $text ); |
86 | if ( $entityType !== 'lexeme' ) { |
87 | $context->setResultsPossible( false ); |
88 | $context->addWarning( 'wikibase-search-bad-entity-type', $entityType ); |
89 | return new MatchNone(); |
90 | } |
91 | // Drop only leading spaces for exact matches, and all spaces for the rest |
92 | $textExact = ltrim( $text ); |
93 | $text = trim( $text ); |
94 | |
95 | $labelsFilter = new MatchQuery( 'labels_all.prefix', $text ); |
96 | |
97 | $profile = $context->getConfig() |
98 | ->getProfileService() |
99 | ->loadProfile( EntitySearchElastic::WIKIBASE_PREFIX_QUERY_BUILDER, |
100 | self::CONTEXT_LEXEME_PREFIX ); |
101 | |
102 | $dismax = new DisMax(); |
103 | $dismax->setTieBreaker( $profile['tie-breaker'] ?? 0 ); |
104 | |
105 | $fields = [ |
106 | [ "lemma.near_match", $profile['exact'] ], |
107 | [ "lemma.near_match_folded", $profile['folded'] ], |
108 | [ |
109 | "lexeme_forms.representation.near_match", |
110 | $profile['exact'] * $profile['form-discount'], |
111 | ], |
112 | [ |
113 | "lexeme_forms.representation.near_match_folded", |
114 | $profile['folded'] * $profile['form-discount'], |
115 | ], |
116 | ]; |
117 | // Fields to which query applies exactly as stated, without trailing space trimming |
118 | $fieldsExact = []; |
119 | if ( $textExact !== $text ) { |
120 | $fields[] = |
121 | [ |
122 | "lemma.prefix", |
123 | $profile['prefix'] * $profile['space-discount'], |
124 | ]; |
125 | $fields[] = |
126 | [ |
127 | "lexeme_forms.representation.prefix", |
128 | $profile['prefix'] * $profile['space-discount'] * $profile['form-discount'], |
129 | ]; |
130 | $fieldsExact[] = [ "lemma.prefix", $profile['prefix'] ]; |
131 | $fieldsExact[] = |
132 | [ |
133 | "lexeme_forms.representation.prefix", |
134 | $profile['prefix'] * $profile['form-discount'], |
135 | ]; |
136 | } else { |
137 | $fields[] = [ "lemma.prefix", $profile['prefix'] ]; |
138 | $fields[] = |
139 | [ |
140 | "lexeme_forms.representation.prefix", |
141 | $profile['prefix'] * $profile['form-discount'], |
142 | ]; |
143 | } |
144 | |
145 | foreach ( $fields as $field ) { |
146 | $dismax->addQuery( EntitySearchUtils::makeConstScoreQuery( $field[0], $field[1], $text ) ); |
147 | } |
148 | |
149 | foreach ( $fieldsExact as $field ) { |
150 | $dismax->addQuery( EntitySearchUtils::makeConstScoreQuery( $field[0], $field[1], $textExact ) ); |
151 | } |
152 | |
153 | $labelsQuery = new BoolQuery(); |
154 | $labelsQuery->addFilter( $labelsFilter ); |
155 | $labelsQuery->addShould( $dismax ); |
156 | $titleMatch = new Term( [ |
157 | 'title.keyword' => EntitySearchUtils::normalizeId( $text, $this->idParser ), |
158 | ] ); |
159 | |
160 | $query = new BoolQuery(); |
161 | // Match either labels or exact match to title |
162 | $query->addShould( $labelsQuery ); |
163 | $query->addShould( $titleMatch ); |
164 | $query->setMinimumShouldMatch( 1 ); |
165 | |
166 | // Filter to fetch only given entity type |
167 | $query->addFilter( new Term( [ 'content_model' => LexemeContent::CONTENT_MODEL_ID ] ) ); |
168 | |
169 | return $query; |
170 | } |
171 | |
172 | /** |
173 | * Get results type object for this search. |
174 | * @return ResultsType |
175 | */ |
176 | protected function makeResultType() { |
177 | return new LexemeTermResult( |
178 | $this->idParser, |
179 | $this->userLanguage, |
180 | $this->lookupFactory |
181 | ); |
182 | } |
183 | |
184 | /** |
185 | * @inheritDoc |
186 | */ |
187 | public function getRankedSearchResults( |
188 | $text, |
189 | $languageCode, |
190 | $entityType, |
191 | $limit, |
192 | $strictLanguage, |
193 | ?string $profileContext = null |
194 | ) { |
195 | $profileContext ??= self::CONTEXT_LEXEME_PREFIX; |
196 | $searcher = new WikibasePrefixSearcher( 0, $limit, $this->debugOptions ); |
197 | $searcher->getSearchContext()->setProfileContext( $profileContext ); |
198 | $query = $this->getElasticSearchQuery( $text, $entityType, $searcher->getSearchContext() ); |
199 | |
200 | $searcher->setResultsType( $this->makeResultType() ); |
201 | |
202 | $result = $searcher->performSearch( $query ); |
203 | |
204 | if ( $result->isOK() ) { |
205 | $result = $result->getValue(); |
206 | } else { |
207 | throw new EntitySearchException( $result ); |
208 | } |
209 | |
210 | if ( $searcher->isReturnRaw() ) { |
211 | $result = $searcher->processRawReturn( $result, $this->request ); |
212 | } |
213 | |
214 | return $result; |
215 | } |
216 | |
217 | } |