Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
95.96% |
95 / 99 |
|
83.33% |
10 / 12 |
CRAP | |
0.00% |
0 / 1 |
CompSuggestQueryBuilder | |
95.96% |
95 / 99 |
|
83.33% |
10 / 12 |
40 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
3 | |||
areResultsPossible | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
3.14 | |||
build | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 | |||
buildSuggestQueries | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 | |||
buildSuggestQuery | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
5 | |||
handleVariants | |
100.00% |
18 / 18 |
|
100.00% |
1 / 1 |
5 | |||
buildVariantProfile | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
postProcess | |
89.66% |
26 / 29 |
|
0.00% |
0 / 1 |
12.16 | |||
decodeId | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getMergedProfiles | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
computeHardLimit | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
getLimit | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Query; |
4 | |
5 | use CirrusSearch\BuildDocument\Completion\SuggestBuilder; |
6 | use CirrusSearch\Search\CompletionResultsCollector; |
7 | use CirrusSearch\Search\SearchContext; |
8 | use CirrusSearch\SearchConfig; |
9 | use Elastica\ResultSet; |
10 | use Elastica\Suggest; |
11 | use Elastica\Suggest\Completion; |
12 | use SearchSuggestion; |
13 | use Wikimedia\Assert\Assert; |
14 | |
15 | /** |
16 | * Suggest (Completion) query builder. |
17 | * Unlike classic query builders it will : |
18 | * - handle limit differently as offsets are not accepted during suggest queries |
19 | * - store a mutable state in mergedProfiles |
20 | * |
21 | */ |
22 | class CompSuggestQueryBuilder { |
23 | use QueryBuilderTraits; |
24 | |
25 | public const VARIANT_EXTRA_DISCOUNT = 0.0001; |
26 | |
27 | /** @var SearchContext (final) */ |
28 | private $searchContext; |
29 | |
30 | /** @var array (final) */ |
31 | private $profile; |
32 | |
33 | /** @var int (final) */ |
34 | private $limit; |
35 | |
36 | /** @var int (final) */ |
37 | private $hardLimit; |
38 | |
39 | /** @var int (final) */ |
40 | private $offset; |
41 | |
42 | /** @var array (mutable) state built after calling self::build */ |
43 | private $mergedProfiles; |
44 | |
45 | /** |
46 | * @param SearchContext $context |
47 | * @param array $profile settings as definied in profiles/SuggestProfiles.config.php |
48 | * @param int $limit the number of results to display |
49 | * @param int $offset |
50 | */ |
51 | public function __construct( SearchContext $context, array $profile, $limit, $offset = 0 ) { |
52 | $this->searchContext = $context; |
53 | $this->profile = $profile['fst']; |
54 | Assert::parameter( count( $this->profile ) > 0, '$profile', 'Profile must not be empty' ); |
55 | $this->hardLimit = self::computeHardLimit( $limit, $offset, $context->getConfig() ); |
56 | if ( $limit > $this->hardLimit - $offset ) { |
57 | $limit = $this->hardLimit - $offset; |
58 | } |
59 | $this->limit = $limit > 0 ? $limit : 0; |
60 | $this->offset = $offset; |
61 | } |
62 | |
63 | /** |
64 | * Check the builder settings to determine if results are possible. |
65 | * If this method returns false the query must not have to be sent to elastic |
66 | * |
67 | * @return bool true if results are possible false otherwise |
68 | */ |
69 | public function areResultsPossible() { |
70 | $namespaces = $this->searchContext->getNamespaces(); |
71 | if ( $namespaces !== null && !in_array( NS_MAIN, $namespaces ) ) { |
72 | return false; |
73 | } |
74 | // If the offset requested is greater than the hard limit |
75 | // allowed we will always return an empty set so let's do it |
76 | // asap. |
77 | return $this->limit > 0; |
78 | } |
79 | |
80 | /** |
81 | * Build the suggest query |
82 | * @param string $term |
83 | * @param string[]|null $variants |
84 | * @return Suggest |
85 | */ |
86 | public function build( $term, $variants = null ) { |
87 | $this->checkTitleSearchRequestLength( $term, $this->searchContext ); |
88 | $origTerm = $term; |
89 | if ( mb_strlen( $term ) > SuggestBuilder::MAX_INPUT_LENGTH ) { |
90 | // Trim the query otherwise we won't find results |
91 | $term = mb_substr( $term, 0, SuggestBuilder::MAX_INPUT_LENGTH ); |
92 | } |
93 | |
94 | $queryLen = mb_strlen( trim( $term ) ); // Avoid cheating with spaces |
95 | |
96 | $this->mergedProfiles = $this->profile; |
97 | $suggest = $this->buildSuggestQueries( $this->profile, $term, $queryLen ); |
98 | |
99 | // Handle variants, update the set of profiles and suggest queries |
100 | if ( $variants ) { |
101 | $this->handleVariants( $suggest, $variants, $queryLen, $origTerm ); |
102 | } |
103 | return $suggest; |
104 | } |
105 | |
106 | /** |
107 | * Builds a set of suggest query by reading the list of profiles |
108 | * @param array $profiles |
109 | * @param string $query |
110 | * @param int $queryLen the length to use when checking min/max_query_len |
111 | * @return Suggest a set of suggest queries ready to for elastic |
112 | */ |
113 | private function buildSuggestQueries( array $profiles, $query, $queryLen ) { |
114 | $suggest = new Suggest(); |
115 | foreach ( $profiles as $name => $config ) { |
116 | $sugg = $this->buildSuggestQuery( $name, $config, $query, $queryLen ); |
117 | if ( $sugg === null ) { |
118 | continue; |
119 | } |
120 | $suggest->addSuggestion( $sugg ); |
121 | } |
122 | return $suggest; |
123 | } |
124 | |
125 | /** |
126 | * Builds a suggest query from a profile |
127 | * @param string $name name of the suggestion |
128 | * @param array $config Profile |
129 | * @param string $query |
130 | * @param int $queryLen the length to use when checking min/max_query_len |
131 | * @return Completion|null suggest query ready to for elastic or null |
132 | */ |
133 | private function buildSuggestQuery( $name, array $config, $query, $queryLen ) { |
134 | // Do not remove spaces at the end, the user might tell us he finished writing a word |
135 | $query = ltrim( $query ); |
136 | if ( $config['min_query_len'] > $queryLen ) { |
137 | return null; |
138 | } |
139 | if ( isset( $config['max_query_len'] ) && $queryLen > $config['max_query_len'] ) { |
140 | return null; |
141 | } |
142 | $field = $config['field']; |
143 | $sug = new Completion( $name, $field ); |
144 | $sug->setPrefix( $query ); |
145 | $sug->setSize( $this->hardLimit * $config['fetch_limit_factor'] ); |
146 | if ( isset( $config['fuzzy'] ) ) { |
147 | $sug->setFuzzy( $config['fuzzy'] ); |
148 | } |
149 | return $sug; |
150 | } |
151 | |
152 | /** |
153 | * Update the suggest queries and return additional profiles flagged the 'fallback' key |
154 | * with a discount factor = originalDiscount * 0.0001/(variantIndex+1). |
155 | * @param Suggest $suggests |
156 | * @param array $variants |
157 | * @param int $queryLen the original query length |
158 | * @param string $term original term (used to dedup) |
159 | * @internal param array $profiles the default profiles |
160 | */ |
161 | private function handleVariants( Suggest $suggests, array $variants, $queryLen, $term ) { |
162 | $variantIndex = 0; |
163 | $done = [ $term ]; |
164 | foreach ( $variants as $variant ) { |
165 | if ( in_array( $variant, $done, true ) ) { |
166 | continue; |
167 | } |
168 | $done[] = $variant; |
169 | $variantIndex++; |
170 | foreach ( $this->profile as $name => $profile ) { |
171 | $variantProfName = $name . '-variant-' . $variantIndex; |
172 | $profile = $this->buildVariantProfile( |
173 | $profile, self::VARIANT_EXTRA_DISCOUNT / $variantIndex |
174 | ); |
175 | $suggest = $this->buildSuggestQuery( |
176 | $variantProfName, $profile, $variant, $queryLen |
177 | ); |
178 | if ( $suggest !== null ) { |
179 | $suggests->addSuggestion( $suggest ); |
180 | $this->mergedProfiles[$variantProfName] = $profile; |
181 | } |
182 | } |
183 | } |
184 | } |
185 | |
186 | /** |
187 | * Creates a copy of $profile[$name] with a custom '-variant-SEQ' suffix. |
188 | * And applies an extra discount factor of 0.0001. |
189 | * The copy is added to the profiles container. |
190 | * @param array $profile profile to copy |
191 | * @param float $extraDiscount extra discount factor to rank variant suggestion lower. |
192 | * @return array |
193 | */ |
194 | protected function buildVariantProfile( array $profile, $extraDiscount = 0.0001 ) { |
195 | // mark the profile as a fallback query |
196 | $profile['fallback'] = true; |
197 | $profile['discount'] *= $extraDiscount; |
198 | return $profile; |
199 | } |
200 | |
201 | /** |
202 | * Post process the response from elastic to build the SearchSuggestionSet. |
203 | * |
204 | * Merge top level multi-queries and resolve returned pageIds into Title objects. |
205 | * |
206 | * @param CompletionResultsCollector $collector |
207 | * @param ResultSet $results |
208 | * @param string $indexName |
209 | * @return int total hits |
210 | */ |
211 | public function postProcess( CompletionResultsCollector $collector, ResultSet $results, $indexName ) { |
212 | $suggestResp = $results->getSuggests(); |
213 | if ( $suggestResp === [] ) { |
214 | // Edge case where the index contains 0 documents and does not even return the 'suggest' field |
215 | return 0; |
216 | } |
217 | $hitsTotal = 0; |
218 | foreach ( $suggestResp as $name => $sug ) { |
219 | $discount = $this->mergedProfiles[$name]['discount']; |
220 | foreach ( $sug as $suggested ) { |
221 | $hitsTotal += count( $suggested['options'] ); |
222 | foreach ( $suggested['options'] as $suggest ) { |
223 | $page = $suggest['text']; |
224 | if ( !isset( $suggest['_id'] ) ) { |
225 | // likely a shard failure during the fetch phase |
226 | // https://github.com/elastic/elasticsearch/issues/32467 |
227 | throw new \Elastica\Exception\RuntimeException( "Invalid response returned from " . |
228 | "the backend (probable shard failure during the fetch phase)" ); |
229 | } |
230 | $targetTitle = $page; |
231 | $targetTitleNS = NS_MAIN; |
232 | if ( isset( $suggest['_source']['target_title'] ) ) { |
233 | $targetTitle = $suggest['_source']['target_title']['title']; |
234 | $targetTitleNS = $suggest['_source']['target_title']['namespace']; |
235 | } |
236 | list( $docId, $type ) = $this->decodeId( $suggest['_id'] ); |
237 | $score = $discount * $suggest['_score']; |
238 | $pageId = $this->searchContext->getConfig()->makePageId( $docId ); |
239 | $suggestion = new SearchSuggestion( $score, null, null, $pageId ); |
240 | if ( $collector->collect( $suggestion, $name, $indexName ) ) { |
241 | if ( $type === SuggestBuilder::TITLE_SUGGESTION && $targetTitleNS === NS_MAIN ) { |
242 | // For title suggestions we always use the target_title |
243 | // This is because we may encounter default_sort or subphrases that are not |
244 | // valid titles... And we prefer to display the title over close redirects |
245 | // for CrossNS redirect we prefer the returned suggestion |
246 | $suggestion->setText( $targetTitle ); |
247 | |
248 | } else { |
249 | $suggestion->setText( $page ); |
250 | } |
251 | } else { |
252 | // Results are returned in order by elastic skip the rest if no more |
253 | // results from this suggest can be collected |
254 | if ( $collector->isFull() && $collector->getMinScore() > $score ) { |
255 | break; |
256 | } |
257 | } |
258 | } |
259 | } |
260 | } |
261 | return $hitsTotal; |
262 | } |
263 | |
264 | /** |
265 | * @param string $id compacted id (id + $type) |
266 | * @return array 2 elt array [ $id, $type ] |
267 | */ |
268 | private function decodeId( $id ) { |
269 | return [ intval( substr( $id, 0, -1 ) ), substr( $id, -1 ) ]; |
270 | } |
271 | |
272 | /** |
273 | * (public for tests) |
274 | * @return array |
275 | */ |
276 | public function getMergedProfiles() { |
277 | return $this->mergedProfiles; |
278 | } |
279 | |
280 | /** |
281 | * Get the hard limit |
282 | * The completion api does not supports offset we have to add a hack |
283 | * here to work around this limitation. |
284 | * To avoid ridiculously large queries we set also a hard limit. |
285 | * Note that this limit will be changed by fetch_limit_factor set to 2 or 1.5 |
286 | * depending on the profile. |
287 | * @param int $limit limit requested |
288 | * @param int $offset offset requested |
289 | * @param SearchConfig $config |
290 | * @return int the number of results to fetch from elastic |
291 | */ |
292 | public static function computeHardLimit( $limit, $offset, SearchConfig $config ) { |
293 | $limit += $offset; |
294 | $hardLimit = $config->get( 'CirrusSearchCompletionSuggesterHardLimit' ) ?? 50; |
295 | if ( $limit > $hardLimit ) { |
296 | return $hardLimit; |
297 | } |
298 | return $limit; |
299 | } |
300 | |
301 | /** |
302 | * Number of results we could display |
303 | * @return int |
304 | */ |
305 | public function getLimit() { |
306 | return $this->limit; |
307 | } |
308 | } |