Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
95.74% covered (success)
95.74%
90 / 94
83.33% covered (warning)
83.33%
10 / 12
CRAP
0.00% covered (danger)
0.00%
0 / 1
CompSuggestQueryBuilder
95.74% covered (success)
95.74%
90 / 94
83.33% covered (warning)
83.33%
10 / 12
40
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
3
 areResultsPossible
75.00% covered (warning)
75.00%
3 / 4
0.00% covered (danger)
0.00%
0 / 1
3.14
 build
100.00% covered (success)
100.00%
10 / 10
100.00% covered (success)
100.00%
1 / 1
3
 buildSuggestQueries
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
3
 buildSuggestQuery
100.00% covered (success)
100.00%
12 / 12
100.00% covered (success)
100.00%
1 / 1
5
 handleVariants
100.00% covered (success)
100.00%
14 / 14
100.00% covered (success)
100.00%
1 / 1
5
 buildVariantProfile
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 postProcess
89.29% covered (warning)
89.29%
25 / 28
0.00% covered (danger)
0.00%
0 / 1
12.18
 decodeId
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getMergedProfiles
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 computeHardLimit
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
2
 getLimit
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
1<?php
2
3namespace CirrusSearch\Query;
4
5use CirrusSearch\BuildDocument\Completion\SuggestBuilder;
6use CirrusSearch\Search\CompletionResultsCollector;
7use CirrusSearch\Search\SearchContext;
8use CirrusSearch\SearchConfig;
9use Elastica\ResultSet;
10use Elastica\Suggest;
11use Elastica\Suggest\Completion;
12use SearchSuggestion;
13use Wikimedia\Assert\Assert;
14
15/**
16 * Suggest (Completion) query builder.
17 * Unlike classic query builders it will :
18 * - handle limit differently as offsets are not accepted during suggest queries
19 * - store a mutable state in mergedProfiles
20 *
21 */
22class CompSuggestQueryBuilder {
23    use QueryBuilderTraits;
24
25    public const VARIANT_EXTRA_DISCOUNT = 0.0001;
26
27    /** @var SearchContext (final) */
28    private $searchContext;
29
30    /** @var array (final) */
31    private $profile;
32
33    /** @var int (final) */
34    private $limit;
35
36    /** @var int (final) */
37    private $hardLimit;
38
39    /** @var int (final) */
40    private $offset;
41
42    /** @var array (mutable) state built after calling self::build */
43    private $mergedProfiles;
44
45    /**
46     * @param SearchContext $context
47     * @param array $profile settings as definied in profiles/SuggestProfiles.config.php
48     * @param int $limit the number of results to display
49     * @param int $offset
50     */
51    public function __construct( SearchContext $context, array $profile, $limit, $offset = 0 ) {
52        $this->searchContext = $context;
53        $this->profile = $profile['fst'];
54        Assert::parameter( count( $this->profile ) > 0, '$profile', 'Profile must not be empty' );
55        $this->hardLimit = self::computeHardLimit( $limit, $offset, $context->getConfig() );
56        if ( $limit > $this->hardLimit - $offset ) {
57            $limit = $this->hardLimit - $offset;
58        }
59        $this->limit = $limit > 0 ? $limit : 0;
60        $this->offset = $offset;
61    }
62
63    /**
64     * Check the builder settings to determine if results are possible.
65     * If this method returns false the query must not have to be sent to elastic
66     *
67     * @return bool true if results are possible false otherwise
68     */
69    public function areResultsPossible() {
70        $namespaces = $this->searchContext->getNamespaces();
71        if ( $namespaces !== null && !in_array( NS_MAIN, $namespaces ) ) {
72            return false;
73        }
74        // If the offset requested is greater than the hard limit
75        // allowed we will always return an empty set so let's do it
76        // asap.
77        return $this->limit > 0;
78    }
79
80    /**
81     * Build the suggest query
82     * @param string $term
83     * @param string[]|null $variants
84     * @return Suggest
85     */
86    public function build( $term, $variants = null ) {
87        $this->checkTitleSearchRequestLength( $term, $this->searchContext );
88        $origTerm = $term;
89        if ( mb_strlen( $term ) > SuggestBuilder::MAX_INPUT_LENGTH ) {
90            // Trim the query otherwise we won't find results
91            $term = mb_substr( $term, 0, SuggestBuilder::MAX_INPUT_LENGTH );
92        }
93
94        $queryLen = mb_strlen( trim( $term ) ); // Avoid cheating with spaces
95
96        $this->mergedProfiles = $this->profile;
97        $suggest = $this->buildSuggestQueries( $this->profile, $term, $queryLen );
98
99        // Handle variants, update the set of profiles and suggest queries
100        if ( !empty( $variants ) ) {
101            $this->handleVariants( $suggest, $variants, $queryLen, $origTerm );
102        }
103        return $suggest;
104    }
105
106    /**
107     * Builds a set of suggest query by reading the list of profiles
108     * @param array $profiles
109     * @param string $query
110     * @param int $queryLen the length to use when checking min/max_query_len
111     * @return Suggest a set of suggest queries ready to for elastic
112     */
113    private function buildSuggestQueries( array $profiles, $query, $queryLen ) {
114        $suggest = new Suggest();
115        foreach ( $profiles as $name => $config ) {
116            $sugg = $this->buildSuggestQuery( $name, $config, $query, $queryLen );
117            if ( $sugg === null ) {
118                continue;
119            }
120            $suggest->addSuggestion( $sugg );
121        }
122        return $suggest;
123    }
124
125    /**
126     * Builds a suggest query from a profile
127     * @param string $name name of the suggestion
128     * @param array $config Profile
129     * @param string $query
130     * @param int $queryLen the length to use when checking min/max_query_len
131     * @return Completion|null suggest query ready to for elastic or null
132     */
133    private function buildSuggestQuery( $name, array $config, $query, $queryLen ) {
134        // Do not remove spaces at the end, the user might tell us he finished writing a word
135        $query = ltrim( $query );
136        if ( $config['min_query_len'] > $queryLen ) {
137            return null;
138        }
139        if ( isset( $config['max_query_len'] ) && $queryLen > $config['max_query_len'] ) {
140            return null;
141        }
142        $field = $config['field'];
143        $sug = new Completion( $name, $field );
144        $sug->setPrefix( $query );
145        $sug->setSize( $this->hardLimit * $config['fetch_limit_factor'] );
146        if ( isset( $config['fuzzy'] ) ) {
147            $sug->setFuzzy( $config['fuzzy'] );
148        }
149        return $sug;
150    }
151
152    /**
153     * Update the suggest queries and return additional profiles flagged the 'fallback' key
154     * with a discount factor = originalDiscount * 0.0001/(variantIndex+1).
155     * @param Suggest $suggests
156     * @param array $variants
157     * @param int $queryLen the original query length
158     * @param string $term original term (used to dedup)
159     * @internal param array $profiles the default profiles
160     */
161    private function handleVariants( Suggest $suggests, array $variants, $queryLen, $term ) {
162        $variantIndex = 0;
163        $done = [ $term ];
164        foreach ( $variants as $variant ) {
165            if ( in_array( $variant, $done, true ) ) {
166                continue;
167            }
168            $done[] = $variant;
169            $variantIndex++;
170            foreach ( $this->profile as $name => $profile ) {
171                $variantProfName = $name . '-variant-' . $variantIndex;
172                $profile = $this->buildVariantProfile(
173                    $profile, self::VARIANT_EXTRA_DISCOUNT / $variantIndex
174                );
175                $suggest = $this->buildSuggestQuery(
176                    $variantProfName, $profile, $variant, $queryLen
177                );
178                if ( $suggest !== null ) {
179                    $suggests->addSuggestion( $suggest );
180                    $this->mergedProfiles[$variantProfName] = $profile;
181                }
182            }
183        }
184    }
185
186    /**
187     * Creates a copy of $profile[$name] with a custom '-variant-SEQ' suffix.
188     * And applies an extra discount factor of 0.0001.
189     * The copy is added to the profiles container.
190     * @param array $profile profile to copy
191     * @param float $extraDiscount extra discount factor to rank variant suggestion lower.
192     * @return array
193     */
194    protected function buildVariantProfile( array $profile, $extraDiscount = 0.0001 ) {
195        // mark the profile as a fallback query
196        $profile['fallback'] = true;
197        $profile['discount'] *= $extraDiscount;
198        return $profile;
199    }
200
201    /**
202     * Post process the response from elastic to build the SearchSuggestionSet.
203     *
204     * Merge top level multi-queries and resolve returned pageIds into Title objects.
205     *
206     * @param CompletionResultsCollector $collector
207     * @param ResultSet $results
208     * @param string $indexName
209     * @return int total hits
210     */
211    public function postProcess( CompletionResultsCollector $collector, ResultSet $results, $indexName ) {
212        $suggestResp = $results->getSuggests();
213        if ( $suggestResp === [] ) {
214            // Edge case where the index contains 0 documents and does not even return the 'suggest' field
215            return 0;
216        }
217        $hitsTotal = 0;
218        foreach ( $suggestResp as $name => $sug ) {
219            $discount = $this->mergedProfiles[$name]['discount'];
220            foreach ( $sug  as $suggested ) {
221                $hitsTotal += count( $suggested['options'] );
222                foreach ( $suggested['options'] as $suggest ) {
223                    $page = $suggest['text'];
224                    if ( !isset( $suggest['_id'] ) ) {
225                        // likely a shard failure during the fetch phase
226                        // https://github.com/elastic/elasticsearch/issues/32467
227                        throw new \Elastica\Exception\RuntimeException( "Invalid response returned from " .
228                            "the backend (probable shard failure during the fetch phase)" );
229                    }
230                    $targetTitle = $page;
231                    $targetTitleNS = NS_MAIN;
232                    if ( isset( $suggest['_source']['target_title'] ) ) {
233                        $targetTitle = $suggest['_source']['target_title']['title'];
234                        $targetTitleNS = $suggest['_source']['target_title']['namespace'];
235                    }
236                    list( $docId, $type ) = $this->decodeId( $suggest['_id'] );
237                    $score = $discount * $suggest['_score'];
238                    $pageId = $this->searchContext->getConfig()->makePageId( $docId );
239                    $suggestion = new SearchSuggestion( $score, null, null, $pageId );
240                    if ( $collector->collect( $suggestion, $name, $indexName ) ) {
241                        if ( $type === SuggestBuilder::TITLE_SUGGESTION && $targetTitleNS === NS_MAIN ) {
242                            // For title suggestions we always use the target_title
243                            // This is because we may encounter default_sort or subphrases that are not
244                            // valid titles... And we prefer to display the title over close redirects
245                            // for CrossNS redirect we prefer the returned suggestion
246                            $suggestion->setText( $targetTitle );
247
248                        } else {
249                            $suggestion->setText( $page );
250                        }
251                    } else {
252                        // Results are returned in order by elastic skip the rest if no more
253                        // results from this suggest can be collected
254                        if ( $collector->isFull() && $collector->getMinScore() > $score ) {
255                            break;
256                        }
257                    }
258                }
259            }
260        }
261        return $hitsTotal;
262    }
263
264    /**
265     * @param string $id compacted id (id + $type)
266     * @return array 2 elt array [ $id, $type ]
267     */
268    private function decodeId( $id ) {
269        return [ intval( substr( $id, 0, -1 ) ), substr( $id, -1 ) ];
270    }
271
272    /**
273     * (public for tests)
274     * @return array
275     */
276    public function getMergedProfiles() {
277        return $this->mergedProfiles;
278    }
279
280    /**
281     * Get the hard limit
282     * The completion api does not supports offset we have to add a hack
283     * here to work around this limitation.
284     * To avoid ridiculously large queries we set also a hard limit.
285     * Note that this limit will be changed by fetch_limit_factor set to 2 or 1.5
286     * depending on the profile.
287     * @param int $limit limit requested
288     * @param int $offset offset requested
289     * @param SearchConfig $config
290     * @return int the number of results to fetch from elastic
291     */
292    public static function computeHardLimit( $limit, $offset, SearchConfig $config ) {
293        $limit += $offset;
294        $hardLimit = $config->get( 'CirrusSearchCompletionSuggesterHardLimit' ) ?? 50;
295        if ( $limit > $hardLimit ) {
296            return $hardLimit;
297        }
298        return $limit;
299    }
300
301    /**
302     * Number of results we could display
303     * @return int
304     */
305    public function getLimit() {
306        return $this->limit;
307    }
308}