Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
94.55% covered (success)
94.55%
104 / 110
76.92% covered (warning)
76.92%
10 / 13
CRAP
0.00% covered (danger)
0.00%
0 / 1
CompSuggestQueryBuilder
94.55% covered (success)
94.55%
104 / 110
76.92% covered (warning)
76.92%
10 / 13
45.33
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
3
 areResultsPossible
75.00% covered (warning)
75.00%
3 / 4
0.00% covered (danger)
0.00%
0 / 1
3.14
 build
100.00% covered (success)
100.00%
10 / 10
100.00% covered (success)
100.00%
1 / 1
3
 buildSuggestQueries
100.00% covered (success)
100.00%
7 / 7
100.00% covered (success)
100.00%
1 / 1
3
 resolveFuzzy
88.89% covered (warning)
88.89%
8 / 9
0.00% covered (danger)
0.00%
0 / 1
4.02
 buildSuggestQuery
100.00% covered (success)
100.00%
12 / 12
100.00% covered (success)
100.00%
1 / 1
5
 handleVariants
100.00% covered (success)
100.00%
19 / 19
100.00% covered (success)
100.00%
1 / 1
6
 buildVariantProfile
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 postProcess
86.21% covered (warning)
86.21%
25 / 29
0.00% covered (danger)
0.00%
0 / 1
12.38
 decodeId
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getMergedProfiles
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 computeHardLimit
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
2
 getLimit
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
1<?php
2
3namespace CirrusSearch\Query;
4
5use CirrusSearch\BuildDocument\Completion\SuggestBuilder;
6use CirrusSearch\Search\CompletionResultsCollector;
7use CirrusSearch\Search\SearchContext;
8use CirrusSearch\SearchConfig;
9use CirrusSearch\SecondTry\SecondTryRunner;
10use Elastica\ResultSet;
11use Elastica\Suggest;
12use Elastica\Suggest\Completion;
13use SearchSuggestion;
14use Wikimedia\Assert\Assert;
15
16/**
17 * Suggest (Completion) query builder.
18 * Unlike classic query builders it will :
19 * - handle limit differently as offsets are not accepted during suggest queries
20 * - store a mutable state in mergedProfiles
21 */
22class CompSuggestQueryBuilder {
23    use QueryBuilderTraits;
24
25    public const VARIANT_EXTRA_DISCOUNT = 0.0001;
26
27    /** @var SearchContext (final) */
28    private $searchContext;
29
30    /** @var array (final) */
31    private $profile;
32
33    /** @var int (final) */
34    private $limit;
35
36    /** @var int (final) */
37    private $hardLimit;
38
39    /** @var int (final) */
40    private $offset;
41
42    /** @var array (mutable) state built after calling self::build */
43    private $mergedProfiles;
44    private SecondTryRunner $secondTryRunner;
45
46    /**
47     * @param SearchContext $context
48     * @param array $profile settings as definied in profiles/SuggestProfiles.config.php
49     * @param SecondTryRunner $secondTryRunner
50     * @param int $limit the number of results to display
51     * @param int $offset
52     */
53    public function __construct( SearchContext $context, array $profile, SecondTryRunner $secondTryRunner, $limit, $offset = 0 ) {
54        $this->searchContext = $context;
55        $this->profile = $profile['fst'];
56        Assert::parameter( count( $this->profile ) > 0, '$profile', 'Profile must not be empty' );
57        $this->secondTryRunner = $secondTryRunner;
58        $this->hardLimit = self::computeHardLimit( $limit, $offset, $context->getConfig() );
59        if ( $limit > $this->hardLimit - $offset ) {
60            $limit = $this->hardLimit - $offset;
61        }
62        $this->limit = $limit > 0 ? $limit : 0;
63        $this->offset = $offset;
64    }
65
66    /**
67     * Check the builder settings to determine if results are possible.
68     * If this method returns false the query must not have to be sent to elastic
69     *
70     * @return bool true if results are possible false otherwise
71     */
72    public function areResultsPossible() {
73        $namespaces = $this->searchContext->getNamespaces();
74        if ( $namespaces !== null && !in_array( NS_MAIN, $namespaces ) ) {
75            return false;
76        }
77        // If the offset requested is greater than the hard limit
78        // allowed we will always return an empty set so let's do it
79        // asap.
80        return $this->limit > 0;
81    }
82
83    /**
84     * Build the suggest query
85     * @param string $term
86     * @param array<string, string[]> $secondTryCandidates
87     * @return Suggest
88     */
89    public function build( string $term, array $secondTryCandidates = [] ): Suggest {
90        $this->checkTitleSearchRequestLength( $term, $this->searchContext );
91        $origTerm = $term;
92        if ( mb_strlen( $term ) > SuggestBuilder::MAX_INPUT_LENGTH ) {
93            // Trim the query otherwise we won't find results
94            $term = mb_substr( $term, 0, SuggestBuilder::MAX_INPUT_LENGTH );
95        }
96
97        $queryLen = mb_strlen( trim( $term ) ); // Avoid cheating with spaces
98
99        $this->mergedProfiles = $this->profile;
100        $suggest = $this->buildSuggestQueries( $this->profile, $term, $queryLen );
101
102        // Handle variants, update the set of profiles and suggest queries
103        if ( $secondTryCandidates ) {
104            $this->handleVariants( $suggest, $secondTryCandidates, $queryLen, $origTerm );
105        }
106        return $suggest;
107    }
108
109    /**
110     * Builds a set of suggest query by reading the list of profiles
111     * @param array $profiles
112     * @param string $query
113     * @param int $queryLen the length to use when checking min/max_query_len
114     * @return Suggest a set of suggest queries ready to for elastic
115     */
116    private function buildSuggestQueries( array $profiles, $query, $queryLen ) {
117        $suggest = new Suggest();
118        foreach ( $profiles as $name => $config ) {
119            $sugg = $this->buildSuggestQuery( $name, $config, $query, $queryLen );
120            if ( $sugg === null ) {
121                continue;
122            }
123            $suggest->addSuggestion( $sugg );
124        }
125        return $suggest;
126    }
127
128    /**
129     * Resolves AUTO fuzziness into a constant value
130     * @param array $fuzzy FST Fuzziness configuration
131     * @param int $queryLen The number of codepoints in the query
132     * @return array Resolve FST Fuzziness configuration
133     */
134    private function resolveFuzzy( array $fuzzy, $queryLen ): array {
135        // TODO: We could support `AUTO:2,8` syntax as well, but didnt seem necessary
136        if ( ( $fuzzy['fuzziness'] ?? null ) === 'AUTO' ) {
137            $low = 3;
138            $high = 6;
139            if ( $queryLen < $low ) {
140                $fuzzy['fuzziness'] = 0;
141            } elseif ( $queryLen < $high ) {
142                $fuzzy['fuzziness'] = 1;
143            } else {
144                $fuzzy['fuzziness'] = 2;
145            }
146        }
147        return $fuzzy;
148    }
149
150    /**
151     * Builds a suggest query from a profile
152     * @param string $name name of the suggestion
153     * @param array $config Profile
154     * @param string $query
155     * @param int $queryLen the length to use when checking min/max_query_len
156     * @return Completion|null suggest query ready to for elastic or null
157     */
158    private function buildSuggestQuery( $name, array $config, $query, $queryLen ) {
159        // Do not remove spaces at the end, the user might tell us he finished writing a word
160        $query = ltrim( $query );
161        if ( $config['min_query_len'] > $queryLen ) {
162            return null;
163        }
164        if ( isset( $config['max_query_len'] ) && $queryLen > $config['max_query_len'] ) {
165            return null;
166        }
167        $field = $config['field'];
168        $sug = new Completion( $name, $field );
169        $sug->setPrefix( $query );
170        $sug->setSize( $this->hardLimit * $config['fetch_limit_factor'] );
171        if ( isset( $config['fuzzy'] ) ) {
172            $sug->setFuzzy( $this->resolveFuzzy( $config['fuzzy'], $queryLen ) );
173        }
174        return $sug;
175    }
176
177    /**
178     * Update the suggest queries and return additional profiles flagged the 'fallback' key
179     * with a discount factor = originalDiscount * 0.0001/(variantIndex+1).
180     * @param Suggest $suggests
181     * @param array<string, string[]> $secondTryCandidates candidates as returned by {@link SecondTryRunner::candidate}
182     * @param int $queryLen the original query length
183     * @param string $term original term (used to dedup)
184     * @internal param array $profiles the default profiles
185     */
186    private function handleVariants( Suggest $suggests, array $secondTryCandidates, int $queryLen, string $term ): void {
187        $done = [ $term ];
188        $variantIndex = 0;
189        foreach ( $secondTryCandidates as $strategy => $candidates ) {
190            foreach ( $candidates as $candidate ) {
191                if ( in_array( $candidate, $done, true ) ) {
192                    continue;
193                }
194                $done[] = $candidate;
195                $variantIndex++;
196                foreach ( $this->profile as $name => $profile ) {
197                    $variantProfName = $name . '-second-try-' . $strategy . '-' . $variantIndex;
198                    $profile = $this->buildVariantProfile(
199                        $profile, ( self::VARIANT_EXTRA_DISCOUNT * $this->secondTryRunner->weight( $strategy ) ) / $variantIndex
200                    );
201                    $suggest = $this->buildSuggestQuery(
202                        $variantProfName, $profile, $candidate, $queryLen
203                    );
204                    if ( $suggest !== null ) {
205                        $suggests->addSuggestion( $suggest );
206                        $this->mergedProfiles[$variantProfName] = $profile;
207                    }
208                }
209
210            }
211        }
212    }
213
214    /**
215     * Creates a copy of $profile[$name] with a custom '-variant-SEQ' suffix.
216     * And applies an extra discount factor of 0.0001.
217     * The copy is added to the profiles container.
218     * @param array $profile profile to copy
219     * @param float $extraDiscount extra discount factor to rank variant suggestion lower.
220     * @return array
221     */
222    protected function buildVariantProfile( array $profile, $extraDiscount = 0.0001 ) {
223        // mark the profile as a fallback query
224        $profile['fallback'] = true;
225        $profile['discount'] *= $extraDiscount;
226        return $profile;
227    }
228
229    /**
230     * Post process the response from elastic to build the SearchSuggestionSet.
231     *
232     * Merge top level multi-queries and resolve returned pageIds into Title objects.
233     *
234     * @param CompletionResultsCollector $collector
235     * @param ResultSet $results
236     * @param string $indexName
237     * @return int total hits
238     */
239    public function postProcess( CompletionResultsCollector $collector, ResultSet $results, $indexName ) {
240        $suggestResp = $results->getSuggests();
241        if ( $suggestResp === [] ) {
242            // Edge case where the index contains 0 documents and does not even return the 'suggest' field
243            return 0;
244        }
245        $hitsTotal = 0;
246        foreach ( $suggestResp as $name => $sug ) {
247            $discount = $this->mergedProfiles[$name]['discount'];
248            foreach ( $sug  as $suggested ) {
249                $hitsTotal += count( $suggested['options'] );
250                foreach ( $suggested['options'] as $suggest ) {
251                    $page = $suggest['text'];
252                    if ( !isset( $suggest['_id'] ) ) {
253                        // likely a shard failure during the fetch phase
254                        // https://github.com/elastic/elasticsearch/issues/32467
255                        throw new \Elastica\Exception\RuntimeException( "Invalid response returned from " .
256                            "the backend (probable shard failure during the fetch phase)" );
257                    }
258                    $targetTitle = $page;
259                    $targetTitleNS = NS_MAIN;
260                    if ( isset( $suggest['_source']['target_title'] ) ) {
261                        $targetTitle = $suggest['_source']['target_title']['title'];
262                        $targetTitleNS = $suggest['_source']['target_title']['namespace'];
263                    }
264                    [ $docId, $type ] = $this->decodeId( $suggest['_id'] );
265                    $score = $discount * $suggest['_score'];
266                    $pageId = $this->searchContext->getConfig()->makePageId( $docId );
267                    $suggestion = new SearchSuggestion( $score, null, null, $pageId );
268                    if ( $collector->collect( $suggestion, $name, $indexName ) ) {
269                        if ( $type === SuggestBuilder::TITLE_SUGGESTION && $targetTitleNS === NS_MAIN ) {
270                            // For title suggestions we always use the target_title
271                            // This is because we may encounter default_sort or subphrases that are not
272                            // valid titles... And we prefer to display the title over close redirects
273                            // for CrossNS redirect we prefer the returned suggestion
274                            $suggestion->setText( $targetTitle );
275
276                        } else {
277                            $suggestion->setText( $page );
278                        }
279                    } else {
280                        // Results are returned in order by elastic skip the rest if no more
281                        // results from this suggest can be collected
282                        if ( $collector->isFull() && $collector->getMinScore() > $score ) {
283                            break;
284                        }
285                    }
286                }
287            }
288        }
289        return $hitsTotal;
290    }
291
292    /**
293     * @param string $id compacted id (id + $type)
294     * @return array 2 elt array [ $id, $type ]
295     */
296    private function decodeId( $id ) {
297        return [ intval( substr( $id, 0, -1 ) ), substr( $id, -1 ) ];
298    }
299
300    /**
301     * (public for tests)
302     * @return array
303     */
304    public function getMergedProfiles() {
305        return $this->mergedProfiles;
306    }
307
308    /**
309     * Get the hard limit
310     * The completion api does not supports offset we have to add a hack
311     * here to work around this limitation.
312     * To avoid ridiculously large queries we set also a hard limit.
313     * Note that this limit will be changed by fetch_limit_factor set to 2 or 1.5
314     * depending on the profile.
315     * @param int $limit limit requested
316     * @param int $offset offset requested
317     * @param SearchConfig $config
318     * @return int the number of results to fetch from elastic
319     */
320    public static function computeHardLimit( $limit, $offset, SearchConfig $config ) {
321        $limit += $offset;
322        $hardLimit = $config->get( 'CirrusSearchCompletionSuggesterHardLimit' ) ?? 50;
323        if ( $limit > $hardLimit ) {
324            return $hardLimit;
325        }
326        return $limit;
327    }
328
329    /**
330     * Number of results we could display
331     * @return int
332     */
333    public function getLimit() {
334        return $this->limit;
335    }
336}