Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 155
0.00% covered (danger)
0.00%
0 / 10
CRAP
0.00% covered (danger)
0.00%
0 / 1
CompletionSuggester
0.00% covered (danger)
0.00%
0 / 155
0.00% covered (danger)
0.00%
0 / 10
1640
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 39
0.00% covered (danger)
0.00%
0 / 1
90
 suggest
0.00% covered (danger)
0.00%
0 / 31
0.00% covered (danger)
0.00%
0 / 1
56
 processMSearchResponse
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
2
 collectCompSuggestResults
0.00% covered (danger)
0.00%
0 / 10
0.00% covered (danger)
0.00%
0 / 1
6
 collectPrefixSearchResults
0.00% covered (danger)
0.00%
0 / 22
0.00% covered (danger)
0.00%
0 / 1
72
 getSuggestSearchRequest
0.00% covered (danger)
0.00%
0 / 11
0.00% covered (danger)
0.00%
0 / 1
6
 getPrefixSearchRequest
0.00% covered (danger)
0.00%
0 / 23
0.00% covered (danger)
0.00%
0 / 1
56
 newLog
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
2
 getCompletionIndex
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 getResultsTransformer
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
6
1<?php
2
3namespace CirrusSearch;
4
5use CirrusSearch\Profile\SearchProfileService;
6use CirrusSearch\Query\CompSuggestQueryBuilder;
7use CirrusSearch\Query\PrefixSearchQueryBuilder;
8use CirrusSearch\Search\CompletionResultsCollector;
9use CirrusSearch\Search\FancyTitleResultsType;
10use CirrusSearch\Search\MSearchRequests;
11use CirrusSearch\Search\SearchContext;
12use CirrusSearch\Search\SearchRequestBuilder;
13use CirrusSearch\SecondTry\SecondTryRunner;
14use CirrusSearch\SecondTry\SecondTryRunnerFactory;
15use CirrusSearch\SecondTry\SecondTrySearchFactory;
16use Closure;
17use Elastica\Index;
18use Elastica\Multi\Search as MultiSearch;
19use Elastica\Query;
20use Elastica\ResultSet;
21use Elastica\Search;
22use MediaWiki\MediaWikiServices;
23use MediaWiki\Status\Status;
24use MediaWiki\User\User;
25use SearchSuggestionSet;
26use Wikimedia\Assert\Assert;
27
28/**
29 * Performs search as you type queries using Completion Suggester.
30 *
31 * @license GPL-2.0-or-later
32 */
33
34/**
35 * Completion Suggester Searcher
36 *
37 * NOTES:
38 * The CompletionSuggester is built on top of the ElasticSearch Completion
39 * Suggester.
40 * (https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters-completion.html).
41 *
42 * This class is used at query time, see
43 * CirrusSearch\BuildDocument\SuggestBuilder for index time logic.
44 *
45 * Document model: Cirrus documents are indexed with 2 suggestions:
46 *
47 * 1. The title suggestion (and close redirects).
48 * This helps to avoid displaying redirects with typos (e.g. Albert Enstein,
49 * Unietd States) where we make the assumption that if the redirect is close
50 * enough it's likely a typo and it's preferable to display the canonical title.
51 * This decision is made at index-time in SuggestBuilder::extractTitleAndSimilarRedirects.
52 *
53 * 2. The redirect suggestions
54 * Because the same canonical title can be returned twice we support fetch_limit_factor
55 * in suggest profiles to fetch more than what the use asked.
56 *
57 * Additionally if the namespaces request include non NS_MAIN a prefix search query
58 * is sent to the main index. Results are appended to the suggest results. Appending
59 * is far from ideal but in the current state scores between the suggest index and prefix
60 * search are not comparable.
61 * TODO: investigate computing the comp suggest score on main indices to properly merge
62 * results.
63 */
64class CompletionSuggester extends ElasticsearchIntermediary {
65    /**
66     * @const string multisearch key to identify the comp suggest request
67     */
68    private const MSEARCH_KEY_SUGGEST = "suggest";
69
70    /**
71     * @const string multisearch key to identify the prefix search request
72     */
73    private const MSEARCH_KEY_PREFIX = "prefix";
74
75    /**
76     * Search type (used for logs & timeout configs)
77     */
78    private const SEARCH_TYPE = 'comp_suggest';
79
80    /**
81     * @var int maximum number of result (final)
82     */
83    private $limit;
84
85    /**
86     * @var int offset (final)
87     */
88    private $offset;
89
90    /**
91     * @var string index base name to use (final)
92     */
93    private $indexBaseName;
94
95    /**
96     * @var Index (final)
97     */
98    private $completionIndex;
99
100    /**
101     * Search environment configuration (final)
102     * @var SearchConfig
103     */
104    private $config;
105
106    /**
107     * @var SearchContext (final)
108     */
109    private $searchContext;
110
111    /**
112     * @var CompSuggestQueryBuilder (final)
113     */
114    private $compSuggestBuilder;
115
116    /**
117     * @var PrefixSearchQueryBuilder (final)
118     */
119    private $prefixSearchQueryBuilder;
120
121    /**
122     * @var SearchRequestBuilder the builder to build the search for prefix search queries
123     */
124    private $prefixSearchRequestBuilder;
125
126    private SecondTryRunner $secondTryRunner;
127
128    /**
129     * @param Connection $conn
130     * @param int $limit Limit the results to this many
131     * @param int $offset
132     * @param SearchConfig|null $config Configuration settings
133     * @param int[]|null $namespaces Array of namespace numbers to search or null to search all namespaces.
134     * @param User|null $user user for which this search is being performed.  Attached to slow request logs.
135     * @param string|bool $index Base name for index to search from, defaults to $wgCirrusSearchIndexBaseName
136     * @param null $profileName force the profile to use otherwise SearchProfileService defaults will be used
137     * @param CirrusDebugOptions|null $debugOptions
138     * @param SecondTryRunnerFactory|null $secondTryRunnerFactory the SecondTryRunner factory
139     */
140    public function __construct(
141        Connection $conn,
142        $limit,
143        $offset = 0,
144        ?SearchConfig $config = null,
145        ?array $namespaces = null,
146        ?User $user = null,
147        $index = false,
148        $profileName = null,
149        ?CirrusDebugOptions $debugOptions = null,
150        ?SecondTryRunnerFactory $secondTryRunnerFactory = null
151    ) {
152        if ( $config === null ) {
153            // @todo connection has an embedded config ... reuse that? somehow should
154            // at least ensure they are the same.
155            $config = MediaWikiServices::getInstance()
156                ->getConfigFactory()
157                ->makeConfig( 'CirrusSearch' );
158        }
159        parent::__construct( $conn, $user, $config->get( 'CirrusSearchSlowSearch' ) );
160        if ( $secondTryRunnerFactory === null ) {
161            $secondTryRunnerFactory = new SecondTryRunnerFactory(
162                new SecondTrySearchFactory(
163                    MediaWikiServices::getInstance()
164                        ->getLanguageConverterFactory(),
165                ),
166                $config
167            );
168        }
169        $this->secondTryRunner = $secondTryRunnerFactory->create( SearchProfileService::CONTEXT_COMPLETION );
170
171        $this->limit = $limit;
172        $this->offset = $offset;
173        $this->indexBaseName = $index ?: $config->get( SearchConfig::INDEX_BASE_NAME );
174        $altIndexId = $config->get( 'CirrusSearchCompletionSuggesterUseAltIndexId' );
175        // Check if the alternate index id is actually setup
176        $altIndex = null;
177        if ( $altIndexId !== null && AlternativeIndices::isValidAltIndexId( $altIndexId ) ) {
178            $altIndex = AlternativeIndices::build( $config )->getAlternativeIndexById( AlternativeIndices::COMPLETION, (int)$altIndexId );
179            if ( $altIndex !== null && !$altIndex->isUse() ) {
180                $altIndex = null;
181            }
182        }
183        if ( $altIndex !== null ) {
184            $this->completionIndex = $altIndex->getIndex( $this->connection );
185            $this->config = $altIndex->getConfig();
186        } else {
187            $this->completionIndex = $this->connection->getIndex( $this->indexBaseName, Connection::TITLE_SUGGEST_INDEX_SUFFIX );
188            $this->config = $config;
189        }
190        $this->searchContext = new SearchContext( $this->config, $namespaces, $debugOptions );
191
192        $profileDefinition = $this->config->getProfileService()
193            ->loadProfile( SearchProfileService::COMPLETION, SearchProfileService::CONTEXT_COMPLETION, $profileName );
194        $this->compSuggestBuilder = new CompSuggestQueryBuilder(
195            $this->searchContext,
196            $profileDefinition,
197            $this->secondTryRunner,
198            $limit,
199            $offset
200        );
201
202        $this->prefixSearchQueryBuilder = new PrefixSearchQueryBuilder( $this->secondTryRunner );
203    }
204
205    /**
206     * Produce a set of completion suggestions for text using _suggest
207     * See https://www.elastic.co/guide/en/elasticsearch/reference/1.6/search-suggesters-completion.html
208     *
209     * WARNING: experimental API
210     *
211     * @param string $text Search term
212     * @return Status
213     */
214    public function suggest( $text ) {
215        $secondTryCandidates = $this->secondTryRunner->candidates( $text );
216        $suggestSearch = $this->getSuggestSearchRequest( $text, $secondTryCandidates );
217        $mSearchRequests = new MSearchRequests();
218
219        if ( $suggestSearch !== null ) {
220            $mSearchRequests->addRequest( self::MSEARCH_KEY_SUGGEST, $suggestSearch );
221        }
222
223        $prefixSearch = $this->getPrefixSearchRequest( $text, $secondTryCandidates );
224        if ( $prefixSearch !== null ) {
225            $mSearchRequests->addRequest( self::MSEARCH_KEY_PREFIX, $prefixSearch );
226        }
227
228        if ( !$mSearchRequests->getRequests() ) {
229            return Status::newGood( SearchSuggestionSet::emptySuggestionSet() );
230        }
231        $description = "{queryType} search for '{query}'";
232
233        if ( $this->searchContext->getDebugOptions()->isCirrusDumpQuery() ) {
234            return $mSearchRequests->dumpQuery( $description );
235        }
236
237        $multiSearch = new MultiSearch( $this->connection->getClient() );
238        $multiSearch->addSearches( $mSearchRequests->getRequests() );
239
240        $this->connection->setTimeout( $this->getClientTimeout( self::SEARCH_TYPE ) );
241
242        $status = Util::doPoolCounterWork( 'CirrusSearch-Completion', $this->user,
243                function () use ( $multiSearch, $text, $description ) {
244                    $log = $this->newLog( $description, self::SEARCH_TYPE, [
245                        'query' => $text,
246                        'offset' => $this->offset,
247                    ] );
248
249                    $resultsTransformer = $this->getResultsTransformer( $log );
250
251                    return $this->runMSearch( $multiSearch, $log, $this->connection,
252                        $resultsTransformer );
253                } );
254
255        if ( $status->isOk() && $this->searchContext->getDebugOptions()->isCirrusDumpResult() ) {
256            $resultSets = $status->getValue()->getResultSets();
257            $responses = $mSearchRequests->toMSearchResponses( $resultSets );
258
259            return $responses->dumpResults( $description );
260        }
261
262        return $status;
263    }
264
265    /**
266     * @param ResultSet[] $results
267     * @param CompletionRequestLog $log
268     * @return SearchSuggestionSet
269     */
270    private function processMSearchResponse( array $results, CompletionRequestLog $log ) {
271        $collector = new CompletionResultsCollector(
272            $this->limit, $this->offset, $this->config->get( 'CirrusSearchCompletionBannedPageIds' ) );
273        $totalHits = $this->collectCompSuggestResults( $collector, $results, $log );
274        $totalHits += $this->collectPrefixSearchResults( $collector, $results, $log );
275        $log->setTotalHits( $totalHits );
276        return $collector->logAndGetSet( $log );
277    }
278
279    /**
280     * @param CompletionResultsCollector $collector
281     * @param ResultSet[] $results
282     * @param CompletionRequestLog $log
283     * @return int
284     */
285    private function collectCompSuggestResults( CompletionResultsCollector $collector, array $results, CompletionRequestLog $log ) {
286        if ( !isset( $results[self::MSEARCH_KEY_SUGGEST] ) ) {
287            return 0;
288        }
289        $log->addIndex( $this->completionIndex->getName() );
290        $suggestResults = $results[self::MSEARCH_KEY_SUGGEST];
291        $log->setSuggestTookMs( intval( $suggestResults->getResponse()->getQueryTime() * 1000 ) );
292        return $this->compSuggestBuilder->postProcess(
293            $collector,
294            $suggestResults,
295            $this->completionIndex->getName()
296        );
297    }
298
299    /**
300     * @param CompletionResultsCollector $collector
301     * @param ResultSet[] $results
302     * @param CompletionRequestLog $log
303     * @return int
304     * @throws \Exception
305     */
306    private function collectPrefixSearchResults( CompletionResultsCollector $collector, array $results, CompletionRequestLog $log ) {
307        if ( !isset( $results[self::MSEARCH_KEY_PREFIX] ) ) {
308            return 0;
309        }
310        $indexName = $this->prefixSearchRequestBuilder->getIndex()->getName();
311        $prefixResults = $results[self::MSEARCH_KEY_PREFIX];
312        $totalHits = $prefixResults->getTotalHits();
313        $log->addIndex( $indexName );
314        $log->setPrefixTookMs( intval( $prefixResults->getResponse()->getQueryTime() * 1000 ) );
315        // We only append as we can't really compare scores without more complex code/evaluation
316        if ( $collector->isFull() ) {
317            return $totalHits;
318        }
319        /** @var FancyTitleResultsType $rType */
320        $rType = $this->prefixSearchRequestBuilder->getSearchContext()->getResultsType();
321        // the code below highly depends on the array format built by
322        // FancyTitleResultsType::transformOneElasticResult assert that this type
323        // is properly set so that we fail during unit tests if someone changes it
324        // inadvertently.
325        Assert::precondition( $rType instanceof FancyTitleResultsType, '$rType must be a FancyTitleResultsType' );
326        // scores can go negative, it's not a problem we only use scores for sorting
327        // they'll be forgotten in client response
328        $score = $collector->getMinScore() !== null ? $collector->getMinScore() - 1 : count( $prefixResults->getResults() );
329
330        $namespaces = $this->prefixSearchRequestBuilder->getSearchContext()->getNamespaces();
331        foreach ( $prefixResults->getResults() as $res ) {
332            $pageId = $this->config->makePageId( $res->getId() );
333            $title = FancyTitleResultsType::chooseBestTitleOrRedirect( $rType->transformOneElasticResult( $res, $namespaces ) );
334            if ( $title === false ) {
335                continue;
336            }
337            $suggestion = new \SearchSuggestion( $score--, $title->getPrefixedText(), $title, $pageId );
338            if ( !$collector->collect( $suggestion, 'prefix', $indexName ) && $collector->isFull() ) {
339                break;
340            }
341        }
342        return $totalHits;
343    }
344
345    /**
346     * @param string $text Search term
347     * @param array<string, string[]> $secondTryCandidates second try search candidates
348     * @return Search|null
349     */
350    private function getSuggestSearchRequest( string $text, array $secondTryCandidates ): ?Search {
351        if ( !$this->compSuggestBuilder->areResultsPossible() ) {
352            return null;
353        }
354
355        $suggest = $this->compSuggestBuilder->build( $text, $secondTryCandidates );
356        $query = new Query( new Query\MatchNone() );
357        $query->setSize( 0 );
358        $query->setSuggest( $suggest );
359        $query->setSource( [ 'target_title' ] );
360        $search = new Search( $this->connection->getClient() );
361        $search->addIndex( $this->completionIndex );
362        $search->setQuery( $query );
363        return $search;
364    }
365
366    /**
367     * @param string $term Search term
368     * @param array<string, string[]> $secondTryCandidates second try candidates
369     * @return Search|null
370     */
371    private function getPrefixSearchRequest( $term, array $secondTryCandidates ): ?Search {
372        $namespaces = $this->searchContext->getNamespaces();
373        if ( $namespaces === null ) {
374            return null;
375        }
376
377        foreach ( $namespaces as $k => $v ) {
378            // non-strict comparison, it can be strings
379            if ( $v === NS_MAIN ) {
380                unset( $namespaces[$k] );
381            }
382        }
383
384        if ( $namespaces === [] ) {
385            return null;
386        }
387        $limit = CompSuggestQueryBuilder::computeHardLimit( $this->limit, $this->offset, $this->config );
388        if ( $this->offset > $limit ) {
389            return null;
390        }
391        $prefixSearchContext = new SearchContext( $this->config, $namespaces );
392        $prefixSearchContext->setResultsType( new FancyTitleResultsType( 'prefix' ) );
393        $this->prefixSearchQueryBuilder->build( $prefixSearchContext, $term, $secondTryCandidates );
394        if ( !$prefixSearchContext->areResultsPossible() ) {
395            // $prefixSearchContext might contain warnings, but these are lost.
396            return null;
397        }
398        $this->prefixSearchRequestBuilder = new SearchRequestBuilder( $prefixSearchContext, $this->connection, $this->indexBaseName );
399        $this->prefixSearchRequestBuilder->setTimeout( $this->getTimeout( self::SEARCH_TYPE ) );
400        return $this->prefixSearchRequestBuilder->setLimit( $limit )
401            // collect all results up to $limit, $this->offset is the offset the client wants
402            // not the offset in prefix search results.
403            ->setOffset( 0 )
404            ->build();
405    }
406
407    /**
408     * @param string $description
409     * @param string $queryType
410     * @param array $extra
411     * @return CompletionRequestLog
412     */
413    protected function newLog( $description, $queryType, array $extra = [] ) {
414        return new CompletionRequestLog(
415            $description,
416            $queryType,
417            $extra,
418            $this->searchContext->getNamespaces()
419        );
420    }
421
422    /**
423     * @return Index
424     */
425    public function getCompletionIndex() {
426        return $this->completionIndex;
427    }
428
429    /**
430     * @param CompletionRequestLog $log
431     * @return Closure|null
432     */
433    private function getResultsTransformer( CompletionRequestLog $log ): ?Closure {
434        $resultsTransformer = null;
435        if ( !$this->searchContext->getDebugOptions()->isCirrusDumpResult() ) {
436            $resultsTransformer = function ( \Elastica\Multi\ResultSet $results ) use ( $log ) {
437                return $this->processMSearchResponse( $results->getResultSets(), $log );
438            };
439        }
440
441        return $resultsTransformer;
442    }
443
444}