Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 136
0.00% covered (danger)
0.00%
0 / 10
CRAP
0.00% covered (danger)
0.00%
0 / 1
CompletionSuggester
0.00% covered (danger)
0.00%
0 / 136
0.00% covered (danger)
0.00%
0 / 10
1190
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 21
0.00% covered (danger)
0.00%
0 / 1
12
 suggest
0.00% covered (danger)
0.00%
0 / 30
0.00% covered (danger)
0.00%
0 / 1
56
 processMSearchResponse
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
2
 collectCompSuggestResults
0.00% covered (danger)
0.00%
0 / 10
0.00% covered (danger)
0.00%
0 / 1
6
 collectPrefixSearchResults
0.00% covered (danger)
0.00%
0 / 22
0.00% covered (danger)
0.00%
0 / 1
72
 getSuggestSearchRequest
0.00% covered (danger)
0.00%
0 / 11
0.00% covered (danger)
0.00%
0 / 1
6
 getPrefixSearchRequest
0.00% covered (danger)
0.00%
0 / 23
0.00% covered (danger)
0.00%
0 / 1
56
 newLog
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
2
 getCompletionIndex
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 getResultsTransformer
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
6
1<?php
2
3namespace CirrusSearch;
4
5use CirrusSearch\Profile\SearchProfileService;
6use CirrusSearch\Query\CompSuggestQueryBuilder;
7use CirrusSearch\Query\PrefixSearchQueryBuilder;
8use CirrusSearch\Search\CompletionResultsCollector;
9use CirrusSearch\Search\FancyTitleResultsType;
10use CirrusSearch\Search\MSearchRequests;
11use CirrusSearch\Search\SearchContext;
12use CirrusSearch\Search\SearchRequestBuilder;
13use Closure;
14use Elastica\Index;
15use Elastica\Multi\Search as MultiSearch;
16use Elastica\Query;
17use Elastica\ResultSet;
18use Elastica\Search;
19use MediaWiki\MediaWikiServices;
20use MediaWiki\Status\Status;
21use MediaWiki\User\User;
22use SearchSuggestionSet;
23use Wikimedia\Assert\Assert;
24
25/**
26 * Performs search as you type queries using Completion Suggester.
27 *
28 * This program is free software; you can redistribute it and/or modify
29 * it under the terms of the GNU General Public License as published by
30 * the Free Software Foundation; either version 2 of the License, or
31 * (at your option) any later version.
32 *
33 * This program is distributed in the hope that it will be useful,
34 * but WITHOUT ANY WARRANTY; without even the implied warranty of
35 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36 * GNU General Public License for more details.
37 *
38 * You should have received a copy of the GNU General Public License along
39 * with this program; if not, write to the Free Software Foundation, Inc.,
40 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
41 * http://www.gnu.org/copyleft/gpl.html
42 */
43
44/**
45 * Completion Suggester Searcher
46 *
47 * NOTES:
48 * The CompletionSuggester is built on top of the ElasticSearch Completion
49 * Suggester.
50 * (https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters-completion.html).
51 *
52 * This class is used at query time, see
53 * CirrusSearch\BuildDocument\SuggestBuilder for index time logic.
54 *
55 * Document model: Cirrus documents are indexed with 2 suggestions:
56 *
57 * 1. The title suggestion (and close redirects).
58 * This helps to avoid displaying redirects with typos (e.g. Albert Enstein,
59 * Unietd States) where we make the assumption that if the redirect is close
60 * enough it's likely a typo and it's preferable to display the canonical title.
61 * This decision is made at index-time in SuggestBuilder::extractTitleAndSimilarRedirects.
62 *
63 * 2. The redirect suggestions
64 * Because the same canonical title can be returned twice we support fetch_limit_factor
65 * in suggest profiles to fetch more than what the use asked.
66 *
67 * Additionally if the namespaces request include non NS_MAIN a prefix search query
68 * is sent to the main index. Results are appended to the suggest results. Appending
69 * is far from ideal but in the current state scores between the suggest index and prefix
70 * search are not comparable.
71 * TODO: investigate computing the comp suggest score on main indices to properly merge
72 * results.
73 */
74class CompletionSuggester extends ElasticsearchIntermediary {
75    /**
76     * @const string multisearch key to identify the comp suggest request
77     */
78    private const MSEARCH_KEY_SUGGEST = "suggest";
79
80    /**
81     * @const string multisearch key to identify the prefix search request
82     */
83    private const MSEARCH_KEY_PREFIX = "prefix";
84
85    /**
86     * Search type (used for logs & timeout configs)
87     */
88    private const SEARCH_TYPE = 'comp_suggest';
89
90    /**
91     * @var int maximum number of result (final)
92     */
93    private $limit;
94
95    /**
96     * @var int offset (final)
97     */
98    private $offset;
99
100    /**
101     * @var string index base name to use (final)
102     */
103    private $indexBaseName;
104
105    /**
106     * @var Index (final)
107     */
108    private $completionIndex;
109
110    /**
111     * Search environment configuration (final)
112     * @var SearchConfig
113     */
114    private $config;
115
116    /**
117     * @var SearchContext (final)
118     */
119    private $searchContext;
120
121    /**
122     * @var CompSuggestQueryBuilder (final)
123     */
124    private $compSuggestBuilder;
125
126    /**
127     * @var PrefixSearchQueryBuilder (final)
128     */
129    private $prefixSearchQueryBuilder;
130
131    /**
132     * @var SearchRequestBuilder the builder to build the search for prefix search queries
133     */
134    private $prefixSearchRequestBuilder;
135
136    /**
137     * @param Connection $conn
138     * @param int $limit Limit the results to this many
139     * @param int $offset
140     * @param SearchConfig|null $config Configuration settings
141     * @param int[]|null $namespaces Array of namespace numbers to search or null to search all namespaces.
142     * @param User|null $user user for which this search is being performed.  Attached to slow request logs.
143     * @param string|bool $index Base name for index to search from, defaults to $wgCirrusSearchIndexBaseName
144     * @param string|null $profileName force the profile to use otherwise SearchProfileService defaults will be used
145     * @param CirrusDebugOptions|null $debugOptions
146     */
147    public function __construct( Connection $conn, $limit, $offset = 0, ?SearchConfig $config = null, ?array $namespaces = null,
148        ?User $user = null, $index = false, $profileName = null,
149                                 ?CirrusDebugOptions $debugOptions = null ) {
150        if ( $config === null ) {
151            // @todo connection has an embedded config ... reuse that? somehow should
152            // at least ensure they are the same.
153            $config = MediaWikiServices::getInstance()
154                ->getConfigFactory()
155                ->makeConfig( 'CirrusSearch' );
156        }
157
158        parent::__construct( $conn, $user, $config->get( 'CirrusSearchSlowSearch' ) );
159        $this->config = $config;
160        $this->limit = $limit;
161        $this->offset = $offset;
162        $this->indexBaseName = $index ?: $config->get( SearchConfig::INDEX_BASE_NAME );
163        $this->completionIndex = $this->connection->getIndex( $this->indexBaseName,
164            Connection::TITLE_SUGGEST_INDEX_SUFFIX );
165        $this->searchContext = new SearchContext( $this->config, $namespaces, $debugOptions );
166
167        $profileDefinition = $this->config->getProfileService()
168            ->loadProfile( SearchProfileService::COMPLETION, SearchProfileService::CONTEXT_DEFAULT, $profileName );
169        $this->compSuggestBuilder = new CompSuggestQueryBuilder(
170            $this->searchContext,
171            $profileDefinition,
172            $limit,
173            $offset
174        );
175        $this->prefixSearchQueryBuilder = new PrefixSearchQueryBuilder();
176    }
177
178    /**
179     * Produce a set of completion suggestions for text using _suggest
180     * See https://www.elastic.co/guide/en/elasticsearch/reference/1.6/search-suggesters-completion.html
181     *
182     * WARNING: experimental API
183     *
184     * @param string $text Search term
185     * @param string[]|null $variants Search term variants
186     *  Usually issued via LanguageConverter::autoConvertToAllVariants( $text ) for the content language.
187     * @return Status
188     */
189    public function suggest( $text, $variants = null ) {
190        $suggestSearch = $this->getSuggestSearchRequest( $text, $variants );
191        $mSearchRequests = new MSearchRequests();
192
193        if ( $suggestSearch !== null ) {
194            $mSearchRequests->addRequest( self::MSEARCH_KEY_SUGGEST, $suggestSearch );
195        }
196
197        $prefixSearch = $this->getPrefixSearchRequest( $text, $variants );
198        if ( $prefixSearch !== null ) {
199            $mSearchRequests->addRequest( self::MSEARCH_KEY_PREFIX, $prefixSearch );
200        }
201
202        if ( !$mSearchRequests->getRequests() ) {
203            return Status::newGood( SearchSuggestionSet::emptySuggestionSet() );
204        }
205        $description = "{queryType} search for '{query}'";
206
207        if ( $this->searchContext->getDebugOptions()->isCirrusDumpQuery() ) {
208            return $mSearchRequests->dumpQuery( $description );
209        }
210
211        $multiSearch = new MultiSearch( $this->connection->getClient() );
212        $multiSearch->addSearches( $mSearchRequests->getRequests() );
213
214        $this->connection->setTimeout( $this->getClientTimeout( self::SEARCH_TYPE ) );
215
216        $status = Util::doPoolCounterWork( 'CirrusSearch-Completion', $this->user,
217                function () use ( $multiSearch, $text, $description ) {
218                    $log = $this->newLog( $description, self::SEARCH_TYPE, [
219                        'query' => $text,
220                        'offset' => $this->offset,
221                    ] );
222
223                    $resultsTransformer = $this->getResultsTransformer( $log );
224
225                    return $this->runMSearch( $multiSearch, $log, $this->connection,
226                        $resultsTransformer );
227                } );
228
229        if ( $status->isOk() && $this->searchContext->getDebugOptions()->isCirrusDumpResult() ) {
230            $resultSets = $status->getValue()->getResultSets();
231            $responses = $mSearchRequests->toMSearchResponses( $resultSets );
232
233            return $responses->dumpResults( $description );
234        }
235
236        return $status;
237    }
238
239    /**
240     * @param ResultSet[] $results
241     * @param CompletionRequestLog $log
242     * @return SearchSuggestionSet
243     */
244    private function processMSearchResponse( array $results, CompletionRequestLog $log ) {
245        $collector = new CompletionResultsCollector(
246            $this->limit, $this->offset, $this->config->get( 'CirrusSearchCompletionBannedPageIds' ) );
247        $totalHits = $this->collectCompSuggestResults( $collector, $results, $log );
248        $totalHits += $this->collectPrefixSearchResults( $collector, $results, $log );
249        $log->setTotalHits( $totalHits );
250        return $collector->logAndGetSet( $log );
251    }
252
253    /**
254     * @param CompletionResultsCollector $collector
255     * @param ResultSet[] $results
256     * @param CompletionRequestLog $log
257     * @return int
258     */
259    private function collectCompSuggestResults( CompletionResultsCollector $collector, array $results, CompletionRequestLog $log ) {
260        if ( !isset( $results[self::MSEARCH_KEY_SUGGEST] ) ) {
261            return 0;
262        }
263        $log->addIndex( $this->completionIndex->getName() );
264        $suggestResults = $results[self::MSEARCH_KEY_SUGGEST];
265        $log->setSuggestTookMs( intval( $suggestResults->getResponse()->getQueryTime() * 1000 ) );
266        return $this->compSuggestBuilder->postProcess(
267            $collector,
268            $suggestResults,
269            $this->completionIndex->getName()
270        );
271    }
272
273    /**
274     * @param CompletionResultsCollector $collector
275     * @param ResultSet[] $results
276     * @param CompletionRequestLog $log
277     * @return int
278     * @throws \Exception
279     */
280    private function collectPrefixSearchResults( CompletionResultsCollector $collector, array $results, CompletionRequestLog $log ) {
281        if ( !isset( $results[self::MSEARCH_KEY_PREFIX] ) ) {
282            return 0;
283        }
284        $indexName = $this->prefixSearchRequestBuilder->getIndex()->getName();
285        $prefixResults = $results[self::MSEARCH_KEY_PREFIX];
286        $totalHits = $prefixResults->getTotalHits();
287        $log->addIndex( $indexName );
288        $log->setPrefixTookMs( intval( $prefixResults->getResponse()->getQueryTime() * 1000 ) );
289        // We only append as we can't really compare scores without more complex code/evaluation
290        if ( $collector->isFull() ) {
291            return $totalHits;
292        }
293        /** @var FancyTitleResultsType $rType */
294        $rType = $this->prefixSearchRequestBuilder->getSearchContext()->getResultsType();
295        // the code below highly depends on the array format built by
296        // FancyTitleResultsType::transformOneElasticResult assert that this type
297        // is properly set so that we fail during unit tests if someone changes it
298        // inadvertently.
299        Assert::precondition( $rType instanceof FancyTitleResultsType, '$rType must be a FancyTitleResultsType' );
300        // scores can go negative, it's not a problem we only use scores for sorting
301        // they'll be forgotten in client response
302        $score = $collector->getMinScore() !== null ? $collector->getMinScore() - 1 : count( $prefixResults->getResults() );
303
304        $namespaces = $this->prefixSearchRequestBuilder->getSearchContext()->getNamespaces();
305        foreach ( $prefixResults->getResults() as $res ) {
306            $pageId = $this->config->makePageId( $res->getId() );
307            $title = FancyTitleResultsType::chooseBestTitleOrRedirect( $rType->transformOneElasticResult( $res, $namespaces ) );
308            if ( $title === false ) {
309                continue;
310            }
311            $suggestion = new \SearchSuggestion( $score--, $title->getPrefixedText(), $title, $pageId );
312            if ( !$collector->collect( $suggestion, 'prefix', $indexName ) && $collector->isFull() ) {
313                break;
314            }
315        }
316        return $totalHits;
317    }
318
319    /**
320     * @param string $text Search term
321     * @param string[]|null $variants Search term variants
322     *  Usually issued via LanguageConverter::autoConvertToAllVariants( $text ) for the content language.
323     * @return Search|null
324     */
325    private function getSuggestSearchRequest( $text, $variants ) {
326        if ( !$this->compSuggestBuilder->areResultsPossible() ) {
327            return null;
328        }
329
330        $suggest = $this->compSuggestBuilder->build( $text, $variants );
331        $query = new Query( new Query\MatchNone() );
332        $query->setSize( 0 );
333        $query->setSuggest( $suggest );
334        $query->setSource( [ 'target_title' ] );
335        $search = new Search( $this->connection->getClient() );
336        $search->addIndex( $this->completionIndex );
337        $search->setQuery( $query );
338        return $search;
339    }
340
341    /**
342     * @param string $term Search term
343     * @param string[]|null $variants Search term variants
344     *  Usually issued via LanguageConverter::autoConvertToAllVariants( $text ) for the content language.
345     * @return Search|null
346     */
347    private function getPrefixSearchRequest( $term, $variants ) {
348        $namespaces = $this->searchContext->getNamespaces();
349        if ( $namespaces === null ) {
350            return null;
351        }
352
353        foreach ( $namespaces as $k => $v ) {
354            // non-strict comparison, it can be strings
355            if ( $v === NS_MAIN ) {
356                unset( $namespaces[$k] );
357            }
358        }
359
360        if ( $namespaces === [] ) {
361            return null;
362        }
363        $limit = CompSuggestQueryBuilder::computeHardLimit( $this->limit, $this->offset, $this->config );
364        if ( $this->offset > $limit ) {
365            return null;
366        }
367        $prefixSearchContext = new SearchContext( $this->config, $namespaces );
368        $prefixSearchContext->setResultsType( new FancyTitleResultsType( 'prefix' ) );
369        $this->prefixSearchQueryBuilder->build( $prefixSearchContext, $term, $variants );
370        if ( !$prefixSearchContext->areResultsPossible() ) {
371            // $prefixSearchContext might contain warnings, but these are lost.
372            return null;
373        }
374        $this->prefixSearchRequestBuilder = new SearchRequestBuilder( $prefixSearchContext, $this->connection, $this->indexBaseName );
375        $this->prefixSearchRequestBuilder->setTimeout( $this->getTimeout( self::SEARCH_TYPE ) );
376        return $this->prefixSearchRequestBuilder->setLimit( $limit )
377            // collect all results up to $limit, $this->offset is the offset the client wants
378            // not the offset in prefix search results.
379            ->setOffset( 0 )
380            ->build();
381    }
382
383    /**
384     * @param string $description
385     * @param string $queryType
386     * @param array $extra
387     * @return CompletionRequestLog
388     */
389    protected function newLog( $description, $queryType, array $extra = [] ) {
390        return new CompletionRequestLog(
391            $description,
392            $queryType,
393            $extra,
394            $this->searchContext->getNamespaces()
395        );
396    }
397
398    /**
399     * @return Index
400     */
401    public function getCompletionIndex() {
402        return $this->completionIndex;
403    }
404
405    /**
406     * @param CompletionRequestLog $log
407     * @return Closure|null
408     */
409    private function getResultsTransformer( CompletionRequestLog $log ): ?Closure {
410        $resultsTransformer = null;
411        if ( !$this->searchContext->getDebugOptions()->isCirrusDumpResult() ) {
412            $resultsTransformer = function ( \Elastica\Multi\ResultSet $results ) use ( $log ) {
413                return $this->processMSearchResponse( $results->getResultSets(), $log );
414            };
415        }
416
417        return $resultsTransformer;
418    }
419
420}