Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 136 |
|
0.00% |
0 / 10 |
CRAP | |
0.00% |
0 / 1 |
CompletionSuggester | |
0.00% |
0 / 136 |
|
0.00% |
0 / 10 |
1190 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 21 |
|
0.00% |
0 / 1 |
12 | |||
suggest | |
0.00% |
0 / 30 |
|
0.00% |
0 / 1 |
56 | |||
processMSearchResponse | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
collectCompSuggestResults | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
6 | |||
collectPrefixSearchResults | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
72 | |||
getSuggestSearchRequest | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
6 | |||
getPrefixSearchRequest | |
0.00% |
0 / 23 |
|
0.00% |
0 / 1 |
56 | |||
newLog | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
getCompletionIndex | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getResultsTransformer | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | |
3 | namespace CirrusSearch; |
4 | |
5 | use CirrusSearch\Profile\SearchProfileService; |
6 | use CirrusSearch\Query\CompSuggestQueryBuilder; |
7 | use CirrusSearch\Query\PrefixSearchQueryBuilder; |
8 | use CirrusSearch\Search\CompletionResultsCollector; |
9 | use CirrusSearch\Search\FancyTitleResultsType; |
10 | use CirrusSearch\Search\MSearchRequests; |
11 | use CirrusSearch\Search\SearchContext; |
12 | use CirrusSearch\Search\SearchRequestBuilder; |
13 | use Closure; |
14 | use Elastica\Index; |
15 | use Elastica\Multi\Search as MultiSearch; |
16 | use Elastica\Query; |
17 | use Elastica\ResultSet; |
18 | use Elastica\Search; |
19 | use MediaWiki\MediaWikiServices; |
20 | use MediaWiki\Status\Status; |
21 | use MediaWiki\User\User; |
22 | use SearchSuggestionSet; |
23 | use Wikimedia\Assert\Assert; |
24 | |
25 | /** |
26 | * Performs search as you type queries using Completion Suggester. |
27 | * |
28 | * This program is free software; you can redistribute it and/or modify |
29 | * it under the terms of the GNU General Public License as published by |
30 | * the Free Software Foundation; either version 2 of the License, or |
31 | * (at your option) any later version. |
32 | * |
33 | * This program is distributed in the hope that it will be useful, |
34 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
35 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
36 | * GNU General Public License for more details. |
37 | * |
38 | * You should have received a copy of the GNU General Public License along |
39 | * with this program; if not, write to the Free Software Foundation, Inc., |
40 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
41 | * http://www.gnu.org/copyleft/gpl.html |
42 | */ |
43 | |
44 | /** |
45 | * Completion Suggester Searcher |
46 | * |
47 | * NOTES: |
48 | * The CompletionSuggester is built on top of the ElasticSearch Completion |
49 | * Suggester. |
50 | * (https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters-completion.html). |
51 | * |
52 | * This class is used at query time, see |
53 | * CirrusSearch\BuildDocument\SuggestBuilder for index time logic. |
54 | * |
55 | * Document model: Cirrus documents are indexed with 2 suggestions: |
56 | * |
57 | * 1. The title suggestion (and close redirects). |
58 | * This helps to avoid displaying redirects with typos (e.g. Albert Enstein, |
59 | * Unietd States) where we make the assumption that if the redirect is close |
60 | * enough it's likely a typo and it's preferable to display the canonical title. |
61 | * This decision is made at index-time in SuggestBuilder::extractTitleAndSimilarRedirects. |
62 | * |
63 | * 2. The redirect suggestions |
64 | * Because the same canonical title can be returned twice we support fetch_limit_factor |
65 | * in suggest profiles to fetch more than what the use asked. |
66 | * |
67 | * Additionally if the namespaces request include non NS_MAIN a prefix search query |
68 | * is sent to the main index. Results are appended to the suggest results. Appending |
69 | * is far from ideal but in the current state scores between the suggest index and prefix |
70 | * search are not comparable. |
71 | * TODO: investigate computing the comp suggest score on main indices to properly merge |
72 | * results. |
73 | */ |
74 | class CompletionSuggester extends ElasticsearchIntermediary { |
75 | /** |
76 | * @const string multisearch key to identify the comp suggest request |
77 | */ |
78 | private const MSEARCH_KEY_SUGGEST = "suggest"; |
79 | |
80 | /** |
81 | * @const string multisearch key to identify the prefix search request |
82 | */ |
83 | private const MSEARCH_KEY_PREFIX = "prefix"; |
84 | |
85 | /** |
86 | * Search type (used for logs & timeout configs) |
87 | */ |
88 | private const SEARCH_TYPE = 'comp_suggest'; |
89 | |
90 | /** |
91 | * @var int maximum number of result (final) |
92 | */ |
93 | private $limit; |
94 | |
95 | /** |
96 | * @var int offset (final) |
97 | */ |
98 | private $offset; |
99 | |
100 | /** |
101 | * @var string index base name to use (final) |
102 | */ |
103 | private $indexBaseName; |
104 | |
105 | /** |
106 | * @var Index (final) |
107 | */ |
108 | private $completionIndex; |
109 | |
110 | /** |
111 | * Search environment configuration (final) |
112 | * @var SearchConfig |
113 | */ |
114 | private $config; |
115 | |
116 | /** |
117 | * @var SearchContext (final) |
118 | */ |
119 | private $searchContext; |
120 | |
121 | /** |
122 | * @var CompSuggestQueryBuilder (final) |
123 | */ |
124 | private $compSuggestBuilder; |
125 | |
126 | /** |
127 | * @var PrefixSearchQueryBuilder (final) |
128 | */ |
129 | private $prefixSearchQueryBuilder; |
130 | |
131 | /** |
132 | * @var SearchRequestBuilder the builder to build the search for prefix search queries |
133 | */ |
134 | private $prefixSearchRequestBuilder; |
135 | |
136 | /** |
137 | * @param Connection $conn |
138 | * @param int $limit Limit the results to this many |
139 | * @param int $offset |
140 | * @param SearchConfig|null $config Configuration settings |
141 | * @param int[]|null $namespaces Array of namespace numbers to search or null to search all namespaces. |
142 | * @param User|null $user user for which this search is being performed. Attached to slow request logs. |
143 | * @param string|bool $index Base name for index to search from, defaults to $wgCirrusSearchIndexBaseName |
144 | * @param string|null $profileName force the profile to use otherwise SearchProfileService defaults will be used |
145 | * @param CirrusDebugOptions|null $debugOptions |
146 | */ |
147 | public function __construct( Connection $conn, $limit, $offset = 0, ?SearchConfig $config = null, ?array $namespaces = null, |
148 | ?User $user = null, $index = false, $profileName = null, |
149 | ?CirrusDebugOptions $debugOptions = null ) { |
150 | if ( $config === null ) { |
151 | // @todo connection has an embedded config ... reuse that? somehow should |
152 | // at least ensure they are the same. |
153 | $config = MediaWikiServices::getInstance() |
154 | ->getConfigFactory() |
155 | ->makeConfig( 'CirrusSearch' ); |
156 | } |
157 | |
158 | parent::__construct( $conn, $user, $config->get( 'CirrusSearchSlowSearch' ) ); |
159 | $this->config = $config; |
160 | $this->limit = $limit; |
161 | $this->offset = $offset; |
162 | $this->indexBaseName = $index ?: $config->get( SearchConfig::INDEX_BASE_NAME ); |
163 | $this->completionIndex = $this->connection->getIndex( $this->indexBaseName, |
164 | Connection::TITLE_SUGGEST_INDEX_SUFFIX ); |
165 | $this->searchContext = new SearchContext( $this->config, $namespaces, $debugOptions ); |
166 | |
167 | $profileDefinition = $this->config->getProfileService() |
168 | ->loadProfile( SearchProfileService::COMPLETION, SearchProfileService::CONTEXT_DEFAULT, $profileName ); |
169 | $this->compSuggestBuilder = new CompSuggestQueryBuilder( |
170 | $this->searchContext, |
171 | $profileDefinition, |
172 | $limit, |
173 | $offset |
174 | ); |
175 | $this->prefixSearchQueryBuilder = new PrefixSearchQueryBuilder(); |
176 | } |
177 | |
178 | /** |
179 | * Produce a set of completion suggestions for text using _suggest |
180 | * See https://www.elastic.co/guide/en/elasticsearch/reference/1.6/search-suggesters-completion.html |
181 | * |
182 | * WARNING: experimental API |
183 | * |
184 | * @param string $text Search term |
185 | * @param string[]|null $variants Search term variants |
186 | * Usually issued via LanguageConverter::autoConvertToAllVariants( $text ) for the content language. |
187 | * @return Status |
188 | */ |
189 | public function suggest( $text, $variants = null ) { |
190 | $suggestSearch = $this->getSuggestSearchRequest( $text, $variants ); |
191 | $mSearchRequests = new MSearchRequests(); |
192 | |
193 | if ( $suggestSearch !== null ) { |
194 | $mSearchRequests->addRequest( self::MSEARCH_KEY_SUGGEST, $suggestSearch ); |
195 | } |
196 | |
197 | $prefixSearch = $this->getPrefixSearchRequest( $text, $variants ); |
198 | if ( $prefixSearch !== null ) { |
199 | $mSearchRequests->addRequest( self::MSEARCH_KEY_PREFIX, $prefixSearch ); |
200 | } |
201 | |
202 | if ( !$mSearchRequests->getRequests() ) { |
203 | return Status::newGood( SearchSuggestionSet::emptySuggestionSet() ); |
204 | } |
205 | $description = "{queryType} search for '{query}'"; |
206 | |
207 | if ( $this->searchContext->getDebugOptions()->isCirrusDumpQuery() ) { |
208 | return $mSearchRequests->dumpQuery( $description ); |
209 | } |
210 | |
211 | $multiSearch = new MultiSearch( $this->connection->getClient() ); |
212 | $multiSearch->addSearches( $mSearchRequests->getRequests() ); |
213 | |
214 | $this->connection->setTimeout( $this->getClientTimeout( self::SEARCH_TYPE ) ); |
215 | |
216 | $status = Util::doPoolCounterWork( 'CirrusSearch-Completion', $this->user, |
217 | function () use ( $multiSearch, $text, $description ) { |
218 | $log = $this->newLog( $description, self::SEARCH_TYPE, [ |
219 | 'query' => $text, |
220 | 'offset' => $this->offset, |
221 | ] ); |
222 | |
223 | $resultsTransformer = $this->getResultsTransformer( $log ); |
224 | |
225 | return $this->runMSearch( $multiSearch, $log, $this->connection, |
226 | $resultsTransformer ); |
227 | } ); |
228 | |
229 | if ( $status->isOk() && $this->searchContext->getDebugOptions()->isCirrusDumpResult() ) { |
230 | $resultSets = $status->getValue()->getResultSets(); |
231 | $responses = $mSearchRequests->toMSearchResponses( $resultSets ); |
232 | |
233 | return $responses->dumpResults( $description ); |
234 | } |
235 | |
236 | return $status; |
237 | } |
238 | |
239 | /** |
240 | * @param ResultSet[] $results |
241 | * @param CompletionRequestLog $log |
242 | * @return SearchSuggestionSet |
243 | */ |
244 | private function processMSearchResponse( array $results, CompletionRequestLog $log ) { |
245 | $collector = new CompletionResultsCollector( |
246 | $this->limit, $this->offset, $this->config->get( 'CirrusSearchCompletionBannedPageIds' ) ); |
247 | $totalHits = $this->collectCompSuggestResults( $collector, $results, $log ); |
248 | $totalHits += $this->collectPrefixSearchResults( $collector, $results, $log ); |
249 | $log->setTotalHits( $totalHits ); |
250 | return $collector->logAndGetSet( $log ); |
251 | } |
252 | |
253 | /** |
254 | * @param CompletionResultsCollector $collector |
255 | * @param ResultSet[] $results |
256 | * @param CompletionRequestLog $log |
257 | * @return int |
258 | */ |
259 | private function collectCompSuggestResults( CompletionResultsCollector $collector, array $results, CompletionRequestLog $log ) { |
260 | if ( !isset( $results[self::MSEARCH_KEY_SUGGEST] ) ) { |
261 | return 0; |
262 | } |
263 | $log->addIndex( $this->completionIndex->getName() ); |
264 | $suggestResults = $results[self::MSEARCH_KEY_SUGGEST]; |
265 | $log->setSuggestTookMs( intval( $suggestResults->getResponse()->getQueryTime() * 1000 ) ); |
266 | return $this->compSuggestBuilder->postProcess( |
267 | $collector, |
268 | $suggestResults, |
269 | $this->completionIndex->getName() |
270 | ); |
271 | } |
272 | |
273 | /** |
274 | * @param CompletionResultsCollector $collector |
275 | * @param ResultSet[] $results |
276 | * @param CompletionRequestLog $log |
277 | * @return int |
278 | * @throws \Exception |
279 | */ |
280 | private function collectPrefixSearchResults( CompletionResultsCollector $collector, array $results, CompletionRequestLog $log ) { |
281 | if ( !isset( $results[self::MSEARCH_KEY_PREFIX] ) ) { |
282 | return 0; |
283 | } |
284 | $indexName = $this->prefixSearchRequestBuilder->getIndex()->getName(); |
285 | $prefixResults = $results[self::MSEARCH_KEY_PREFIX]; |
286 | $totalHits = $prefixResults->getTotalHits(); |
287 | $log->addIndex( $indexName ); |
288 | $log->setPrefixTookMs( intval( $prefixResults->getResponse()->getQueryTime() * 1000 ) ); |
289 | // We only append as we can't really compare scores without more complex code/evaluation |
290 | if ( $collector->isFull() ) { |
291 | return $totalHits; |
292 | } |
293 | /** @var FancyTitleResultsType $rType */ |
294 | $rType = $this->prefixSearchRequestBuilder->getSearchContext()->getResultsType(); |
295 | // the code below highly depends on the array format built by |
296 | // FancyTitleResultsType::transformOneElasticResult assert that this type |
297 | // is properly set so that we fail during unit tests if someone changes it |
298 | // inadvertently. |
299 | Assert::precondition( $rType instanceof FancyTitleResultsType, '$rType must be a FancyTitleResultsType' ); |
300 | // scores can go negative, it's not a problem we only use scores for sorting |
301 | // they'll be forgotten in client response |
302 | $score = $collector->getMinScore() !== null ? $collector->getMinScore() - 1 : count( $prefixResults->getResults() ); |
303 | |
304 | $namespaces = $this->prefixSearchRequestBuilder->getSearchContext()->getNamespaces(); |
305 | foreach ( $prefixResults->getResults() as $res ) { |
306 | $pageId = $this->config->makePageId( $res->getId() ); |
307 | $title = FancyTitleResultsType::chooseBestTitleOrRedirect( $rType->transformOneElasticResult( $res, $namespaces ) ); |
308 | if ( $title === false ) { |
309 | continue; |
310 | } |
311 | $suggestion = new \SearchSuggestion( $score--, $title->getPrefixedText(), $title, $pageId ); |
312 | if ( !$collector->collect( $suggestion, 'prefix', $indexName ) && $collector->isFull() ) { |
313 | break; |
314 | } |
315 | } |
316 | return $totalHits; |
317 | } |
318 | |
319 | /** |
320 | * @param string $text Search term |
321 | * @param string[]|null $variants Search term variants |
322 | * Usually issued via LanguageConverter::autoConvertToAllVariants( $text ) for the content language. |
323 | * @return Search|null |
324 | */ |
325 | private function getSuggestSearchRequest( $text, $variants ) { |
326 | if ( !$this->compSuggestBuilder->areResultsPossible() ) { |
327 | return null; |
328 | } |
329 | |
330 | $suggest = $this->compSuggestBuilder->build( $text, $variants ); |
331 | $query = new Query( new Query\MatchNone() ); |
332 | $query->setSize( 0 ); |
333 | $query->setSuggest( $suggest ); |
334 | $query->setSource( [ 'target_title' ] ); |
335 | $search = new Search( $this->connection->getClient() ); |
336 | $search->addIndex( $this->completionIndex ); |
337 | $search->setQuery( $query ); |
338 | return $search; |
339 | } |
340 | |
341 | /** |
342 | * @param string $term Search term |
343 | * @param string[]|null $variants Search term variants |
344 | * Usually issued via LanguageConverter::autoConvertToAllVariants( $text ) for the content language. |
345 | * @return Search|null |
346 | */ |
347 | private function getPrefixSearchRequest( $term, $variants ) { |
348 | $namespaces = $this->searchContext->getNamespaces(); |
349 | if ( $namespaces === null ) { |
350 | return null; |
351 | } |
352 | |
353 | foreach ( $namespaces as $k => $v ) { |
354 | // non-strict comparison, it can be strings |
355 | if ( $v === NS_MAIN ) { |
356 | unset( $namespaces[$k] ); |
357 | } |
358 | } |
359 | |
360 | if ( $namespaces === [] ) { |
361 | return null; |
362 | } |
363 | $limit = CompSuggestQueryBuilder::computeHardLimit( $this->limit, $this->offset, $this->config ); |
364 | if ( $this->offset > $limit ) { |
365 | return null; |
366 | } |
367 | $prefixSearchContext = new SearchContext( $this->config, $namespaces ); |
368 | $prefixSearchContext->setResultsType( new FancyTitleResultsType( 'prefix' ) ); |
369 | $this->prefixSearchQueryBuilder->build( $prefixSearchContext, $term, $variants ); |
370 | if ( !$prefixSearchContext->areResultsPossible() ) { |
371 | // $prefixSearchContext might contain warnings, but these are lost. |
372 | return null; |
373 | } |
374 | $this->prefixSearchRequestBuilder = new SearchRequestBuilder( $prefixSearchContext, $this->connection, $this->indexBaseName ); |
375 | $this->prefixSearchRequestBuilder->setTimeout( $this->getTimeout( self::SEARCH_TYPE ) ); |
376 | return $this->prefixSearchRequestBuilder->setLimit( $limit ) |
377 | // collect all results up to $limit, $this->offset is the offset the client wants |
378 | // not the offset in prefix search results. |
379 | ->setOffset( 0 ) |
380 | ->build(); |
381 | } |
382 | |
383 | /** |
384 | * @param string $description |
385 | * @param string $queryType |
386 | * @param array $extra |
387 | * @return CompletionRequestLog |
388 | */ |
389 | protected function newLog( $description, $queryType, array $extra = [] ) { |
390 | return new CompletionRequestLog( |
391 | $description, |
392 | $queryType, |
393 | $extra, |
394 | $this->searchContext->getNamespaces() |
395 | ); |
396 | } |
397 | |
398 | /** |
399 | * @return Index |
400 | */ |
401 | public function getCompletionIndex() { |
402 | return $this->completionIndex; |
403 | } |
404 | |
405 | /** |
406 | * @param CompletionRequestLog $log |
407 | * @return Closure|null |
408 | */ |
409 | private function getResultsTransformer( CompletionRequestLog $log ): ?Closure { |
410 | $resultsTransformer = null; |
411 | if ( !$this->searchContext->getDebugOptions()->isCirrusDumpResult() ) { |
412 | $resultsTransformer = function ( \Elastica\Multi\ResultSet $results ) use ( $log ) { |
413 | return $this->processMSearchResponse( $results->getResultSets(), $log ); |
414 | }; |
415 | } |
416 | |
417 | return $resultsTransformer; |
418 | } |
419 | |
420 | } |