Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 155 |
|
0.00% |
0 / 10 |
CRAP | |
0.00% |
0 / 1 |
| CompletionSuggester | |
0.00% |
0 / 155 |
|
0.00% |
0 / 10 |
1640 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 39 |
|
0.00% |
0 / 1 |
90 | |||
| suggest | |
0.00% |
0 / 31 |
|
0.00% |
0 / 1 |
56 | |||
| processMSearchResponse | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
| collectCompSuggestResults | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
6 | |||
| collectPrefixSearchResults | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
72 | |||
| getSuggestSearchRequest | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
6 | |||
| getPrefixSearchRequest | |
0.00% |
0 / 23 |
|
0.00% |
0 / 1 |
56 | |||
| newLog | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
| getCompletionIndex | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| getResultsTransformer | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace CirrusSearch; |
| 4 | |
| 5 | use CirrusSearch\Profile\SearchProfileService; |
| 6 | use CirrusSearch\Query\CompSuggestQueryBuilder; |
| 7 | use CirrusSearch\Query\PrefixSearchQueryBuilder; |
| 8 | use CirrusSearch\Search\CompletionResultsCollector; |
| 9 | use CirrusSearch\Search\FancyTitleResultsType; |
| 10 | use CirrusSearch\Search\MSearchRequests; |
| 11 | use CirrusSearch\Search\SearchContext; |
| 12 | use CirrusSearch\Search\SearchRequestBuilder; |
| 13 | use CirrusSearch\SecondTry\SecondTryRunner; |
| 14 | use CirrusSearch\SecondTry\SecondTryRunnerFactory; |
| 15 | use CirrusSearch\SecondTry\SecondTrySearchFactory; |
| 16 | use Closure; |
| 17 | use Elastica\Index; |
| 18 | use Elastica\Multi\Search as MultiSearch; |
| 19 | use Elastica\Query; |
| 20 | use Elastica\ResultSet; |
| 21 | use Elastica\Search; |
| 22 | use MediaWiki\MediaWikiServices; |
| 23 | use MediaWiki\Status\Status; |
| 24 | use MediaWiki\User\User; |
| 25 | use SearchSuggestionSet; |
| 26 | use Wikimedia\Assert\Assert; |
| 27 | |
| 28 | /** |
| 29 | * Performs search as you type queries using Completion Suggester. |
| 30 | * |
| 31 | * @license GPL-2.0-or-later |
| 32 | */ |
| 33 | |
| 34 | /** |
| 35 | * Completion Suggester Searcher |
| 36 | * |
| 37 | * NOTES: |
| 38 | * The CompletionSuggester is built on top of the ElasticSearch Completion |
| 39 | * Suggester. |
| 40 | * (https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters-completion.html). |
| 41 | * |
| 42 | * This class is used at query time, see |
| 43 | * CirrusSearch\BuildDocument\SuggestBuilder for index time logic. |
| 44 | * |
| 45 | * Document model: Cirrus documents are indexed with 2 suggestions: |
| 46 | * |
| 47 | * 1. The title suggestion (and close redirects). |
| 48 | * This helps to avoid displaying redirects with typos (e.g. Albert Enstein, |
| 49 | * Unietd States) where we make the assumption that if the redirect is close |
| 50 | * enough it's likely a typo and it's preferable to display the canonical title. |
| 51 | * This decision is made at index-time in SuggestBuilder::extractTitleAndSimilarRedirects. |
| 52 | * |
| 53 | * 2. The redirect suggestions |
| 54 | * Because the same canonical title can be returned twice we support fetch_limit_factor |
| 55 | * in suggest profiles to fetch more than what the use asked. |
| 56 | * |
| 57 | * Additionally if the namespaces request include non NS_MAIN a prefix search query |
| 58 | * is sent to the main index. Results are appended to the suggest results. Appending |
| 59 | * is far from ideal but in the current state scores between the suggest index and prefix |
| 60 | * search are not comparable. |
| 61 | * TODO: investigate computing the comp suggest score on main indices to properly merge |
| 62 | * results. |
| 63 | */ |
| 64 | class CompletionSuggester extends ElasticsearchIntermediary { |
| 65 | /** |
| 66 | * @const string multisearch key to identify the comp suggest request |
| 67 | */ |
| 68 | private const MSEARCH_KEY_SUGGEST = "suggest"; |
| 69 | |
| 70 | /** |
| 71 | * @const string multisearch key to identify the prefix search request |
| 72 | */ |
| 73 | private const MSEARCH_KEY_PREFIX = "prefix"; |
| 74 | |
| 75 | /** |
| 76 | * Search type (used for logs & timeout configs) |
| 77 | */ |
| 78 | private const SEARCH_TYPE = 'comp_suggest'; |
| 79 | |
| 80 | /** |
| 81 | * @var int maximum number of result (final) |
| 82 | */ |
| 83 | private $limit; |
| 84 | |
| 85 | /** |
| 86 | * @var int offset (final) |
| 87 | */ |
| 88 | private $offset; |
| 89 | |
| 90 | /** |
| 91 | * @var string index base name to use (final) |
| 92 | */ |
| 93 | private $indexBaseName; |
| 94 | |
| 95 | /** |
| 96 | * @var Index (final) |
| 97 | */ |
| 98 | private $completionIndex; |
| 99 | |
| 100 | /** |
| 101 | * Search environment configuration (final) |
| 102 | * @var SearchConfig |
| 103 | */ |
| 104 | private $config; |
| 105 | |
| 106 | /** |
| 107 | * @var SearchContext (final) |
| 108 | */ |
| 109 | private $searchContext; |
| 110 | |
| 111 | /** |
| 112 | * @var CompSuggestQueryBuilder (final) |
| 113 | */ |
| 114 | private $compSuggestBuilder; |
| 115 | |
| 116 | /** |
| 117 | * @var PrefixSearchQueryBuilder (final) |
| 118 | */ |
| 119 | private $prefixSearchQueryBuilder; |
| 120 | |
| 121 | /** |
| 122 | * @var SearchRequestBuilder the builder to build the search for prefix search queries |
| 123 | */ |
| 124 | private $prefixSearchRequestBuilder; |
| 125 | |
| 126 | private SecondTryRunner $secondTryRunner; |
| 127 | |
| 128 | /** |
| 129 | * @param Connection $conn |
| 130 | * @param int $limit Limit the results to this many |
| 131 | * @param int $offset |
| 132 | * @param SearchConfig|null $config Configuration settings |
| 133 | * @param int[]|null $namespaces Array of namespace numbers to search or null to search all namespaces. |
| 134 | * @param User|null $user user for which this search is being performed. Attached to slow request logs. |
| 135 | * @param string|bool $index Base name for index to search from, defaults to $wgCirrusSearchIndexBaseName |
| 136 | * @param null $profileName force the profile to use otherwise SearchProfileService defaults will be used |
| 137 | * @param CirrusDebugOptions|null $debugOptions |
| 138 | * @param SecondTryRunnerFactory|null $secondTryRunnerFactory the SecondTryRunner factory |
| 139 | */ |
| 140 | public function __construct( |
| 141 | Connection $conn, |
| 142 | $limit, |
| 143 | $offset = 0, |
| 144 | ?SearchConfig $config = null, |
| 145 | ?array $namespaces = null, |
| 146 | ?User $user = null, |
| 147 | $index = false, |
| 148 | $profileName = null, |
| 149 | ?CirrusDebugOptions $debugOptions = null, |
| 150 | ?SecondTryRunnerFactory $secondTryRunnerFactory = null |
| 151 | ) { |
| 152 | if ( $config === null ) { |
| 153 | // @todo connection has an embedded config ... reuse that? somehow should |
| 154 | // at least ensure they are the same. |
| 155 | $config = MediaWikiServices::getInstance() |
| 156 | ->getConfigFactory() |
| 157 | ->makeConfig( 'CirrusSearch' ); |
| 158 | } |
| 159 | parent::__construct( $conn, $user, $config->get( 'CirrusSearchSlowSearch' ) ); |
| 160 | if ( $secondTryRunnerFactory === null ) { |
| 161 | $secondTryRunnerFactory = new SecondTryRunnerFactory( |
| 162 | new SecondTrySearchFactory( |
| 163 | MediaWikiServices::getInstance() |
| 164 | ->getLanguageConverterFactory(), |
| 165 | ), |
| 166 | $config |
| 167 | ); |
| 168 | } |
| 169 | $this->secondTryRunner = $secondTryRunnerFactory->create( SearchProfileService::CONTEXT_COMPLETION ); |
| 170 | |
| 171 | $this->limit = $limit; |
| 172 | $this->offset = $offset; |
| 173 | $this->indexBaseName = $index ?: $config->get( SearchConfig::INDEX_BASE_NAME ); |
| 174 | $altIndexId = $config->get( 'CirrusSearchCompletionSuggesterUseAltIndexId' ); |
| 175 | // Check if the alternate index id is actually setup |
| 176 | $altIndex = null; |
| 177 | if ( $altIndexId !== null && AlternativeIndices::isValidAltIndexId( $altIndexId ) ) { |
| 178 | $altIndex = AlternativeIndices::build( $config )->getAlternativeIndexById( AlternativeIndices::COMPLETION, (int)$altIndexId ); |
| 179 | if ( $altIndex !== null && !$altIndex->isUse() ) { |
| 180 | $altIndex = null; |
| 181 | } |
| 182 | } |
| 183 | if ( $altIndex !== null ) { |
| 184 | $this->completionIndex = $altIndex->getIndex( $this->connection ); |
| 185 | $this->config = $altIndex->getConfig(); |
| 186 | } else { |
| 187 | $this->completionIndex = $this->connection->getIndex( $this->indexBaseName, Connection::TITLE_SUGGEST_INDEX_SUFFIX ); |
| 188 | $this->config = $config; |
| 189 | } |
| 190 | $this->searchContext = new SearchContext( $this->config, $namespaces, $debugOptions ); |
| 191 | |
| 192 | $profileDefinition = $this->config->getProfileService() |
| 193 | ->loadProfile( SearchProfileService::COMPLETION, SearchProfileService::CONTEXT_COMPLETION, $profileName ); |
| 194 | $this->compSuggestBuilder = new CompSuggestQueryBuilder( |
| 195 | $this->searchContext, |
| 196 | $profileDefinition, |
| 197 | $this->secondTryRunner, |
| 198 | $limit, |
| 199 | $offset |
| 200 | ); |
| 201 | |
| 202 | $this->prefixSearchQueryBuilder = new PrefixSearchQueryBuilder( $this->secondTryRunner ); |
| 203 | } |
| 204 | |
| 205 | /** |
| 206 | * Produce a set of completion suggestions for text using _suggest |
| 207 | * See https://www.elastic.co/guide/en/elasticsearch/reference/1.6/search-suggesters-completion.html |
| 208 | * |
| 209 | * WARNING: experimental API |
| 210 | * |
| 211 | * @param string $text Search term |
| 212 | * @return Status |
| 213 | */ |
| 214 | public function suggest( $text ) { |
| 215 | $secondTryCandidates = $this->secondTryRunner->candidates( $text ); |
| 216 | $suggestSearch = $this->getSuggestSearchRequest( $text, $secondTryCandidates ); |
| 217 | $mSearchRequests = new MSearchRequests(); |
| 218 | |
| 219 | if ( $suggestSearch !== null ) { |
| 220 | $mSearchRequests->addRequest( self::MSEARCH_KEY_SUGGEST, $suggestSearch ); |
| 221 | } |
| 222 | |
| 223 | $prefixSearch = $this->getPrefixSearchRequest( $text, $secondTryCandidates ); |
| 224 | if ( $prefixSearch !== null ) { |
| 225 | $mSearchRequests->addRequest( self::MSEARCH_KEY_PREFIX, $prefixSearch ); |
| 226 | } |
| 227 | |
| 228 | if ( !$mSearchRequests->getRequests() ) { |
| 229 | return Status::newGood( SearchSuggestionSet::emptySuggestionSet() ); |
| 230 | } |
| 231 | $description = "{queryType} search for '{query}'"; |
| 232 | |
| 233 | if ( $this->searchContext->getDebugOptions()->isCirrusDumpQuery() ) { |
| 234 | return $mSearchRequests->dumpQuery( $description ); |
| 235 | } |
| 236 | |
| 237 | $multiSearch = new MultiSearch( $this->connection->getClient() ); |
| 238 | $multiSearch->addSearches( $mSearchRequests->getRequests() ); |
| 239 | |
| 240 | $this->connection->setTimeout( $this->getClientTimeout( self::SEARCH_TYPE ) ); |
| 241 | |
| 242 | $status = Util::doPoolCounterWork( 'CirrusSearch-Completion', $this->user, |
| 243 | function () use ( $multiSearch, $text, $description ) { |
| 244 | $log = $this->newLog( $description, self::SEARCH_TYPE, [ |
| 245 | 'query' => $text, |
| 246 | 'offset' => $this->offset, |
| 247 | ] ); |
| 248 | |
| 249 | $resultsTransformer = $this->getResultsTransformer( $log ); |
| 250 | |
| 251 | return $this->runMSearch( $multiSearch, $log, $this->connection, |
| 252 | $resultsTransformer ); |
| 253 | } ); |
| 254 | |
| 255 | if ( $status->isOk() && $this->searchContext->getDebugOptions()->isCirrusDumpResult() ) { |
| 256 | $resultSets = $status->getValue()->getResultSets(); |
| 257 | $responses = $mSearchRequests->toMSearchResponses( $resultSets ); |
| 258 | |
| 259 | return $responses->dumpResults( $description ); |
| 260 | } |
| 261 | |
| 262 | return $status; |
| 263 | } |
| 264 | |
| 265 | /** |
| 266 | * @param ResultSet[] $results |
| 267 | * @param CompletionRequestLog $log |
| 268 | * @return SearchSuggestionSet |
| 269 | */ |
| 270 | private function processMSearchResponse( array $results, CompletionRequestLog $log ) { |
| 271 | $collector = new CompletionResultsCollector( |
| 272 | $this->limit, $this->offset, $this->config->get( 'CirrusSearchCompletionBannedPageIds' ) ); |
| 273 | $totalHits = $this->collectCompSuggestResults( $collector, $results, $log ); |
| 274 | $totalHits += $this->collectPrefixSearchResults( $collector, $results, $log ); |
| 275 | $log->setTotalHits( $totalHits ); |
| 276 | return $collector->logAndGetSet( $log ); |
| 277 | } |
| 278 | |
| 279 | /** |
| 280 | * @param CompletionResultsCollector $collector |
| 281 | * @param ResultSet[] $results |
| 282 | * @param CompletionRequestLog $log |
| 283 | * @return int |
| 284 | */ |
| 285 | private function collectCompSuggestResults( CompletionResultsCollector $collector, array $results, CompletionRequestLog $log ) { |
| 286 | if ( !isset( $results[self::MSEARCH_KEY_SUGGEST] ) ) { |
| 287 | return 0; |
| 288 | } |
| 289 | $log->addIndex( $this->completionIndex->getName() ); |
| 290 | $suggestResults = $results[self::MSEARCH_KEY_SUGGEST]; |
| 291 | $log->setSuggestTookMs( intval( $suggestResults->getResponse()->getQueryTime() * 1000 ) ); |
| 292 | return $this->compSuggestBuilder->postProcess( |
| 293 | $collector, |
| 294 | $suggestResults, |
| 295 | $this->completionIndex->getName() |
| 296 | ); |
| 297 | } |
| 298 | |
| 299 | /** |
| 300 | * @param CompletionResultsCollector $collector |
| 301 | * @param ResultSet[] $results |
| 302 | * @param CompletionRequestLog $log |
| 303 | * @return int |
| 304 | * @throws \Exception |
| 305 | */ |
| 306 | private function collectPrefixSearchResults( CompletionResultsCollector $collector, array $results, CompletionRequestLog $log ) { |
| 307 | if ( !isset( $results[self::MSEARCH_KEY_PREFIX] ) ) { |
| 308 | return 0; |
| 309 | } |
| 310 | $indexName = $this->prefixSearchRequestBuilder->getIndex()->getName(); |
| 311 | $prefixResults = $results[self::MSEARCH_KEY_PREFIX]; |
| 312 | $totalHits = $prefixResults->getTotalHits(); |
| 313 | $log->addIndex( $indexName ); |
| 314 | $log->setPrefixTookMs( intval( $prefixResults->getResponse()->getQueryTime() * 1000 ) ); |
| 315 | // We only append as we can't really compare scores without more complex code/evaluation |
| 316 | if ( $collector->isFull() ) { |
| 317 | return $totalHits; |
| 318 | } |
| 319 | /** @var FancyTitleResultsType $rType */ |
| 320 | $rType = $this->prefixSearchRequestBuilder->getSearchContext()->getResultsType(); |
| 321 | // the code below highly depends on the array format built by |
| 322 | // FancyTitleResultsType::transformOneElasticResult assert that this type |
| 323 | // is properly set so that we fail during unit tests if someone changes it |
| 324 | // inadvertently. |
| 325 | Assert::precondition( $rType instanceof FancyTitleResultsType, '$rType must be a FancyTitleResultsType' ); |
| 326 | // scores can go negative, it's not a problem we only use scores for sorting |
| 327 | // they'll be forgotten in client response |
| 328 | $score = $collector->getMinScore() !== null ? $collector->getMinScore() - 1 : count( $prefixResults->getResults() ); |
| 329 | |
| 330 | $namespaces = $this->prefixSearchRequestBuilder->getSearchContext()->getNamespaces(); |
| 331 | foreach ( $prefixResults->getResults() as $res ) { |
| 332 | $pageId = $this->config->makePageId( $res->getId() ); |
| 333 | $title = FancyTitleResultsType::chooseBestTitleOrRedirect( $rType->transformOneElasticResult( $res, $namespaces ) ); |
| 334 | if ( $title === false ) { |
| 335 | continue; |
| 336 | } |
| 337 | $suggestion = new \SearchSuggestion( $score--, $title->getPrefixedText(), $title, $pageId ); |
| 338 | if ( !$collector->collect( $suggestion, 'prefix', $indexName ) && $collector->isFull() ) { |
| 339 | break; |
| 340 | } |
| 341 | } |
| 342 | return $totalHits; |
| 343 | } |
| 344 | |
| 345 | /** |
| 346 | * @param string $text Search term |
| 347 | * @param array<string, string[]> $secondTryCandidates second try search candidates |
| 348 | * @return Search|null |
| 349 | */ |
| 350 | private function getSuggestSearchRequest( string $text, array $secondTryCandidates ): ?Search { |
| 351 | if ( !$this->compSuggestBuilder->areResultsPossible() ) { |
| 352 | return null; |
| 353 | } |
| 354 | |
| 355 | $suggest = $this->compSuggestBuilder->build( $text, $secondTryCandidates ); |
| 356 | $query = new Query( new Query\MatchNone() ); |
| 357 | $query->setSize( 0 ); |
| 358 | $query->setSuggest( $suggest ); |
| 359 | $query->setSource( [ 'target_title' ] ); |
| 360 | $search = new Search( $this->connection->getClient() ); |
| 361 | $search->addIndex( $this->completionIndex ); |
| 362 | $search->setQuery( $query ); |
| 363 | return $search; |
| 364 | } |
| 365 | |
| 366 | /** |
| 367 | * @param string $term Search term |
| 368 | * @param array<string, string[]> $secondTryCandidates second try candidates |
| 369 | * @return Search|null |
| 370 | */ |
| 371 | private function getPrefixSearchRequest( $term, array $secondTryCandidates ): ?Search { |
| 372 | $namespaces = $this->searchContext->getNamespaces(); |
| 373 | if ( $namespaces === null ) { |
| 374 | return null; |
| 375 | } |
| 376 | |
| 377 | foreach ( $namespaces as $k => $v ) { |
| 378 | // non-strict comparison, it can be strings |
| 379 | if ( $v === NS_MAIN ) { |
| 380 | unset( $namespaces[$k] ); |
| 381 | } |
| 382 | } |
| 383 | |
| 384 | if ( $namespaces === [] ) { |
| 385 | return null; |
| 386 | } |
| 387 | $limit = CompSuggestQueryBuilder::computeHardLimit( $this->limit, $this->offset, $this->config ); |
| 388 | if ( $this->offset > $limit ) { |
| 389 | return null; |
| 390 | } |
| 391 | $prefixSearchContext = new SearchContext( $this->config, $namespaces ); |
| 392 | $prefixSearchContext->setResultsType( new FancyTitleResultsType( 'prefix' ) ); |
| 393 | $this->prefixSearchQueryBuilder->build( $prefixSearchContext, $term, $secondTryCandidates ); |
| 394 | if ( !$prefixSearchContext->areResultsPossible() ) { |
| 395 | // $prefixSearchContext might contain warnings, but these are lost. |
| 396 | return null; |
| 397 | } |
| 398 | $this->prefixSearchRequestBuilder = new SearchRequestBuilder( $prefixSearchContext, $this->connection, $this->indexBaseName ); |
| 399 | $this->prefixSearchRequestBuilder->setTimeout( $this->getTimeout( self::SEARCH_TYPE ) ); |
| 400 | return $this->prefixSearchRequestBuilder->setLimit( $limit ) |
| 401 | // collect all results up to $limit, $this->offset is the offset the client wants |
| 402 | // not the offset in prefix search results. |
| 403 | ->setOffset( 0 ) |
| 404 | ->build(); |
| 405 | } |
| 406 | |
| 407 | /** |
| 408 | * @param string $description |
| 409 | * @param string $queryType |
| 410 | * @param array $extra |
| 411 | * @return CompletionRequestLog |
| 412 | */ |
| 413 | protected function newLog( $description, $queryType, array $extra = [] ) { |
| 414 | return new CompletionRequestLog( |
| 415 | $description, |
| 416 | $queryType, |
| 417 | $extra, |
| 418 | $this->searchContext->getNamespaces() |
| 419 | ); |
| 420 | } |
| 421 | |
| 422 | /** |
| 423 | * @return Index |
| 424 | */ |
| 425 | public function getCompletionIndex() { |
| 426 | return $this->completionIndex; |
| 427 | } |
| 428 | |
| 429 | /** |
| 430 | * @param CompletionRequestLog $log |
| 431 | * @return Closure|null |
| 432 | */ |
| 433 | private function getResultsTransformer( CompletionRequestLog $log ): ?Closure { |
| 434 | $resultsTransformer = null; |
| 435 | if ( !$this->searchContext->getDebugOptions()->isCirrusDumpResult() ) { |
| 436 | $resultsTransformer = function ( \Elastica\Multi\ResultSet $results ) use ( $log ) { |
| 437 | return $this->processMSearchResponse( $results->getResultSets(), $log ); |
| 438 | }; |
| 439 | } |
| 440 | |
| 441 | return $resultsTransformer; |
| 442 | } |
| 443 | |
| 444 | } |