Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
94.55% |
104 / 110 |
|
76.92% |
10 / 13 |
CRAP | |
0.00% |
0 / 1 |
| CompSuggestQueryBuilder | |
94.55% |
104 / 110 |
|
76.92% |
10 / 13 |
45.33 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
3 | |||
| areResultsPossible | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
3.14 | |||
| build | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
3 | |||
| buildSuggestQueries | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 | |||
| resolveFuzzy | |
88.89% |
8 / 9 |
|
0.00% |
0 / 1 |
4.02 | |||
| buildSuggestQuery | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
5 | |||
| handleVariants | |
100.00% |
19 / 19 |
|
100.00% |
1 / 1 |
6 | |||
| buildVariantProfile | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| postProcess | |
86.21% |
25 / 29 |
|
0.00% |
0 / 1 |
12.38 | |||
| decodeId | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getMergedProfiles | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| computeHardLimit | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
| getLimit | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace CirrusSearch\Query; |
| 4 | |
| 5 | use CirrusSearch\BuildDocument\Completion\SuggestBuilder; |
| 6 | use CirrusSearch\Search\CompletionResultsCollector; |
| 7 | use CirrusSearch\Search\SearchContext; |
| 8 | use CirrusSearch\SearchConfig; |
| 9 | use CirrusSearch\SecondTry\SecondTryRunner; |
| 10 | use Elastica\ResultSet; |
| 11 | use Elastica\Suggest; |
| 12 | use Elastica\Suggest\Completion; |
| 13 | use SearchSuggestion; |
| 14 | use Wikimedia\Assert\Assert; |
| 15 | |
| 16 | /** |
| 17 | * Suggest (Completion) query builder. |
| 18 | * Unlike classic query builders it will : |
| 19 | * - handle limit differently as offsets are not accepted during suggest queries |
| 20 | * - store a mutable state in mergedProfiles |
| 21 | */ |
| 22 | class CompSuggestQueryBuilder { |
| 23 | use QueryBuilderTraits; |
| 24 | |
| 25 | public const VARIANT_EXTRA_DISCOUNT = 0.0001; |
| 26 | |
| 27 | /** @var SearchContext (final) */ |
| 28 | private $searchContext; |
| 29 | |
| 30 | /** @var array (final) */ |
| 31 | private $profile; |
| 32 | |
| 33 | /** @var int (final) */ |
| 34 | private $limit; |
| 35 | |
| 36 | /** @var int (final) */ |
| 37 | private $hardLimit; |
| 38 | |
| 39 | /** @var int (final) */ |
| 40 | private $offset; |
| 41 | |
| 42 | /** @var array (mutable) state built after calling self::build */ |
| 43 | private $mergedProfiles; |
| 44 | private SecondTryRunner $secondTryRunner; |
| 45 | |
| 46 | /** |
| 47 | * @param SearchContext $context |
| 48 | * @param array $profile settings as definied in profiles/SuggestProfiles.config.php |
| 49 | * @param SecondTryRunner $secondTryRunner |
| 50 | * @param int $limit the number of results to display |
| 51 | * @param int $offset |
| 52 | */ |
| 53 | public function __construct( SearchContext $context, array $profile, SecondTryRunner $secondTryRunner, $limit, $offset = 0 ) { |
| 54 | $this->searchContext = $context; |
| 55 | $this->profile = $profile['fst']; |
| 56 | Assert::parameter( count( $this->profile ) > 0, '$profile', 'Profile must not be empty' ); |
| 57 | $this->secondTryRunner = $secondTryRunner; |
| 58 | $this->hardLimit = self::computeHardLimit( $limit, $offset, $context->getConfig() ); |
| 59 | if ( $limit > $this->hardLimit - $offset ) { |
| 60 | $limit = $this->hardLimit - $offset; |
| 61 | } |
| 62 | $this->limit = $limit > 0 ? $limit : 0; |
| 63 | $this->offset = $offset; |
| 64 | } |
| 65 | |
| 66 | /** |
| 67 | * Check the builder settings to determine if results are possible. |
| 68 | * If this method returns false the query must not have to be sent to elastic |
| 69 | * |
| 70 | * @return bool true if results are possible false otherwise |
| 71 | */ |
| 72 | public function areResultsPossible() { |
| 73 | $namespaces = $this->searchContext->getNamespaces(); |
| 74 | if ( $namespaces !== null && !in_array( NS_MAIN, $namespaces ) ) { |
| 75 | return false; |
| 76 | } |
| 77 | // If the offset requested is greater than the hard limit |
| 78 | // allowed we will always return an empty set so let's do it |
| 79 | // asap. |
| 80 | return $this->limit > 0; |
| 81 | } |
| 82 | |
| 83 | /** |
| 84 | * Build the suggest query |
| 85 | * @param string $term |
| 86 | * @param array<string, string[]> $secondTryCandidates |
| 87 | * @return Suggest |
| 88 | */ |
| 89 | public function build( string $term, array $secondTryCandidates = [] ): Suggest { |
| 90 | $this->checkTitleSearchRequestLength( $term, $this->searchContext ); |
| 91 | $origTerm = $term; |
| 92 | if ( mb_strlen( $term ) > SuggestBuilder::MAX_INPUT_LENGTH ) { |
| 93 | // Trim the query otherwise we won't find results |
| 94 | $term = mb_substr( $term, 0, SuggestBuilder::MAX_INPUT_LENGTH ); |
| 95 | } |
| 96 | |
| 97 | $queryLen = mb_strlen( trim( $term ) ); // Avoid cheating with spaces |
| 98 | |
| 99 | $this->mergedProfiles = $this->profile; |
| 100 | $suggest = $this->buildSuggestQueries( $this->profile, $term, $queryLen ); |
| 101 | |
| 102 | // Handle variants, update the set of profiles and suggest queries |
| 103 | if ( $secondTryCandidates ) { |
| 104 | $this->handleVariants( $suggest, $secondTryCandidates, $queryLen, $origTerm ); |
| 105 | } |
| 106 | return $suggest; |
| 107 | } |
| 108 | |
| 109 | /** |
| 110 | * Builds a set of suggest query by reading the list of profiles |
| 111 | * @param array $profiles |
| 112 | * @param string $query |
| 113 | * @param int $queryLen the length to use when checking min/max_query_len |
| 114 | * @return Suggest a set of suggest queries ready to for elastic |
| 115 | */ |
| 116 | private function buildSuggestQueries( array $profiles, $query, $queryLen ) { |
| 117 | $suggest = new Suggest(); |
| 118 | foreach ( $profiles as $name => $config ) { |
| 119 | $sugg = $this->buildSuggestQuery( $name, $config, $query, $queryLen ); |
| 120 | if ( $sugg === null ) { |
| 121 | continue; |
| 122 | } |
| 123 | $suggest->addSuggestion( $sugg ); |
| 124 | } |
| 125 | return $suggest; |
| 126 | } |
| 127 | |
| 128 | /** |
| 129 | * Resolves AUTO fuzziness into a constant value |
| 130 | * @param array $fuzzy FST Fuzziness configuration |
| 131 | * @param int $queryLen The number of codepoints in the query |
| 132 | * @return array Resolve FST Fuzziness configuration |
| 133 | */ |
| 134 | private function resolveFuzzy( array $fuzzy, $queryLen ): array { |
| 135 | // TODO: We could support `AUTO:2,8` syntax as well, but didnt seem necessary |
| 136 | if ( ( $fuzzy['fuzziness'] ?? null ) === 'AUTO' ) { |
| 137 | $low = 3; |
| 138 | $high = 6; |
| 139 | if ( $queryLen < $low ) { |
| 140 | $fuzzy['fuzziness'] = 0; |
| 141 | } elseif ( $queryLen < $high ) { |
| 142 | $fuzzy['fuzziness'] = 1; |
| 143 | } else { |
| 144 | $fuzzy['fuzziness'] = 2; |
| 145 | } |
| 146 | } |
| 147 | return $fuzzy; |
| 148 | } |
| 149 | |
| 150 | /** |
| 151 | * Builds a suggest query from a profile |
| 152 | * @param string $name name of the suggestion |
| 153 | * @param array $config Profile |
| 154 | * @param string $query |
| 155 | * @param int $queryLen the length to use when checking min/max_query_len |
| 156 | * @return Completion|null suggest query ready to for elastic or null |
| 157 | */ |
| 158 | private function buildSuggestQuery( $name, array $config, $query, $queryLen ) { |
| 159 | // Do not remove spaces at the end, the user might tell us he finished writing a word |
| 160 | $query = ltrim( $query ); |
| 161 | if ( $config['min_query_len'] > $queryLen ) { |
| 162 | return null; |
| 163 | } |
| 164 | if ( isset( $config['max_query_len'] ) && $queryLen > $config['max_query_len'] ) { |
| 165 | return null; |
| 166 | } |
| 167 | $field = $config['field']; |
| 168 | $sug = new Completion( $name, $field ); |
| 169 | $sug->setPrefix( $query ); |
| 170 | $sug->setSize( $this->hardLimit * $config['fetch_limit_factor'] ); |
| 171 | if ( isset( $config['fuzzy'] ) ) { |
| 172 | $sug->setFuzzy( $this->resolveFuzzy( $config['fuzzy'], $queryLen ) ); |
| 173 | } |
| 174 | return $sug; |
| 175 | } |
| 176 | |
| 177 | /** |
| 178 | * Update the suggest queries and return additional profiles flagged the 'fallback' key |
| 179 | * with a discount factor = originalDiscount * 0.0001/(variantIndex+1). |
| 180 | * @param Suggest $suggests |
| 181 | * @param array<string, string[]> $secondTryCandidates candidates as returned by {@link SecondTryRunner::candidate} |
| 182 | * @param int $queryLen the original query length |
| 183 | * @param string $term original term (used to dedup) |
| 184 | * @internal param array $profiles the default profiles |
| 185 | */ |
| 186 | private function handleVariants( Suggest $suggests, array $secondTryCandidates, int $queryLen, string $term ): void { |
| 187 | $done = [ $term ]; |
| 188 | $variantIndex = 0; |
| 189 | foreach ( $secondTryCandidates as $strategy => $candidates ) { |
| 190 | foreach ( $candidates as $candidate ) { |
| 191 | if ( in_array( $candidate, $done, true ) ) { |
| 192 | continue; |
| 193 | } |
| 194 | $done[] = $candidate; |
| 195 | $variantIndex++; |
| 196 | foreach ( $this->profile as $name => $profile ) { |
| 197 | $variantProfName = $name . '-second-try-' . $strategy . '-' . $variantIndex; |
| 198 | $profile = $this->buildVariantProfile( |
| 199 | $profile, ( self::VARIANT_EXTRA_DISCOUNT * $this->secondTryRunner->weight( $strategy ) ) / $variantIndex |
| 200 | ); |
| 201 | $suggest = $this->buildSuggestQuery( |
| 202 | $variantProfName, $profile, $candidate, $queryLen |
| 203 | ); |
| 204 | if ( $suggest !== null ) { |
| 205 | $suggests->addSuggestion( $suggest ); |
| 206 | $this->mergedProfiles[$variantProfName] = $profile; |
| 207 | } |
| 208 | } |
| 209 | |
| 210 | } |
| 211 | } |
| 212 | } |
| 213 | |
| 214 | /** |
| 215 | * Creates a copy of $profile[$name] with a custom '-variant-SEQ' suffix. |
| 216 | * And applies an extra discount factor of 0.0001. |
| 217 | * The copy is added to the profiles container. |
| 218 | * @param array $profile profile to copy |
| 219 | * @param float $extraDiscount extra discount factor to rank variant suggestion lower. |
| 220 | * @return array |
| 221 | */ |
| 222 | protected function buildVariantProfile( array $profile, $extraDiscount = 0.0001 ) { |
| 223 | // mark the profile as a fallback query |
| 224 | $profile['fallback'] = true; |
| 225 | $profile['discount'] *= $extraDiscount; |
| 226 | return $profile; |
| 227 | } |
| 228 | |
| 229 | /** |
| 230 | * Post process the response from elastic to build the SearchSuggestionSet. |
| 231 | * |
| 232 | * Merge top level multi-queries and resolve returned pageIds into Title objects. |
| 233 | * |
| 234 | * @param CompletionResultsCollector $collector |
| 235 | * @param ResultSet $results |
| 236 | * @param string $indexName |
| 237 | * @return int total hits |
| 238 | */ |
| 239 | public function postProcess( CompletionResultsCollector $collector, ResultSet $results, $indexName ) { |
| 240 | $suggestResp = $results->getSuggests(); |
| 241 | if ( $suggestResp === [] ) { |
| 242 | // Edge case where the index contains 0 documents and does not even return the 'suggest' field |
| 243 | return 0; |
| 244 | } |
| 245 | $hitsTotal = 0; |
| 246 | foreach ( $suggestResp as $name => $sug ) { |
| 247 | $discount = $this->mergedProfiles[$name]['discount']; |
| 248 | foreach ( $sug as $suggested ) { |
| 249 | $hitsTotal += count( $suggested['options'] ); |
| 250 | foreach ( $suggested['options'] as $suggest ) { |
| 251 | $page = $suggest['text']; |
| 252 | if ( !isset( $suggest['_id'] ) ) { |
| 253 | // likely a shard failure during the fetch phase |
| 254 | // https://github.com/elastic/elasticsearch/issues/32467 |
| 255 | throw new \Elastica\Exception\RuntimeException( "Invalid response returned from " . |
| 256 | "the backend (probable shard failure during the fetch phase)" ); |
| 257 | } |
| 258 | $targetTitle = $page; |
| 259 | $targetTitleNS = NS_MAIN; |
| 260 | if ( isset( $suggest['_source']['target_title'] ) ) { |
| 261 | $targetTitle = $suggest['_source']['target_title']['title']; |
| 262 | $targetTitleNS = $suggest['_source']['target_title']['namespace']; |
| 263 | } |
| 264 | [ $docId, $type ] = $this->decodeId( $suggest['_id'] ); |
| 265 | $score = $discount * $suggest['_score']; |
| 266 | $pageId = $this->searchContext->getConfig()->makePageId( $docId ); |
| 267 | $suggestion = new SearchSuggestion( $score, null, null, $pageId ); |
| 268 | if ( $collector->collect( $suggestion, $name, $indexName ) ) { |
| 269 | if ( $type === SuggestBuilder::TITLE_SUGGESTION && $targetTitleNS === NS_MAIN ) { |
| 270 | // For title suggestions we always use the target_title |
| 271 | // This is because we may encounter default_sort or subphrases that are not |
| 272 | // valid titles... And we prefer to display the title over close redirects |
| 273 | // for CrossNS redirect we prefer the returned suggestion |
| 274 | $suggestion->setText( $targetTitle ); |
| 275 | |
| 276 | } else { |
| 277 | $suggestion->setText( $page ); |
| 278 | } |
| 279 | } else { |
| 280 | // Results are returned in order by elastic skip the rest if no more |
| 281 | // results from this suggest can be collected |
| 282 | if ( $collector->isFull() && $collector->getMinScore() > $score ) { |
| 283 | break; |
| 284 | } |
| 285 | } |
| 286 | } |
| 287 | } |
| 288 | } |
| 289 | return $hitsTotal; |
| 290 | } |
| 291 | |
| 292 | /** |
| 293 | * @param string $id compacted id (id + $type) |
| 294 | * @return array 2 elt array [ $id, $type ] |
| 295 | */ |
| 296 | private function decodeId( $id ) { |
| 297 | return [ intval( substr( $id, 0, -1 ) ), substr( $id, -1 ) ]; |
| 298 | } |
| 299 | |
| 300 | /** |
| 301 | * (public for tests) |
| 302 | * @return array |
| 303 | */ |
| 304 | public function getMergedProfiles() { |
| 305 | return $this->mergedProfiles; |
| 306 | } |
| 307 | |
| 308 | /** |
| 309 | * Get the hard limit |
| 310 | * The completion api does not supports offset we have to add a hack |
| 311 | * here to work around this limitation. |
| 312 | * To avoid ridiculously large queries we set also a hard limit. |
| 313 | * Note that this limit will be changed by fetch_limit_factor set to 2 or 1.5 |
| 314 | * depending on the profile. |
| 315 | * @param int $limit limit requested |
| 316 | * @param int $offset offset requested |
| 317 | * @param SearchConfig $config |
| 318 | * @return int the number of results to fetch from elastic |
| 319 | */ |
| 320 | public static function computeHardLimit( $limit, $offset, SearchConfig $config ) { |
| 321 | $limit += $offset; |
| 322 | $hardLimit = $config->get( 'CirrusSearchCompletionSuggesterHardLimit' ) ?? 50; |
| 323 | if ( $limit > $hardLimit ) { |
| 324 | return $hardLimit; |
| 325 | } |
| 326 | return $limit; |
| 327 | } |
| 328 | |
| 329 | /** |
| 330 | * Number of results we could display |
| 331 | * @return int |
| 332 | */ |
| 333 | public function getLimit() { |
| 334 | return $this->limit; |
| 335 | } |
| 336 | } |