Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
96.95% |
127 / 131 |
|
75.00% |
9 / 12 |
CRAP | |
0.00% |
0 / 1 |
| PhraseSuggestFallbackMethod | |
96.95% |
127 / 131 |
|
75.00% |
9 / 12 |
36 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
3 | |||
| build | |
88.89% |
8 / 9 |
|
0.00% |
0 / 1 |
5.03 | |||
| successApproximation | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
4 | |||
| rewrite | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
3 | |||
| haveSuggestion | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| fixDYMSuggestion | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
| escapeHighlightedSuggestion | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
| totalHitsThresholdMet | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
| findSuggestion | |
81.82% |
9 / 11 |
|
0.00% |
0 / 1 |
5.15 | |||
| getSuggestQueries | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
2 | |||
| buildSuggestConfig | |
98.36% |
60 / 61 |
|
0.00% |
0 / 1 |
7 | |||
| getProfile | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace CirrusSearch\Fallbacks; |
| 4 | |
| 5 | use CirrusSearch\InterwikiResolver; |
| 6 | use CirrusSearch\OtherIndexesUpdater; |
| 7 | use CirrusSearch\Parser\AST\Visitor\QueryFixer; |
| 8 | use CirrusSearch\Profile\SearchProfileException; |
| 9 | use CirrusSearch\Profile\SearchProfileService; |
| 10 | use CirrusSearch\Search\CirrusSearchResultSet; |
| 11 | use CirrusSearch\Search\SearchQuery; |
| 12 | use CirrusSearch\Searcher; |
| 13 | use Wikimedia\Assert\Assert; |
| 14 | use Wikimedia\HtmlArmor\HtmlArmor; |
| 15 | |
| 16 | /** |
| 17 | * Fallback method based on the elastic phrase suggester. |
| 18 | */ |
| 19 | class PhraseSuggestFallbackMethod implements FallbackMethod, ElasticSearchSuggestFallbackMethod { |
| 20 | use FallbackMethodTrait; |
| 21 | |
| 22 | /** |
| 23 | * @var SearchQuery |
| 24 | */ |
| 25 | private $query; |
| 26 | |
| 27 | /** |
| 28 | * @var QueryFixer |
| 29 | */ |
| 30 | private $queryFixer; |
| 31 | |
| 32 | /** |
| 33 | * @var string |
| 34 | */ |
| 35 | private $profileName; |
| 36 | |
| 37 | /** |
| 38 | * @var array|null settings (lazy loaded) |
| 39 | */ |
| 40 | private $profile; |
| 41 | |
| 42 | /** |
| 43 | * @param SearchQuery $query |
| 44 | * @param string $profileName name of the profile to use (null to use the defaults provided by the ProfileService) |
| 45 | */ |
| 46 | private function __construct( SearchQuery $query, $profileName ) { |
| 47 | Assert::precondition( $query->isWithDYMSuggestion() && |
| 48 | $query->getSearchConfig()->get( 'CirrusSearchEnablePhraseSuggest' ) && |
| 49 | $query->getOffset() == 0, "Unsupported query" ); |
| 50 | $this->query = $query; |
| 51 | $this->queryFixer = QueryFixer::build( $query->getParsedQuery() ); |
| 52 | $this->profileName = $profileName; |
| 53 | } |
| 54 | |
| 55 | /** |
| 56 | * @param SearchQuery $query |
| 57 | * @param array $params |
| 58 | * @param InterwikiResolver|null $interwikiResolver |
| 59 | * @return FallbackMethod|null |
| 60 | */ |
| 61 | public static function build( SearchQuery $query, array $params, ?InterwikiResolver $interwikiResolver = null ) { |
| 62 | if ( !$query->isWithDYMSuggestion() ) { |
| 63 | return null; |
| 64 | } |
| 65 | if ( !$query->getSearchConfig()->get( 'CirrusSearchEnablePhraseSuggest' ) ) { |
| 66 | return null; |
| 67 | } |
| 68 | // TODO: Should this be tested at an upper level? |
| 69 | if ( $query->getOffset() !== 0 ) { |
| 70 | return null; |
| 71 | } |
| 72 | if ( !isset( $params['profile'] ) ) { |
| 73 | throw new SearchProfileException( "Missing mandatory parameter 'profile'" ); |
| 74 | } |
| 75 | return new self( $query, $params['profile'] ); |
| 76 | } |
| 77 | |
| 78 | /** |
| 79 | * @param FallbackRunnerContext $context |
| 80 | * @return float |
| 81 | */ |
| 82 | public function successApproximation( FallbackRunnerContext $context ) { |
| 83 | $firstPassResults = $context->getInitialResultSet(); |
| 84 | if ( !$this->haveSuggestion( $firstPassResults ) ) { |
| 85 | return 0.0; |
| 86 | } |
| 87 | |
| 88 | if ( $this->resultContainsFullyHighlightedMatch( $firstPassResults->getElasticaResultSet() ) ) { |
| 89 | return 0.0; |
| 90 | } |
| 91 | |
| 92 | if ( $this->totalHitsThresholdMet( $firstPassResults->getTotalHits() ) ) { |
| 93 | return 0.0; |
| 94 | } |
| 95 | |
| 96 | return 0.5; |
| 97 | } |
| 98 | |
| 99 | public function rewrite( FallbackRunnerContext $context ): FallbackStatus { |
| 100 | $firstPassResults = $context->getInitialResultSet(); |
| 101 | $previousSet = $context->getPreviousResultSet(); |
| 102 | if ( $previousSet->getQueryAfterRewrite() !== null ) { |
| 103 | // a method rewrote the query before us. |
| 104 | return FallbackStatus::noSuggestion(); |
| 105 | } |
| 106 | if ( $previousSet->getSuggestionQuery() !== null ) { |
| 107 | // a method suggested something before us |
| 108 | return FallbackStatus::noSuggestion(); |
| 109 | } |
| 110 | |
| 111 | [ $suggestion, $highlight ] = $this->fixDYMSuggestion( $firstPassResults ); |
| 112 | |
| 113 | return $this->maybeSearchAndRewrite( $context, $this->query, |
| 114 | $suggestion, $highlight ); |
| 115 | } |
| 116 | |
| 117 | /** |
| 118 | * @param CirrusSearchResultSet $resultSet |
| 119 | * @return bool |
| 120 | */ |
| 121 | public function haveSuggestion( CirrusSearchResultSet $resultSet ) { |
| 122 | return $this->findSuggestion( $resultSet ) !== null; |
| 123 | } |
| 124 | |
| 125 | private function fixDYMSuggestion( CirrusSearchResultSet $fromResultSet ): array { |
| 126 | $suggestion = $this->findSuggestion( $fromResultSet ); |
| 127 | Assert::precondition( $suggestion !== null, "fixDYMSuggestion called with no suggestions available" ); |
| 128 | return [ |
| 129 | $this->queryFixer->fix( $suggestion['text'] ), |
| 130 | $this->queryFixer->fix( $this->escapeHighlightedSuggestion( $suggestion['highlighted'] ) ) |
| 131 | ]; |
| 132 | } |
| 133 | |
| 134 | /** |
| 135 | * Escape a highlighted suggestion coming back from Elasticsearch. |
| 136 | * |
| 137 | * @param string $suggestion suggestion from elasticsearch |
| 138 | * @return HtmlArmor $suggestion with html escaped _except_ highlighting pre and post tags |
| 139 | */ |
| 140 | private function escapeHighlightedSuggestion( string $suggestion ): HtmlArmor { |
| 141 | return new HtmlArmor( strtr( htmlspecialchars( $suggestion ), [ |
| 142 | Searcher::HIGHLIGHT_PRE_MARKER => Searcher::SUGGESTION_HIGHLIGHT_PRE, |
| 143 | Searcher::HIGHLIGHT_POST_MARKER => Searcher::SUGGESTION_HIGHLIGHT_POST, |
| 144 | ] ) ); |
| 145 | } |
| 146 | |
| 147 | /** |
| 148 | * @param int $totalHits |
| 149 | * @return bool |
| 150 | */ |
| 151 | private function totalHitsThresholdMet( $totalHits ) { |
| 152 | $threshold = $this->getProfile()['total_hits_threshold'] ?? -1; |
| 153 | return $threshold >= 0 && $totalHits > $threshold; |
| 154 | } |
| 155 | |
| 156 | /** |
| 157 | * @param CirrusSearchResultSet $resultSet |
| 158 | * @return array|null Suggestion options, see "options" part in |
| 159 | * https://www.elastic.co/guide/en/elasticsearch/reference/6.4/search-suggesters.html |
| 160 | */ |
| 161 | private function findSuggestion( CirrusSearchResultSet $resultSet ) { |
| 162 | // TODO some kind of weighting? |
| 163 | $response = $resultSet->getElasticResponse(); |
| 164 | if ( $response === null ) { |
| 165 | return null; |
| 166 | } |
| 167 | $suggest = $response->getData(); |
| 168 | if ( !isset( $suggest[ 'suggest' ] ) ) { |
| 169 | return null; |
| 170 | } |
| 171 | $suggest = $suggest[ 'suggest' ]; |
| 172 | // Elasticsearch will send back the suggest element but no sub suggestion elements if the wiki is empty. |
| 173 | // So we should check to see if they exist even though in normal operation they always will. |
| 174 | if ( isset( $suggest['suggest'][0] ) ) { |
| 175 | foreach ( $suggest['suggest'][0][ 'options' ] as $option ) { |
| 176 | return $option; |
| 177 | } |
| 178 | } |
| 179 | return null; |
| 180 | } |
| 181 | |
| 182 | /** |
| 183 | * @return array|null |
| 184 | */ |
| 185 | public function getSuggestQueries() { |
| 186 | $term = $this->queryFixer->getFixablePart(); |
| 187 | if ( $term !== null ) { |
| 188 | return [ |
| 189 | 'suggest' => [ |
| 190 | 'text' => $term, |
| 191 | 'suggest' => $this->buildSuggestConfig(), |
| 192 | ] |
| 193 | ]; |
| 194 | } |
| 195 | return null; |
| 196 | } |
| 197 | |
| 198 | /** |
| 199 | * Build suggest config for 'suggest' field. |
| 200 | * |
| 201 | * @return array[] array of Elastica configuration |
| 202 | */ |
| 203 | private function buildSuggestConfig() { |
| 204 | $config = $this->query->getSearchConfig(); |
| 205 | $suggestSettings = $this->getProfile(); |
| 206 | $field = $suggestSettings['field'] ?? 'suggest'; |
| 207 | $settings = [ |
| 208 | 'phrase' => [ |
| 209 | 'field' => $field, |
| 210 | 'size' => 1, |
| 211 | 'max_errors' => $suggestSettings['max_errors'], |
| 212 | 'confidence' => $suggestSettings['confidence'], |
| 213 | 'real_word_error_likelihood' => $suggestSettings['real_word_error_likelihood'], |
| 214 | 'direct_generator' => [ |
| 215 | [ |
| 216 | 'field' => $field, |
| 217 | // While not documented, PhraseSuggestionContext.DirectCandidateGenerator |
| 218 | // in opensearch defaults size to 5. |
| 219 | 'size' => $suggestSettings['num_candidates'] ?? 5, |
| 220 | 'suggest_mode' => $suggestSettings['mode'], |
| 221 | 'max_term_freq' => $suggestSettings['max_term_freq'], |
| 222 | 'min_doc_freq' => $suggestSettings['min_doc_freq'], |
| 223 | 'prefix_length' => $suggestSettings['prefix_length'], |
| 224 | ], |
| 225 | ], |
| 226 | 'highlight' => [ |
| 227 | 'pre_tag' => Searcher::HIGHLIGHT_PRE_MARKER, |
| 228 | 'post_tag' => Searcher::HIGHLIGHT_POST_MARKER, |
| 229 | ], |
| 230 | ], |
| 231 | ]; |
| 232 | // Add a second generator with the reverse field |
| 233 | // Only do this for local queries, we don't know if it's activated |
| 234 | // on other wikis. |
| 235 | if ( $config->getElement( 'CirrusSearchPhraseSuggestReverseField', 'use' ) |
| 236 | && ( !$this->query->getCrossSearchStrategy()->isExtraIndicesSearchSupported() |
| 237 | || !OtherIndexesUpdater::getExtraIndexesForNamespaces( |
| 238 | $config, |
| 239 | $this->query->getNamespaces() |
| 240 | ) |
| 241 | ) |
| 242 | ) { |
| 243 | $settings['phrase']['direct_generator'][] = [ |
| 244 | 'field' => $field . '.reverse', |
| 245 | 'suggest_mode' => $suggestSettings['mode'], |
| 246 | 'max_term_freq' => $suggestSettings['max_term_freq'], |
| 247 | 'min_doc_freq' => $suggestSettings['min_doc_freq'], |
| 248 | 'prefix_length' => $suggestSettings['prefix_length'], |
| 249 | 'pre_filter' => 'token_reverse', |
| 250 | 'post_filter' => 'token_reverse' |
| 251 | ]; |
| 252 | } |
| 253 | if ( !empty( $suggestSettings['collate'] ) ) { |
| 254 | $collateFields = [ 'title.plain', 'redirect.title.plain' ]; |
| 255 | if ( $config->get( 'CirrusSearchPhraseSuggestUseText' ) ) { |
| 256 | $collateFields[] = 'text.plain'; |
| 257 | } |
| 258 | $settings['phrase']['collate'] = [ |
| 259 | 'query' => [ |
| 260 | 'inline' => [ |
| 261 | 'multi_match' => [ |
| 262 | 'query' => '{{suggestion}}', |
| 263 | 'operator' => 'or', |
| 264 | 'minimum_should_match' => $suggestSettings['collate_minimum_should_match'], |
| 265 | 'type' => 'cross_fields', |
| 266 | 'fields' => $collateFields |
| 267 | ], |
| 268 | ], |
| 269 | ], |
| 270 | ]; |
| 271 | } |
| 272 | if ( isset( $suggestSettings['smoothing_model'] ) ) { |
| 273 | $settings['phrase']['smoothing'] = $suggestSettings['smoothing_model']; |
| 274 | } |
| 275 | |
| 276 | return $settings; |
| 277 | } |
| 278 | |
| 279 | /** |
| 280 | * @return array |
| 281 | */ |
| 282 | private function getProfile() { |
| 283 | if ( $this->profile === null ) { |
| 284 | $this->profile = $this->query->getSearchConfig()->getProfileService() |
| 285 | ->loadProfileByName( SearchProfileService::PHRASE_SUGGESTER, |
| 286 | $this->profileName ); |
| 287 | } |
| 288 | return $this->profile; |
| 289 | } |
| 290 | } |