Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
96.92% |
126 / 130 |
|
75.00% |
9 / 12 |
CRAP | |
0.00% |
0 / 1 |
PhraseSuggestFallbackMethod | |
96.92% |
126 / 130 |
|
75.00% |
9 / 12 |
36 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
3 | |||
build | |
88.89% |
8 / 9 |
|
0.00% |
0 / 1 |
5.03 | |||
successApproximation | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
4 | |||
rewrite | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
3 | |||
haveSuggestion | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
fixDYMSuggestion | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
escapeHighlightedSuggestion | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
totalHitsThresholdMet | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
findSuggestion | |
81.82% |
9 / 11 |
|
0.00% |
0 / 1 |
5.15 | |||
getSuggestQueries | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
2 | |||
buildSuggestConfig | |
98.33% |
59 / 60 |
|
0.00% |
0 / 1 |
7 | |||
getProfile | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Fallbacks; |
4 | |
5 | use CirrusSearch\InterwikiResolver; |
6 | use CirrusSearch\OtherIndexesUpdater; |
7 | use CirrusSearch\Parser\AST\Visitor\QueryFixer; |
8 | use CirrusSearch\Profile\SearchProfileException; |
9 | use CirrusSearch\Profile\SearchProfileService; |
10 | use CirrusSearch\Search\CirrusSearchResultSet; |
11 | use CirrusSearch\Search\SearchQuery; |
12 | use CirrusSearch\Searcher; |
13 | use HtmlArmor; |
14 | use Wikimedia\Assert\Assert; |
15 | |
16 | /** |
17 | * Fallback method based on the elastic phrase suggester. |
18 | */ |
19 | class PhraseSuggestFallbackMethod implements FallbackMethod, ElasticSearchSuggestFallbackMethod { |
20 | use FallbackMethodTrait; |
21 | |
22 | /** |
23 | * @var SearchQuery |
24 | */ |
25 | private $query; |
26 | |
27 | /** |
28 | * @var QueryFixer |
29 | */ |
30 | private $queryFixer; |
31 | |
32 | /** |
33 | * @var string |
34 | */ |
35 | private $profileName; |
36 | |
37 | /** |
38 | * @var array|null settings (lazy loaded) |
39 | */ |
40 | private $profile; |
41 | |
42 | /** |
43 | * @param SearchQuery $query |
44 | * @param string $profileName name of the profile to use (null to use the defaults provided by the ProfileService) |
45 | */ |
46 | private function __construct( SearchQuery $query, $profileName ) { |
47 | Assert::precondition( $query->isWithDYMSuggestion() && |
48 | $query->getSearchConfig()->get( 'CirrusSearchEnablePhraseSuggest' ) && |
49 | $query->getOffset() == 0, "Unsupported query" ); |
50 | $this->query = $query; |
51 | $this->queryFixer = QueryFixer::build( $query->getParsedQuery() ); |
52 | $this->profileName = $profileName; |
53 | } |
54 | |
55 | /** |
56 | * @param SearchQuery $query |
57 | * @param array $params |
58 | * @param InterwikiResolver|null $interwikiResolver |
59 | * @return FallbackMethod|null |
60 | */ |
61 | public static function build( SearchQuery $query, array $params, ?InterwikiResolver $interwikiResolver = null ) { |
62 | if ( !$query->isWithDYMSuggestion() ) { |
63 | return null; |
64 | } |
65 | if ( !$query->getSearchConfig()->get( 'CirrusSearchEnablePhraseSuggest' ) ) { |
66 | return null; |
67 | } |
68 | // TODO: Should this be tested at an upper level? |
69 | if ( $query->getOffset() !== 0 ) { |
70 | return null; |
71 | } |
72 | if ( !isset( $params['profile'] ) ) { |
73 | throw new SearchProfileException( "Missing mandatory parameter 'profile'" ); |
74 | } |
75 | return new self( $query, $params['profile'] ); |
76 | } |
77 | |
78 | /** |
79 | * @param FallbackRunnerContext $context |
80 | * @return float |
81 | */ |
82 | public function successApproximation( FallbackRunnerContext $context ) { |
83 | $firstPassResults = $context->getInitialResultSet(); |
84 | if ( !$this->haveSuggestion( $firstPassResults ) ) { |
85 | return 0.0; |
86 | } |
87 | |
88 | if ( $this->resultContainsFullyHighlightedMatch( $firstPassResults->getElasticaResultSet() ) ) { |
89 | return 0.0; |
90 | } |
91 | |
92 | if ( $this->totalHitsThresholdMet( $firstPassResults->getTotalHits() ) ) { |
93 | return 0.0; |
94 | } |
95 | |
96 | return 0.5; |
97 | } |
98 | |
99 | /** |
100 | * @param FallbackRunnerContext $context |
101 | * @return FallbackStatus |
102 | */ |
103 | public function rewrite( FallbackRunnerContext $context ): FallbackStatus { |
104 | $firstPassResults = $context->getInitialResultSet(); |
105 | $previousSet = $context->getPreviousResultSet(); |
106 | if ( $previousSet->getQueryAfterRewrite() !== null ) { |
107 | // a method rewrote the query before us. |
108 | return FallbackStatus::noSuggestion(); |
109 | } |
110 | if ( $previousSet->getSuggestionQuery() !== null ) { |
111 | // a method suggested something before us |
112 | return FallbackStatus::noSuggestion(); |
113 | } |
114 | |
115 | [ $suggestion, $highlight ] = $this->fixDYMSuggestion( $firstPassResults ); |
116 | |
117 | return $this->maybeSearchAndRewrite( $context, $this->query, |
118 | $suggestion, $highlight ); |
119 | } |
120 | |
121 | /** |
122 | * @param CirrusSearchResultSet $resultSet |
123 | * @return bool |
124 | */ |
125 | public function haveSuggestion( CirrusSearchResultSet $resultSet ) { |
126 | return $this->findSuggestion( $resultSet ) !== null; |
127 | } |
128 | |
129 | private function fixDYMSuggestion( CirrusSearchResultSet $fromResultSet ): array { |
130 | $suggestion = $this->findSuggestion( $fromResultSet ); |
131 | Assert::precondition( $suggestion !== null, "fixDYMSuggestion called with no suggestions available" ); |
132 | return [ |
133 | $this->queryFixer->fix( $suggestion['text'] ), |
134 | $this->queryFixer->fix( $this->escapeHighlightedSuggestion( $suggestion['highlighted'] ) ) |
135 | ]; |
136 | } |
137 | |
138 | /** |
139 | * Escape a highlighted suggestion coming back from Elasticsearch. |
140 | * |
141 | * @param string $suggestion suggestion from elasticsearch |
142 | * @return HtmlArmor $suggestion with html escaped _except_ highlighting pre and post tags |
143 | */ |
144 | private function escapeHighlightedSuggestion( string $suggestion ): HtmlArmor { |
145 | return new HtmlArmor( strtr( htmlspecialchars( $suggestion ), [ |
146 | Searcher::HIGHLIGHT_PRE_MARKER => Searcher::SUGGESTION_HIGHLIGHT_PRE, |
147 | Searcher::HIGHLIGHT_POST_MARKER => Searcher::SUGGESTION_HIGHLIGHT_POST, |
148 | ] ) ); |
149 | } |
150 | |
151 | /** |
152 | * @param int $totalHits |
153 | * @return bool |
154 | */ |
155 | private function totalHitsThresholdMet( $totalHits ) { |
156 | $threshold = $this->getProfile()['total_hits_threshold'] ?? -1; |
157 | return $threshold >= 0 && $totalHits > $threshold; |
158 | } |
159 | |
160 | /** |
161 | * @param CirrusSearchResultSet $resultSet |
162 | * @return array|null Suggestion options, see "options" part in |
163 | * https://www.elastic.co/guide/en/elasticsearch/reference/6.4/search-suggesters.html |
164 | */ |
165 | private function findSuggestion( CirrusSearchResultSet $resultSet ) { |
166 | // TODO some kind of weighting? |
167 | $response = $resultSet->getElasticResponse(); |
168 | if ( $response === null ) { |
169 | return null; |
170 | } |
171 | $suggest = $response->getData(); |
172 | if ( !isset( $suggest[ 'suggest' ] ) ) { |
173 | return null; |
174 | } |
175 | $suggest = $suggest[ 'suggest' ]; |
176 | // Elasticsearch will send back the suggest element but no sub suggestion elements if the wiki is empty. |
177 | // So we should check to see if they exist even though in normal operation they always will. |
178 | if ( isset( $suggest['suggest'][0] ) ) { |
179 | foreach ( $suggest['suggest'][0][ 'options' ] as $option ) { |
180 | return $option; |
181 | } |
182 | } |
183 | return null; |
184 | } |
185 | |
186 | /** |
187 | * @return array|null |
188 | */ |
189 | public function getSuggestQueries() { |
190 | $term = $this->queryFixer->getFixablePart(); |
191 | if ( $term !== null ) { |
192 | return [ |
193 | 'suggest' => [ |
194 | 'text' => $term, |
195 | 'suggest' => $this->buildSuggestConfig(), |
196 | ] |
197 | ]; |
198 | } |
199 | return null; |
200 | } |
201 | |
202 | /** |
203 | * Build suggest config for 'suggest' field. |
204 | * |
205 | * @return array[] array of Elastica configuration |
206 | */ |
207 | private function buildSuggestConfig() { |
208 | $field = 'suggest'; |
209 | $config = $this->query->getSearchConfig(); |
210 | $suggestSettings = $this->getProfile(); |
211 | $settings = [ |
212 | 'phrase' => [ |
213 | 'field' => $field, |
214 | 'size' => 1, |
215 | 'max_errors' => $suggestSettings['max_errors'], |
216 | 'confidence' => $suggestSettings['confidence'], |
217 | 'real_word_error_likelihood' => $suggestSettings['real_word_error_likelihood'], |
218 | 'direct_generator' => [ |
219 | [ |
220 | 'field' => $field, |
221 | 'suggest_mode' => $suggestSettings['mode'], |
222 | 'max_term_freq' => $suggestSettings['max_term_freq'], |
223 | 'min_doc_freq' => $suggestSettings['min_doc_freq'], |
224 | 'prefix_length' => $suggestSettings['prefix_length'], |
225 | ], |
226 | ], |
227 | 'highlight' => [ |
228 | 'pre_tag' => Searcher::HIGHLIGHT_PRE_MARKER, |
229 | 'post_tag' => Searcher::HIGHLIGHT_POST_MARKER, |
230 | ], |
231 | ], |
232 | ]; |
233 | // Add a second generator with the reverse field |
234 | // Only do this for local queries, we don't know if it's activated |
235 | // on other wikis. |
236 | if ( $config->getElement( 'CirrusSearchPhraseSuggestReverseField', 'use' ) |
237 | && ( !$this->query->getCrossSearchStrategy()->isExtraIndicesSearchSupported() |
238 | || !OtherIndexesUpdater::getExtraIndexesForNamespaces( |
239 | $config, |
240 | $this->query->getNamespaces() |
241 | ) |
242 | ) |
243 | ) { |
244 | $settings['phrase']['direct_generator'][] = [ |
245 | 'field' => $field . '.reverse', |
246 | 'suggest_mode' => $suggestSettings['mode'], |
247 | 'max_term_freq' => $suggestSettings['max_term_freq'], |
248 | 'min_doc_freq' => $suggestSettings['min_doc_freq'], |
249 | 'prefix_length' => $suggestSettings['prefix_length'], |
250 | 'pre_filter' => 'token_reverse', |
251 | 'post_filter' => 'token_reverse' |
252 | ]; |
253 | } |
254 | if ( !empty( $suggestSettings['collate'] ) ) { |
255 | $collateFields = [ 'title.plain', 'redirect.title.plain' ]; |
256 | if ( $config->get( 'CirrusSearchPhraseSuggestUseText' ) ) { |
257 | $collateFields[] = 'text.plain'; |
258 | } |
259 | $settings['phrase']['collate'] = [ |
260 | 'query' => [ |
261 | 'inline' => [ |
262 | 'multi_match' => [ |
263 | 'query' => '{{suggestion}}', |
264 | 'operator' => 'or', |
265 | 'minimum_should_match' => $suggestSettings['collate_minimum_should_match'], |
266 | 'type' => 'cross_fields', |
267 | 'fields' => $collateFields |
268 | ], |
269 | ], |
270 | ], |
271 | ]; |
272 | } |
273 | if ( isset( $suggestSettings['smoothing_model'] ) ) { |
274 | $settings['phrase']['smoothing'] = $suggestSettings['smoothing_model']; |
275 | } |
276 | |
277 | return $settings; |
278 | } |
279 | |
280 | /** |
281 | * @return array |
282 | */ |
283 | private function getProfile() { |
284 | if ( $this->profile === null ) { |
285 | $this->profile = $this->query->getSearchConfig()->getProfileService() |
286 | ->loadProfileByName( SearchProfileService::PHRASE_SUGGESTER, |
287 | $this->profileName ); |
288 | } |
289 | return $this->profile; |
290 | } |
291 | } |