Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 121 |
|
0.00% |
0 / 9 |
CRAP | |
0.00% |
0 / 1 |
FullTextSimpleMatchQueryBuilder | |
0.00% |
0 / 121 |
|
0.00% |
0 / 9 |
1260 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 | |||
buildSearchTextQuery | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
12 | |||
buildHighlightQuery | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
buildPhraseRescoreQuery | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
12 | |||
getMultiTermRewriteMethod | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
buildExpQuery | |
0.00% |
0 / 45 |
|
0.00% |
0 / 1 |
156 | |||
attachFilter | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
30 | |||
buildSimpleAllFilter | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
20 | |||
buildTitleFilter | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
12 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Query; |
4 | |
5 | use CirrusSearch\Search\SearchContext; |
6 | use CirrusSearch\SearchConfig; |
7 | use Elastica\Query\AbstractQuery; |
8 | use Elastica\Query\MatchNone; |
9 | |
10 | /** |
11 | * Simple Match query builder, currently based on |
12 | * FullTextQueryStringQueryBuilder to reuse its parsing logic. |
13 | * It will only support queries that do not use the lucene QueryString syntax |
14 | * and fallbacks to FullTextQueryStringQueryBuilder in such cases. |
15 | * It generates only simple match/multi_match queries. It supports merging |
16 | * multiple clauses into a dismax query with 'in_dismax'. |
17 | */ |
18 | class FullTextSimpleMatchQueryBuilder extends FullTextQueryStringQueryBuilder { |
19 | /** |
20 | * @var bool true is the main used the experimental query |
21 | */ |
22 | private $usedExpQuery = false; |
23 | |
24 | /** |
25 | * @var float[]|array[] mixed array of field settings used for the main query |
26 | */ |
27 | private $fields; |
28 | |
29 | /** |
30 | * @var float[]|array[] mixed array of field settings used for the phrase rescore query |
31 | */ |
32 | private $phraseFields; |
33 | |
34 | /** |
35 | * @var float default weight to use for stems |
36 | */ |
37 | private $defaultStemWeight; |
38 | |
39 | /** |
40 | * @var string default multimatch query type |
41 | */ |
42 | private $defaultQueryType; |
43 | |
44 | /** |
45 | * @var string default multimatch min should match |
46 | */ |
47 | private $defaultMinShouldMatch; |
48 | |
49 | /** |
50 | * @var array[] dismax query settings |
51 | */ |
52 | private $dismaxSettings; |
53 | |
54 | /** |
55 | * @var array filter settings |
56 | */ |
57 | private $filter; |
58 | |
59 | public function __construct( SearchConfig $config, array $feature, array $settings ) { |
60 | parent::__construct( $config, $feature ); |
61 | $this->fields = $settings['fields']; |
62 | $this->filter = $settings['filter'] ?? [ 'type' => 'default' ]; |
63 | $this->phraseFields = $settings['phrase_rescore_fields']; |
64 | $this->defaultStemWeight = $settings['default_stem_weight']; |
65 | $this->defaultQueryType = $settings['default_query_type']; |
66 | $this->defaultMinShouldMatch = $settings['default_min_should_match']; |
67 | $this->dismaxSettings = $settings['dismax_settings'] ?? []; |
68 | } |
69 | |
70 | /** |
71 | * Build the primary query used for full text search. |
72 | * If query_string syntax is not used the experimental query is built. |
73 | * We fallback to parent implementation otherwise. |
74 | * |
75 | * @param SearchContext $context |
76 | * @param string[] $fields |
77 | * @param AbstractQuery $nearMatchQuery |
78 | * @param string $queryString |
79 | * @return \Elastica\Query\AbstractQuery |
80 | */ |
81 | protected function buildSearchTextQuery( |
82 | SearchContext $context, |
83 | array $fields, |
84 | AbstractQuery $nearMatchQuery, |
85 | $queryString |
86 | ) { |
87 | if ( $context->isSyntaxUsed( 'query_string' ) ) { |
88 | return parent::buildSearchTextQuery( $context, $fields, |
89 | $nearMatchQuery, $queryString ); |
90 | } |
91 | $context->addSyntaxUsed( 'full_text_simple_match', 5 ); |
92 | $this->usedExpQuery = true; |
93 | $queryForMostFields = $this->buildExpQuery( $queryString ); |
94 | if ( $nearMatchQuery instanceof MatchNone ) { |
95 | return $queryForMostFields; |
96 | } |
97 | |
98 | // Build one query for the full text fields and one for the near match fields so that |
99 | // the near match can run unescaped. |
100 | $bool = new \Elastica\Query\BoolQuery(); |
101 | $bool->setMinimumShouldMatch( 1 ); |
102 | $bool->addShould( $queryForMostFields ); |
103 | $bool->addShould( $nearMatchQuery ); |
104 | |
105 | return $bool; |
106 | } |
107 | |
108 | /** |
109 | * Builds the highlight query |
110 | * @param SearchContext $context |
111 | * @param string[] $fields |
112 | * @param string $queryText |
113 | * @param int $slop |
114 | * @return \Elastica\Query\AbstractQuery |
115 | */ |
116 | protected function buildHighlightQuery( SearchContext $context, array $fields, $queryText, $slop ) { |
117 | $query = parent::buildHighlightQuery( $context, $fields, $queryText, $slop ); |
118 | if ( $this->usedExpQuery && $query instanceof \Elastica\Query\QueryString ) { |
119 | // the exp query accepts more docs (stopwords in query are not required) |
120 | $query->setDefaultOperator( 'OR' ); |
121 | } |
122 | return $query; |
123 | } |
124 | |
125 | /** |
126 | * Builds the phrase rescore query |
127 | * @param SearchContext $context |
128 | * @param string[] $fields |
129 | * @param string $queryText |
130 | * @param int $slop |
131 | * @return \Elastica\Query\AbstractQuery |
132 | */ |
133 | protected function buildPhraseRescoreQuery( SearchContext $context, array $fields, $queryText, $slop ) { |
134 | if ( $this->usedExpQuery ) { |
135 | $phrase = new \Elastica\Query\MultiMatch(); |
136 | $phrase->setParam( 'type', 'phrase' ); |
137 | $phrase->setParam( 'slop', $slop ); |
138 | $fields = []; |
139 | foreach ( $this->phraseFields as $f => $b ) { |
140 | $fields[] = "$f^$b"; |
141 | } |
142 | $phrase->setFields( $fields ); |
143 | $phrase->setQuery( $queryText ); |
144 | return $this->maybeWrapWithTokenCountRouter( $queryText, $phrase ); |
145 | } else { |
146 | return parent::buildPhraseRescoreQuery( $context, $fields, $queryText, $slop ); |
147 | } |
148 | } |
149 | |
150 | /** |
151 | * @inheritDoc |
152 | */ |
153 | protected function getMultiTermRewriteMethod() { |
154 | // Use blended freq as a rewrite method. The |
155 | // top_terms_boost_1024 method used by the parent is not well |
156 | // suited for a weighted sum and for some reasons uses the |
157 | // queryNorms which depends on the number of terms found by the |
158 | // wildcard. Using this one we'll use the similarity configured |
159 | // for this field instead of a constant score and in the case |
160 | // of BM25 queryNorm is ignored (removed in lucene 7) |
161 | return 'top_terms_blended_freqs_1024'; |
162 | } |
163 | |
164 | /** |
165 | * Generate an elasticsearch query by reading profile settings |
166 | * @param string $queryString the query text |
167 | * @return \Elastica\Query\AbstractQuery |
168 | */ |
169 | private function buildExpQuery( $queryString ) { |
170 | $query = new \Elastica\Query\BoolQuery(); |
171 | $query->setMinimumShouldMatch( 0 ); |
172 | $this->attachFilter( $this->filter, $queryString, $query ); |
173 | $dismaxQueries = []; |
174 | |
175 | foreach ( $this->fields as $f => $settings ) { |
176 | $mmatch = new \Elastica\Query\MultiMatch(); |
177 | $mmatch->setQuery( $queryString ); |
178 | $queryType = $this->defaultQueryType; |
179 | $minShouldMatch = $this->defaultMinShouldMatch; |
180 | $stemWeight = $this->defaultStemWeight; |
181 | $boost = 1; |
182 | $fields = [ "$f.plain^1", "$f^$stemWeight" ]; |
183 | $in_dismax = null; |
184 | |
185 | if ( is_array( $settings ) ) { |
186 | $boost = $settings['boost'] ?? $boost; |
187 | $queryType = $settings['query_type'] ?? $queryType; |
188 | $minShouldMatch = $settings['min_should_match'] ?? $minShouldMatch; |
189 | if ( isset( $settings['is_plain'] ) && $settings['is_plain'] ) { |
190 | $fields = [ $f ]; |
191 | } else { |
192 | $fields = [ "$f.plain^1", "$f^$stemWeight" ]; |
193 | } |
194 | $in_dismax = $settings['in_dismax'] ?? null; |
195 | } else { |
196 | $boost = $settings; |
197 | } |
198 | |
199 | if ( $boost === 0 ) { |
200 | continue; |
201 | } |
202 | |
203 | $mmatch->setParam( 'boost', $boost ); |
204 | $mmatch->setMinimumShouldMatch( $minShouldMatch ); |
205 | $mmatch->setType( $queryType ); |
206 | $mmatch->setFields( $fields ); |
207 | $mmatch->setParam( 'boost', $boost ); |
208 | $mmatch->setQuery( $queryString ); |
209 | if ( $in_dismax ) { |
210 | $dismaxQueries[$in_dismax][] = $mmatch; |
211 | } else { |
212 | $query->addShould( $mmatch ); |
213 | } |
214 | } |
215 | foreach ( $dismaxQueries as $name => $queries ) { |
216 | $dismax = new \Elastica\Query\DisMax(); |
217 | if ( isset( $this->dismaxSettings[$name] ) ) { |
218 | $settings = $this->dismaxSettings[$name]; |
219 | if ( isset( $settings['tie_breaker'] ) ) { |
220 | $dismax->setTieBreaker( $settings['tie_breaker'] ); |
221 | } |
222 | if ( isset( $settings['boost'] ) ) { |
223 | $dismax->setBoost( $settings['boost'] ); |
224 | } |
225 | } |
226 | foreach ( $queries as $q ) { |
227 | $dismax->addQuery( $q ); |
228 | } |
229 | $query->addShould( $dismax ); |
230 | } |
231 | return $query; |
232 | } |
233 | |
234 | /** |
235 | * Attach the query filter to $boolQuery |
236 | * |
237 | * @param array $filterDef filter definition |
238 | * @param string $query query text |
239 | * @param \Elastica\Query\BoolQuery $boolQuery the query to attach the filter to |
240 | */ |
241 | private function attachFilter( array $filterDef, $query, \Elastica\Query\BoolQuery $boolQuery ) { |
242 | if ( !isset( $filterDef['type'] ) ) { |
243 | throw new \RuntimeException( "Cannot configure the filter clause, 'type' must be defined." ); |
244 | } |
245 | $type = $filterDef['type']; |
246 | $filter = null; |
247 | |
248 | switch ( $type ) { |
249 | case 'default': |
250 | $filter = $this->buildSimpleAllFilter( $filterDef, $query ); |
251 | break; |
252 | case 'constrain_title': |
253 | $filter = $this->buildTitleFilter( $filterDef, $query ); |
254 | break; |
255 | default: |
256 | throw new \RuntimeException( "Cannot build the filter clause: unknown filter type $type" ); |
257 | } |
258 | |
259 | $boolQuery->addFilter( $filter ); |
260 | } |
261 | |
262 | /** |
263 | * Builds a simple filter on all and all.plain when all terms must match |
264 | * |
265 | * @param array[] $options array containing filter options |
266 | * @param string $query |
267 | * @return \Elastica\Query\AbstractQuery |
268 | */ |
269 | private function buildSimpleAllFilter( $options, $query ) { |
270 | $filter = new \Elastica\Query\BoolQuery(); |
271 | $filter->setMinimumShouldMatch( 1 ); |
272 | // FIXME: We can't use solely the stem field here |
273 | // - Depending on languages it may lack stopwords, |
274 | // A dedicated field used for filtering would be nice |
275 | foreach ( [ 'all', 'all.plain' ] as $field ) { |
276 | $m = new \Elastica\Query\MatchQuery(); |
277 | $m->setFieldQuery( $field, $query ); |
278 | $minShouldMatch = '100%'; |
279 | if ( isset( $options['settings'][$field]['minimum_should_match'] ) ) { |
280 | $minShouldMatch = $options['settings'][$field]['minimum_should_match']; |
281 | } |
282 | if ( $minShouldMatch === '100%' ) { |
283 | $m->setFieldOperator( $field, 'AND' ); |
284 | } else { |
285 | $m->setFieldMinimumShouldMatch( $field, $minShouldMatch ); |
286 | } |
287 | $filter->addShould( $m ); |
288 | } |
289 | return $filter; |
290 | } |
291 | |
292 | /** |
293 | * Builds a simple filter based on buildSimpleAllFilter + a constraint |
294 | * on title/redirect : |
295 | * (all:query OR all.plain:query) AND (title:query OR redirect:query) |
296 | * where the filter on title/redirect can be controlled by setting |
297 | * minimum_should_match to relax the constraint on title. |
298 | * (defaults to '3<80%') |
299 | * |
300 | * @param array[] $options array containing filter options |
301 | * @param string $query the user query |
302 | * @return \Elastica\Query\AbstractQuery |
303 | */ |
304 | private function buildTitleFilter( $options, $query ) { |
305 | $filter = new \Elastica\Query\BoolQuery(); |
306 | $filter->addMust( $this->buildSimpleAllFilter( $options, $query ) ); |
307 | $minShouldMatch = '3<80%'; |
308 | if ( isset( $options['settings']['minimum_should_match'] ) ) { |
309 | $minShouldMatch = $options['settings']['minimum_should_match']; |
310 | } |
311 | $titleFilter = new \Elastica\Query\BoolQuery(); |
312 | $titleFilter->setMinimumShouldMatch( 1 ); |
313 | |
314 | foreach ( [ 'title', 'redirect.title' ] as $field ) { |
315 | $m = new \Elastica\Query\MatchQuery(); |
316 | $m->setFieldQuery( $field, $query ); |
317 | $m->setFieldMinimumShouldMatch( $field, $minShouldMatch ); |
318 | $titleFilter->addShould( $m ); |
319 | } |
320 | $filter->addMust( $titleFilter ); |
321 | return $filter; |
322 | } |
323 | } |