Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 124 |
|
0.00% |
0 / 9 |
CRAP | |
0.00% |
0 / 1 |
FullTextSimpleMatchQueryBuilder | |
0.00% |
0 / 124 |
|
0.00% |
0 / 9 |
1260 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 | |||
buildSearchTextQuery | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
12 | |||
buildHighlightQuery | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
buildPhraseRescoreQuery | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
12 | |||
getMultiTermRewriteMethod | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
buildExpQuery | |
0.00% |
0 / 45 |
|
0.00% |
0 / 1 |
156 | |||
attachFilter | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
30 | |||
buildSimpleAllFilter | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
20 | |||
buildTitleFilter | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
12 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Query; |
4 | |
5 | use CirrusSearch\Search\SearchContext; |
6 | use CirrusSearch\SearchConfig; |
7 | |
8 | /** |
9 | * Simple Match query builder, currently based on |
10 | * FullTextQueryStringQueryBuilder to reuse its parsing logic. |
11 | * It will only support queries that do not use the lucene QueryString syntax |
12 | * and fallbacks to FullTextQueryStringQueryBuilder in such cases. |
13 | * It generates only simple match/multi_match queries. It supports merging |
14 | * multiple clauses into a dismax query with 'in_dismax'. |
15 | */ |
16 | class FullTextSimpleMatchQueryBuilder extends FullTextQueryStringQueryBuilder { |
17 | /** |
18 | * @var bool true is the main used the experimental query |
19 | */ |
20 | private $usedExpQuery = false; |
21 | |
22 | /** |
23 | * @var float[]|array[] mixed array of field settings used for the main query |
24 | */ |
25 | private $fields; |
26 | |
27 | /** |
28 | * @var float[]|array[] mixed array of field settings used for the phrase rescore query |
29 | */ |
30 | private $phraseFields; |
31 | |
32 | /** |
33 | * @var float default weight to use for stems |
34 | */ |
35 | private $defaultStemWeight; |
36 | |
37 | /** |
38 | * @var string default multimatch query type |
39 | */ |
40 | private $defaultQueryType; |
41 | |
42 | /** |
43 | * @var string default multimatch min should match |
44 | */ |
45 | private $defaultMinShouldMatch; |
46 | |
47 | /** |
48 | * @var array[] dismax query settings |
49 | */ |
50 | private $dismaxSettings; |
51 | |
52 | /** |
53 | * @var array filter settings |
54 | */ |
55 | private $filter; |
56 | |
57 | public function __construct( SearchConfig $config, array $feature, array $settings ) { |
58 | parent::__construct( $config, $feature ); |
59 | $this->fields = $settings['fields']; |
60 | $this->filter = $settings['filter'] ?? [ 'type' => 'default' ]; |
61 | $this->phraseFields = $settings['phrase_rescore_fields']; |
62 | $this->defaultStemWeight = $settings['default_stem_weight']; |
63 | $this->defaultQueryType = $settings['default_query_type']; |
64 | $this->defaultMinShouldMatch = $settings['default_min_should_match']; |
65 | $this->dismaxSettings = $settings['dismax_settings'] ?? []; |
66 | } |
67 | |
68 | /** |
69 | * Build the primary query used for full text search. |
70 | * If query_string syntax is not used the experimental query is built. |
71 | * We fallback to parent implementation otherwise. |
72 | * |
73 | * @param SearchContext $context |
74 | * @param string[] $fields |
75 | * @param string[] $nearMatchFields |
76 | * @param string $queryString |
77 | * @param string $nearMatchQuery |
78 | * @return \Elastica\Query\AbstractQuery |
79 | */ |
80 | protected function buildSearchTextQuery( |
81 | SearchContext $context, |
82 | array $fields, |
83 | array $nearMatchFields, |
84 | $queryString, |
85 | $nearMatchQuery |
86 | ) { |
87 | if ( $context->isSyntaxUsed( 'query_string' ) ) { |
88 | return parent::buildSearchTextQuery( $context, $fields, |
89 | $nearMatchFields, $queryString, $nearMatchQuery ); |
90 | } |
91 | $context->addSyntaxUsed( 'full_text_simple_match', 5 ); |
92 | $this->usedExpQuery = true; |
93 | $queryForMostFields = $this->buildExpQuery( $queryString ); |
94 | if ( !$nearMatchQuery ) { |
95 | return $queryForMostFields; |
96 | } |
97 | |
98 | // Build one query for the full text fields and one for the near match fields so that |
99 | // the near match can run unescaped. |
100 | $bool = new \Elastica\Query\BoolQuery(); |
101 | $bool->setMinimumShouldMatch( 1 ); |
102 | $bool->addShould( $queryForMostFields ); |
103 | $nearMatch = new \Elastica\Query\MultiMatch(); |
104 | $nearMatch->setFields( $nearMatchFields ); |
105 | $nearMatch->setQuery( $nearMatchQuery ); |
106 | $bool->addShould( $nearMatch ); |
107 | |
108 | return $bool; |
109 | } |
110 | |
111 | /** |
112 | * Builds the highlight query |
113 | * @param SearchContext $context |
114 | * @param string[] $fields |
115 | * @param string $queryText |
116 | * @param int $slop |
117 | * @return \Elastica\Query\AbstractQuery |
118 | */ |
119 | protected function buildHighlightQuery( SearchContext $context, array $fields, $queryText, $slop ) { |
120 | $query = parent::buildHighlightQuery( $context, $fields, $queryText, $slop ); |
121 | if ( $this->usedExpQuery && $query instanceof \Elastica\Query\QueryString ) { |
122 | // the exp query accepts more docs (stopwords in query are not required) |
123 | $query->setDefaultOperator( 'OR' ); |
124 | } |
125 | return $query; |
126 | } |
127 | |
128 | /** |
129 | * Builds the phrase rescore query |
130 | * @param SearchContext $context |
131 | * @param string[] $fields |
132 | * @param string $queryText |
133 | * @param int $slop |
134 | * @return \Elastica\Query\AbstractQuery |
135 | */ |
136 | protected function buildPhraseRescoreQuery( SearchContext $context, array $fields, $queryText, $slop ) { |
137 | if ( $this->usedExpQuery ) { |
138 | $phrase = new \Elastica\Query\MultiMatch(); |
139 | $phrase->setParam( 'type', 'phrase' ); |
140 | $phrase->setParam( 'slop', $slop ); |
141 | $fields = []; |
142 | foreach ( $this->phraseFields as $f => $b ) { |
143 | $fields[] = "$f^$b"; |
144 | } |
145 | $phrase->setFields( $fields ); |
146 | $phrase->setQuery( $queryText ); |
147 | return $this->maybeWrapWithTokenCountRouter( $queryText, $phrase ); |
148 | } else { |
149 | return parent::buildPhraseRescoreQuery( $context, $fields, $queryText, $slop ); |
150 | } |
151 | } |
152 | |
153 | /** |
154 | * @inheritDoc |
155 | */ |
156 | protected function getMultiTermRewriteMethod() { |
157 | // Use blended freq as a rewrite method. The |
158 | // top_terms_boost_1024 method used by the parent is not well |
159 | // suited for a weighted sum and for some reasons uses the |
160 | // queryNorms which depends on the number of terms found by the |
161 | // wildcard. Using this one we'll use the similarity configured |
162 | // for this field instead of a constant score and in the case |
163 | // of BM25 queryNorm is ignored (removed in lucene 7) |
164 | return 'top_terms_blended_freqs_1024'; |
165 | } |
166 | |
167 | /** |
168 | * Generate an elasticsearch query by reading profile settings |
169 | * @param string $queryString the query text |
170 | * @return \Elastica\Query\AbstractQuery |
171 | */ |
172 | private function buildExpQuery( $queryString ) { |
173 | $query = new \Elastica\Query\BoolQuery(); |
174 | $query->setMinimumShouldMatch( 0 ); |
175 | $this->attachFilter( $this->filter, $queryString, $query ); |
176 | $dismaxQueries = []; |
177 | |
178 | foreach ( $this->fields as $f => $settings ) { |
179 | $mmatch = new \Elastica\Query\MultiMatch(); |
180 | $mmatch->setQuery( $queryString ); |
181 | $queryType = $this->defaultQueryType; |
182 | $minShouldMatch = $this->defaultMinShouldMatch; |
183 | $stemWeight = $this->defaultStemWeight; |
184 | $boost = 1; |
185 | $fields = [ "$f.plain^1", "$f^$stemWeight" ]; |
186 | $in_dismax = null; |
187 | |
188 | if ( is_array( $settings ) ) { |
189 | $boost = $settings['boost'] ?? $boost; |
190 | $queryType = $settings['query_type'] ?? $queryType; |
191 | $minShouldMatch = $settings['min_should_match'] ?? $minShouldMatch; |
192 | if ( isset( $settings['is_plain'] ) && $settings['is_plain'] ) { |
193 | $fields = [ $f ]; |
194 | } else { |
195 | $fields = [ "$f.plain^1", "$f^$stemWeight" ]; |
196 | } |
197 | $in_dismax = $settings['in_dismax'] ?? null; |
198 | } else { |
199 | $boost = $settings; |
200 | } |
201 | |
202 | if ( $boost === 0 ) { |
203 | continue; |
204 | } |
205 | |
206 | $mmatch->setParam( 'boost', $boost ); |
207 | $mmatch->setMinimumShouldMatch( $minShouldMatch ); |
208 | $mmatch->setType( $queryType ); |
209 | $mmatch->setFields( $fields ); |
210 | $mmatch->setParam( 'boost', $boost ); |
211 | $mmatch->setQuery( $queryString ); |
212 | if ( $in_dismax ) { |
213 | $dismaxQueries[$in_dismax][] = $mmatch; |
214 | } else { |
215 | $query->addShould( $mmatch ); |
216 | } |
217 | } |
218 | foreach ( $dismaxQueries as $name => $queries ) { |
219 | $dismax = new \Elastica\Query\DisMax(); |
220 | if ( isset( $this->dismaxSettings[$name] ) ) { |
221 | $settings = $this->dismaxSettings[$name]; |
222 | if ( isset( $settings['tie_breaker'] ) ) { |
223 | $dismax->setTieBreaker( $settings['tie_breaker'] ); |
224 | } |
225 | if ( isset( $settings['boost'] ) ) { |
226 | $dismax->setBoost( $settings['boost'] ); |
227 | } |
228 | } |
229 | foreach ( $queries as $q ) { |
230 | $dismax->addQuery( $q ); |
231 | } |
232 | $query->addShould( $dismax ); |
233 | } |
234 | return $query; |
235 | } |
236 | |
237 | /** |
238 | * Attach the query filter to $boolQuery |
239 | * |
240 | * @param array $filterDef filter definition |
241 | * @param string $query query text |
242 | * @param \Elastica\Query\BoolQuery $boolQuery the query to attach the filter to |
243 | */ |
244 | private function attachFilter( array $filterDef, $query, \Elastica\Query\BoolQuery $boolQuery ) { |
245 | if ( !isset( $filterDef['type'] ) ) { |
246 | throw new \RuntimeException( "Cannot configure the filter clause, 'type' must be defined." ); |
247 | } |
248 | $type = $filterDef['type']; |
249 | $filter = null; |
250 | |
251 | switch ( $type ) { |
252 | case 'default': |
253 | $filter = $this->buildSimpleAllFilter( $filterDef, $query ); |
254 | break; |
255 | case 'constrain_title': |
256 | $filter = $this->buildTitleFilter( $filterDef, $query ); |
257 | break; |
258 | default: |
259 | throw new \RuntimeException( "Cannot build the filter clause: unknown filter type $type" ); |
260 | } |
261 | |
262 | $boolQuery->addFilter( $filter ); |
263 | } |
264 | |
265 | /** |
266 | * Builds a simple filter on all and all.plain when all terms must match |
267 | * |
268 | * @param array[] $options array containing filter options |
269 | * @param string $query |
270 | * @return \Elastica\Query\AbstractQuery |
271 | */ |
272 | private function buildSimpleAllFilter( $options, $query ) { |
273 | $filter = new \Elastica\Query\BoolQuery(); |
274 | $filter->setMinimumShouldMatch( 1 ); |
275 | // FIXME: We can't use solely the stem field here |
276 | // - Depending on languages it may lack stopwords, |
277 | // A dedicated field used for filtering would be nice |
278 | foreach ( [ 'all', 'all.plain' ] as $field ) { |
279 | $m = new \Elastica\Query\MatchQuery(); |
280 | $m->setFieldQuery( $field, $query ); |
281 | $minShouldMatch = '100%'; |
282 | if ( isset( $options['settings'][$field]['minimum_should_match'] ) ) { |
283 | $minShouldMatch = $options['settings'][$field]['minimum_should_match']; |
284 | } |
285 | if ( $minShouldMatch === '100%' ) { |
286 | $m->setFieldOperator( $field, 'AND' ); |
287 | } else { |
288 | $m->setFieldMinimumShouldMatch( $field, $minShouldMatch ); |
289 | } |
290 | $filter->addShould( $m ); |
291 | } |
292 | return $filter; |
293 | } |
294 | |
295 | /** |
296 | * Builds a simple filter based on buildSimpleAllFilter + a constraint |
297 | * on title/redirect : |
298 | * (all:query OR all.plain:query) AND (title:query OR redirect:query) |
299 | * where the filter on title/redirect can be controlled by setting |
300 | * minimum_should_match to relax the constraint on title. |
301 | * (defaults to '3<80%') |
302 | * |
303 | * @param array[] $options array containing filter options |
304 | * @param string $query the user query |
305 | * @return \Elastica\Query\AbstractQuery |
306 | */ |
307 | private function buildTitleFilter( $options, $query ) { |
308 | $filter = new \Elastica\Query\BoolQuery(); |
309 | $filter->addMust( $this->buildSimpleAllFilter( $options, $query ) ); |
310 | $minShouldMatch = '3<80%'; |
311 | if ( isset( $options['settings']['minimum_should_match'] ) ) { |
312 | $minShouldMatch = $options['settings']['minimum_should_match']; |
313 | } |
314 | $titleFilter = new \Elastica\Query\BoolQuery(); |
315 | $titleFilter->setMinimumShouldMatch( 1 ); |
316 | |
317 | foreach ( [ 'title', 'redirect.title' ] as $field ) { |
318 | $m = new \Elastica\Query\MatchQuery(); |
319 | $m->setFieldQuery( $field, $query ); |
320 | $m->setFieldMinimumShouldMatch( $field, $minShouldMatch ); |
321 | $titleFilter->addShould( $m ); |
322 | } |
323 | $filter->addMust( $titleFilter ); |
324 | return $filter; |
325 | } |
326 | } |