Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 120 |
|
0.00% |
0 / 9 |
CRAP | |
0.00% |
0 / 1 |
| FullTextSimpleMatchQueryBuilder | |
0.00% |
0 / 120 |
|
0.00% |
0 / 9 |
1122 | |
0.00% |
0 / 1 |
| __construct | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 | |||
| buildSearchTextQuery | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
12 | |||
| buildHighlightQuery | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
| buildPhraseRescoreQuery | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
12 | |||
| getMultiTermRewriteMethod | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| buildExpQuery | |
0.00% |
0 / 48 |
|
0.00% |
0 / 1 |
156 | |||
| attachFilter | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
30 | |||
| buildSimpleAllFilter | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
12 | |||
| buildTitleFilter | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
6 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace CirrusSearch\Query; |
| 4 | |
| 5 | use CirrusSearch\Search\SearchContext; |
| 6 | use CirrusSearch\SearchConfig; |
| 7 | use Elastica\Query\AbstractQuery; |
| 8 | use Elastica\Query\MatchNone; |
| 9 | |
| 10 | /** |
| 11 | * Simple Match query builder, currently based on |
| 12 | * FullTextQueryStringQueryBuilder to reuse its parsing logic. |
| 13 | * It will only support queries that do not use the lucene QueryString syntax |
| 14 | * and fallbacks to FullTextQueryStringQueryBuilder in such cases. |
| 15 | * It generates only simple match/multi_match queries. It supports merging |
| 16 | * multiple clauses into a dismax query with 'in_dismax'. |
| 17 | */ |
| 18 | class FullTextSimpleMatchQueryBuilder extends FullTextQueryStringQueryBuilder { |
| 19 | /** |
| 20 | * @var bool true is the main used the experimental query |
| 21 | */ |
| 22 | private $usedExpQuery = false; |
| 23 | |
| 24 | /** |
| 25 | * @var float[]|array[] mixed array of field settings used for the main query |
| 26 | */ |
| 27 | private $fields; |
| 28 | |
| 29 | /** |
| 30 | * @var float[]|array[] mixed array of field settings used for the phrase rescore query |
| 31 | */ |
| 32 | private $phraseFields; |
| 33 | |
| 34 | /** |
| 35 | * @var float default weight to use for stems |
| 36 | */ |
| 37 | private $defaultStemWeight; |
| 38 | |
| 39 | /** |
| 40 | * @var string default multimatch query type |
| 41 | */ |
| 42 | private $defaultQueryType; |
| 43 | |
| 44 | /** |
| 45 | * @var string default multimatch min should match |
| 46 | */ |
| 47 | private $defaultMinShouldMatch; |
| 48 | |
| 49 | /** |
| 50 | * @var array[] dismax query settings |
| 51 | */ |
| 52 | private $dismaxSettings; |
| 53 | |
| 54 | /** |
| 55 | * @var array filter settings |
| 56 | */ |
| 57 | private $filter; |
| 58 | |
| 59 | public function __construct( SearchConfig $config, array $feature, array $settings ) { |
| 60 | parent::__construct( $config, $feature ); |
| 61 | $this->fields = $settings['fields']; |
| 62 | $this->filter = $settings['filter'] ?? [ 'type' => 'default' ]; |
| 63 | $this->phraseFields = $settings['phrase_rescore_fields']; |
| 64 | $this->defaultStemWeight = $settings['default_stem_weight']; |
| 65 | $this->defaultQueryType = $settings['default_query_type']; |
| 66 | $this->defaultMinShouldMatch = $settings['default_min_should_match']; |
| 67 | $this->dismaxSettings = $settings['dismax_settings'] ?? []; |
| 68 | } |
| 69 | |
| 70 | /** |
| 71 | * Build the primary query used for full text search. |
| 72 | * If query_string syntax is not used the experimental query is built. |
| 73 | * We fallback to parent implementation otherwise. |
| 74 | * |
| 75 | * @param SearchContext $context |
| 76 | * @param string[] $fields |
| 77 | * @param AbstractQuery $nearMatchQuery |
| 78 | * @param string $queryString |
| 79 | * @return \Elastica\Query\AbstractQuery |
| 80 | */ |
| 81 | protected function buildSearchTextQuery( |
| 82 | SearchContext $context, |
| 83 | array $fields, |
| 84 | AbstractQuery $nearMatchQuery, |
| 85 | $queryString |
| 86 | ) { |
| 87 | if ( $context->isSyntaxUsed( 'query_string' ) ) { |
| 88 | return parent::buildSearchTextQuery( $context, $fields, |
| 89 | $nearMatchQuery, $queryString ); |
| 90 | } |
| 91 | $context->addSyntaxUsed( 'full_text_simple_match', 5 ); |
| 92 | $this->usedExpQuery = true; |
| 93 | $queryForMostFields = $this->buildExpQuery( $queryString ); |
| 94 | if ( $nearMatchQuery instanceof MatchNone ) { |
| 95 | return $queryForMostFields; |
| 96 | } |
| 97 | |
| 98 | // Build one query for the full text fields and one for the near match fields so that |
| 99 | // the near match can run unescaped. |
| 100 | $bool = new \Elastica\Query\BoolQuery(); |
| 101 | $bool->setMinimumShouldMatch( 1 ); |
| 102 | $bool->addShould( $queryForMostFields ); |
| 103 | $bool->addShould( $nearMatchQuery ); |
| 104 | |
| 105 | return $bool; |
| 106 | } |
| 107 | |
| 108 | /** |
| 109 | * Builds the highlight query |
| 110 | * @param SearchContext $context |
| 111 | * @param string[] $fields |
| 112 | * @param string $queryText |
| 113 | * @param int $slop |
| 114 | * @return \Elastica\Query\AbstractQuery |
| 115 | */ |
| 116 | protected function buildHighlightQuery( SearchContext $context, array $fields, $queryText, $slop ) { |
| 117 | $query = parent::buildHighlightQuery( $context, $fields, $queryText, $slop ); |
| 118 | if ( $this->usedExpQuery && $query instanceof \Elastica\Query\QueryString ) { |
| 119 | // the exp query accepts more docs (stopwords in query are not required) |
| 120 | $query->setDefaultOperator( 'OR' ); |
| 121 | } |
| 122 | return $query; |
| 123 | } |
| 124 | |
| 125 | /** |
| 126 | * Builds the phrase rescore query |
| 127 | * @param SearchContext $context |
| 128 | * @param string[] $fields |
| 129 | * @param string $queryText |
| 130 | * @param int $slop |
| 131 | * @return \Elastica\Query\AbstractQuery |
| 132 | */ |
| 133 | protected function buildPhraseRescoreQuery( SearchContext $context, array $fields, $queryText, $slop ) { |
| 134 | if ( $this->usedExpQuery ) { |
| 135 | $phrase = new \Elastica\Query\MultiMatch(); |
| 136 | $phrase->setParam( 'type', 'phrase' ); |
| 137 | $phrase->setParam( 'slop', $slop ); |
| 138 | $fields = []; |
| 139 | foreach ( $this->phraseFields as $f => $b ) { |
| 140 | $fields[] = "$f^$b"; |
| 141 | } |
| 142 | $phrase->setFields( $fields ); |
| 143 | $phrase->setQuery( $queryText ); |
| 144 | return $this->maybeWrapWithTokenCountRouter( $queryText, $phrase ); |
| 145 | } else { |
| 146 | return parent::buildPhraseRescoreQuery( $context, $fields, $queryText, $slop ); |
| 147 | } |
| 148 | } |
| 149 | |
| 150 | /** |
| 151 | * @inheritDoc |
| 152 | */ |
| 153 | protected function getMultiTermRewriteMethod() { |
| 154 | // Use blended freq as a rewrite method. The |
| 155 | // top_terms_boost_1024 method used by the parent is not well |
| 156 | // suited for a weighted sum and for some reasons uses the |
| 157 | // queryNorms which depends on the number of terms found by the |
| 158 | // wildcard. Using this one we'll use the similarity configured |
| 159 | // for this field instead of a constant score and in the case |
| 160 | // of BM25 queryNorm is ignored (removed in lucene 7) |
| 161 | return 'top_terms_blended_freqs_1024'; |
| 162 | } |
| 163 | |
| 164 | /** |
| 165 | * Generate an elasticsearch query by reading profile settings |
| 166 | * @param string $queryString the query text |
| 167 | * @return \Elastica\Query\AbstractQuery |
| 168 | */ |
| 169 | private function buildExpQuery( $queryString ) { |
| 170 | $query = new \Elastica\Query\BoolQuery(); |
| 171 | $query->setMinimumShouldMatch( 0 ); |
| 172 | $this->attachFilter( $this->filter, $queryString, $query ); |
| 173 | $dismaxQueries = []; |
| 174 | |
| 175 | foreach ( $this->fields as $f => $settings ) { |
| 176 | $mmatch = new \Elastica\Query\MultiMatch(); |
| 177 | $mmatch->setQuery( $queryString ); |
| 178 | $queryType = $this->defaultQueryType; |
| 179 | $minShouldMatch = $this->defaultMinShouldMatch; |
| 180 | $stemWeight = $this->defaultStemWeight; |
| 181 | $boost = 1; |
| 182 | $plainBoost = 1; |
| 183 | $fields = [ "$f.plain^1", "$f^$stemWeight" ]; |
| 184 | $in_dismax = null; |
| 185 | |
| 186 | if ( is_array( $settings ) ) { |
| 187 | $stemWeight = $settings['stem_boost'] ?? $stemWeight; |
| 188 | $plainBoost = $settings['plain_boost'] ?? $plainBoost; |
| 189 | $boost = $settings['boost'] ?? $boost; |
| 190 | |
| 191 | $queryType = $settings['query_type'] ?? $queryType; |
| 192 | $minShouldMatch = $settings['min_should_match'] ?? $minShouldMatch; |
| 193 | if ( isset( $settings['is_plain'] ) && $settings['is_plain'] ) { |
| 194 | $fields = [ $f ]; |
| 195 | } else { |
| 196 | $fields = [ "$f.plain^$plainBoost", "$f^$stemWeight" ]; |
| 197 | } |
| 198 | $in_dismax = $settings['in_dismax'] ?? null; |
| 199 | } else { |
| 200 | $boost = $settings; |
| 201 | } |
| 202 | |
| 203 | if ( $boost === 0 ) { |
| 204 | continue; |
| 205 | } |
| 206 | |
| 207 | $mmatch->setParam( 'boost', $boost ); |
| 208 | $mmatch->setMinimumShouldMatch( $minShouldMatch ); |
| 209 | $mmatch->setType( $queryType ); |
| 210 | $mmatch->setFields( $fields ); |
| 211 | $mmatch->setParam( 'boost', $boost ); |
| 212 | $mmatch->setQuery( $queryString ); |
| 213 | if ( $in_dismax ) { |
| 214 | $dismaxQueries[$in_dismax][] = $mmatch; |
| 215 | } else { |
| 216 | $query->addShould( $mmatch ); |
| 217 | } |
| 218 | } |
| 219 | foreach ( $dismaxQueries as $name => $queries ) { |
| 220 | $dismax = new \Elastica\Query\DisMax(); |
| 221 | if ( isset( $this->dismaxSettings[$name] ) ) { |
| 222 | $settings = $this->dismaxSettings[$name]; |
| 223 | if ( isset( $settings['tie_breaker'] ) ) { |
| 224 | $dismax->setTieBreaker( $settings['tie_breaker'] ); |
| 225 | } |
| 226 | if ( isset( $settings['boost'] ) ) { |
| 227 | $dismax->setBoost( $settings['boost'] ); |
| 228 | } |
| 229 | } |
| 230 | foreach ( $queries as $q ) { |
| 231 | $dismax->addQuery( $q ); |
| 232 | } |
| 233 | $query->addShould( $dismax ); |
| 234 | } |
| 235 | return $query; |
| 236 | } |
| 237 | |
| 238 | /** |
| 239 | * Attach the query filter to $boolQuery |
| 240 | * |
| 241 | * @param array $filterDef filter definition |
| 242 | * @param string $query query text |
| 243 | * @param \Elastica\Query\BoolQuery $boolQuery the query to attach the filter to |
| 244 | */ |
| 245 | private function attachFilter( array $filterDef, $query, \Elastica\Query\BoolQuery $boolQuery ) { |
| 246 | if ( !isset( $filterDef['type'] ) ) { |
| 247 | throw new \RuntimeException( "Cannot configure the filter clause, 'type' must be defined." ); |
| 248 | } |
| 249 | $type = $filterDef['type']; |
| 250 | $filter = null; |
| 251 | |
| 252 | switch ( $type ) { |
| 253 | case 'default': |
| 254 | $filter = $this->buildSimpleAllFilter( $filterDef, $query ); |
| 255 | break; |
| 256 | case 'constrain_title': |
| 257 | $filter = $this->buildTitleFilter( $filterDef, $query ); |
| 258 | break; |
| 259 | default: |
| 260 | throw new \RuntimeException( "Cannot build the filter clause: unknown filter type $type" ); |
| 261 | } |
| 262 | |
| 263 | $boolQuery->addFilter( $filter ); |
| 264 | } |
| 265 | |
| 266 | /** |
| 267 | * Builds a simple filter on all and all.plain when all terms must match |
| 268 | * |
| 269 | * @param array[] $options array containing filter options |
| 270 | * @param string $query |
| 271 | * @return \Elastica\Query\AbstractQuery |
| 272 | */ |
| 273 | private function buildSimpleAllFilter( $options, $query ) { |
| 274 | $filter = new \Elastica\Query\BoolQuery(); |
| 275 | $filter->setMinimumShouldMatch( 1 ); |
| 276 | // FIXME: We can't use solely the stem field here |
| 277 | // - Depending on languages it may lack stopwords, |
| 278 | // A dedicated field used for filtering would be nice |
| 279 | foreach ( [ 'all', 'all.plain' ] as $field ) { |
| 280 | $m = new \Elastica\Query\MatchQuery(); |
| 281 | $m->setFieldQuery( $field, $query ); |
| 282 | $minShouldMatch = $options['settings'][$field]['minimum_should_match'] ?? '100%'; |
| 283 | if ( $minShouldMatch === '100%' ) { |
| 284 | $m->setFieldOperator( $field, 'AND' ); |
| 285 | } else { |
| 286 | $m->setFieldMinimumShouldMatch( $field, $minShouldMatch ); |
| 287 | } |
| 288 | $filter->addShould( $m ); |
| 289 | } |
| 290 | return $filter; |
| 291 | } |
| 292 | |
| 293 | /** |
| 294 | * Builds a simple filter based on buildSimpleAllFilter + a constraint |
| 295 | * on title/redirect : |
| 296 | * (all:query OR all.plain:query) AND (title:query OR redirect:query) |
| 297 | * where the filter on title/redirect can be controlled by setting |
| 298 | * minimum_should_match to relax the constraint on title. |
| 299 | * (defaults to '3<80%') |
| 300 | * |
| 301 | * @param array[] $options array containing filter options |
| 302 | * @param string $query the user query |
| 303 | * @return \Elastica\Query\AbstractQuery |
| 304 | */ |
| 305 | private function buildTitleFilter( $options, $query ) { |
| 306 | $filter = new \Elastica\Query\BoolQuery(); |
| 307 | $filter->addMust( $this->buildSimpleAllFilter( $options, $query ) ); |
| 308 | $minShouldMatch = $options['settings']['minimum_should_match'] ?? '3<80%'; |
| 309 | $titleFilter = new \Elastica\Query\BoolQuery(); |
| 310 | $titleFilter->setMinimumShouldMatch( 1 ); |
| 311 | |
| 312 | foreach ( [ 'title', 'redirect.title' ] as $field ) { |
| 313 | $m = new \Elastica\Query\MatchQuery(); |
| 314 | $m->setFieldQuery( $field, $query ); |
| 315 | $m->setFieldMinimumShouldMatch( $field, $minShouldMatch ); |
| 316 | $titleFilter->addShould( $m ); |
| 317 | } |
| 318 | $filter->addMust( $titleFilter ); |
| 319 | return $filter; |
| 320 | } |
| 321 | } |