Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
75.00% |
90 / 120 |
|
44.44% |
4 / 9 |
CRAP | |
0.00% |
0 / 1 |
| RescoreBuilder | |
75.00% |
90 / 120 |
|
44.44% |
4 / 9 |
81.52 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| build | |
86.67% |
13 / 15 |
|
0.00% |
0 / 1 |
4.04 | |||
| buildRescoreQuery | |
75.00% |
6 / 8 |
|
0.00% |
0 / 1 |
5.39 | |||
| buildLtrQuery | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
6 | |||
| windowSize | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
| prepareQueryParams | |
45.45% |
5 / 11 |
|
0.00% |
0 / 1 |
9.06 | |||
| getSupportedProfile | |
72.73% |
24 / 33 |
|
0.00% |
0 / 1 |
10.64 | |||
| isProfileNamespaceSupported | |
100.00% |
21 / 21 |
|
100.00% |
1 / 1 |
10 | |||
| isProfileSyntaxSupported | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
8 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace CirrusSearch\Search\Rescore; |
| 4 | |
| 5 | use CirrusSearch\CirrusSearchHookRunner; |
| 6 | use CirrusSearch\Elastica\LtrQuery; |
| 7 | use CirrusSearch\Profile\SearchProfileService; |
| 8 | use CirrusSearch\Search\SearchContext; |
| 9 | use Elastica\Query\AbstractQuery; |
| 10 | |
| 11 | /** |
| 12 | * Set of rescore builders |
| 13 | * |
| 14 | * @license GPL-2.0-or-later |
| 15 | */ |
| 16 | |
| 17 | /** |
| 18 | * Builds a rescore queries by reading a rescore profile. |
| 19 | */ |
| 20 | class RescoreBuilder { |
| 21 | |
| 22 | /** Maximum number of rescore profile fallbacks */ |
| 23 | private const FALLBACK_LIMIT = 4; |
| 24 | |
| 25 | /** |
| 26 | * List of allowed rescore params |
| 27 | * @todo refactor to const with php 5.6 |
| 28 | * |
| 29 | * @var string[] |
| 30 | */ |
| 31 | private static $rescoreMainParams = [ |
| 32 | 'query_weight', |
| 33 | 'rescore_query_weight', |
| 34 | 'score_mode' |
| 35 | ]; |
| 36 | |
| 37 | private const FUNCTION_SCORE_TYPE = "function_score"; |
| 38 | private const LTR_TYPE = "ltr"; |
| 39 | private const PHRASE = "phrase"; |
| 40 | |
| 41 | /** |
| 42 | * @var SearchContext |
| 43 | */ |
| 44 | private $context; |
| 45 | |
| 46 | /** |
| 47 | * @var array|string a rescore profile |
| 48 | */ |
| 49 | private $profile; |
| 50 | /** |
| 51 | * @var CirrusSearchHookRunner |
| 52 | */ |
| 53 | private $cirrusSearchHookRunner; |
| 54 | |
| 55 | /** |
| 56 | * @param SearchContext $context |
| 57 | * @param CirrusSearchHookRunner $cirrusSearchHookRunner |
| 58 | * @param string|null $profile |
| 59 | * @throws InvalidRescoreProfileException |
| 60 | */ |
| 61 | public function __construct( SearchContext $context, CirrusSearchHookRunner $cirrusSearchHookRunner, $profile = null ) { |
| 62 | $this->context = $context; |
| 63 | $this->profile = $this->getSupportedProfile( $profile ?? $context->getRescoreProfile() ); |
| 64 | $this->cirrusSearchHookRunner = $cirrusSearchHookRunner; |
| 65 | } |
| 66 | |
| 67 | /** |
| 68 | * @return array[] List of rescore queries |
| 69 | */ |
| 70 | public function build() { |
| 71 | $rescores = []; |
| 72 | foreach ( $this->profile['rescore'] as $rescoreDef ) { |
| 73 | $windowSize = $this->windowSize( $rescoreDef ); |
| 74 | if ( $windowSize <= 0 ) { |
| 75 | continue; |
| 76 | } |
| 77 | |
| 78 | $rescoreQuery = $this->buildRescoreQuery( $rescoreDef ); |
| 79 | if ( $rescoreQuery === null ) { |
| 80 | continue; |
| 81 | } |
| 82 | |
| 83 | $rescore = [ |
| 84 | 'window_size' => $windowSize, |
| 85 | 'query' => $this->prepareQueryParams( $rescoreDef ), |
| 86 | ]; |
| 87 | $rescore['query']['rescore_query'] = $rescoreQuery; |
| 88 | $rescores[] = $rescore; |
| 89 | } |
| 90 | return $rescores; |
| 91 | } |
| 92 | |
| 93 | /** |
| 94 | * builds the 'query' attribute by reading type |
| 95 | * |
| 96 | * @param array $rescoreDef |
| 97 | * @return AbstractQuery|null the rescore query |
| 98 | * @throws InvalidRescoreProfileException |
| 99 | */ |
| 100 | private function buildRescoreQuery( array $rescoreDef ) { |
| 101 | switch ( $rescoreDef['type'] ) { |
| 102 | case self::FUNCTION_SCORE_TYPE: |
| 103 | $funcChain = new FunctionScoreChain( $this->context, $rescoreDef['function_chain'], |
| 104 | $rescoreDef['function_chain_overrides'] ?? [], $this->cirrusSearchHookRunner ); |
| 105 | return $funcChain->buildRescoreQuery(); |
| 106 | case self::LTR_TYPE: |
| 107 | return $this->buildLtrQuery( $rescoreDef['model'] ); |
| 108 | case self::PHRASE: |
| 109 | return $this->context->getPhraseRescoreQuery(); |
| 110 | default: |
| 111 | throw new InvalidRescoreProfileException( |
| 112 | "Unsupported rescore query type: " . $rescoreDef['type'] ); |
| 113 | } |
| 114 | } |
| 115 | |
| 116 | /** |
| 117 | * @param string $model Name of the sltr model to use |
| 118 | * @return AbstractQuery |
| 119 | */ |
| 120 | private function buildLtrQuery( $model ) { |
| 121 | // This is a bit fragile, and makes the bold assumption |
| 122 | // only a single level of rescore will be used. This is |
| 123 | // strictly for debugging/testing before shipping a model |
| 124 | // live so shouldn't be a big deal. |
| 125 | $override = $this->context->getDebugOptions()->getCirrusMLRModel(); |
| 126 | if ( $override ) { |
| 127 | $model = $override; |
| 128 | } |
| 129 | |
| 130 | $bool = new \Elastica\Query\BoolQuery(); |
| 131 | // the ltr query can return negative scores, which mucks with elasticsearch |
| 132 | // sorting as that will put these results below documents set to 0. Fix |
| 133 | // that up by adding a large constant boost. |
| 134 | $constant = new \Elastica\Query\ConstantScore( new \Elastica\Query\MatchAll ); |
| 135 | $constant->setBoost( 100000 ); |
| 136 | $bool->addShould( $constant ); |
| 137 | $bool->addShould( new LtrQuery( $model, [ |
| 138 | // TODO: These params probably shouldn't be hard coded |
| 139 | 'query_string' => $this->context->getCleanedSearchTerm(), |
| 140 | ] ) ); |
| 141 | |
| 142 | return $bool; |
| 143 | } |
| 144 | |
| 145 | /** |
| 146 | * @param array $rescore |
| 147 | * @return int the window size defined in the profile |
| 148 | * or the value from config if window_size_override is set. |
| 149 | */ |
| 150 | private function windowSize( array $rescore ) { |
| 151 | if ( isset( $rescore['window_size_override'] ) ) { |
| 152 | $windowSize = $this->context->getConfig()->get( $rescore['window_size_override'] ); |
| 153 | if ( $windowSize !== null ) { |
| 154 | return $windowSize; |
| 155 | } |
| 156 | } |
| 157 | return $rescore['window']; |
| 158 | } |
| 159 | |
| 160 | /** |
| 161 | * Assemble query params in the rescore block |
| 162 | * Only self::$rescoreMainParams are allowed. |
| 163 | * @param array $settings |
| 164 | * @return array |
| 165 | */ |
| 166 | private function prepareQueryParams( array $settings ) { |
| 167 | $def = []; |
| 168 | foreach ( self::$rescoreMainParams as $param ) { |
| 169 | if ( !isset( $settings[$param] ) ) { |
| 170 | continue; |
| 171 | } |
| 172 | $value = $settings[$param]; |
| 173 | if ( isset( $settings[$param . '_override'] ) ) { |
| 174 | $oValue = $this->context->getConfig()->get( $settings[$param . '_override'] ); |
| 175 | if ( $oValue !== null ) { |
| 176 | $value = $oValue; |
| 177 | } |
| 178 | } |
| 179 | $def[$param] = $value; |
| 180 | } |
| 181 | return $def; |
| 182 | } |
| 183 | |
| 184 | /** |
| 185 | * Inspect requested namespaces and return the supported profile |
| 186 | * |
| 187 | * @param string $profileName |
| 188 | * @return array the supported rescore profile. |
| 189 | * @throws InvalidRescoreProfileException |
| 190 | */ |
| 191 | private function getSupportedProfile( $profileName ) { |
| 192 | $profile = $this->context->getConfig() |
| 193 | ->getProfileService() |
| 194 | ->loadProfileByName( SearchProfileService::RESCORE, $profileName ); |
| 195 | if ( !is_array( $profile ) ) { |
| 196 | throw new InvalidRescoreProfileException( |
| 197 | "Invalid fallback profile, must be array: $profileName" ); |
| 198 | } |
| 199 | |
| 200 | $seen = []; |
| 201 | while ( true ) { |
| 202 | $seen[$profileName] = true; |
| 203 | if ( count( $seen ) > self::FALLBACK_LIMIT ) { |
| 204 | throw new InvalidRescoreProfileException( |
| 205 | "Fell back more than " . self::FALLBACK_LIMIT . " times" |
| 206 | ); |
| 207 | } |
| 208 | |
| 209 | if ( !$this->isProfileNamespaceSupported( $profile ) |
| 210 | || !$this->isProfileSyntaxSupported( $profile ) |
| 211 | ) { |
| 212 | if ( !isset( $profile['fallback_profile'] ) ) { |
| 213 | throw new InvalidRescoreProfileException( |
| 214 | "Invalid rescore profile: fallback_profile is mandatory " |
| 215 | . "if supported_namespaces is not 'all' or " |
| 216 | . "unsupported_syntax is not null." |
| 217 | ); |
| 218 | } |
| 219 | $profileName = $profile['fallback_profile']; |
| 220 | if ( isset( $seen[$profileName] ) ) { |
| 221 | $chain = implode( '->', array_keys( $seen ) ) . "->$profileName"; |
| 222 | throw new InvalidRescoreProfileException( "Cycle in rescore fallbacks: $chain" ); |
| 223 | } |
| 224 | |
| 225 | $profile = $this->context->getConfig() |
| 226 | ->getProfileService() |
| 227 | ->loadProfileByName( SearchProfileService::RESCORE, $profileName ); |
| 228 | if ( !is_array( $profile ) ) { |
| 229 | throw new InvalidRescoreProfileException( |
| 230 | "Invalid fallback profile, must be array: $profileName" ); |
| 231 | } |
| 232 | continue; |
| 233 | } |
| 234 | return $profile; |
| 235 | } |
| 236 | } |
| 237 | |
| 238 | /** |
| 239 | * Check if a given profile supports the namespaces used by the current |
| 240 | * search request. |
| 241 | * |
| 242 | * @param array $profile Profile to check |
| 243 | * @return bool True is the profile supports current namespaces |
| 244 | * @throws InvalidRescoreProfileException |
| 245 | */ |
| 246 | private function isProfileNamespaceSupported( array $profile ) { |
| 247 | if ( !is_array( $profile['supported_namespaces'] ) ) { |
| 248 | switch ( $profile['supported_namespaces'] ) { |
| 249 | case 'all': |
| 250 | return true; |
| 251 | case 'content': |
| 252 | $profileNs = $this->context->getConfig()->get( 'ContentNamespaces' ); |
| 253 | // Default search namespaces are also considered content |
| 254 | $defaultSearch = $this->context->getConfig()->get( 'NamespacesToBeSearchedDefault' ); |
| 255 | foreach ( $defaultSearch as $ns => $isDefault ) { |
| 256 | if ( $isDefault ) { |
| 257 | $profileNs[] = $ns; |
| 258 | } |
| 259 | } |
| 260 | break; |
| 261 | default: |
| 262 | throw new InvalidRescoreProfileException( "Invalid rescore profile: supported_namespaces " . |
| 263 | "should be 'all', 'content' or an array of namespaces" ); |
| 264 | } |
| 265 | } else { |
| 266 | $profileNs = $profile['supported_namespaces']; |
| 267 | } |
| 268 | |
| 269 | $queryNs = $this->context->getNamespaces(); |
| 270 | |
| 271 | if ( !$queryNs ) { |
| 272 | // According to comments in Searcher if namespaces is |
| 273 | // not set we run the query on all namespaces |
| 274 | // @todo: verify comments. |
| 275 | return false; |
| 276 | } |
| 277 | |
| 278 | foreach ( $queryNs as $ns ) { |
| 279 | if ( !in_array( $ns, $profileNs ) ) { |
| 280 | return false; |
| 281 | } |
| 282 | } |
| 283 | |
| 284 | return true; |
| 285 | } |
| 286 | |
| 287 | /** |
| 288 | * Check if the given profile supports the syntax used by the |
| 289 | * current search request. |
| 290 | * |
| 291 | * @param array $profile |
| 292 | * @return bool |
| 293 | */ |
| 294 | private function isProfileSyntaxSupported( array $profile ) { |
| 295 | if ( ( $profile['supported_syntax'] ?? [] ) !== [] ) { |
| 296 | $supportedSyntax = false; |
| 297 | foreach ( $profile['supported_syntax'] as $supported ) { |
| 298 | if ( $this->context->isSyntaxUsed( $supported ) ) { |
| 299 | $supportedSyntax = true; |
| 300 | break; |
| 301 | } |
| 302 | } |
| 303 | if ( !$supportedSyntax ) { |
| 304 | return false; |
| 305 | } |
| 306 | } |
| 307 | if ( ( $profile['unsupported_syntax'] ?? [] ) !== [] ) { |
| 308 | foreach ( $profile['unsupported_syntax'] as $reject ) { |
| 309 | if ( $this->context->isSyntaxUsed( $reject ) ) { |
| 310 | return false; |
| 311 | } |
| 312 | } |
| 313 | } |
| 314 | |
| 315 | return true; |
| 316 | } |
| 317 | } |