Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
75.61% |
93 / 123 |
|
44.44% |
4 / 9 |
CRAP | |
0.00% |
0 / 1 |
RescoreBuilder | |
75.61% |
93 / 123 |
|
44.44% |
4 / 9 |
79.05 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
build | |
86.67% |
13 / 15 |
|
0.00% |
0 / 1 |
4.04 | |||
buildRescoreQuery | |
81.82% |
9 / 11 |
|
0.00% |
0 / 1 |
5.15 | |||
buildLtrQuery | |
0.00% |
0 / 11 |
|
0.00% |
0 / 1 |
6 | |||
windowSize | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
prepareQueryParams | |
45.45% |
5 / 11 |
|
0.00% |
0 / 1 |
9.06 | |||
getSupportedProfile | |
72.73% |
24 / 33 |
|
0.00% |
0 / 1 |
10.64 | |||
isProfileNamespaceSupported | |
100.00% |
21 / 21 |
|
100.00% |
1 / 1 |
10 | |||
isProfileSyntaxSupported | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
8 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Search\Rescore; |
4 | |
5 | use CirrusSearch\CirrusSearchHookRunner; |
6 | use CirrusSearch\Elastica\LtrQuery; |
7 | use CirrusSearch\Profile\SearchProfileService; |
8 | use CirrusSearch\Search\SearchContext; |
9 | use Elastica\Query\AbstractQuery; |
10 | |
11 | /** |
12 | * Set of rescore builders |
13 | * |
14 | * This program is free software; you can redistribute it and/or modify |
15 | * it under the terms of the GNU General Public License as published by |
16 | * the Free Software Foundation; either version 2 of the License, or |
17 | * (at your option) any later version. |
18 | * |
19 | * This program is distributed in the hope that it will be useful, |
20 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
21 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
22 | * GNU General Public License for more details. |
23 | * |
24 | * You should have received a copy of the GNU General Public License along |
25 | * with this program; if not, write to the Free Software Foundation, Inc., |
26 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
27 | * http://www.gnu.org/copyleft/gpl.html |
28 | */ |
29 | |
30 | /** |
31 | * Builds a rescore queries by reading a rescore profile. |
32 | */ |
33 | class RescoreBuilder { |
34 | |
35 | /** Maximum number of rescore profile fallbacks */ |
36 | private const FALLBACK_LIMIT = 4; |
37 | |
38 | /** |
39 | * List of allowed rescore params |
40 | * @todo: refactor to const with php 5.6 |
41 | * |
42 | * @var string[] |
43 | */ |
44 | private static $rescoreMainParams = [ |
45 | 'query_weight', |
46 | 'rescore_query_weight', |
47 | 'score_mode' |
48 | ]; |
49 | |
50 | private const FUNCTION_SCORE_TYPE = "function_score"; |
51 | private const LTR_TYPE = "ltr"; |
52 | private const PHRASE = "phrase"; |
53 | |
54 | /** |
55 | * @var SearchContext |
56 | */ |
57 | private $context; |
58 | |
59 | /** |
60 | * @var array|string a rescore profile |
61 | */ |
62 | private $profile; |
63 | /** |
64 | * @var CirrusSearchHookRunner |
65 | */ |
66 | private $cirrusSearchHookRunner; |
67 | |
68 | /** |
69 | * @param SearchContext $context |
70 | * @param CirrusSearchHookRunner $cirrusSearchHookRunner |
71 | * @param string|null $profile |
72 | * @throws InvalidRescoreProfileException |
73 | */ |
74 | public function __construct( SearchContext $context, CirrusSearchHookRunner $cirrusSearchHookRunner, $profile = null ) { |
75 | $this->context = $context; |
76 | $this->profile = $this->getSupportedProfile( $profile ?? $context->getRescoreProfile() ); |
77 | $this->cirrusSearchHookRunner = $cirrusSearchHookRunner; |
78 | } |
79 | |
80 | /** |
81 | * @return array[] List of rescore queries |
82 | */ |
83 | public function build() { |
84 | $rescores = []; |
85 | foreach ( $this->profile['rescore'] as $rescoreDef ) { |
86 | $windowSize = $this->windowSize( $rescoreDef ); |
87 | if ( $windowSize <= 0 ) { |
88 | continue; |
89 | } |
90 | $rescore = [ |
91 | 'window_size' => $windowSize, |
92 | ]; |
93 | |
94 | $rescore['query'] = $this->prepareQueryParams( $rescoreDef ); |
95 | $rescoreQuery = $this->buildRescoreQuery( $rescoreDef ); |
96 | if ( $rescoreQuery === null ) { |
97 | continue; |
98 | } |
99 | $rescore['query']['rescore_query'] = $rescoreQuery; |
100 | $rescores[] = $rescore; |
101 | } |
102 | return $rescores; |
103 | } |
104 | |
105 | /** |
106 | * builds the 'query' attribute by reading type |
107 | * |
108 | * @param array $rescoreDef |
109 | * @return AbstractQuery|null the rescore query |
110 | * @throws InvalidRescoreProfileException |
111 | */ |
112 | private function buildRescoreQuery( array $rescoreDef ) { |
113 | switch ( $rescoreDef['type'] ) { |
114 | case self::FUNCTION_SCORE_TYPE: |
115 | $funcChain = new FunctionScoreChain( $this->context, $rescoreDef['function_chain'], |
116 | $rescoreDef['function_chain_overrides'] ?? [], $this->cirrusSearchHookRunner ); |
117 | return $funcChain->buildRescoreQuery(); |
118 | case self::LTR_TYPE: |
119 | return $this->buildLtrQuery( $rescoreDef['model'] ); |
120 | case self::PHRASE: |
121 | return $this->context->getPhraseRescoreQuery(); |
122 | default: |
123 | throw new InvalidRescoreProfileException( |
124 | "Unsupported rescore query type: " . $rescoreDef['type'] ); |
125 | } |
126 | } |
127 | |
128 | /** |
129 | * @param string $model Name of the sltr model to use |
130 | * @return AbstractQuery |
131 | */ |
132 | private function buildLtrQuery( $model ) { |
133 | // This is a bit fragile, and makes the bold assumption |
134 | // only a single level of rescore will be used. This is |
135 | // strictly for debugging/testing before shipping a model |
136 | // live so shouldn't be a big deal. |
137 | $override = $this->context->getDebugOptions()->getCirrusMLRModel(); |
138 | if ( $override ) { |
139 | $model = $override; |
140 | } |
141 | |
142 | $bool = new \Elastica\Query\BoolQuery(); |
143 | // the ltr query can return negative scores, which mucks with elasticsearch |
144 | // sorting as that will put these results below documents set to 0. Fix |
145 | // that up by adding a large constant boost. |
146 | $constant = new \Elastica\Query\ConstantScore( new \Elastica\Query\MatchAll ); |
147 | $constant->setBoost( 100000 ); |
148 | $bool->addShould( $constant ); |
149 | $bool->addShould( new LtrQuery( $model, [ |
150 | // TODO: These params probably shouldn't be hard coded |
151 | 'query_string' => $this->context->getCleanedSearchTerm(), |
152 | ] ) ); |
153 | |
154 | return $bool; |
155 | } |
156 | |
157 | /** |
158 | * @param array $rescore |
159 | * @return int the window size defined in the profile |
160 | * or the value from config if window_size_override is set. |
161 | */ |
162 | private function windowSize( array $rescore ) { |
163 | if ( isset( $rescore['window_size_override'] ) ) { |
164 | $windowSize = $this->context->getConfig()->get( $rescore['window_size_override'] ); |
165 | if ( $windowSize !== null ) { |
166 | return $windowSize; |
167 | } |
168 | } |
169 | return $rescore['window']; |
170 | } |
171 | |
172 | /** |
173 | * Assemble query params in the rescore block |
174 | * Only self::$rescoreMainParams are allowed. |
175 | * @param array $settings |
176 | * @return array |
177 | */ |
178 | private function prepareQueryParams( array $settings ) { |
179 | $def = []; |
180 | foreach ( self::$rescoreMainParams as $param ) { |
181 | if ( !isset( $settings[$param] ) ) { |
182 | continue; |
183 | } |
184 | $value = $settings[$param]; |
185 | if ( isset( $settings[$param . '_override'] ) ) { |
186 | $oValue = $this->context->getConfig()->get( $settings[$param . '_override'] ); |
187 | if ( $oValue !== null ) { |
188 | $value = $oValue; |
189 | } |
190 | } |
191 | $def[$param] = $value; |
192 | } |
193 | return $def; |
194 | } |
195 | |
196 | /** |
197 | * Inspect requested namespaces and return the supported profile |
198 | * |
199 | * @param string $profileName |
200 | * @return array the supported rescore profile. |
201 | * @throws InvalidRescoreProfileException |
202 | */ |
203 | private function getSupportedProfile( $profileName ) { |
204 | $profile = $this->context->getConfig() |
205 | ->getProfileService() |
206 | ->loadProfileByName( SearchProfileService::RESCORE, $profileName ); |
207 | if ( !is_array( $profile ) ) { |
208 | throw new InvalidRescoreProfileException( |
209 | "Invalid fallback profile, must be array: $profileName" ); |
210 | } |
211 | |
212 | $seen = []; |
213 | while ( true ) { |
214 | $seen[$profileName] = true; |
215 | if ( count( $seen ) > self::FALLBACK_LIMIT ) { |
216 | throw new InvalidRescoreProfileException( |
217 | "Fell back more than " . self::FALLBACK_LIMIT . " times" |
218 | ); |
219 | } |
220 | |
221 | if ( !$this->isProfileNamespaceSupported( $profile ) |
222 | || !$this->isProfileSyntaxSupported( $profile ) |
223 | ) { |
224 | if ( !isset( $profile['fallback_profile'] ) ) { |
225 | throw new InvalidRescoreProfileException( |
226 | "Invalid rescore profile: fallback_profile is mandatory " |
227 | . "if supported_namespaces is not 'all' or " |
228 | . "unsupported_syntax is not null." |
229 | ); |
230 | } |
231 | $profileName = $profile['fallback_profile']; |
232 | if ( isset( $seen[$profileName] ) ) { |
233 | $chain = implode( '->', array_keys( $seen ) ) . "->$profileName"; |
234 | throw new InvalidRescoreProfileException( "Cycle in rescore fallbacks: $chain" ); |
235 | } |
236 | |
237 | $profile = $this->context->getConfig() |
238 | ->getProfileService() |
239 | ->loadProfileByName( SearchProfileService::RESCORE, $profileName ); |
240 | if ( !is_array( $profile ) ) { |
241 | throw new InvalidRescoreProfileException( |
242 | "Invalid fallback profile, must be array: $profileName" ); |
243 | } |
244 | continue; |
245 | } |
246 | return $profile; |
247 | } |
248 | } |
249 | |
250 | /** |
251 | * Check if a given profile supports the namespaces used by the current |
252 | * search request. |
253 | * |
254 | * @param array $profile Profile to check |
255 | * @return bool True is the profile supports current namespaces |
256 | */ |
257 | private function isProfileNamespaceSupported( array $profile ) { |
258 | if ( !is_array( $profile['supported_namespaces'] ) ) { |
259 | switch ( $profile['supported_namespaces'] ) { |
260 | case 'all': |
261 | return true; |
262 | case 'content': |
263 | $profileNs = $this->context->getConfig()->get( 'ContentNamespaces' ); |
264 | // Default search namespaces are also considered content |
265 | $defaultSearch = $this->context->getConfig()->get( 'NamespacesToBeSearchedDefault' ); |
266 | foreach ( $defaultSearch as $ns => $isDefault ) { |
267 | if ( $isDefault ) { |
268 | $profileNs[] = $ns; |
269 | } |
270 | } |
271 | break; |
272 | default: |
273 | throw new InvalidRescoreProfileException( "Invalid rescore profile: supported_namespaces " . |
274 | "should be 'all', 'content' or an array of namespaces" ); |
275 | } |
276 | } else { |
277 | $profileNs = $profile['supported_namespaces']; |
278 | } |
279 | |
280 | $queryNs = $this->context->getNamespaces(); |
281 | |
282 | if ( !$queryNs ) { |
283 | // According to comments in Searcher if namespaces is |
284 | // not set we run the query on all namespaces |
285 | // @todo: verify comments. |
286 | return false; |
287 | } |
288 | |
289 | foreach ( $queryNs as $ns ) { |
290 | if ( !in_array( $ns, $profileNs ) ) { |
291 | return false; |
292 | } |
293 | } |
294 | |
295 | return true; |
296 | } |
297 | |
298 | /** |
299 | * Check if the given profile supports the syntax used by the |
300 | * current search request. |
301 | * |
302 | * @param array $profile |
303 | * @return bool |
304 | */ |
305 | private function isProfileSyntaxSupported( array $profile ) { |
306 | if ( ( $profile['supported_syntax'] ?? [] ) !== [] ) { |
307 | $supportedSyntax = false; |
308 | foreach ( $profile['supported_syntax'] as $supported ) { |
309 | if ( $this->context->isSyntaxUsed( $supported ) ) { |
310 | $supportedSyntax = true; |
311 | break; |
312 | } |
313 | } |
314 | if ( !$supportedSyntax ) { |
315 | return false; |
316 | } |
317 | } |
318 | if ( ( $profile['unsupported_syntax'] ?? [] ) !== [] ) { |
319 | foreach ( $profile['unsupported_syntax'] as $reject ) { |
320 | if ( $this->context->isSyntaxUsed( $reject ) ) { |
321 | return false; |
322 | } |
323 | } |
324 | } |
325 | |
326 | return true; |
327 | } |
328 | } |