Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
75.61% covered (warning)
75.61%
93 / 123
44.44% covered (danger)
44.44%
4 / 9
CRAP
0.00% covered (danger)
0.00%
0 / 1
RescoreBuilder
75.61% covered (warning)
75.61%
93 / 123
44.44% covered (danger)
44.44%
4 / 9
79.05
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 build
86.67% covered (warning)
86.67%
13 / 15
0.00% covered (danger)
0.00%
0 / 1
4.04
 buildRescoreQuery
81.82% covered (warning)
81.82%
9 / 11
0.00% covered (danger)
0.00%
0 / 1
5.15
 buildLtrQuery
0.00% covered (danger)
0.00%
0 / 11
0.00% covered (danger)
0.00%
0 / 1
6
 windowSize
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
3
 prepareQueryParams
45.45% covered (danger)
45.45%
5 / 11
0.00% covered (danger)
0.00%
0 / 1
9.06
 getSupportedProfile
72.73% covered (warning)
72.73%
24 / 33
0.00% covered (danger)
0.00%
0 / 1
10.64
 isProfileNamespaceSupported
100.00% covered (success)
100.00%
21 / 21
100.00% covered (success)
100.00%
1 / 1
10
 isProfileSyntaxSupported
100.00% covered (success)
100.00%
13 / 13
100.00% covered (success)
100.00%
1 / 1
8
1<?php
2
3namespace CirrusSearch\Search\Rescore;
4
5use CirrusSearch\CirrusSearchHookRunner;
6use CirrusSearch\Elastica\LtrQuery;
7use CirrusSearch\Profile\SearchProfileService;
8use CirrusSearch\Search\SearchContext;
9use Elastica\Query\AbstractQuery;
10
11/**
12 * Set of rescore builders
13 *
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
18 *
19 * This program is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License along
25 * with this program; if not, write to the Free Software Foundation, Inc.,
26 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
27 * http://www.gnu.org/copyleft/gpl.html
28 */
29
30/**
31 * Builds a rescore queries by reading a rescore profile.
32 */
33class RescoreBuilder {
34
35    /** Maximum number of rescore profile fallbacks */
36    private const FALLBACK_LIMIT = 4;
37
38    /**
39     * List of allowed rescore params
40     * @todo: refactor to const with php 5.6
41     *
42     * @var string[]
43     */
44    private static $rescoreMainParams = [
45        'query_weight',
46        'rescore_query_weight',
47        'score_mode'
48    ];
49
50    private const FUNCTION_SCORE_TYPE = "function_score";
51    private const LTR_TYPE = "ltr";
52    private const PHRASE = "phrase";
53
54    /**
55     * @var SearchContext
56     */
57    private $context;
58
59    /**
60     * @var array|string a rescore profile
61     */
62    private $profile;
63    /**
64     * @var CirrusSearchHookRunner
65     */
66    private $cirrusSearchHookRunner;
67
68    /**
69     * @param SearchContext $context
70     * @param CirrusSearchHookRunner $cirrusSearchHookRunner
71     * @param string|null $profile
72     * @throws InvalidRescoreProfileException
73     */
74    public function __construct( SearchContext $context, CirrusSearchHookRunner $cirrusSearchHookRunner, $profile = null ) {
75        $this->context = $context;
76        $this->profile = $this->getSupportedProfile( $profile ?? $context->getRescoreProfile() );
77        $this->cirrusSearchHookRunner = $cirrusSearchHookRunner;
78    }
79
80    /**
81     * @return array[] List of rescore queries
82     */
83    public function build() {
84        $rescores = [];
85        foreach ( $this->profile['rescore'] as $rescoreDef ) {
86            $windowSize = $this->windowSize( $rescoreDef );
87            if ( $windowSize <= 0 ) {
88                continue;
89            }
90            $rescore = [
91                'window_size' => $windowSize,
92            ];
93
94            $rescore['query'] = $this->prepareQueryParams( $rescoreDef );
95            $rescoreQuery = $this->buildRescoreQuery( $rescoreDef );
96            if ( $rescoreQuery === null ) {
97                continue;
98            }
99            $rescore['query']['rescore_query'] = $rescoreQuery;
100            $rescores[] = $rescore;
101        }
102        return $rescores;
103    }
104
105    /**
106     * builds the 'query' attribute by reading type
107     *
108     * @param array $rescoreDef
109     * @return AbstractQuery|null the rescore query
110     * @throws InvalidRescoreProfileException
111     */
112    private function buildRescoreQuery( array $rescoreDef ) {
113        switch ( $rescoreDef['type'] ) {
114            case self::FUNCTION_SCORE_TYPE:
115                $funcChain = new FunctionScoreChain( $this->context, $rescoreDef['function_chain'],
116                    $rescoreDef['function_chain_overrides'] ?? [], $this->cirrusSearchHookRunner );
117                return $funcChain->buildRescoreQuery();
118            case self::LTR_TYPE:
119                return $this->buildLtrQuery( $rescoreDef['model'] );
120            case self::PHRASE:
121                return $this->context->getPhraseRescoreQuery();
122            default:
123                throw new InvalidRescoreProfileException(
124                    "Unsupported rescore query type: " . $rescoreDef['type'] );
125        }
126    }
127
128    /**
129     * @param string $model Name of the sltr model to use
130     * @return AbstractQuery
131     */
132    private function buildLtrQuery( $model ) {
133        // This is a bit fragile, and makes the bold assumption
134        // only a single level of rescore will be used. This is
135        // strictly for debugging/testing before shipping a model
136        // live so shouldn't be a big deal.
137        $override = $this->context->getDebugOptions()->getCirrusMLRModel();
138        if ( $override ) {
139            $model = $override;
140        }
141
142        $bool = new \Elastica\Query\BoolQuery();
143        // the ltr query can return negative scores, which mucks with elasticsearch
144        // sorting as that will put these results below documents set to 0. Fix
145        // that up by adding a large constant boost.
146        $constant = new \Elastica\Query\ConstantScore( new \Elastica\Query\MatchAll );
147        $constant->setBoost( 100000 );
148        $bool->addShould( $constant );
149        $bool->addShould( new LtrQuery( $model, [
150                // TODO: These params probably shouldn't be hard coded
151                'query_string' => $this->context->getCleanedSearchTerm(),
152            ] ) );
153
154        return $bool;
155    }
156
157    /**
158     * @param array $rescore
159     * @return int the window size defined in the profile
160     * or the value from config if window_size_override is set.
161     */
162    private function windowSize( array $rescore ) {
163        if ( isset( $rescore['window_size_override'] ) ) {
164            $windowSize = $this->context->getConfig()->get( $rescore['window_size_override'] );
165            if ( $windowSize !== null ) {
166                return $windowSize;
167            }
168        }
169        return $rescore['window'];
170    }
171
172    /**
173     * Assemble query params in the rescore block
174     * Only self::$rescoreMainParams are allowed.
175     * @param array $settings
176     * @return array
177     */
178    private function prepareQueryParams( array $settings ) {
179        $def = [];
180        foreach ( self::$rescoreMainParams as $param ) {
181            if ( !isset( $settings[$param] ) ) {
182                continue;
183            }
184            $value = $settings[$param];
185            if ( isset( $settings[$param . '_override'] ) ) {
186                $oValue = $this->context->getConfig()->get( $settings[$param . '_override'] );
187                if ( $oValue !== null ) {
188                    $value = $oValue;
189                }
190            }
191            $def[$param] = $value;
192        }
193        return $def;
194    }
195
196    /**
197     * Inspect requested namespaces and return the supported profile
198     *
199     * @param string $profileName
200     * @return array the supported rescore profile.
201     * @throws InvalidRescoreProfileException
202     */
203    private function getSupportedProfile( $profileName ) {
204        $profile = $this->context->getConfig()
205            ->getProfileService()
206            ->loadProfileByName( SearchProfileService::RESCORE, $profileName );
207        if ( !is_array( $profile ) ) {
208            throw new InvalidRescoreProfileException(
209                "Invalid fallback profile, must be array: $profileName" );
210        }
211
212        $seen = [];
213        while ( true ) {
214            $seen[$profileName] = true;
215            if ( count( $seen ) > self::FALLBACK_LIMIT ) {
216                throw new InvalidRescoreProfileException(
217                    "Fell back more than " . self::FALLBACK_LIMIT . " times"
218                );
219            }
220
221            if ( !$this->isProfileNamespaceSupported( $profile )
222                || !$this->isProfileSyntaxSupported( $profile )
223            ) {
224                if ( !isset( $profile['fallback_profile'] ) ) {
225                    throw new InvalidRescoreProfileException(
226                        "Invalid rescore profile: fallback_profile is mandatory "
227                        . "if supported_namespaces is not 'all' or "
228                        . "unsupported_syntax is not null."
229                    );
230                }
231                $profileName = $profile['fallback_profile'];
232                if ( isset( $seen[$profileName] ) ) {
233                    $chain = implode( '->', array_keys( $seen ) ) . "->$profileName";
234                    throw new InvalidRescoreProfileException( "Cycle in rescore fallbacks: $chain" );
235                }
236
237                $profile = $this->context->getConfig()
238                    ->getProfileService()
239                    ->loadProfileByName( SearchProfileService::RESCORE, $profileName );
240                if ( !is_array( $profile ) ) {
241                    throw new InvalidRescoreProfileException(
242                        "Invalid fallback profile, must be array: $profileName" );
243                }
244                continue;
245            }
246            return $profile;
247        }
248    }
249
250    /**
251     * Check if a given profile supports the namespaces used by the current
252     * search request.
253     *
254     * @param array $profile Profile to check
255     * @return bool True is the profile supports current namespaces
256     */
257    private function isProfileNamespaceSupported( array $profile ) {
258        if ( !is_array( $profile['supported_namespaces'] ) ) {
259            switch ( $profile['supported_namespaces'] ) {
260                case 'all':
261                    return true;
262                case 'content':
263                    $profileNs = $this->context->getConfig()->get( 'ContentNamespaces' );
264                    // Default search namespaces are also considered content
265                    $defaultSearch = $this->context->getConfig()->get( 'NamespacesToBeSearchedDefault' );
266                    foreach ( $defaultSearch as $ns => $isDefault ) {
267                        if ( $isDefault ) {
268                            $profileNs[] = $ns;
269                        }
270                    }
271                    break;
272                default:
273                    throw new InvalidRescoreProfileException( "Invalid rescore profile: supported_namespaces " .
274                        "should be 'all', 'content' or an array of namespaces" );
275            }
276        } else {
277            $profileNs = $profile['supported_namespaces'];
278        }
279
280        $queryNs = $this->context->getNamespaces();
281
282        if ( !$queryNs ) {
283            // According to comments in Searcher if namespaces is
284            // not set we run the query on all namespaces
285            // @todo: verify comments.
286            return false;
287        }
288
289        foreach ( $queryNs as $ns ) {
290            if ( !in_array( $ns, $profileNs ) ) {
291                return false;
292            }
293        }
294
295        return true;
296    }
297
298    /**
299     * Check if the given profile supports the syntax used by the
300     * current search request.
301     *
302     * @param array $profile
303     * @return bool
304     */
305    private function isProfileSyntaxSupported( array $profile ) {
306        if ( ( $profile['supported_syntax'] ?? [] ) !== [] ) {
307            $supportedSyntax = false;
308            foreach ( $profile['supported_syntax'] as $supported ) {
309                if ( $this->context->isSyntaxUsed( $supported ) ) {
310                    $supportedSyntax = true;
311                    break;
312                }
313            }
314            if ( !$supportedSyntax ) {
315                return false;
316            }
317        }
318        if ( ( $profile['unsupported_syntax'] ?? [] ) !== [] ) {
319            foreach ( $profile['unsupported_syntax'] as $reject ) {
320                if ( $this->context->isSyntaxUsed( $reject ) ) {
321                    return false;
322                }
323            }
324        }
325
326        return true;
327    }
328}