Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
13.64% covered (danger)
13.64%
18 / 132
23.08% covered (danger)
23.08%
3 / 13
CRAP
0.00% covered (danger)
0.00%
0 / 1
SearchRequestBuilder
13.64% covered (danger)
13.64%
18 / 132
23.08% covered (danger)
23.08%
3 / 13
1291.09
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 build
0.00% covered (danger)
0.00%
0 / 101
0.00% covered (danger)
0.00%
0 / 1
870
 getOffset
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setOffset
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 getLimit
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setLimit
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 getTimeout
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setTimeout
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 getIndex
100.00% covered (success)
100.00%
13 / 13
100.00% covered (success)
100.00%
1 / 1
4
 setIndex
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 getSort
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setSort
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 getSearchContext
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2
3namespace CirrusSearch\Search;
4
5use CirrusSearch\Connection;
6use CirrusSearch\Util;
7use Elastica\Index;
8use Elastica\Query;
9use MediaWiki\Logger\LoggerFactory;
10
11/**
12 * Build the search request body
13 */
14class SearchRequestBuilder {
15    /** @var SearchContext */
16    private $searchContext;
17
18    /** @var Connection */
19    private $connection;
20
21    /** @var string */
22    private $indexBaseName;
23
24    /** @var int */
25    private $offset = 0;
26
27    /** @var int */
28    private $limit = 20;
29
30    /** @var string search timeout, string with time and unit, e.g. 20s for 20 seconds */
31    private $timeout;
32
33    /**
34     * @var Index|null force the index when set, use {@link Connection::pickIndexSuffixForNamespaces}
35     */
36    private $index;
37
38    /** @var string set the sort option, controls the use of rescore functions or elastic sort */
39    private $sort = 'relevance';
40
41    public function __construct( SearchContext $searchContext, Connection $connection, $indexBaseName ) {
42        $this->searchContext = $searchContext;
43        $this->connection = $connection;
44        $this->indexBaseName = $indexBaseName;
45    }
46
47    /**
48     * Build the search request
49     * @return \Elastica\Search
50     */
51    public function build() {
52        $resultsType = $this->searchContext->getResultsType();
53
54        $query = new Query();
55        $query->setTrackTotalHits( $this->searchContext->getTrackTotalHits() );
56        $query->setSource( $resultsType->getSourceFiltering() );
57        $query->setParam( "fields", $resultsType->getFields() );
58
59        $extraIndexes = $this->searchContext->getExtraIndices();
60
61        if ( $extraIndexes && $this->searchContext->getConfig()->getElement( 'CirrusSearchDeduplicateInQuery' ) !== false ) {
62            $this->searchContext->addNotFilter( new \Elastica\Query\Term(
63                [ 'local_sites_with_dupe' => $this->indexBaseName ]
64            ) );
65        }
66
67        $mainQuery = $this->searchContext->getQuery();
68        $query->setQuery( $mainQuery );
69
70        foreach ( $this->searchContext->getAggregations() as $agg ) {
71            $query->addAggregation( $agg );
72        }
73
74        $highlight = $this->searchContext->getHighlight( $resultsType, $mainQuery );
75        if ( $highlight ) {
76            $query->setHighlight( $highlight );
77        }
78
79        $suggestQueries = $this->searchContext->getFallbackRunner()->getElasticSuggesters();
80        if ( $suggestQueries ) {
81            $query->setParam( 'suggest', [
82                // TODO: remove special case on 1-elt array, added to not change the test fixtures
83                // We should switch to explicit naming
84                'suggest' => count( $suggestQueries ) === 1 ? reset( $suggestQueries ) : $suggestQueries
85            ] );
86            $query->addParam( 'stats', 'suggest' );
87        }
88
89        foreach ( $this->searchContext->getSyntaxUsed() as $syntax ) {
90            $query->addParam( 'stats', $syntax );
91        }
92
93        // See also CirrusSearch::getValidSorts()
94        switch ( $this->sort ) {
95            case 'just_match':
96                // Use just matching scores, without any rescoring, and default sort.
97                break;
98            case 'relevance':
99                // Add some rescores to improve relevance
100                $rescores = $this->searchContext->getRescore();
101                if ( $rescores !== [] ) {
102                    $query->setParam( 'rescore', $rescores );
103                }
104                break;  // The default
105            case 'create_timestamp_asc':
106                $query->setSort( [ 'create_timestamp' => 'asc' ] );
107                break;
108            case 'create_timestamp_desc':
109                $query->setSort( [ 'create_timestamp' => 'desc' ] );
110                break;
111            case 'last_edit_asc':
112                $query->setSort( [ 'timestamp' => 'asc' ] );
113                break;
114            case 'last_edit_desc':
115                $query->setSort( [ 'timestamp' => 'desc' ] );
116                break;
117            case 'incoming_links_asc':
118                $query->setSort( [ 'incoming_links' => [
119                    'order' => 'asc',
120                    'missing' => '_first',
121                ] ] );
122                break;
123            case 'incoming_links_desc':
124                $query->setSort( [ 'incoming_links' => [
125                    'order' => 'desc',
126                    'missing' => '_last',
127                ] ] );
128                break;
129            case 'none':
130                // Return documents in index order
131                $query->setSort( [ '_doc' ] );
132                break;
133            case 'random':
134                $randomSeed = $this->searchContext->getSearchQuery()->getRandomSeed();
135                if ( $randomSeed === null && $this->offset !== 0 ) {
136                    $this->searchContext->addWarning( 'cirrussearch-offset-not-allowed-with-random-sort' );
137                    $this->offset = 0;
138                }
139                // Can't use an empty array, it would JSONify to [] instead of {}.
140                $scoreParams = ( $randomSeed === null ) ? (object)[] : [ 'seed' => $randomSeed, 'field' => '_seq_no' ];
141                // Instead of setting a sort field wrap the whole query in a
142                // bool filter and add a must clause for the random score. This
143                // could alternatively be a rescore over a limited document
144                // set, but in basic testing the filter was more performant
145                // than an 8k rescore window even with 50M total hits.
146                $query->setQuery( ( new Query\BoolQuery() )
147                    ->addFilter( $mainQuery )
148                    ->addMust( ( new Query\FunctionScore() )
149                        ->setQuery( new Query\MatchAll() )
150                        ->addFunction( 'random_score', $scoreParams ) ) );
151
152                break;
153            case 'user_random':
154                // Randomly ordered, but consistent for a single user
155                $query->setQuery( ( new Query\BoolQuery() )
156                    ->addFilter( $mainQuery )
157                    ->addMust( ( new Query\FunctionScore() )
158                        ->setQuery( new Query\MatchAll() )
159                        ->addFunction( 'random_score', [
160                            'seed' => Util::generateIdentToken(),
161                            'field' => '_seq_no',
162                        ] ) ) );
163                break;
164
165            default:
166                // Same as just_match. No user warning since an invalid sort
167                // getting this far as a bug in the calling code which should
168                // be validating it's input.
169                LoggerFactory::getInstance( 'CirrusSearch' )->warning(
170                    "Invalid sort type: {sort}",
171                    [ 'sort' => $this->sort ]
172                );
173        }
174
175        if ( $this->offset ) {
176            $query->setFrom( $this->offset );
177        }
178        if ( $this->limit ) {
179            $query->setSize( $this->limit );
180        }
181
182        // Setup the search
183        $queryOptions = [];
184        if ( $this->timeout ) {
185            $queryOptions[\Elastica\Search::OPTION_TIMEOUT] = $this->timeout;
186        }
187        // @todo when switching to multi-search this has to be provided at the top level
188        if ( $this->searchContext->getConfig()->get( 'CirrusSearchMoreAccurateScoringMode' ) ) {
189            $queryOptions[\Elastica\Search::OPTION_SEARCH_TYPE] = \Elastica\Search::OPTION_SEARCH_TYPE_DFS_QUERY_THEN_FETCH;
190        }
191
192        $search = $this->getIndex()->createSearch( $query, $queryOptions );
193        $crossClusterName = $this->connection->getConfig()->getClusterAssignment()->getCrossClusterName();
194        foreach ( $extraIndexes as $i ) {
195            $search->addIndex( $this->connection->getIndex( $i->getSearchIndex( $crossClusterName ) ) );
196        }
197
198        $this->searchContext->getDebugOptions()->applyDebugOptions( $query );
199        return $search;
200    }
201
202    /**
203     * @return int
204     */
205    public function getOffset() {
206        return $this->offset;
207    }
208
209    /**
210     * @param int $offset
211     * @return SearchRequestBuilder
212     */
213    public function setOffset( $offset ) {
214        $this->offset = $offset;
215
216        return $this;
217    }
218
219    /**
220     * @return int
221     */
222    public function getLimit() {
223        return $this->limit;
224    }
225
226    /**
227     * @param int $limit
228     * @return SearchRequestBuilder
229     */
230    public function setLimit( $limit ) {
231        $this->limit = $limit;
232
233        return $this;
234    }
235
236    /**
237     * @return string
238     */
239    public function getTimeout() {
240        return $this->timeout;
241    }
242
243    /**
244     * @param string $timeout
245     * @return SearchRequestBuilder
246     */
247    public function setTimeout( $timeout ) {
248        $this->timeout = $timeout;
249
250        return $this;
251    }
252
253    /**
254     * @return \Elastica\Index An elastica type suitable for searching against
255     *  the configured wiki over the host wiki's default connection.
256     */
257    public function getIndex(): \Elastica\Index {
258        if ( $this->index ) {
259            return $this->index;
260        } else {
261            $indexBaseName = $this->indexBaseName;
262            $config = $this->searchContext->getConfig();
263            $hostConfig = $config->getHostWikiConfig();
264            $indexSuffix = $this->connection->pickIndexSuffixForNamespaces(
265                $this->searchContext->getNamespaces() );
266            if ( $hostConfig->get( 'CirrusSearchCrossClusterSearch' ) ) {
267                $local = $hostConfig->getClusterAssignment()->getCrossClusterName();
268                $current = $config->getClusterAssignment()->getCrossClusterName();
269                if ( $local !== $current ) {
270                    $indexBaseName = $current . ':' . $indexBaseName;
271                }
272            }
273            return $this->connection->getIndex( $indexBaseName, $indexSuffix );
274        }
275    }
276
277    /**
278     * @param ?Index $index
279     * @return $this
280     */
281    public function setIndex( ?Index $index ): self {
282        $this->index = $index;
283        return $this;
284    }
285
286    /**
287     * @return string
288     */
289    public function getSort() {
290        return $this->sort;
291    }
292
293    /**
294     * @param string $sort
295     * @return SearchRequestBuilder
296     */
297    public function setSort( $sort ) {
298        $this->sort = $sort;
299
300        return $this;
301    }
302
303    /**
304     * @return SearchContext
305     */
306    public function getSearchContext() {
307        return $this->searchContext;
308    }
309}