Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
12.95% covered (danger)
12.95%
18 / 139
23.08% covered (danger)
23.08%
3 / 13
CRAP
0.00% covered (danger)
0.00%
0 / 1
SearchRequestBuilder
12.95% covered (danger)
12.95%
18 / 139
23.08% covered (danger)
23.08%
3 / 13
1567.83
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 build
0.00% covered (danger)
0.00%
0 / 108
0.00% covered (danger)
0.00%
0 / 1
1122
 getOffset
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setOffset
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 getLimit
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setLimit
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 getTimeout
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setTimeout
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 getIndex
100.00% covered (success)
100.00%
13 / 13
100.00% covered (success)
100.00%
1 / 1
4
 setIndex
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 getSort
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setSort
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 getSearchContext
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2
3namespace CirrusSearch\Search;
4
5use CirrusSearch\Connection;
6use CirrusSearch\Util;
7use Elastica\Index;
8use Elastica\Query;
9use MediaWiki\Logger\LoggerFactory;
10
11/**
12 * Build the search request body
13 */
14class SearchRequestBuilder {
15    /** @var SearchContext */
16    private $searchContext;
17
18    /** @var Connection */
19    private $connection;
20
21    /** @var string */
22    private $indexBaseName;
23
24    /** @var int */
25    private $offset = 0;
26
27    /** @var int */
28    private $limit = 20;
29
30    /** @var string search timeout, string with time and unit, e.g. 20s for 20 seconds */
31    private $timeout;
32
33    /**
34     * @var Index|null force the index when set, use {@link Connection::pickIndexSuffixForNamespaces}
35     */
36    private $index;
37
38    /** @var string set the sort option, controls the use of rescore functions or elastic sort */
39    private $sort = 'relevance';
40
41    /**
42     * @param SearchContext $searchContext
43     * @param Connection $connection
44     * @param string $indexBaseName
45     */
46    public function __construct( SearchContext $searchContext, Connection $connection, $indexBaseName ) {
47        $this->searchContext = $searchContext;
48        $this->connection = $connection;
49        $this->indexBaseName = $indexBaseName;
50    }
51
52    /**
53     * Build the search request
54     * @return \Elastica\Search
55     */
56    public function build() {
57        $resultsType = $this->searchContext->getResultsType();
58
59        $query = new Query();
60        // Track at least offset + limit + 1 hits if precise total_hits is not requested
61        // This useful to know if more results are available on the next page
62        $query->setTrackTotalHits( $this->searchContext->getTrackTotalHits() ? true : $this->offset + $this->limit + 1 );
63        $query->setSource( $resultsType->getSourceFiltering() );
64        $query->setParam( "fields", $resultsType->getFields() );
65
66        $extraIndexes = $this->searchContext->getExtraIndices();
67
68        if ( $extraIndexes && $this->searchContext->getConfig()->getElement( 'CirrusSearchDeduplicateInQuery' ) !== false ) {
69            $this->searchContext->addNotFilter( new \Elastica\Query\Term(
70                [ 'local_sites_with_dupe' => $this->indexBaseName ]
71            ) );
72        }
73
74        $mainQuery = $this->searchContext->getQuery();
75        $query->setQuery( $mainQuery );
76
77        foreach ( $this->searchContext->getAggregations() as $agg ) {
78            $query->addAggregation( $agg );
79        }
80
81        $highlight = $this->searchContext->getHighlight( $resultsType, $mainQuery );
82        if ( $highlight ) {
83            $query->setHighlight( $highlight );
84        }
85
86        $suggestQueries = $this->searchContext->getFallbackRunner()->getElasticSuggesters();
87        if ( $suggestQueries ) {
88            $query->setParam( 'suggest', [
89                // TODO: remove special case on 1-elt array, added to not change the test fixtures
90                // We should switch to explicit naming
91                'suggest' => count( $suggestQueries ) === 1 ? reset( $suggestQueries ) : $suggestQueries
92            ] );
93            $query->addParam( 'stats', 'suggest' );
94        }
95
96        foreach ( $this->searchContext->getSyntaxUsed() as $syntax ) {
97            $query->addParam( 'stats', $syntax );
98        }
99
100        // See also CirrusSearch::getValidSorts()
101        switch ( $this->sort ) {
102            case 'just_match':
103                // Use just matching scores, without any rescoring, and default sort.
104                break;
105            case 'relevance':
106                // Add some rescores to improve relevance
107                $rescores = $this->searchContext->getRescore();
108                if ( $rescores !== [] ) {
109                    $query->setParam( 'rescore', $rescores );
110                }
111                break;  // The default
112            case 'create_timestamp_asc':
113                $query->setSort( [ 'create_timestamp' => 'asc' ] );
114                break;
115            case 'create_timestamp_desc':
116                $query->setSort( [ 'create_timestamp' => 'desc' ] );
117                break;
118            case 'last_edit_asc':
119                $query->setSort( [ 'timestamp' => 'asc' ] );
120                break;
121            case 'last_edit_desc':
122                $query->setSort( [ 'timestamp' => 'desc' ] );
123                break;
124            case 'incoming_links_asc':
125                $query->setSort( [ 'incoming_links' => [
126                    'order' => 'asc',
127                    'missing' => '_first',
128                ] ] );
129                break;
130            case 'incoming_links_desc':
131                $query->setSort( [ 'incoming_links' => [
132                    'order' => 'desc',
133                    'missing' => '_last',
134                ] ] );
135                break;
136            case 'none':
137                // Return documents in index order
138                $query->setSort( [ '_doc' ] );
139                break;
140            case 'random':
141                $randomSeed = $this->searchContext->getSearchQuery()->getRandomSeed();
142                if ( $randomSeed === null && $this->offset !== 0 ) {
143                    $this->searchContext->addWarning( 'cirrussearch-offset-not-allowed-with-random-sort' );
144                    $this->offset = 0;
145                }
146                // Can't use an empty array, it would JSONify to [] instead of {}.
147                $scoreParams = ( $randomSeed === null ) ? (object)[] : [ 'seed' => $randomSeed, 'field' => '_seq_no' ];
148                // Instead of setting a sort field wrap the whole query in a
149                // bool filter and add a must clause for the random score. This
150                // could alternatively be a rescore over a limited document
151                // set, but in basic testing the filter was more performant
152                // than an 8k rescore window even with 50M total hits.
153                $query->setQuery( ( new Query\BoolQuery() )
154                    ->addFilter( $mainQuery )
155                    ->addMust( ( new Query\FunctionScore() )
156                        ->setQuery( new Query\MatchAll() )
157                        ->addFunction( 'random_score', $scoreParams ) ) );
158
159                break;
160            case 'user_random':
161                // Randomly ordered, but consistent for a single user
162                $query->setQuery( ( new Query\BoolQuery() )
163                    ->addFilter( $mainQuery )
164                    ->addMust( ( new Query\FunctionScore() )
165                        ->setQuery( new Query\MatchAll() )
166                        ->addFunction( 'random_score', [
167                            'seed' => Util::generateIdentToken(),
168                            'field' => '_seq_no',
169                        ] ) ) );
170                break;
171
172            case 'title_natural_asc':
173            case 'title_natural_desc':
174                if ( $this->searchContext->getConfig()->getElement( 'CirrusSearchNaturalTitleSort', 'use' ) ) {
175                    $query->setSort( [
176                        'title.natural_sort' => explode( '_', $this->sort, 3 )[2],
177                    ] );
178                    break;
179                }
180                // Intentional fall-through to default error case.
181
182            default:
183                // Same as just_match. No user warning since an invalid sort
184                // getting this far is a bug in the calling code which should
185                // be validating it's input.
186                LoggerFactory::getInstance( 'CirrusSearch' )->warning(
187                    "Invalid sort type: {sort}",
188                    [ 'sort' => $this->sort ]
189                );
190        }
191
192        if ( $this->offset ) {
193            $query->setFrom( $this->offset );
194        }
195        if ( $this->limit ) {
196            $query->setSize( $this->limit );
197        }
198
199        // Setup the search
200        $queryOptions = [];
201        if ( $this->timeout ) {
202            $queryOptions[\Elastica\Search::OPTION_TIMEOUT] = $this->timeout;
203        }
204        // @todo when switching to multi-search this has to be provided at the top level
205        if ( $this->searchContext->getConfig()->get( 'CirrusSearchMoreAccurateScoringMode' ) ) {
206            $queryOptions[\Elastica\Search::OPTION_SEARCH_TYPE] = \Elastica\Search::OPTION_SEARCH_TYPE_DFS_QUERY_THEN_FETCH;
207        }
208
209        $search = $this->getIndex()->createSearch( $query, $queryOptions );
210        $crossClusterName = $this->connection->getConfig()->getClusterAssignment()->getCrossClusterName();
211        foreach ( $extraIndexes as $i ) {
212            $search->addIndex( $this->connection->getIndex( $i->getSearchIndex( $crossClusterName ) ) );
213        }
214
215        $this->searchContext->getDebugOptions()->applyDebugOptions( $query );
216        return $search;
217    }
218
219    /**
220     * @return int
221     */
222    public function getOffset() {
223        return $this->offset;
224    }
225
226    /**
227     * @param int $offset
228     * @return self
229     */
230    public function setOffset( $offset ) {
231        $this->offset = $offset;
232
233        return $this;
234    }
235
236    /**
237     * @return int
238     */
239    public function getLimit() {
240        return $this->limit;
241    }
242
243    /**
244     * @param int $limit
245     * @return self
246     */
247    public function setLimit( $limit ) {
248        $this->limit = $limit;
249
250        return $this;
251    }
252
253    /**
254     * @return string
255     */
256    public function getTimeout() {
257        return $this->timeout;
258    }
259
260    /**
261     * @param string $timeout
262     * @return self
263     */
264    public function setTimeout( $timeout ) {
265        $this->timeout = $timeout;
266
267        return $this;
268    }
269
270    /**
271     * @return \Elastica\Index An elastica type suitable for searching against
272     *  the configured wiki over the host wiki's default connection.
273     */
274    public function getIndex(): \Elastica\Index {
275        if ( $this->index ) {
276            return $this->index;
277        } else {
278            $indexBaseName = $this->indexBaseName;
279            $config = $this->searchContext->getConfig();
280            $hostConfig = $config->getHostWikiConfig();
281            $indexSuffix = $this->connection->pickIndexSuffixForNamespaces(
282                $this->searchContext->getNamespaces() );
283            if ( $hostConfig->get( 'CirrusSearchCrossClusterSearch' ) ) {
284                $local = $hostConfig->getClusterAssignment()->getCrossClusterName();
285                $current = $config->getClusterAssignment()->getCrossClusterName();
286                if ( $local !== $current ) {
287                    $indexBaseName = $current . ':' . $indexBaseName;
288                }
289            }
290            return $this->connection->getIndex( $indexBaseName, $indexSuffix );
291        }
292    }
293
294    /**
295     * @param ?Index $index
296     * @return $this
297     */
298    public function setIndex( ?Index $index ): self {
299        $this->index = $index;
300        return $this;
301    }
302
303    /**
304     * @return string
305     */
306    public function getSort() {
307        return $this->sort;
308    }
309
310    /**
311     * @param string $sort
312     * @return self
313     */
314    public function setSort( $sort ) {
315        $this->sort = $sort;
316
317        return $this;
318    }
319
320    /**
321     * @return SearchContext
322     */
323    public function getSearchContext() {
324        return $this->searchContext;
325    }
326}