Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
21.66% covered (danger)
21.66%
34 / 157
28.57% covered (danger)
28.57%
4 / 14
CRAP
0.00% covered (danger)
0.00%
0 / 1
SearchRequestBuilder
21.66% covered (danger)
21.66%
34 / 157
28.57% covered (danger)
28.57%
4 / 14
1563.97
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 build
0.00% covered (danger)
0.00%
0 / 110
0.00% covered (danger)
0.00%
0 / 1
1122
 getOffset
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setOffset
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 getLimit
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setLimit
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 getTimeout
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setTimeout
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 getIndex
100.00% covered (success)
100.00%
16 / 16
100.00% covered (success)
100.00%
1 / 1
5
 inferIndexFromConcreteNamespaceMap
100.00% covered (success)
100.00%
13 / 13
100.00% covered (success)
100.00%
1 / 1
7
 setIndex
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 getSort
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 setSort
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 getSearchContext
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2
3namespace CirrusSearch\Search;
4
5use CirrusSearch\Connection;
6use CirrusSearch\SearchConfig;
7use CirrusSearch\Util;
8use Elastica\Index;
9use Elastica\Query;
10use MediaWiki\Logger\LoggerFactory;
11
12/**
13 * Build the search request body
14 */
15class SearchRequestBuilder {
16    /** @var SearchContext */
17    private $searchContext;
18
19    /** @var Connection */
20    private $connection;
21
22    /** @var string */
23    private $indexBaseName;
24
25    /** @var int */
26    private $offset = 0;
27
28    /** @var int */
29    private $limit = 20;
30
31    /** @var string search timeout, string with time and unit, e.g. 20s for 20 seconds */
32    private $timeout;
33
34    /**
35     * @var Index|null force the index when set, use {@link Connection::pickIndexSuffixForNamespaces}
36     */
37    private $index;
38
39    /** @var string set the sort option, controls the use of rescore functions or elastic sort */
40    private $sort = 'relevance';
41
42    /**
43     * @param SearchContext $searchContext
44     * @param Connection $connection
45     * @param string $indexBaseName
46     */
47    public function __construct( SearchContext $searchContext, Connection $connection, $indexBaseName ) {
48        $this->searchContext = $searchContext;
49        $this->connection = $connection;
50        $this->indexBaseName = $indexBaseName;
51    }
52
53    /**
54     * Build the search request
55     * @return \Elastica\Search
56     */
57    public function build() {
58        $resultsType = $this->searchContext->getResultsType();
59
60        $query = new Query();
61        // Track at least offset + limit + 1 hits if precise total_hits is not requested
62        // This useful to know if more results are available on the next page
63        $query->setTrackTotalHits( $this->searchContext->getTrackTotalHits() ? true : $this->offset + $this->limit + 1 );
64        $query->setSource( $resultsType->getSourceFiltering() );
65        $query->setParam( "fields", $resultsType->getFields() );
66
67        $extraIndexes = $this->searchContext->getExtraIndices();
68
69        if ( $extraIndexes && $this->searchContext->getConfig()->getElement( 'CirrusSearchDeduplicateInQuery' ) !== false ) {
70            $this->searchContext->addNotFilter( new \Elastica\Query\Term(
71                [ 'local_sites_with_dupe' => $this->indexBaseName ]
72            ) );
73        }
74
75        $mainQuery = $this->searchContext->getQuery();
76        $query->setQuery( $mainQuery );
77
78        foreach ( $this->searchContext->getAggregations() as $agg ) {
79            $query->addAggregation( $agg );
80        }
81
82        $highlight = $this->searchContext->getHighlight( $resultsType, $mainQuery );
83        if ( $highlight ) {
84            $query->setHighlight( $highlight );
85        }
86
87        $suggestQueries = $this->searchContext->getFallbackRunner()->getElasticSuggesters();
88        if ( $suggestQueries ) {
89            $query->setParam( 'suggest', [
90                // TODO: remove special case on 1-elt array, added to not change the test fixtures
91                // We should switch to explicit naming
92                'suggest' => count( $suggestQueries ) === 1 ? reset( $suggestQueries ) : $suggestQueries
93            ] );
94            $query->addParam( 'stats', 'suggest' );
95        }
96
97        foreach ( $this->searchContext->getSyntaxUsed() as $syntax ) {
98            $query->addParam( 'stats', $syntax );
99        }
100
101        // See also CirrusSearch::getValidSorts()
102        switch ( $this->sort ) {
103            case 'just_match':
104                // Use just matching scores, without any rescoring, and default sort.
105                break;
106            case 'relevance':
107                // Add some rescores to improve relevance
108                $rescores = $this->searchContext->getRescore();
109                if ( $rescores !== [] ) {
110                    $query->setParam( 'rescore', $rescores );
111                }
112                break;  // The default
113            case 'create_timestamp_asc':
114                $query->setSort( [ 'create_timestamp' => 'asc' ] );
115                break;
116            case 'create_timestamp_desc':
117                $query->setSort( [ 'create_timestamp' => 'desc' ] );
118                break;
119            case 'last_edit_asc':
120                $query->setSort( [ 'timestamp' => 'asc' ] );
121                break;
122            case 'last_edit_desc':
123                $query->setSort( [ 'timestamp' => 'desc' ] );
124                break;
125            case 'incoming_links_asc':
126                $query->setSort( [ 'incoming_links' => [
127                    'order' => 'asc',
128                    'missing' => '_first',
129                ] ] );
130                break;
131            case 'incoming_links_desc':
132                $query->setSort( [ 'incoming_links' => [
133                    'order' => 'desc',
134                    'missing' => '_last',
135                ] ] );
136                break;
137            case 'none':
138                // Return documents in index order
139                $query->setSort( [ '_doc' ] );
140                break;
141            case 'random':
142                $randomSeed = $this->searchContext->getSearchQuery()->getRandomSeed();
143                if ( $randomSeed === null && $this->offset !== 0 ) {
144                    $this->searchContext->addWarning( 'cirrussearch-offset-not-allowed-with-random-sort' );
145                    $this->offset = 0;
146                }
147                // Can't use an empty array, it would JSONify to [] instead of {}.
148                $scoreParams = ( $randomSeed === null ) ? (object)[] : [ 'seed' => $randomSeed, 'field' => '_seq_no' ];
149                // Instead of setting a sort field wrap the whole query in a
150                // bool filter and add a must clause for the random score. This
151                // could alternatively be a rescore over a limited document
152                // set, but in basic testing the filter was more performant
153                // than an 8k rescore window even with 50M total hits.
154                $query->setQuery( ( new Query\BoolQuery() )
155                    ->addFilter( $mainQuery )
156                    ->addMust( ( new Query\FunctionScore() )
157                        ->setQuery( new Query\MatchAll() )
158                        ->addFunction( 'random_score', $scoreParams ) ) );
159
160                break;
161            case 'user_random':
162                // Randomly ordered, but consistent for a single user
163                $query->setQuery( ( new Query\BoolQuery() )
164                    ->addFilter( $mainQuery )
165                    ->addMust( ( new Query\FunctionScore() )
166                        ->setQuery( new Query\MatchAll() )
167                        ->addFunction( 'random_score', [
168                            'seed' => Util::generateIdentToken(),
169                            'field' => '_seq_no',
170                        ] ) ) );
171                break;
172
173            case 'title_natural_asc':
174            case 'title_natural_desc':
175                if ( $this->searchContext->getConfig()->getElement( 'CirrusSearchNaturalTitleSort', 'use' ) ) {
176                    $dir = explode( '_', $this->sort, 3 )[2];
177                    $query->setSort( [
178                        'namespace_text' => $dir,
179                        'title.natural_sort' => $dir,
180                    ] );
181                    break;
182                }
183                // Intentional fall-through to default error case.
184
185            default:
186                // Same as just_match. No user warning since an invalid sort
187                // getting this far is a bug in the calling code which should
188                // be validating it's input.
189                LoggerFactory::getInstance( 'CirrusSearch' )->warning(
190                    "Invalid sort type: {sort}",
191                    [ 'sort' => $this->sort ]
192                );
193        }
194
195        if ( $this->offset ) {
196            $query->setFrom( $this->offset );
197        }
198        if ( $this->limit ) {
199            $query->setSize( $this->limit );
200        }
201
202        // Setup the search
203        $queryOptions = [];
204        if ( $this->timeout ) {
205            $queryOptions[\Elastica\Search::OPTION_TIMEOUT] = $this->timeout;
206        }
207        // @todo when switching to multi-search this has to be provided at the top level
208        if ( $this->searchContext->getConfig()->get( 'CirrusSearchMoreAccurateScoringMode' ) ) {
209            $queryOptions[\Elastica\Search::OPTION_SEARCH_TYPE] = \Elastica\Search::OPTION_SEARCH_TYPE_DFS_QUERY_THEN_FETCH;
210        }
211
212        $search = $this->getIndex()->createSearch( $query, $queryOptions );
213        $crossClusterName = $this->connection->getConfig()->getClusterAssignment()->getCrossClusterName();
214        foreach ( $extraIndexes as $i ) {
215            $search->addIndex( $this->connection->getIndex( $i->getSearchIndex( $crossClusterName ) ) );
216        }
217
218        $this->searchContext->getDebugOptions()->applyDebugOptions( $query );
219        return $search;
220    }
221
222    /**
223     * @return int
224     */
225    public function getOffset() {
226        return $this->offset;
227    }
228
229    /**
230     * @param int $offset
231     * @return self
232     */
233    public function setOffset( $offset ) {
234        $this->offset = $offset;
235
236        return $this;
237    }
238
239    /**
240     * @return int
241     */
242    public function getLimit() {
243        return $this->limit;
244    }
245
246    /**
247     * @param int $limit
248     * @return self
249     */
250    public function setLimit( $limit ) {
251        $this->limit = $limit;
252
253        return $this;
254    }
255
256    /**
257     * @return string
258     */
259    public function getTimeout() {
260        return $this->timeout;
261    }
262
263    /**
264     * @param string $timeout
265     * @return self
266     */
267    public function setTimeout( $timeout ) {
268        $this->timeout = $timeout;
269
270        return $this;
271    }
272
273    /**
274     * @return \Elastica\Index An elastica type suitable for searching against
275     *  the configured wiki over the host wiki's default connection.
276     */
277    public function getIndex(): \Elastica\Index {
278        if ( $this->index ) {
279            return $this->index;
280        } else {
281            $indexBaseName = $this->indexBaseName;
282            $config = $this->searchContext->getConfig();
283            $hostConfig = $config->getHostWikiConfig();
284
285            $indexName = $this->inferIndexFromConcreteNamespaceMap( $config );
286            if ( $indexName === null ) {
287                $indexSuffix = $this->connection->pickIndexSuffixForNamespaces(
288                    $this->searchContext->getNamespaces() );
289                $indexName = $this->connection->getIndexName( $indexBaseName, $indexSuffix );
290            }
291
292            if ( $hostConfig->get( 'CirrusSearchCrossClusterSearch' ) ) {
293                $local = $hostConfig->getClusterAssignment()->getCrossClusterName();
294                $current = $config->getClusterAssignment()->getCrossClusterName();
295                if ( $local !== $current ) {
296                    $indexName = $current . ':' . $indexName;
297                }
298            }
299            return $this->connection->getIndex( $indexName );
300        }
301    }
302
303    /**
304     * Attempt to infer the index from the concrete namespace map.
305     * This is used mainly during crossproject searches where the concrete namespace map
306     * is provided by the config dump API.
307     * Since we might want to query namespaces that are unknown to the host wiki, we
308     * can't use the connection to pick the index suffix.
309     * Instead, we use the concrete namespace map to infer the index suffix.
310     * Returns null if the concrete namespace map is not available or if multiple index types
311     * might be required, in which case we rely on the host wiki connection to pick up the
312     * right index.
313     *
314     * @param SearchConfig $config
315     * @return string|null
316     */
317    private function inferIndexFromConcreteNamespaceMap( SearchConfig $config ): ?string {
318        if ( $this->searchContext->getNamespaces() && $config->has( 'CirrusSearchConcreteNamespaceMap' ) ) {
319            // Attempt to skip Connection::pickIndexSuffixForNamespaces() and use the
320            // concrete namespace map.
321            // Reason is that the connection is built against the host wiki config but
322            // the concrete namespace map is likely obtained for the target wiki.
323            $concreteNamespaceMap = $config->get( 'CirrusSearchConcreteNamespaceMap' );
324            $indices = [];
325            $inconsistentNamespaces = false;
326            foreach ( $this->searchContext->getNamespaces() as $ns ) {
327                if ( !isset( $concreteNamespaceMap[$ns] ) ) {
328                    // Something's odd here we can trust the target wiki config
329                    $inconsistentNamespaces = true;
330                    continue;
331                }
332                $indices[$concreteNamespaceMap[$ns]] = $concreteNamespaceMap[$ns];
333            }
334            // the concrete namespace map contains the full index name $basename_$suffix
335            // if there's only one index requested we target this one
336            // otherwise we target the main alias.
337            if ( !$inconsistentNamespaces && count( $indices ) === 1 ) {
338                $indexBaseName = reset( $indices );
339                return $this->connection->getIndexName( $indexBaseName );
340            }
341        }
342        return null;
343    }
344
345    /**
346     * @param ?Index $index
347     * @return $this
348     */
349    public function setIndex( ?Index $index ): self {
350        $this->index = $index;
351        return $this;
352    }
353
354    /**
355     * @return string
356     */
357    public function getSort() {
358        return $this->sort;
359    }
360
361    /**
362     * @param string $sort
363     * @return self
364     */
365    public function setSort( $sort ) {
366        $this->sort = $sort;
367
368        return $this;
369    }
370
371    /**
372     * @return SearchContext
373     */
374    public function getSearchContext() {
375        return $this->searchContext;
376    }
377}