Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
13.64% |
18 / 132 |
|
23.08% |
3 / 13 |
CRAP | |
0.00% |
0 / 1 |
SearchRequestBuilder | |
13.64% |
18 / 132 |
|
23.08% |
3 / 13 |
1291.09 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
build | |
0.00% |
0 / 101 |
|
0.00% |
0 / 1 |
870 | |||
getOffset | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setOffset | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getLimit | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setLimit | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getTimeout | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setTimeout | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getIndex | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
4 | |||
setIndex | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getSort | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setSort | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getSearchContext | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Search; |
4 | |
5 | use CirrusSearch\Connection; |
6 | use CirrusSearch\Util; |
7 | use Elastica\Index; |
8 | use Elastica\Query; |
9 | use MediaWiki\Logger\LoggerFactory; |
10 | |
11 | /** |
12 | * Build the search request body |
13 | */ |
14 | class SearchRequestBuilder { |
15 | /** @var SearchContext */ |
16 | private $searchContext; |
17 | |
18 | /** @var Connection */ |
19 | private $connection; |
20 | |
21 | /** @var string */ |
22 | private $indexBaseName; |
23 | |
24 | /** @var int */ |
25 | private $offset = 0; |
26 | |
27 | /** @var int */ |
28 | private $limit = 20; |
29 | |
30 | /** @var string search timeout, string with time and unit, e.g. 20s for 20 seconds */ |
31 | private $timeout; |
32 | |
33 | /** |
34 | * @var Index|null force the index when set, use {@link Connection::pickIndexSuffixForNamespaces} |
35 | */ |
36 | private $index; |
37 | |
38 | /** @var string set the sort option, controls the use of rescore functions or elastic sort */ |
39 | private $sort = 'relevance'; |
40 | |
41 | public function __construct( SearchContext $searchContext, Connection $connection, $indexBaseName ) { |
42 | $this->searchContext = $searchContext; |
43 | $this->connection = $connection; |
44 | $this->indexBaseName = $indexBaseName; |
45 | } |
46 | |
47 | /** |
48 | * Build the search request |
49 | * @return \Elastica\Search |
50 | */ |
51 | public function build() { |
52 | $resultsType = $this->searchContext->getResultsType(); |
53 | |
54 | $query = new Query(); |
55 | $query->setTrackTotalHits( $this->searchContext->getTrackTotalHits() ); |
56 | $query->setSource( $resultsType->getSourceFiltering() ); |
57 | $query->setParam( "fields", $resultsType->getFields() ); |
58 | |
59 | $extraIndexes = $this->searchContext->getExtraIndices(); |
60 | |
61 | if ( $extraIndexes && $this->searchContext->getConfig()->getElement( 'CirrusSearchDeduplicateInQuery' ) !== false ) { |
62 | $this->searchContext->addNotFilter( new \Elastica\Query\Term( |
63 | [ 'local_sites_with_dupe' => $this->indexBaseName ] |
64 | ) ); |
65 | } |
66 | |
67 | $mainQuery = $this->searchContext->getQuery(); |
68 | $query->setQuery( $mainQuery ); |
69 | |
70 | foreach ( $this->searchContext->getAggregations() as $agg ) { |
71 | $query->addAggregation( $agg ); |
72 | } |
73 | |
74 | $highlight = $this->searchContext->getHighlight( $resultsType, $mainQuery ); |
75 | if ( $highlight ) { |
76 | $query->setHighlight( $highlight ); |
77 | } |
78 | |
79 | $suggestQueries = $this->searchContext->getFallbackRunner()->getElasticSuggesters(); |
80 | if ( $suggestQueries ) { |
81 | $query->setParam( 'suggest', [ |
82 | // TODO: remove special case on 1-elt array, added to not change the test fixtures |
83 | // We should switch to explicit naming |
84 | 'suggest' => count( $suggestQueries ) === 1 ? reset( $suggestQueries ) : $suggestQueries |
85 | ] ); |
86 | $query->addParam( 'stats', 'suggest' ); |
87 | } |
88 | |
89 | foreach ( $this->searchContext->getSyntaxUsed() as $syntax ) { |
90 | $query->addParam( 'stats', $syntax ); |
91 | } |
92 | |
93 | // See also CirrusSearch::getValidSorts() |
94 | switch ( $this->sort ) { |
95 | case 'just_match': |
96 | // Use just matching scores, without any rescoring, and default sort. |
97 | break; |
98 | case 'relevance': |
99 | // Add some rescores to improve relevance |
100 | $rescores = $this->searchContext->getRescore(); |
101 | if ( $rescores !== [] ) { |
102 | $query->setParam( 'rescore', $rescores ); |
103 | } |
104 | break; // The default |
105 | case 'create_timestamp_asc': |
106 | $query->setSort( [ 'create_timestamp' => 'asc' ] ); |
107 | break; |
108 | case 'create_timestamp_desc': |
109 | $query->setSort( [ 'create_timestamp' => 'desc' ] ); |
110 | break; |
111 | case 'last_edit_asc': |
112 | $query->setSort( [ 'timestamp' => 'asc' ] ); |
113 | break; |
114 | case 'last_edit_desc': |
115 | $query->setSort( [ 'timestamp' => 'desc' ] ); |
116 | break; |
117 | case 'incoming_links_asc': |
118 | $query->setSort( [ 'incoming_links' => [ |
119 | 'order' => 'asc', |
120 | 'missing' => '_first', |
121 | ] ] ); |
122 | break; |
123 | case 'incoming_links_desc': |
124 | $query->setSort( [ 'incoming_links' => [ |
125 | 'order' => 'desc', |
126 | 'missing' => '_last', |
127 | ] ] ); |
128 | break; |
129 | case 'none': |
130 | // Return documents in index order |
131 | $query->setSort( [ '_doc' ] ); |
132 | break; |
133 | case 'random': |
134 | $randomSeed = $this->searchContext->getSearchQuery()->getRandomSeed(); |
135 | if ( $randomSeed === null && $this->offset !== 0 ) { |
136 | $this->searchContext->addWarning( 'cirrussearch-offset-not-allowed-with-random-sort' ); |
137 | $this->offset = 0; |
138 | } |
139 | // Can't use an empty array, it would JSONify to [] instead of {}. |
140 | $scoreParams = ( $randomSeed === null ) ? (object)[] : [ 'seed' => $randomSeed, 'field' => '_seq_no' ]; |
141 | // Instead of setting a sort field wrap the whole query in a |
142 | // bool filter and add a must clause for the random score. This |
143 | // could alternatively be a rescore over a limited document |
144 | // set, but in basic testing the filter was more performant |
145 | // than an 8k rescore window even with 50M total hits. |
146 | $query->setQuery( ( new Query\BoolQuery() ) |
147 | ->addFilter( $mainQuery ) |
148 | ->addMust( ( new Query\FunctionScore() ) |
149 | ->setQuery( new Query\MatchAll() ) |
150 | ->addFunction( 'random_score', $scoreParams ) ) ); |
151 | |
152 | break; |
153 | case 'user_random': |
154 | // Randomly ordered, but consistent for a single user |
155 | $query->setQuery( ( new Query\BoolQuery() ) |
156 | ->addFilter( $mainQuery ) |
157 | ->addMust( ( new Query\FunctionScore() ) |
158 | ->setQuery( new Query\MatchAll() ) |
159 | ->addFunction( 'random_score', [ |
160 | 'seed' => Util::generateIdentToken(), |
161 | 'field' => '_seq_no', |
162 | ] ) ) ); |
163 | break; |
164 | |
165 | default: |
166 | // Same as just_match. No user warning since an invalid sort |
167 | // getting this far as a bug in the calling code which should |
168 | // be validating it's input. |
169 | LoggerFactory::getInstance( 'CirrusSearch' )->warning( |
170 | "Invalid sort type: {sort}", |
171 | [ 'sort' => $this->sort ] |
172 | ); |
173 | } |
174 | |
175 | if ( $this->offset ) { |
176 | $query->setFrom( $this->offset ); |
177 | } |
178 | if ( $this->limit ) { |
179 | $query->setSize( $this->limit ); |
180 | } |
181 | |
182 | // Setup the search |
183 | $queryOptions = []; |
184 | if ( $this->timeout ) { |
185 | $queryOptions[\Elastica\Search::OPTION_TIMEOUT] = $this->timeout; |
186 | } |
187 | // @todo when switching to multi-search this has to be provided at the top level |
188 | if ( $this->searchContext->getConfig()->get( 'CirrusSearchMoreAccurateScoringMode' ) ) { |
189 | $queryOptions[\Elastica\Search::OPTION_SEARCH_TYPE] = \Elastica\Search::OPTION_SEARCH_TYPE_DFS_QUERY_THEN_FETCH; |
190 | } |
191 | |
192 | $search = $this->getIndex()->createSearch( $query, $queryOptions ); |
193 | $crossClusterName = $this->connection->getConfig()->getClusterAssignment()->getCrossClusterName(); |
194 | foreach ( $extraIndexes as $i ) { |
195 | $search->addIndex( $this->connection->getIndex( $i->getSearchIndex( $crossClusterName ) ) ); |
196 | } |
197 | |
198 | $this->searchContext->getDebugOptions()->applyDebugOptions( $query ); |
199 | return $search; |
200 | } |
201 | |
202 | /** |
203 | * @return int |
204 | */ |
205 | public function getOffset() { |
206 | return $this->offset; |
207 | } |
208 | |
209 | /** |
210 | * @param int $offset |
211 | * @return SearchRequestBuilder |
212 | */ |
213 | public function setOffset( $offset ) { |
214 | $this->offset = $offset; |
215 | |
216 | return $this; |
217 | } |
218 | |
219 | /** |
220 | * @return int |
221 | */ |
222 | public function getLimit() { |
223 | return $this->limit; |
224 | } |
225 | |
226 | /** |
227 | * @param int $limit |
228 | * @return SearchRequestBuilder |
229 | */ |
230 | public function setLimit( $limit ) { |
231 | $this->limit = $limit; |
232 | |
233 | return $this; |
234 | } |
235 | |
236 | /** |
237 | * @return string |
238 | */ |
239 | public function getTimeout() { |
240 | return $this->timeout; |
241 | } |
242 | |
243 | /** |
244 | * @param string $timeout |
245 | * @return SearchRequestBuilder |
246 | */ |
247 | public function setTimeout( $timeout ) { |
248 | $this->timeout = $timeout; |
249 | |
250 | return $this; |
251 | } |
252 | |
253 | /** |
254 | * @return \Elastica\Index An elastica type suitable for searching against |
255 | * the configured wiki over the host wiki's default connection. |
256 | */ |
257 | public function getIndex(): \Elastica\Index { |
258 | if ( $this->index ) { |
259 | return $this->index; |
260 | } else { |
261 | $indexBaseName = $this->indexBaseName; |
262 | $config = $this->searchContext->getConfig(); |
263 | $hostConfig = $config->getHostWikiConfig(); |
264 | $indexSuffix = $this->connection->pickIndexSuffixForNamespaces( |
265 | $this->searchContext->getNamespaces() ); |
266 | if ( $hostConfig->get( 'CirrusSearchCrossClusterSearch' ) ) { |
267 | $local = $hostConfig->getClusterAssignment()->getCrossClusterName(); |
268 | $current = $config->getClusterAssignment()->getCrossClusterName(); |
269 | if ( $local !== $current ) { |
270 | $indexBaseName = $current . ':' . $indexBaseName; |
271 | } |
272 | } |
273 | return $this->connection->getIndex( $indexBaseName, $indexSuffix ); |
274 | } |
275 | } |
276 | |
277 | /** |
278 | * @param ?Index $index |
279 | * @return $this |
280 | */ |
281 | public function setIndex( ?Index $index ): self { |
282 | $this->index = $index; |
283 | return $this; |
284 | } |
285 | |
286 | /** |
287 | * @return string |
288 | */ |
289 | public function getSort() { |
290 | return $this->sort; |
291 | } |
292 | |
293 | /** |
294 | * @param string $sort |
295 | * @return SearchRequestBuilder |
296 | */ |
297 | public function setSort( $sort ) { |
298 | $this->sort = $sort; |
299 | |
300 | return $this; |
301 | } |
302 | |
303 | /** |
304 | * @return SearchContext |
305 | */ |
306 | public function getSearchContext() { |
307 | return $this->searchContext; |
308 | } |
309 | } |