Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
12.95% |
18 / 139 |
|
23.08% |
3 / 13 |
CRAP | |
0.00% |
0 / 1 |
SearchRequestBuilder | |
12.95% |
18 / 139 |
|
23.08% |
3 / 13 |
1567.83 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
build | |
0.00% |
0 / 108 |
|
0.00% |
0 / 1 |
1122 | |||
getOffset | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setOffset | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getLimit | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setLimit | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getTimeout | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setTimeout | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getIndex | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
4 | |||
setIndex | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getSort | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setSort | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getSearchContext | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace CirrusSearch\Search; |
4 | |
5 | use CirrusSearch\Connection; |
6 | use CirrusSearch\Util; |
7 | use Elastica\Index; |
8 | use Elastica\Query; |
9 | use MediaWiki\Logger\LoggerFactory; |
10 | |
11 | /** |
12 | * Build the search request body |
13 | */ |
14 | class SearchRequestBuilder { |
15 | /** @var SearchContext */ |
16 | private $searchContext; |
17 | |
18 | /** @var Connection */ |
19 | private $connection; |
20 | |
21 | /** @var string */ |
22 | private $indexBaseName; |
23 | |
24 | /** @var int */ |
25 | private $offset = 0; |
26 | |
27 | /** @var int */ |
28 | private $limit = 20; |
29 | |
30 | /** @var string search timeout, string with time and unit, e.g. 20s for 20 seconds */ |
31 | private $timeout; |
32 | |
33 | /** |
34 | * @var Index|null force the index when set, use {@link Connection::pickIndexSuffixForNamespaces} |
35 | */ |
36 | private $index; |
37 | |
38 | /** @var string set the sort option, controls the use of rescore functions or elastic sort */ |
39 | private $sort = 'relevance'; |
40 | |
41 | /** |
42 | * @param SearchContext $searchContext |
43 | * @param Connection $connection |
44 | * @param string $indexBaseName |
45 | */ |
46 | public function __construct( SearchContext $searchContext, Connection $connection, $indexBaseName ) { |
47 | $this->searchContext = $searchContext; |
48 | $this->connection = $connection; |
49 | $this->indexBaseName = $indexBaseName; |
50 | } |
51 | |
52 | /** |
53 | * Build the search request |
54 | * @return \Elastica\Search |
55 | */ |
56 | public function build() { |
57 | $resultsType = $this->searchContext->getResultsType(); |
58 | |
59 | $query = new Query(); |
60 | // Track at least offset + limit + 1 hits if precise total_hits is not requested |
61 | // This useful to know if more results are available on the next page |
62 | $query->setTrackTotalHits( $this->searchContext->getTrackTotalHits() ? true : $this->offset + $this->limit + 1 ); |
63 | $query->setSource( $resultsType->getSourceFiltering() ); |
64 | $query->setParam( "fields", $resultsType->getFields() ); |
65 | |
66 | $extraIndexes = $this->searchContext->getExtraIndices(); |
67 | |
68 | if ( $extraIndexes && $this->searchContext->getConfig()->getElement( 'CirrusSearchDeduplicateInQuery' ) !== false ) { |
69 | $this->searchContext->addNotFilter( new \Elastica\Query\Term( |
70 | [ 'local_sites_with_dupe' => $this->indexBaseName ] |
71 | ) ); |
72 | } |
73 | |
74 | $mainQuery = $this->searchContext->getQuery(); |
75 | $query->setQuery( $mainQuery ); |
76 | |
77 | foreach ( $this->searchContext->getAggregations() as $agg ) { |
78 | $query->addAggregation( $agg ); |
79 | } |
80 | |
81 | $highlight = $this->searchContext->getHighlight( $resultsType, $mainQuery ); |
82 | if ( $highlight ) { |
83 | $query->setHighlight( $highlight ); |
84 | } |
85 | |
86 | $suggestQueries = $this->searchContext->getFallbackRunner()->getElasticSuggesters(); |
87 | if ( $suggestQueries ) { |
88 | $query->setParam( 'suggest', [ |
89 | // TODO: remove special case on 1-elt array, added to not change the test fixtures |
90 | // We should switch to explicit naming |
91 | 'suggest' => count( $suggestQueries ) === 1 ? reset( $suggestQueries ) : $suggestQueries |
92 | ] ); |
93 | $query->addParam( 'stats', 'suggest' ); |
94 | } |
95 | |
96 | foreach ( $this->searchContext->getSyntaxUsed() as $syntax ) { |
97 | $query->addParam( 'stats', $syntax ); |
98 | } |
99 | |
100 | // See also CirrusSearch::getValidSorts() |
101 | switch ( $this->sort ) { |
102 | case 'just_match': |
103 | // Use just matching scores, without any rescoring, and default sort. |
104 | break; |
105 | case 'relevance': |
106 | // Add some rescores to improve relevance |
107 | $rescores = $this->searchContext->getRescore(); |
108 | if ( $rescores !== [] ) { |
109 | $query->setParam( 'rescore', $rescores ); |
110 | } |
111 | break; // The default |
112 | case 'create_timestamp_asc': |
113 | $query->setSort( [ 'create_timestamp' => 'asc' ] ); |
114 | break; |
115 | case 'create_timestamp_desc': |
116 | $query->setSort( [ 'create_timestamp' => 'desc' ] ); |
117 | break; |
118 | case 'last_edit_asc': |
119 | $query->setSort( [ 'timestamp' => 'asc' ] ); |
120 | break; |
121 | case 'last_edit_desc': |
122 | $query->setSort( [ 'timestamp' => 'desc' ] ); |
123 | break; |
124 | case 'incoming_links_asc': |
125 | $query->setSort( [ 'incoming_links' => [ |
126 | 'order' => 'asc', |
127 | 'missing' => '_first', |
128 | ] ] ); |
129 | break; |
130 | case 'incoming_links_desc': |
131 | $query->setSort( [ 'incoming_links' => [ |
132 | 'order' => 'desc', |
133 | 'missing' => '_last', |
134 | ] ] ); |
135 | break; |
136 | case 'none': |
137 | // Return documents in index order |
138 | $query->setSort( [ '_doc' ] ); |
139 | break; |
140 | case 'random': |
141 | $randomSeed = $this->searchContext->getSearchQuery()->getRandomSeed(); |
142 | if ( $randomSeed === null && $this->offset !== 0 ) { |
143 | $this->searchContext->addWarning( 'cirrussearch-offset-not-allowed-with-random-sort' ); |
144 | $this->offset = 0; |
145 | } |
146 | // Can't use an empty array, it would JSONify to [] instead of {}. |
147 | $scoreParams = ( $randomSeed === null ) ? (object)[] : [ 'seed' => $randomSeed, 'field' => '_seq_no' ]; |
148 | // Instead of setting a sort field wrap the whole query in a |
149 | // bool filter and add a must clause for the random score. This |
150 | // could alternatively be a rescore over a limited document |
151 | // set, but in basic testing the filter was more performant |
152 | // than an 8k rescore window even with 50M total hits. |
153 | $query->setQuery( ( new Query\BoolQuery() ) |
154 | ->addFilter( $mainQuery ) |
155 | ->addMust( ( new Query\FunctionScore() ) |
156 | ->setQuery( new Query\MatchAll() ) |
157 | ->addFunction( 'random_score', $scoreParams ) ) ); |
158 | |
159 | break; |
160 | case 'user_random': |
161 | // Randomly ordered, but consistent for a single user |
162 | $query->setQuery( ( new Query\BoolQuery() ) |
163 | ->addFilter( $mainQuery ) |
164 | ->addMust( ( new Query\FunctionScore() ) |
165 | ->setQuery( new Query\MatchAll() ) |
166 | ->addFunction( 'random_score', [ |
167 | 'seed' => Util::generateIdentToken(), |
168 | 'field' => '_seq_no', |
169 | ] ) ) ); |
170 | break; |
171 | |
172 | case 'title_natural_asc': |
173 | case 'title_natural_desc': |
174 | if ( $this->searchContext->getConfig()->getElement( 'CirrusSearchNaturalTitleSort', 'use' ) ) { |
175 | $query->setSort( [ |
176 | 'title.natural_sort' => explode( '_', $this->sort, 3 )[2], |
177 | ] ); |
178 | break; |
179 | } |
180 | // Intentional fall-through to default error case. |
181 | |
182 | default: |
183 | // Same as just_match. No user warning since an invalid sort |
184 | // getting this far is a bug in the calling code which should |
185 | // be validating it's input. |
186 | LoggerFactory::getInstance( 'CirrusSearch' )->warning( |
187 | "Invalid sort type: {sort}", |
188 | [ 'sort' => $this->sort ] |
189 | ); |
190 | } |
191 | |
192 | if ( $this->offset ) { |
193 | $query->setFrom( $this->offset ); |
194 | } |
195 | if ( $this->limit ) { |
196 | $query->setSize( $this->limit ); |
197 | } |
198 | |
199 | // Setup the search |
200 | $queryOptions = []; |
201 | if ( $this->timeout ) { |
202 | $queryOptions[\Elastica\Search::OPTION_TIMEOUT] = $this->timeout; |
203 | } |
204 | // @todo when switching to multi-search this has to be provided at the top level |
205 | if ( $this->searchContext->getConfig()->get( 'CirrusSearchMoreAccurateScoringMode' ) ) { |
206 | $queryOptions[\Elastica\Search::OPTION_SEARCH_TYPE] = \Elastica\Search::OPTION_SEARCH_TYPE_DFS_QUERY_THEN_FETCH; |
207 | } |
208 | |
209 | $search = $this->getIndex()->createSearch( $query, $queryOptions ); |
210 | $crossClusterName = $this->connection->getConfig()->getClusterAssignment()->getCrossClusterName(); |
211 | foreach ( $extraIndexes as $i ) { |
212 | $search->addIndex( $this->connection->getIndex( $i->getSearchIndex( $crossClusterName ) ) ); |
213 | } |
214 | |
215 | $this->searchContext->getDebugOptions()->applyDebugOptions( $query ); |
216 | return $search; |
217 | } |
218 | |
219 | /** |
220 | * @return int |
221 | */ |
222 | public function getOffset() { |
223 | return $this->offset; |
224 | } |
225 | |
226 | /** |
227 | * @param int $offset |
228 | * @return self |
229 | */ |
230 | public function setOffset( $offset ) { |
231 | $this->offset = $offset; |
232 | |
233 | return $this; |
234 | } |
235 | |
236 | /** |
237 | * @return int |
238 | */ |
239 | public function getLimit() { |
240 | return $this->limit; |
241 | } |
242 | |
243 | /** |
244 | * @param int $limit |
245 | * @return self |
246 | */ |
247 | public function setLimit( $limit ) { |
248 | $this->limit = $limit; |
249 | |
250 | return $this; |
251 | } |
252 | |
253 | /** |
254 | * @return string |
255 | */ |
256 | public function getTimeout() { |
257 | return $this->timeout; |
258 | } |
259 | |
260 | /** |
261 | * @param string $timeout |
262 | * @return self |
263 | */ |
264 | public function setTimeout( $timeout ) { |
265 | $this->timeout = $timeout; |
266 | |
267 | return $this; |
268 | } |
269 | |
270 | /** |
271 | * @return \Elastica\Index An elastica type suitable for searching against |
272 | * the configured wiki over the host wiki's default connection. |
273 | */ |
274 | public function getIndex(): \Elastica\Index { |
275 | if ( $this->index ) { |
276 | return $this->index; |
277 | } else { |
278 | $indexBaseName = $this->indexBaseName; |
279 | $config = $this->searchContext->getConfig(); |
280 | $hostConfig = $config->getHostWikiConfig(); |
281 | $indexSuffix = $this->connection->pickIndexSuffixForNamespaces( |
282 | $this->searchContext->getNamespaces() ); |
283 | if ( $hostConfig->get( 'CirrusSearchCrossClusterSearch' ) ) { |
284 | $local = $hostConfig->getClusterAssignment()->getCrossClusterName(); |
285 | $current = $config->getClusterAssignment()->getCrossClusterName(); |
286 | if ( $local !== $current ) { |
287 | $indexBaseName = $current . ':' . $indexBaseName; |
288 | } |
289 | } |
290 | return $this->connection->getIndex( $indexBaseName, $indexSuffix ); |
291 | } |
292 | } |
293 | |
294 | /** |
295 | * @param ?Index $index |
296 | * @return $this |
297 | */ |
298 | public function setIndex( ?Index $index ): self { |
299 | $this->index = $index; |
300 | return $this; |
301 | } |
302 | |
303 | /** |
304 | * @return string |
305 | */ |
306 | public function getSort() { |
307 | return $this->sort; |
308 | } |
309 | |
310 | /** |
311 | * @param string $sort |
312 | * @return self |
313 | */ |
314 | public function setSort( $sort ) { |
315 | $this->sort = $sort; |
316 | |
317 | return $this; |
318 | } |
319 | |
320 | /** |
321 | * @return SearchContext |
322 | */ |
323 | public function getSearchContext() { |
324 | return $this->searchContext; |
325 | } |
326 | } |