Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
51.45% |
230 / 447 |
|
36.36% |
12 / 33 |
CRAP | |
0.00% |
0 / 1 |
Searcher | |
51.45% |
230 / 447 |
|
36.36% |
12 / 33 |
1600.85 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
15 / 15 |
|
100.00% |
1 / 1 |
5 | |||
search | |
89.47% |
17 / 19 |
|
0.00% |
0 / 1 |
3.01 | |||
setResultsType | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
isReturnRaw | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setSort | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
limitSearchToLocalWiki | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
nearMatchTitleSearch | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
countContentWords | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
prefixSearch | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
buildFullTextSearch | |
94.74% |
18 / 19 |
|
0.00% |
0 / 1 |
5.00 | |||
searchTextInternal | |
61.22% |
30 / 49 |
|
0.00% |
0 / 1 |
25.43 | |||
get | |
0.00% |
0 / 39 |
|
0.00% |
0 / 1 |
72 | |||
findNamespace | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
6 | |||
buildSearch | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
searchOne | |
38.46% |
5 / 13 |
|
0.00% |
0 / 1 |
10.83 | |||
searchMulti | |
38.53% |
42 / 109 |
|
0.00% |
0 / 1 |
134.41 | |||
updateNamespacesFromQuery | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
20 | |||
getSearchContext | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getPoolCounterType | |
76.92% |
10 / 13 |
|
0.00% |
0 / 1 |
5.31 | |||
isAutomatedRequest | |
40.00% |
4 / 10 |
|
0.00% |
0 / 1 |
4.94 | |||
getOverriddenConnection | |
60.00% |
3 / 5 |
|
0.00% |
0 / 1 |
3.58 | |||
getQueryCacheStatsKey | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
newLog | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
processRawReturn | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
searchArchive | |
100.00% |
28 / 28 |
|
100.00% |
1 / 1 |
1 | |||
areSearchesTheSame | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
42 | |||
buildInterleaveSearcher | |
50.00% |
5 / 10 |
|
0.00% |
0 / 1 |
6.00 | |||
emptyResultSet | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
3 | |||
applyDebugOptionsToQuery | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
makeSearcher | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
setOffsetLimit | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
getOffsetLimit | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
buildFullTextBuilder | |
90.48% |
19 / 21 |
|
0.00% |
0 / 1 |
4.01 |
1 | <?php |
2 | |
3 | namespace CirrusSearch; |
4 | |
5 | use CirrusSearch\Fallbacks\FallbackRunner; |
6 | use CirrusSearch\Fallbacks\SearcherFactory; |
7 | use CirrusSearch\Maintenance\NullPrinter; |
8 | use CirrusSearch\MetaStore\MetaStoreIndex; |
9 | use CirrusSearch\Parser\BasicQueryClassifier; |
10 | use CirrusSearch\Parser\FullTextKeywordRegistry; |
11 | use CirrusSearch\Parser\NamespacePrefixParser; |
12 | use CirrusSearch\Profile\SearchProfileService; |
13 | use CirrusSearch\Query\CountContentWordsBuilder; |
14 | use CirrusSearch\Query\FullTextQueryBuilder; |
15 | use CirrusSearch\Query\KeywordFeature; |
16 | use CirrusSearch\Query\NearMatchQueryBuilder; |
17 | use CirrusSearch\Query\PrefixSearchQueryBuilder; |
18 | use CirrusSearch\Search\BaseCirrusSearchResultSet; |
19 | use CirrusSearch\Search\FullTextResultsType; |
20 | use CirrusSearch\Search\MSearchRequests; |
21 | use CirrusSearch\Search\MSearchResponses; |
22 | use CirrusSearch\Search\ResultsType; |
23 | use CirrusSearch\Search\SearchContext; |
24 | use CirrusSearch\Search\SearchQuery; |
25 | use CirrusSearch\Search\SearchRequestBuilder; |
26 | use CirrusSearch\Search\TeamDraftInterleaver; |
27 | use CirrusSearch\Search\TitleHelper; |
28 | use CirrusSearch\Search\TitleResultsType; |
29 | use Elastica\Exception\ResponseException; |
30 | use Elastica\Exception\RuntimeException; |
31 | use Elastica\Multi\Search as MultiSearch; |
32 | use Elastica\Query; |
33 | use Elastica\Query\BoolQuery; |
34 | use Elastica\Query\MultiMatch; |
35 | use Elastica\Search; |
36 | use MediaWiki\Logger\LoggerFactory; |
37 | use MediaWiki\MediaWikiServices; |
38 | use MediaWiki\Request\WebRequest; |
39 | use MediaWiki\Status\Status; |
40 | use MediaWiki\Title\Title; |
41 | use MediaWiki\User\User; |
42 | use MediaWiki\WikiMap\WikiMap; |
43 | use RequestContext; |
44 | use Wikimedia\Assert\Assert; |
45 | use Wikimedia\ObjectFactory\ObjectFactory; |
46 | |
47 | /** |
48 | * Performs searches using Elasticsearch. Note that each instance of this class |
49 | * is single use only. |
50 | * |
51 | * This program is free software; you can redistribute it and/or modify |
52 | * it under the terms of the GNU General Public License as published by |
53 | * the Free Software Foundation; either version 2 of the License, or |
54 | * (at your option) any later version. |
55 | * |
56 | * This program is distributed in the hope that it will be useful, |
57 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
58 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
59 | * GNU General Public License for more details. |
60 | * |
61 | * You should have received a copy of the GNU General Public License along |
62 | * with this program; if not, write to the Free Software Foundation, Inc., |
63 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
64 | * http://www.gnu.org/copyleft/gpl.html |
65 | */ |
66 | class Searcher extends ElasticsearchIntermediary implements SearcherFactory { |
67 | public const SUGGESTION_HIGHLIGHT_PRE = '<em>'; |
68 | public const SUGGESTION_HIGHLIGHT_POST = '</em>'; |
69 | public const HIGHLIGHT_PRE_MARKER = ''; // \uE000. Can't be a unicode literal until php7 |
70 | public const HIGHLIGHT_PRE = '<span class="searchmatch">'; |
71 | public const HIGHLIGHT_POST_MARKER = ''; // \uE001 |
72 | public const HIGHLIGHT_POST = '</span>'; |
73 | |
74 | /** |
75 | * Maximum offset + limit depth allowed. As in the deepest possible result |
76 | * to return. Too deep will cause very slow queries. 10,000 feels plenty |
77 | * deep. This should be <= index.max_result_window in elasticsearch. |
78 | */ |
79 | private const MAX_OFFSET_LIMIT = 10000; |
80 | |
81 | /** |
82 | * Identifies the main search in MSearchRequests/MSearchResponses |
83 | */ |
84 | public const MAINSEARCH_MSEARCH_KEY = '__main__'; |
85 | |
86 | /** |
87 | * Identifies the "tested" search request in MSearchRequests/MSearchResponses |
88 | */ |
89 | private const INTERLEAVED_MSEARCH_KEY = '__interleaved__'; |
90 | |
91 | /** |
92 | * @var int search offset |
93 | */ |
94 | protected $offset; |
95 | |
96 | /** |
97 | * @var int maximum number of result |
98 | */ |
99 | protected $limit; |
100 | |
101 | /** |
102 | * @var string sort type |
103 | */ |
104 | private $sort = 'relevance'; |
105 | |
106 | /** |
107 | * @var string index base name to use |
108 | */ |
109 | protected $indexBaseName; |
110 | |
111 | /** |
112 | * Search environment configuration |
113 | * @var SearchConfig |
114 | */ |
115 | protected $config; |
116 | |
117 | /** |
118 | * @var SearchContext |
119 | */ |
120 | protected $searchContext; |
121 | |
122 | /** |
123 | * Indexing type we'll be using. |
124 | * @var string|\Elastica\Index |
125 | */ |
126 | private $index; |
127 | |
128 | /** |
129 | * @var NamespacePrefixParser|null |
130 | */ |
131 | private $namespacePrefixParser; |
132 | /** |
133 | * @var InterwikiResolver |
134 | */ |
135 | protected $interwikiResolver; |
136 | |
137 | /** @var TitleHelper */ |
138 | protected $titleHelper; |
139 | /** |
140 | * @var CirrusSearchHookRunner |
141 | */ |
142 | protected $cirrusSearchHookRunner; |
143 | |
144 | /** |
145 | * @param Connection $conn |
146 | * @param int $offset Offset the results by this much |
147 | * @param int $limit Limit the results to this many |
148 | * @param SearchConfig $config Configuration settings |
149 | * @param int[]|null $namespaces Array of namespace numbers to search or null to search all namespaces. |
150 | * @param User|null $user user for which this search is being performed. Attached to slow request logs. |
151 | * @param string|bool $index Base name for index to search from, defaults to $wgCirrusSearchIndexBaseName |
152 | * @param CirrusDebugOptions|null $options the debugging options to use or null to use defaults |
153 | * @param NamespacePrefixParser|null $namespacePrefixParser |
154 | * @param InterwikiResolver|null $interwikiResolver |
155 | * @param TitleHelper|null $titleHelper |
156 | * @param CirrusSearchHookRunner|null $cirrusSearchHookRunner |
157 | * @see CirrusDebugOptions::defaultOptions() |
158 | */ |
159 | public function __construct( |
160 | Connection $conn, $offset, |
161 | $limit, |
162 | SearchConfig $config, |
163 | array $namespaces = null, |
164 | User $user = null, |
165 | $index = false, |
166 | CirrusDebugOptions $options = null, |
167 | NamespacePrefixParser $namespacePrefixParser = null, |
168 | InterwikiResolver $interwikiResolver = null, |
169 | TitleHelper $titleHelper = null, |
170 | CirrusSearchHookRunner $cirrusSearchHookRunner = null |
171 | ) { |
172 | parent::__construct( |
173 | $conn, |
174 | $user, |
175 | $config->get( 'CirrusSearchSlowSearch' ), |
176 | $config->get( 'CirrusSearchExtraBackendLatency' ) |
177 | ); |
178 | $this->config = $config; |
179 | $this->setOffsetLimit( $offset, $limit ); |
180 | $this->indexBaseName = $index ?: $config->get( SearchConfig::INDEX_BASE_NAME ); |
181 | // TODO: Make these params mandatory once WBCS stops extending this class |
182 | $this->namespacePrefixParser = $namespacePrefixParser; |
183 | $this->interwikiResolver = $interwikiResolver ?: MediaWikiServices::getInstance()->getService( InterwikiResolver::SERVICE ); |
184 | $this->titleHelper = $titleHelper ?: new TitleHelper( WikiMap::getCurrentWikiId(), $this->interwikiResolver ); |
185 | $this->cirrusSearchHookRunner = $cirrusSearchHookRunner ?: new CirrusSearchHookRunner( |
186 | MediaWikiServices::getInstance()->getHookContainer() ); |
187 | $this->searchContext = new SearchContext( $this->config, $namespaces, $options, null, null, $this->cirrusSearchHookRunner ); |
188 | } |
189 | |
190 | /** |
191 | * Unified search public entry-point. |
192 | * |
193 | * NOTE: only fulltext search supported for now. |
194 | * @param SearchQuery $query |
195 | * @return Status |
196 | */ |
197 | public function search( SearchQuery $query ) { |
198 | if ( $query->getDebugOptions()->isCirrusDumpQueryAST() ) { |
199 | return Status::newGood( [ 'ast' => $query->getParsedQuery()->toArray() ] ); |
200 | } |
201 | // TODO: properly pass the profile context name and its params once we have a dispatch service. |
202 | $this->searchContext = SearchContext::fromSearchQuery( $query, FallbackRunner::create( $query, $this->interwikiResolver ), |
203 | $this->cirrusSearchHookRunner ); |
204 | $this->setOffsetLimit( $query->getOffset(), $query->getLimit() ); |
205 | $this->config = $query->getSearchConfig(); |
206 | $this->sort = $query->getSort(); |
207 | |
208 | if ( $query->getSearchEngineEntryPoint() === SearchQuery::SEARCH_TEXT ) { |
209 | $this->searchContext->setResultsType( |
210 | new FullTextResultsType( |
211 | $this->searchContext->getFetchPhaseBuilder(), |
212 | $query->getParsedQuery()->isQueryOfClass( BasicQueryClassifier::COMPLEX_QUERY ), |
213 | $this->titleHelper, |
214 | $query->getExtraFieldsToExtract(), |
215 | $this->searchContext->getConfig()->getElement( 'CirrusSearchDeduplicateInMemory' ) === true |
216 | ) |
217 | ); |
218 | return $this->searchTextInternal( $query->getParsedQuery()->getQueryWithoutNsHeader() ); |
219 | } else { |
220 | throw new \RuntimeException( 'Only ' . SearchQuery::SEARCH_TEXT . ' is supported for now' ); |
221 | } |
222 | } |
223 | |
224 | /** |
225 | * @param ResultsType $resultsType results type to return |
226 | */ |
227 | public function setResultsType( $resultsType ) { |
228 | $this->searchContext->setResultsType( $resultsType ); |
229 | } |
230 | |
231 | /** |
232 | * Is this searcher used to return debugging info? |
233 | * @return bool true if the search will return raw output |
234 | */ |
235 | public function isReturnRaw() { |
236 | return $this->searchContext->getDebugOptions()->isReturnRaw(); |
237 | } |
238 | |
239 | /** |
240 | * Set the type of sort to perform. Must be 'relevance', 'title_asc', 'title_desc'. |
241 | * @param string $sort sort type |
242 | */ |
243 | public function setSort( $sort ) { |
244 | $this->sort = $sort; |
245 | } |
246 | |
247 | /** |
248 | * Should this search limit results to the local wiki? If not called the default is false. |
249 | * @param bool $limitSearchToLocalWiki should the results be limited? |
250 | */ |
251 | public function limitSearchToLocalWiki( $limitSearchToLocalWiki ) { |
252 | $this->searchContext->setLimitSearchToLocalWiki( $limitSearchToLocalWiki ); |
253 | } |
254 | |
255 | /** |
256 | * Perform a "near match" title search which is pretty much a prefix match without the prefixes. |
257 | * @param string $term text by which to search |
258 | * @return Status status containing results defined by resultsType on success |
259 | */ |
260 | public function nearMatchTitleSearch( $term ) { |
261 | ( new NearMatchQueryBuilder() )->build( $this->searchContext, $term ); |
262 | return $this->searchOne(); |
263 | } |
264 | |
265 | /** |
266 | * Perform a sum over the number of words in the content index |
267 | * @return Status status containing a single integer |
268 | */ |
269 | public function countContentWords() { |
270 | ( new CountContentWordsBuilder() )->build( $this->searchContext ); |
271 | $this->limit = 1; |
272 | return $this->searchOne(); |
273 | } |
274 | |
275 | /** |
276 | * Perform a prefix search. |
277 | * @param string $term text by which to search |
278 | * @param string[] $variants variants to search for |
279 | * @return Status status containing results defined by resultsType on success |
280 | */ |
281 | public function prefixSearch( $term, $variants = [] ) { |
282 | ( new PrefixSearchQueryBuilder() )->build( $this->searchContext, $term, $variants ); |
283 | return $this->searchOne(); |
284 | } |
285 | |
286 | /** |
287 | * Build full text search for articles with provided term. All the |
288 | * state is applied to $this->searchContext. The returned query |
289 | * builder can be used to build a degraded query if necessary. |
290 | * |
291 | * @param string $term term to search |
292 | * @return FullTextQueryBuilder |
293 | */ |
294 | protected function buildFullTextSearch( $term ) { |
295 | // Convert the unicode character 'ideographic whitespace' into standard |
296 | // whitespace. Cirrussearch treats them both as normal whitespace, but |
297 | // the preceding isn't appropriately trimmed. |
298 | // No searching for nothing! That takes forever! |
299 | $term = trim( str_replace( "\xE3\x80\x80", " ", $term ) ); |
300 | if ( $term === '' ) { |
301 | $this->searchContext->setResultsPossible( false ); |
302 | } |
303 | |
304 | $builderSettings = $this->config->getProfileService() |
305 | ->loadProfileByName( SearchProfileService::FT_QUERY_BUILDER, |
306 | $this->searchContext->getFulltextQueryBuilderProfile() ); |
307 | $features = ( new FullTextKeywordRegistry( $this->config ) )->getKeywords(); |
308 | $qb = self::buildFullTextBuilder( $builderSettings, $this->config, $features ); |
309 | |
310 | $qb->build( $this->searchContext, $term ); |
311 | |
312 | if ( $this->searchContext->getSearchQuery() !== null ) { |
313 | $degradeOnParseWarnings = [ |
314 | // && test, test AND && test |
315 | 'cirrussearch-parse-error-unexpected-token', |
316 | // test AND |
317 | 'cirrussearch-parse-error-unexpected-end' |
318 | ]; |
319 | // Quick hack to avoid sending bad queries to the backend |
320 | foreach ( $this->searchContext->getSearchQuery()->getParsedQuery()->getParseWarnings() as $warning ) { |
321 | if ( in_array( $warning->getMessage(), $degradeOnParseWarnings ) ) { |
322 | $qb->buildDegraded( $this->searchContext ); |
323 | return $qb; |
324 | } |
325 | } |
326 | } |
327 | |
328 | return $qb; |
329 | } |
330 | |
331 | /** |
332 | * @param string $term |
333 | * @return Status |
334 | */ |
335 | private function searchTextInternal( $term ) { |
336 | // Searcher needs to be cloned before any actual query building is done. |
337 | $interleaveSearcher = $this->buildInterleaveSearcher(); |
338 | |
339 | $qb = $this->buildFullTextSearch( $term ); |
340 | $mainSearch = $this->buildSearch(); |
341 | $searches = MSearchRequests::build( self::MAINSEARCH_MSEARCH_KEY, $mainSearch ); |
342 | $description = "{$this->searchContext->getSearchType()} search for '{$this->searchContext->getOriginalSearchTerm()}'"; |
343 | |
344 | if ( !$this->searchContext->areResultsPossible() ) { |
345 | if ( $this->searchContext->getDebugOptions()->isCirrusDumpQuery() ) { |
346 | // return the empty array to suggest that no query will be run |
347 | return Status::newGood( [] ); |
348 | } |
349 | $status = $this->emptyResultSet(); |
350 | if ( $this->searchContext->getDebugOptions()->isCirrusDumpResult() ) { |
351 | return Status::newGood( |
352 | ( new MSearchResponses( [ $status->getValue() ], [] ) )->dumpResults( $description ) |
353 | ); |
354 | } |
355 | return $status; |
356 | } |
357 | |
358 | if ( $interleaveSearcher !== null ) { |
359 | $interleaveSearcher->buildFullTextSearch( $term ); |
360 | $interleaveSearch = $interleaveSearcher->buildSearch(); |
361 | if ( $this->areSearchesTheSame( $mainSearch, $interleaveSearch ) ) { |
362 | $interleaveSearcher = null; |
363 | } else { |
364 | $searches->addRequest( self::INTERLEAVED_MSEARCH_KEY, $interleaveSearch ); |
365 | } |
366 | } |
367 | |
368 | $fallbackRunner = $this->searchContext->getFallbackRunner(); |
369 | $fallbackRunner->attachSearchRequests( $searches, $this->connection->getClient() ); |
370 | |
371 | if ( $this->searchContext->getDebugOptions()->isCirrusDumpQuery() ) { |
372 | return $searches->dumpQuery( $description ); |
373 | } |
374 | |
375 | $responses = $this->searchMulti( $searches ); |
376 | if ( $responses->hasFailure() ) { |
377 | $status = $responses->getFailure(); |
378 | if ( ElasticaErrorHandler::isParseError( $status ) ) { |
379 | // Rebuild the search context because we need a fresh fetchPhaseBuilder |
380 | $this->searchContext = $this->searchContext->withConfig( $this->config ); |
381 | if ( $qb->buildDegraded( $this->searchContext ) ) { |
382 | // If that doesn't work we're out of luck but it should. |
383 | // There no guarantee it'll work properly with the syntax |
384 | // we've built above but it'll do _something_ and we'll |
385 | // still work on fixing all the parse errors that come in. |
386 | $status = $this->searchOne(); |
387 | } |
388 | } |
389 | return $status; |
390 | } |
391 | |
392 | if ( $this->searchContext->getDebugOptions()->isCirrusDumpResult() ) { |
393 | return $responses->dumpResults( $description ); |
394 | } |
395 | |
396 | $rType = $this->getSearchContext()->getResultsType(); |
397 | $mainSet = $responses->transformAsResultSet( $rType, self::MAINSEARCH_MSEARCH_KEY ); |
398 | if ( $interleaveSearcher !== null ) { |
399 | $interleaver = new TeamDraftInterleaver( $this->searchContext->getOriginalSearchTerm() ); |
400 | $testedSet = $responses->transformAsResultSet( $rType, self::INTERLEAVED_MSEARCH_KEY ); |
401 | $response = $interleaver->interleave( $mainSet, $testedSet, $this->limit ); |
402 | } else { |
403 | $response = $mainSet; |
404 | } |
405 | |
406 | $status = Status::newGood(); |
407 | if ( $this->namespacePrefixParser !== null ) { |
408 | $status = Status::newGood( $fallbackRunner->run( $this, $response, $responses, |
409 | $this->namespacePrefixParser, $this->cirrusSearchHookRunner ) ); |
410 | $this->appendMetrics( $fallbackRunner ); |
411 | } |
412 | |
413 | foreach ( $this->searchContext->getWarnings() as $warning ) { |
414 | $status->warning( ...$warning ); |
415 | } |
416 | return $status; |
417 | } |
418 | |
419 | /** |
420 | * Get the page with $docId. Note that the result is a status containing _all_ pages found. |
421 | * It is possible to find more then one page if the page is in multiple indexes. |
422 | * @param string[] $docIds array of document ids |
423 | * @param string[]|bool $sourceFiltering source filtering to apply |
424 | * @param bool $usePoolCounter false to disable the pool counter |
425 | * @return Status containing pages found, containing an empty array if not found, |
426 | * or an error if there was an error |
427 | */ |
428 | public function get( array $docIds, $sourceFiltering, $usePoolCounter = true ) { |
429 | $connection = $this->getOverriddenConnection(); |
430 | $indexSuffix = $connection->pickIndexSuffixForNamespaces( |
431 | $this->searchContext->getNamespaces() |
432 | ); |
433 | |
434 | // The worst case would be to have all ids duplicated in all available indices. |
435 | // We set the limit accordingly |
436 | $size = count( $connection->getAllIndexSuffixesForNamespaces( |
437 | $this->searchContext->getNamespaces() |
438 | ) ); |
439 | $size *= count( $docIds ); |
440 | |
441 | $work = function () use ( $docIds, $sourceFiltering, $indexSuffix, $size, $connection ) { |
442 | try { |
443 | $this->startNewLog( 'get of {indexSuffix}.{docIds}', 'get', [ |
444 | 'indexSuffix' => $indexSuffix, |
445 | 'docIds' => $docIds, |
446 | ] ); |
447 | // Shard timeout not supported on get requests so we just use the client side timeout |
448 | $connection->setTimeout( $this->getClientTimeout( 'get' ) ); |
449 | // We use a search query instead of _get/_mget, these methods are |
450 | // theorically well suited for this kind of job but they are not |
451 | // supported on aliases with multiple indices (content/general) |
452 | $index = $connection->getIndex( $this->indexBaseName, $indexSuffix ); |
453 | $query = new \Elastica\Query( new \Elastica\Query\Ids( $docIds ) ); |
454 | if ( is_array( $sourceFiltering ) ) { |
455 | // The title is a required field in the ApiTrait |
456 | if ( !in_array( "title", $sourceFiltering ) ) { |
457 | array_push( $sourceFiltering, "title" ); |
458 | } |
459 | $query->setParam( '_source', $sourceFiltering ); |
460 | } |
461 | $query->addParam( 'stats', 'get' ); |
462 | // We ignore limits provided to the searcher |
463 | // otherwize we could return fewer results than |
464 | // the ids requested. |
465 | $query->setFrom( 0 ); |
466 | $query->setSize( $size ); |
467 | $resultSet = $index->search( $query, [ 'search_type' => 'query_then_fetch' ] ); |
468 | if ( !$resultSet->getResponse()->isOK() ) { |
469 | $request = $connection->getClient()->getLastRequest(); |
470 | if ( $request == null ) { |
471 | // I can't imagine how this would happen, but the type signature allows |
472 | // for a null last request so we provide a minimal workaround. |
473 | throw new \Elastica\Exception\RuntimeException( |
474 | "Response reports failure, but no last request available" ); |
475 | } |
476 | throw new ResponseException( $request, $resultSet->getResponse() ); |
477 | } |
478 | return $this->success( $resultSet->getResults(), $connection ); |
479 | } catch ( \Elastica\Exception\NotFoundException $e ) { |
480 | // NotFoundException just means the field didn't exist. |
481 | // It is up to the caller to decide if that is an error. |
482 | return $this->success( [], $connection ); |
483 | } catch ( \Elastica\Exception\ExceptionInterface $e ) { |
484 | return $this->failure( $e, $connection ); |
485 | } |
486 | }; |
487 | |
488 | if ( $usePoolCounter ) { |
489 | return Util::doPoolCounterWork( $this->getPoolCounterType(), $this->user, $work ); |
490 | } else { |
491 | return $work(); |
492 | } |
493 | } |
494 | |
495 | /** |
496 | * @param string $name |
497 | * @return Status |
498 | */ |
499 | private function findNamespace( $name ) { |
500 | return Util::doPoolCounterWork( |
501 | 'CirrusSearch-NamespaceLookup', |
502 | $this->user, |
503 | function () use ( $name ) { |
504 | try { |
505 | $this->startNewLog( 'lookup namespace for {namespaceName}', 'namespace', [ |
506 | 'namespaceName' => $name, |
507 | 'query' => $name, |
508 | ] ); |
509 | $connection = $this->getOverriddenConnection(); |
510 | $connection->setTimeout( $this->getClientTimeout( 'namespace' ) ); |
511 | |
512 | // A bit awkward, but accepted as this is the backup |
513 | // implementation of namespace lookup. Deployments should |
514 | // prefer to install php-intl and use utr30. |
515 | $store = ( new MetaStoreIndex( $connection, new NullPrinter(), $this->config ) ) |
516 | ->namespaceStore(); |
517 | $resultSet = $store->find( $name, [ |
518 | 'timeout' => $this->getTimeout( 'namespace' ), |
519 | ] ); |
520 | return $this->success( $resultSet->getResults(), $connection ); |
521 | } catch ( \Elastica\Exception\ExceptionInterface $e ) { |
522 | return $this->failure( $e, $connection ); |
523 | } |
524 | } ); |
525 | } |
526 | |
527 | /** |
528 | * @return \Elastica\Search |
529 | */ |
530 | protected function buildSearch() { |
531 | $builder = new SearchRequestBuilder( |
532 | $this->searchContext, $this->getOverriddenConnection(), $this->indexBaseName ); |
533 | return $builder->setLimit( $this->limit ) |
534 | ->setOffset( $this->offset ) |
535 | ->setIndex( $this->index ) |
536 | ->setSort( $this->sort ) |
537 | ->setTimeout( $this->getTimeout( $this->searchContext->getSearchType() ) ) |
538 | ->build(); |
539 | } |
540 | |
541 | /** |
542 | * Perform a single-query search. |
543 | * @return Status |
544 | */ |
545 | protected function searchOne() { |
546 | $search = $this->buildSearch(); |
547 | $description = "{$this->searchContext->getSearchType()} search for '{$this->searchContext->getOriginalSearchTerm()}'"; |
548 | $msearch = MSearchRequests::build( self::MAINSEARCH_MSEARCH_KEY, $search ); |
549 | if ( $this->searchContext->getDebugOptions()->isCirrusDumpQuery() ) { |
550 | return $msearch->dumpQuery( $description ); |
551 | } |
552 | if ( !$this->searchContext->areResultsPossible() ) { |
553 | return $this->emptyResultSet(); |
554 | } |
555 | |
556 | $mresults = $this->searchMulti( $msearch ); |
557 | |
558 | if ( $mresults->hasFailure() ) { |
559 | return $mresults->getFailure(); |
560 | } |
561 | |
562 | if ( $this->searchContext->getDebugOptions()->isReturnRaw() ) { |
563 | return $mresults->dumpResults( $description ); |
564 | } |
565 | return $mresults->transformAndGetSingle( $this->searchContext->getResultsType(), self::MAINSEARCH_MSEARCH_KEY ); |
566 | } |
567 | |
568 | /** |
569 | * Powers full-text-like searches including prefix search. |
570 | * |
571 | * @param MSearchRequests $msearches |
572 | * @return MSearchResponses search responses |
573 | */ |
574 | protected function searchMulti( MSearchRequests $msearches ) { |
575 | $searches = $msearches->getRequests(); |
576 | $contextResultsType = $this->searchContext->getResultsType(); |
577 | $cirrusDebugOptions = $this->searchContext->getDebugOptions(); |
578 | Assert::precondition( !$cirrusDebugOptions->isCirrusDumpQuery(), 'Must not reach this method when dumping the query' ); |
579 | |
580 | // TODO: should this be moved upper in the stack? |
581 | if ( $this->limit <= 0 ) { |
582 | return $msearches->failure( Status::newFatal( 'cirrussearch-offset-too-large', |
583 | self::MAX_OFFSET_LIMIT, $this->offset ) ); |
584 | } |
585 | |
586 | $connection = $this->getOverriddenConnection(); |
587 | $log = new MultiSearchRequestLog( |
588 | $connection->getClient(), |
589 | "{queryType} search for '{query}'", |
590 | $this->searchContext->getSearchType(), |
591 | [ |
592 | 'query' => $this->searchContext->getOriginalSearchTerm(), |
593 | 'limit' => $this->limit ?: null, |
594 | // Used syntax |
595 | 'syntax' => $this->searchContext->getSyntaxUsed(), |
596 | ], |
597 | $this->searchContext->getNamespaces() ?: [] |
598 | ); |
599 | |
600 | // Similar to indexing support only the bulk code path, rather than |
601 | // single and bulk. The extra overhead should be minimal, and the |
602 | // reduced complexity is welcomed. |
603 | $search = new MultiSearch( $connection->getClient() ); |
604 | $search->addSearches( $searches ); |
605 | |
606 | $connection->setTimeout( $this->getClientTimeout( $this->searchContext->getSearchType() ) ); |
607 | |
608 | if ( $this->config->get( 'CirrusSearchMoreAccurateScoringMode' ) ) { |
609 | $search->setSearchType( \Elastica\Search::OPTION_SEARCH_TYPE_DFS_QUERY_THEN_FETCH ); |
610 | } |
611 | |
612 | // Perform the search |
613 | $work = function () use ( $search, $log, $connection ) { |
614 | return Util::doPoolCounterWork( |
615 | $this->getPoolCounterType(), |
616 | $this->user, |
617 | function () use ( $search, $log, $connection ) { |
618 | // @todo only reports the first error, also turns |
619 | // a partial (single search) error into a complete |
620 | // failure across the board. Should be addressed |
621 | // at some point. |
622 | return $this->runMSearch( $search, $log, $connection ); |
623 | }, |
624 | $this->searchContext->isSyntaxUsed( 'regex' ) ? |
625 | 'cirrussearch-regex-too-busy-error' : null |
626 | ); |
627 | }; |
628 | |
629 | // Wrap with caching if needed, but don't cache debugging queries |
630 | $skipCache = $cirrusDebugOptions->mustNeverBeCached(); |
631 | if ( $this->searchContext->getCacheTtl() > 0 && !$skipCache ) { |
632 | $work = function () use ( $work, $searches, $log, $contextResultsType ) { |
633 | $services = MediaWikiServices::getInstance(); |
634 | $requestStats = $services->getStatsdDataFactory(); |
635 | $cache = $services->getMainWANObjectCache(); |
636 | $keyParts = []; |
637 | foreach ( $searches as $key => $search ) { |
638 | $keyParts[] = $search->getPath() . |
639 | serialize( $search->getOptions() ) . |
640 | serialize( $search->getQuery()->toArray() ) . |
641 | ( $contextResultsType !== null ? get_class( $contextResultsType ) : "NONE" ); |
642 | } |
643 | $key = $cache->makeKey( 'cirrussearch', 'search', 'v2', md5( |
644 | implode( '|', $keyParts ) |
645 | ) ); |
646 | $cacheResult = $cache->get( $key ); |
647 | $statsKey = $this->getQueryCacheStatsKey(); |
648 | if ( $cacheResult ) { |
649 | [ $logVariables, $multiResultSet ] = $cacheResult; |
650 | $requestStats->increment( "$statsKey.hit" ); |
651 | $log->setCachedResult( $logVariables ); |
652 | $this->successViaCache( $log ); |
653 | |
654 | if ( $multiResultSet->isOK() ) { |
655 | /** |
656 | * @var $cachedMResultSet \Elastica\Multi\ResultSet |
657 | */ |
658 | $cachedMResultSet = $multiResultSet->getValue(); |
659 | if ( count( $cachedMResultSet->getResultSets() ) !== count( $searches ) ) { |
660 | LoggerFactory::getInstance( 'CirrusSearch' ) |
661 | ->warning( 'Ignoring a cached Multi/ResultSet wanted {nb_queries} response(s) but received {nb_responses}', |
662 | [ |
663 | 'nb_queries' => count( $searches ), |
664 | 'nb_responses' => count( $cachedMResultSet->getResultSets() ) |
665 | ] ); |
666 | $requestStats->increment( "$statsKey.incoherent" ); |
667 | } else { |
668 | return $multiResultSet; |
669 | } |
670 | } else { |
671 | LoggerFactory::getInstance( 'CirrusSearch' ) |
672 | ->warning( 'Cached a Status value that is not OK' ); |
673 | $requestStats->increment( "$statsKey.nok" ); |
674 | } |
675 | } else { |
676 | $requestStats->increment( "$statsKey.miss" ); |
677 | } |
678 | |
679 | $multiResultSet = $work(); |
680 | |
681 | if ( $multiResultSet->isOK() ) { |
682 | $isPartialResult = false; |
683 | foreach ( $multiResultSet->getValue()->getResultSets() as $resultSet ) { |
684 | $responseData = $resultSet->getResponse()->getData(); |
685 | if ( isset( $responseData['timed_out'] ) && $responseData['timed_out'] ) { |
686 | $isPartialResult = true; |
687 | break; |
688 | } |
689 | } |
690 | if ( !$isPartialResult ) { |
691 | $requestStats->increment( "$statsKey.set" ); |
692 | $cache->set( |
693 | $key, |
694 | [ $log->getLogVariables(), $multiResultSet ], |
695 | $this->searchContext->getCacheTtl() |
696 | ); |
697 | } |
698 | } |
699 | |
700 | return $multiResultSet; |
701 | }; |
702 | } |
703 | |
704 | $status = $work(); |
705 | |
706 | // @todo Does this need anything special for multi-search changes? |
707 | if ( !$status->isOK() ) { |
708 | return $msearches->failure( $status ); |
709 | } |
710 | |
711 | $response = $status->getValue(); |
712 | /** |
713 | * @var $response \Elastica\Multi\ResultSet |
714 | */ |
715 | if ( count( $response->getResultSets() ) !== count( $msearches->getRequests() ) ) { |
716 | // Temp hack to investigate T231023 (use php serialize just in case it has some invalid |
717 | // UTF8 sequences that would prevent this message from being sent to logstash |
718 | LoggerFactory::getInstance( 'CirrusSearch' ) |
719 | ->warning( "Incoherent response received (#searches != #responses) for {query}: {response}", |
720 | [ 'query' => $this->searchContext->getOriginalSearchTerm(), 'response' => serialize( $response->getResponse() ) ] ); |
721 | return $msearches->failure( Status::newFatal( 'cirrussearch-backend-error' ) ); |
722 | } |
723 | $mreponses = $msearches->toMSearchResponses( $response->getResultSets() ); |
724 | if ( $mreponses->hasTimeout() ) { |
725 | LoggerFactory::getInstance( 'CirrusSearch' )->warning( |
726 | $log->getDescription() . " timed out and only returned partial results!", |
727 | $log->getLogVariables() |
728 | ); |
729 | $this->searchContext->addWarning( $this->searchContext->isSyntaxUsed( 'regex' ) |
730 | ? 'cirrussearch-regex-timed-out' |
731 | : 'cirrussearch-timed-out' |
732 | ); |
733 | } |
734 | return $mreponses; |
735 | } |
736 | |
737 | /** |
738 | * Attempt to suck a leading namespace followed by a colon from the query string. |
739 | * Reaches out to Elasticsearch to perform normalized lookup against the namespaces. |
740 | * Should be fast but for the network hop. |
741 | * |
742 | * @param string &$query |
743 | */ |
744 | public function updateNamespacesFromQuery( &$query ) { |
745 | $colon = strpos( $query, ':' ); |
746 | if ( $colon === false ) { |
747 | return; |
748 | } |
749 | $namespaceName = substr( $query, 0, $colon ); |
750 | $status = $this->findNamespace( $namespaceName ); |
751 | // Failure case is already logged so just handle success case |
752 | if ( !$status->isOK() ) { |
753 | return; |
754 | } |
755 | $foundNamespace = $status->getValue(); |
756 | if ( !$foundNamespace ) { |
757 | return; |
758 | } |
759 | $foundNamespace = $foundNamespace[ 0 ]; |
760 | $query = substr( $query, $colon + 1 ); |
761 | $this->searchContext->setNamespaces( [ $foundNamespace->namespace_id ] ); |
762 | } |
763 | |
764 | /** |
765 | * @return SearchContext |
766 | */ |
767 | public function getSearchContext() { |
768 | return $this->searchContext; |
769 | } |
770 | |
771 | private function getPoolCounterType(): string { |
772 | // Default pool counter for all search requests. Note that not all |
773 | // possible requests go through Searcher, so this isn't globally |
774 | // definitive. |
775 | $pool = 'CirrusSearch-Search'; |
776 | // Pool counter overrides based on query syntax. Goal is to |
777 | // separate expensive or high-volume traffic into dedicated |
778 | // pools with specific limits. Prefix is only high volume |
779 | // when completion is disabled. |
780 | $poolCounterTypes = [ |
781 | 'regex' => 'CirrusSearch-Regex', |
782 | 'prefix' => 'CirrusSearch-Prefix', |
783 | 'more_like' => 'CirrusSearch-MoreLike', |
784 | ]; |
785 | foreach ( $poolCounterTypes as $type => $counter ) { |
786 | if ( $this->searchContext->isSyntaxUsed( $type ) ) { |
787 | $pool = $counter; |
788 | break; |
789 | } |
790 | } |
791 | // Put external automated requests into their own bucket The main idea |
792 | // here is to allow automated access, but prevent that automation from |
793 | // capping out the pools used by interactive queries. |
794 | // It's not clear when the automation bucket should not override other |
795 | // bucketing decisions, for now override everything except Regex since |
796 | // those can be very expensive and usually use a small pool. If both |
797 | // the automation and regex pools filled with regexes it would be |
798 | // significantly more load than expected. |
799 | if ( $pool !== 'CirrusSearch-Regex' && $this->isAutomatedRequest() ) { |
800 | $pool = 'CirrusSearch-Automated'; |
801 | } |
802 | return $pool; |
803 | } |
804 | |
805 | private function isAutomatedRequest(): bool { |
806 | $req = RequestContext::getMain()->getRequest(); |
807 | try { |
808 | $ip = $req->getIP(); |
809 | } catch ( \MWException $e ) { |
810 | // No IP, typically this means a CLI invocation. We are attempting |
811 | // to segregate external automation, internal automation has its |
812 | // own ability to control configuration and shouldn't be flagged |
813 | if ( MW_ENTRY_POINT === 'cli' ) { |
814 | return false; |
815 | } |
816 | // When can we get here? Is this ever run? |
817 | LoggerFactory::getInstance( 'CirrusSearch' )->info( |
818 | 'No IP available during automated request check' ); |
819 | return false; |
820 | } |
821 | return Util::looksLikeAutomation( |
822 | $this->config, $ip, $req->getAllHeaders() ); |
823 | } |
824 | |
825 | /** |
826 | * Some queries, like more like this, are quite expensive and can cause |
827 | * latency spikes. This allows redirecting queries using particular |
828 | * features to specific clusters. |
829 | * @return Connection |
830 | */ |
831 | private function getOverriddenConnection() { |
832 | $overrides = $this->config->get( 'CirrusSearchClusterOverrides' ); |
833 | foreach ( $overrides as $feature => $cluster ) { |
834 | if ( $this->searchContext->isSyntaxUsed( $feature ) ) { |
835 | return Connection::getPool( $this->config, $cluster ); |
836 | } |
837 | } |
838 | return $this->connection; |
839 | } |
840 | |
841 | /** |
842 | * @return string The stats key used for reporting hit/miss rates of the |
843 | * application side query cache. |
844 | */ |
845 | protected function getQueryCacheStatsKey() { |
846 | $type = $this->searchContext->getSearchType(); |
847 | return "CirrusSearch.query_cache.$type"; |
848 | } |
849 | |
850 | /** |
851 | * @param string $description |
852 | * @param string $queryType |
853 | * @param string[] $extra |
854 | * @return SearchRequestLog |
855 | */ |
856 | protected function newLog( $description, $queryType, array $extra = [] ) { |
857 | return new SearchRequestLog( |
858 | $this->getOverriddenConnection()->getClient(), |
859 | $description, |
860 | $queryType, |
861 | $extra |
862 | ); |
863 | } |
864 | |
865 | /** |
866 | * If we're supposed to create raw result, create and return it, |
867 | * or output it and finish. |
868 | * @param mixed $result Search result data |
869 | * @param WebRequest $request Request context |
870 | * @return string The new raw result. |
871 | */ |
872 | public function processRawReturn( $result, WebRequest $request ) { |
873 | return Util::processSearchRawReturn( $result, $request, |
874 | $this->searchContext->getDebugOptions() ); |
875 | } |
876 | |
877 | /** |
878 | * Search titles in archive |
879 | * @param string $term |
880 | * @return Status<Title[]> |
881 | */ |
882 | public function searchArchive( $term ) { |
883 | $this->searchContext->setOriginalSearchTerm( $term ); |
884 | $term = $this->searchContext->escaper()->fixupWholeQueryString( $term ); |
885 | $this->setResultsType( new TitleResultsType() ); |
886 | |
887 | // This does not support cross-cluster search, but there is also no use case |
888 | // for cross-wiki archive search. |
889 | $this->index = $this->getOverriddenConnection()->getArchiveIndex( $this->indexBaseName ); |
890 | |
891 | // Setup the search query |
892 | $query = new BoolQuery(); |
893 | |
894 | $multi = new MultiMatch(); |
895 | $multi->setType( 'best_fields' ); |
896 | $multi->setTieBreaker( 0 ); |
897 | $multi->setQuery( $term ); |
898 | $multi->setFields( [ |
899 | 'title.near_match^100', |
900 | 'title.near_match_asciifolding^75', |
901 | 'title.plain^50', |
902 | 'title^25' |
903 | ] ); |
904 | $multi->setOperator( 'AND' ); |
905 | |
906 | $fuzzy = new \Elastica\Query\MatchQuery(); |
907 | $fuzzy->setFieldQuery( 'title.plain', $term ); |
908 | $fuzzy->setFieldFuzziness( 'title.plain', 'AUTO' ); |
909 | $fuzzy->setFieldOperator( 'title.plain', 'AND' ); |
910 | |
911 | $query->addShould( $multi ); |
912 | $query->addShould( $fuzzy ); |
913 | $query->setMinimumShouldMatch( 1 ); |
914 | |
915 | $this->sort = 'just_match'; |
916 | |
917 | $this->searchContext->setMainQuery( $query ); |
918 | $this->searchContext->addSyntaxUsed( 'archive' ); |
919 | $this->searchContext->setRescoreProfile( 'empty' ); |
920 | |
921 | return $this->searchOne(); |
922 | } |
923 | |
924 | /** |
925 | * Tests if two search objects are equivalent |
926 | * |
927 | * @param Search $a |
928 | * @param Search $b |
929 | * @return bool |
930 | */ |
931 | private function areSearchesTheSame( Search $a, Search $b ) { |
932 | // same object. |
933 | if ( $a === $b ) { |
934 | return true; |
935 | } |
936 | |
937 | // Check values not included in toArray() |
938 | if ( $a->getPath() !== $b->getPath() |
939 | || $a->getOptions() != $b->getOptions() |
940 | ) { |
941 | return false; |
942 | } |
943 | |
944 | $aArray = $a->getQuery()->toArray(); |
945 | $bArray = $b->getQuery()->toArray(); |
946 | |
947 | // normalize the 'now' value which contains a timestamp that |
948 | // may vary. |
949 | $fixNow = static function ( &$value, $key ) { |
950 | if ( $key === 'now' && is_int( $value ) ) { |
951 | $value = 12345678; |
952 | } |
953 | }; |
954 | array_walk_recursive( $aArray, $fixNow ); |
955 | array_walk_recursive( $bArray, $fixNow ); |
956 | |
957 | // Simplest form, requires both arrays to have exact same ordering, |
958 | // types, keys, etc. We could try much harder to remove edge cases, |
959 | // but they probably don't matter too much. The main thing we are |
960 | // looking for is if configuration used for interleaved search didn't |
961 | // have an effect query building. If we get it wrong in some rare |
962 | // cases it should have minimal effects on the interleaved search test. |
963 | return $aArray === $bArray; |
964 | } |
965 | |
966 | private function buildInterleaveSearcher() { |
967 | // If we aren't on the first page, or the user has specified |
968 | // some custom magic query options (override rescore profile, |
969 | // etc) then don't interleave. |
970 | if ( $this->offset > 0 || $this->searchContext->isDirty() ) { |
971 | return null; |
972 | } |
973 | |
974 | // Is interleaving configured? |
975 | $overrides = $this->config->get( 'CirrusSearchInterleaveConfig' ); |
976 | if ( $overrides === null ) { |
977 | return null; |
978 | } |
979 | |
980 | $config = new HashSearchConfig( $overrides, [ HashSearchConfig::FLAG_INHERIT ] ); |
981 | $other = clone $this; |
982 | $other->config = $config; |
983 | $other->searchContext = $other->searchContext->withConfig( $config ); |
984 | |
985 | return $other; |
986 | } |
987 | |
988 | /** |
989 | * @return Status |
990 | */ |
991 | private function emptyResultSet() { |
992 | $results = $this->searchContext->getResultsType()->createEmptyResult(); |
993 | if ( $results instanceof BaseCirrusSearchResultSet ) { |
994 | // TODO: Keywords are very specific to full-text search, while |
995 | // ResultsType and this method are much more general. |
996 | // While awkward, this maintains BC until we decide what to do. |
997 | $results = BaseCirrusSearchResultSet::emptyResultSet( |
998 | $this->searchContext->isSpecialKeywordUsed() |
999 | ); |
1000 | } |
1001 | $status = Status::newGood( $results ); |
1002 | foreach ( $this->searchContext->getWarnings() as $warning ) { |
1003 | $status->warning( ...$warning ); |
1004 | } |
1005 | return $status; |
1006 | } |
1007 | |
1008 | /** |
1009 | * Apply debug options to the elastica query |
1010 | * @param Query $query |
1011 | * @return Query |
1012 | */ |
1013 | public function applyDebugOptionsToQuery( Query $query ) { |
1014 | return $this->searchContext->getDebugOptions()->applyDebugOptions( $query ); |
1015 | } |
1016 | |
1017 | /** |
1018 | * @param SearchQuery $query |
1019 | * @return Searcher |
1020 | */ |
1021 | public function makeSearcher( SearchQuery $query ) { |
1022 | return new self( $this->connection, $query->getOffset(), $query->getLimit(), |
1023 | $query->getSearchConfig(), $query->getNamespaces(), $this->user, |
1024 | false, $query->getDebugOptions(), $this->namespacePrefixParser, $this->interwikiResolver, |
1025 | $this->titleHelper, $this->cirrusSearchHookRunner ); |
1026 | } |
1027 | |
1028 | /** |
1029 | * @param int $offset |
1030 | * @param int $limit |
1031 | */ |
1032 | private function setOffsetLimit( $offset, $limit ) { |
1033 | $this->offset = $offset; |
1034 | if ( $offset + $limit > self::MAX_OFFSET_LIMIT ) { |
1035 | $this->limit = self::MAX_OFFSET_LIMIT - $offset; |
1036 | } else { |
1037 | $this->limit = $limit; |
1038 | } |
1039 | } |
1040 | |
1041 | /** |
1042 | * Visible for testing |
1043 | * @return int[] 2 elements array |
1044 | */ |
1045 | public function getOffsetLimit() { |
1046 | Assert::precondition( defined( 'MW_PHPUNIT_TEST' ), |
1047 | 'getOffsetLimit must only be called for testing purposes' ); |
1048 | return [ $this->offset, $this->limit ]; |
1049 | } |
1050 | |
1051 | /** |
1052 | * Build a FullTextQueryBuilder defined in the $builderSettings: |
1053 | * format is: |
1054 | * [ |
1055 | * 'builder_factory' => callback |
1056 | * 'settings' => ... |
1057 | * ] |
1058 | * where callback must be function that accepts the settings array and returns a FullTextQueryBuilder |
1059 | * |
1060 | * Legacy version: |
1061 | * [ |
1062 | * 'builder_class' => ClassName |
1063 | * 'settings' => ... |
1064 | * ] |
1065 | * where ClassName must declare a constructor with these arguments: |
1066 | * SearchConfig $config, KeywordFeature[] $features, $settings |
1067 | * |
1068 | * Visible for testing only |
1069 | * @param array $builderSettings |
1070 | * @param SearchConfig $config |
1071 | * @param KeywordFeature[] $features |
1072 | * @return FullTextQueryBuilder |
1073 | * @throws \ReflectionException |
1074 | */ |
1075 | final public static function buildFullTextBuilder( |
1076 | array $builderSettings, |
1077 | SearchConfig $config, |
1078 | array $features |
1079 | ): FullTextQueryBuilder { |
1080 | if ( isset( $builderSettings['builder_class'] ) ) { |
1081 | $objectFactorySpecs = [ |
1082 | 'class' => $builderSettings['builder_class'], |
1083 | 'args' => [ |
1084 | $config, |
1085 | $features, |
1086 | $builderSettings['settings'] |
1087 | ] |
1088 | ]; |
1089 | } elseif ( $builderSettings['builder_factory'] ) { |
1090 | $objectFactorySpecs = [ |
1091 | 'factory' => $builderSettings['builder_factory'], |
1092 | 'args' => [ |
1093 | $builderSettings['settings'] |
1094 | ] |
1095 | ]; |
1096 | } else { |
1097 | throw new \InvalidArgumentException( 'Missing builder_class or builder_factory in the builderSettings' ); |
1098 | } |
1099 | |
1100 | /** @var FullTextQueryBuilder $qb */ |
1101 | // @phan-suppress-next-line PhanTypeInvalidCallableArraySize |
1102 | $qb = ObjectFactory::getObjectFromSpec( $objectFactorySpecs ); |
1103 | if ( !( $qb instanceof FullTextQueryBuilder ) ) { |
1104 | throw new RuntimeException( 'Bad builder class configured.' ); |
1105 | } |
1106 | |
1107 | return $qb; |
1108 | } |
1109 | } |