Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
51.46% |
229 / 445 |
|
36.36% |
12 / 33 |
CRAP | |
0.00% |
0 / 1 |
Searcher | |
51.46% |
229 / 445 |
|
36.36% |
12 / 33 |
1573.29 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
15 / 15 |
|
100.00% |
1 / 1 |
5 | |||
search | |
89.47% |
17 / 19 |
|
0.00% |
0 / 1 |
3.01 | |||
setResultsType | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
isReturnRaw | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setSort | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
limitSearchToLocalWiki | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
nearMatchTitleSearch | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
countContentWords | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
prefixSearch | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
buildFullTextSearch | |
84.21% |
16 / 19 |
|
0.00% |
0 / 1 |
5.10 | |||
searchTextInternal | |
61.22% |
30 / 49 |
|
0.00% |
0 / 1 |
25.43 | |||
get | |
0.00% |
0 / 34 |
|
0.00% |
0 / 1 |
42 | |||
findNamespace | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
6 | |||
buildSearch | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
searchOne | |
38.46% |
5 / 13 |
|
0.00% |
0 / 1 |
10.83 | |||
searchMulti | |
38.89% |
42 / 108 |
|
0.00% |
0 / 1 |
132.46 | |||
updateNamespacesFromQuery | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
20 | |||
getSearchContext | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getPoolCounterType | |
78.57% |
11 / 14 |
|
0.00% |
0 / 1 |
5.25 | |||
isAutomatedRequest | |
40.00% |
4 / 10 |
|
0.00% |
0 / 1 |
4.94 | |||
getOverriddenConnection | |
60.00% |
3 / 5 |
|
0.00% |
0 / 1 |
3.58 | |||
recordQueryCacheMetrics | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
newLog | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
processRawReturn | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
searchArchive | |
100.00% |
28 / 28 |
|
100.00% |
1 / 1 |
1 | |||
areSearchesTheSame | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
42 | |||
buildInterleaveSearcher | |
50.00% |
5 / 10 |
|
0.00% |
0 / 1 |
6.00 | |||
emptyResultSet | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
3 | |||
applyDebugOptionsToQuery | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
makeSearcher | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
setOffsetLimit | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
getOffsetLimit | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
buildFullTextBuilder | |
90.48% |
19 / 21 |
|
0.00% |
0 / 1 |
4.01 |
1 | <?php |
2 | |
3 | namespace CirrusSearch; |
4 | |
5 | use CirrusSearch\Fallbacks\FallbackRunner; |
6 | use CirrusSearch\Fallbacks\SearcherFactory; |
7 | use CirrusSearch\Maintenance\NullPrinter; |
8 | use CirrusSearch\MetaStore\MetaStoreIndex; |
9 | use CirrusSearch\Parser\BasicQueryClassifier; |
10 | use CirrusSearch\Parser\FullTextKeywordRegistry; |
11 | use CirrusSearch\Parser\NamespacePrefixParser; |
12 | use CirrusSearch\Profile\SearchProfileService; |
13 | use CirrusSearch\Query\CountContentWordsBuilder; |
14 | use CirrusSearch\Query\FullTextQueryBuilder; |
15 | use CirrusSearch\Query\KeywordFeature; |
16 | use CirrusSearch\Query\NearMatchQueryBuilder; |
17 | use CirrusSearch\Query\PrefixSearchQueryBuilder; |
18 | use CirrusSearch\Search\BaseCirrusSearchResultSet; |
19 | use CirrusSearch\Search\FullTextResultsType; |
20 | use CirrusSearch\Search\MSearchRequests; |
21 | use CirrusSearch\Search\MSearchResponses; |
22 | use CirrusSearch\Search\ResultsType; |
23 | use CirrusSearch\Search\SearchContext; |
24 | use CirrusSearch\Search\SearchQuery; |
25 | use CirrusSearch\Search\SearchRequestBuilder; |
26 | use CirrusSearch\Search\TeamDraftInterleaver; |
27 | use CirrusSearch\Search\TitleHelper; |
28 | use CirrusSearch\Search\TitleResultsType; |
29 | use Elastica\Exception\RuntimeException; |
30 | use Elastica\Multi\Search as MultiSearch; |
31 | use Elastica\Query; |
32 | use Elastica\Query\BoolQuery; |
33 | use Elastica\Query\MultiMatch; |
34 | use Elastica\Search; |
35 | use MediaWiki\Context\RequestContext; |
36 | use MediaWiki\Exception\MWException; |
37 | use MediaWiki\Logger\LoggerFactory; |
38 | use MediaWiki\MediaWikiServices; |
39 | use MediaWiki\Request\WebRequest; |
40 | use MediaWiki\Status\Status; |
41 | use MediaWiki\Title\Title; |
42 | use MediaWiki\User\User; |
43 | use MediaWiki\WikiMap\WikiMap; |
44 | use Wikimedia\Assert\Assert; |
45 | use Wikimedia\ObjectFactory\ObjectFactory; |
46 | use Wikimedia\Stats\StatsFactory; |
47 | |
48 | /** |
49 | * Performs searches using Elasticsearch. Note that each instance of this class |
50 | * is single use only. |
51 | * |
52 | * This program is free software; you can redistribute it and/or modify |
53 | * it under the terms of the GNU General Public License as published by |
54 | * the Free Software Foundation; either version 2 of the License, or |
55 | * (at your option) any later version. |
56 | * |
57 | * This program is distributed in the hope that it will be useful, |
58 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
59 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
60 | * GNU General Public License for more details. |
61 | * |
62 | * You should have received a copy of the GNU General Public License along |
63 | * with this program; if not, write to the Free Software Foundation, Inc., |
64 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
65 | * http://www.gnu.org/copyleft/gpl.html |
66 | */ |
67 | class Searcher extends ElasticsearchIntermediary implements SearcherFactory { |
68 | public const SUGGESTION_HIGHLIGHT_PRE = '<em>'; |
69 | public const SUGGESTION_HIGHLIGHT_POST = '</em>'; |
70 | public const HIGHLIGHT_PRE_MARKER = ''; // \uE000. Can't be a unicode literal until php7 |
71 | public const HIGHLIGHT_PRE = '<span class="searchmatch">'; |
72 | public const HIGHLIGHT_POST_MARKER = ''; // \uE001 |
73 | public const HIGHLIGHT_POST = '</span>'; |
74 | |
75 | /** |
76 | * Maximum offset + limit depth allowed. As in the deepest possible result |
77 | * to return. Too deep will cause very slow queries. 10,000 feels plenty |
78 | * deep. This should be <= index.max_result_window in elasticsearch. |
79 | */ |
80 | private const MAX_OFFSET_LIMIT = 10000; |
81 | |
82 | /** |
83 | * Identifies the main search in MSearchRequests/MSearchResponses |
84 | */ |
85 | public const MAINSEARCH_MSEARCH_KEY = '__main__'; |
86 | |
87 | /** |
88 | * Identifies the "tested" search request in MSearchRequests/MSearchResponses |
89 | */ |
90 | private const INTERLEAVED_MSEARCH_KEY = '__interleaved__'; |
91 | |
92 | /** |
93 | * @var int search offset |
94 | */ |
95 | protected $offset; |
96 | |
97 | /** |
98 | * @var int maximum number of result |
99 | */ |
100 | protected $limit; |
101 | |
102 | /** |
103 | * @var string sort type |
104 | */ |
105 | private $sort = 'relevance'; |
106 | |
107 | /** |
108 | * @var string index base name to use |
109 | */ |
110 | protected $indexBaseName; |
111 | |
112 | /** |
113 | * Search environment configuration |
114 | * @var SearchConfig |
115 | */ |
116 | protected $config; |
117 | |
118 | /** |
119 | * @var SearchContext |
120 | */ |
121 | protected $searchContext; |
122 | |
123 | /** |
124 | * Indexing type we'll be using. |
125 | * @var string|\Elastica\Index |
126 | */ |
127 | private $index; |
128 | |
129 | /** |
130 | * @var NamespacePrefixParser|null |
131 | */ |
132 | private $namespacePrefixParser; |
133 | /** |
134 | * @var InterwikiResolver |
135 | */ |
136 | protected $interwikiResolver; |
137 | |
138 | /** @var TitleHelper */ |
139 | protected $titleHelper; |
140 | /** |
141 | * @var CirrusSearchHookRunner |
142 | */ |
143 | protected $cirrusSearchHookRunner; |
144 | |
145 | /** |
146 | * @param Connection $conn |
147 | * @param int $offset Offset the results by this much |
148 | * @param int $limit Limit the results to this many |
149 | * @param SearchConfig $config Configuration settings |
150 | * @param int[]|null $namespaces Array of namespace numbers to search or null to search all namespaces. |
151 | * @param User|null $user user for which this search is being performed. Attached to slow request logs. |
152 | * @param string|bool $index Base name for index to search from, defaults to $wgCirrusSearchIndexBaseName |
153 | * @param CirrusDebugOptions|null $options the debugging options to use or null to use defaults |
154 | * @param NamespacePrefixParser|null $namespacePrefixParser |
155 | * @param InterwikiResolver|null $interwikiResolver |
156 | * @param TitleHelper|null $titleHelper |
157 | * @param CirrusSearchHookRunner|null $cirrusSearchHookRunner |
158 | * @see CirrusDebugOptions::defaultOptions() |
159 | */ |
160 | public function __construct( |
161 | Connection $conn, $offset, |
162 | $limit, |
163 | SearchConfig $config, |
164 | ?array $namespaces = null, |
165 | ?User $user = null, |
166 | $index = false, |
167 | ?CirrusDebugOptions $options = null, |
168 | ?NamespacePrefixParser $namespacePrefixParser = null, |
169 | ?InterwikiResolver $interwikiResolver = null, |
170 | ?TitleHelper $titleHelper = null, |
171 | ?CirrusSearchHookRunner $cirrusSearchHookRunner = null |
172 | ) { |
173 | parent::__construct( |
174 | $conn, |
175 | $user, |
176 | $config->get( 'CirrusSearchSlowSearch' ), |
177 | $config->get( 'CirrusSearchExtraBackendLatency' ) |
178 | ); |
179 | $this->config = $config; |
180 | $this->setOffsetLimit( $offset, $limit ); |
181 | $this->indexBaseName = $index ?: $config->get( SearchConfig::INDEX_BASE_NAME ); |
182 | // TODO: Make these params mandatory once WBCS stops extending this class |
183 | $this->namespacePrefixParser = $namespacePrefixParser; |
184 | $this->interwikiResolver = $interwikiResolver ?: MediaWikiServices::getInstance()->getService( InterwikiResolver::SERVICE ); |
185 | $this->titleHelper = $titleHelper ?: new TitleHelper( WikiMap::getCurrentWikiId(), $this->interwikiResolver ); |
186 | $this->cirrusSearchHookRunner = $cirrusSearchHookRunner ?: new CirrusSearchHookRunner( |
187 | MediaWikiServices::getInstance()->getHookContainer() ); |
188 | $this->searchContext = new SearchContext( $this->config, $namespaces, $options, null, null, $this->cirrusSearchHookRunner ); |
189 | } |
190 | |
191 | /** |
192 | * Unified search public entry-point. |
193 | * |
194 | * NOTE: only fulltext search supported for now. |
195 | * @param SearchQuery $query |
196 | * @return Status |
197 | */ |
198 | public function search( SearchQuery $query ) { |
199 | if ( $query->getDebugOptions()->isCirrusDumpQueryAST() ) { |
200 | return Status::newGood( [ 'ast' => $query->getParsedQuery()->toArray() ] ); |
201 | } |
202 | // TODO: properly pass the profile context name and its params once we have a dispatch service. |
203 | $this->searchContext = SearchContext::fromSearchQuery( $query, FallbackRunner::create( $query, $this->interwikiResolver ), |
204 | $this->cirrusSearchHookRunner ); |
205 | $this->setOffsetLimit( $query->getOffset(), $query->getLimit() ); |
206 | $this->config = $query->getSearchConfig(); |
207 | $this->sort = $query->getSort(); |
208 | |
209 | if ( $query->getSearchEngineEntryPoint() === SearchQuery::SEARCH_TEXT ) { |
210 | $this->searchContext->setResultsType( |
211 | new FullTextResultsType( |
212 | $this->searchContext->getFetchPhaseBuilder(), |
213 | $query->getParsedQuery()->isQueryOfClass( BasicQueryClassifier::COMPLEX_QUERY ), |
214 | $this->titleHelper, |
215 | $query->getExtraFieldsToExtract(), |
216 | $this->searchContext->getConfig()->getElement( 'CirrusSearchDeduplicateInMemory' ) === true |
217 | ) |
218 | ); |
219 | return $this->searchTextInternal( $query->getParsedQuery()->getQueryWithoutNsHeader() ); |
220 | } else { |
221 | throw new \RuntimeException( 'Only ' . SearchQuery::SEARCH_TEXT . ' is supported for now' ); |
222 | } |
223 | } |
224 | |
225 | /** |
226 | * @param ResultsType $resultsType results type to return |
227 | */ |
228 | public function setResultsType( $resultsType ) { |
229 | $this->searchContext->setResultsType( $resultsType ); |
230 | } |
231 | |
232 | /** |
233 | * Is this searcher used to return debugging info? |
234 | * @return bool true if the search will return raw output |
235 | */ |
236 | public function isReturnRaw() { |
237 | return $this->searchContext->getDebugOptions()->isReturnRaw(); |
238 | } |
239 | |
240 | /** |
241 | * Set the type of sort to perform. Must be 'relevance', 'title_asc', 'title_desc'. |
242 | * @param string $sort sort type |
243 | */ |
244 | public function setSort( $sort ) { |
245 | $this->sort = $sort; |
246 | } |
247 | |
248 | /** |
249 | * Should this search limit results to the local wiki? If not called the default is false. |
250 | * @param bool $limitSearchToLocalWiki should the results be limited? |
251 | */ |
252 | public function limitSearchToLocalWiki( $limitSearchToLocalWiki ) { |
253 | $this->searchContext->setLimitSearchToLocalWiki( $limitSearchToLocalWiki ); |
254 | } |
255 | |
256 | /** |
257 | * Perform a "near match" title search which is pretty much a prefix match without the prefixes. |
258 | * @param string $term text by which to search |
259 | * @return Status status containing results defined by resultsType on success |
260 | */ |
261 | public function nearMatchTitleSearch( $term ) { |
262 | ( new NearMatchQueryBuilder() )->build( $this->searchContext, $term ); |
263 | return $this->searchOne(); |
264 | } |
265 | |
266 | /** |
267 | * Perform a sum over the number of words in the content index |
268 | * @return Status status containing a single integer |
269 | */ |
270 | public function countContentWords() { |
271 | ( new CountContentWordsBuilder() )->build( $this->searchContext ); |
272 | $this->limit = 1; |
273 | return $this->searchOne(); |
274 | } |
275 | |
276 | /** |
277 | * Perform a prefix search. |
278 | * @param string $term text by which to search |
279 | * @param string[] $variants variants to search for |
280 | * @return Status status containing results defined by resultsType on success |
281 | */ |
282 | public function prefixSearch( $term, $variants = [] ) { |
283 | ( new PrefixSearchQueryBuilder() )->build( $this->searchContext, $term, $variants ); |
284 | return $this->searchOne(); |
285 | } |
286 | |
287 | /** |
288 | * Build full text search for articles with provided term. All the |
289 | * state is applied to $this->searchContext. The returned query |
290 | * builder can be used to build a degraded query if necessary. |
291 | * |
292 | * @param string $term term to search |
293 | * @return FullTextQueryBuilder |
294 | */ |
295 | protected function buildFullTextSearch( $term ) { |
296 | // Convert the unicode character 'ideographic whitespace' into standard |
297 | // whitespace. Cirrussearch treats them both as normal whitespace, but |
298 | // the preceding isn't appropriately trimmed. |
299 | // No searching for nothing! That takes forever! |
300 | $term = trim( str_replace( "\xE3\x80\x80", " ", $term ) ); |
301 | if ( $term === '' ) { |
302 | $this->searchContext->setResultsPossible( false ); |
303 | } |
304 | |
305 | $builderSettings = $this->config->getProfileService() |
306 | ->loadProfileByName( SearchProfileService::FT_QUERY_BUILDER, |
307 | $this->searchContext->getFulltextQueryBuilderProfile() ); |
308 | $features = ( new FullTextKeywordRegistry( $this->config ) )->getKeywords(); |
309 | $qb = self::buildFullTextBuilder( $builderSettings, $this->config, $features ); |
310 | |
311 | $qb->build( $this->searchContext, $term ); |
312 | |
313 | if ( $this->searchContext->getSearchQuery() !== null ) { |
314 | $degradeOnParseWarnings = [ |
315 | // && test, test AND && test |
316 | 'cirrussearch-parse-error-unexpected-token', |
317 | // test AND |
318 | 'cirrussearch-parse-error-unexpected-end' |
319 | ]; |
320 | // Quick hack to avoid sending bad queries to the backend |
321 | foreach ( $this->searchContext->getSearchQuery()->getParsedQuery()->getParseWarnings() as $warning ) { |
322 | if ( in_array( $warning->getMessage(), $degradeOnParseWarnings ) ) { |
323 | $qb->buildDegraded( $this->searchContext ); |
324 | return $qb; |
325 | } |
326 | } |
327 | } |
328 | |
329 | return $qb; |
330 | } |
331 | |
332 | /** |
333 | * @param string $term |
334 | * @return Status |
335 | */ |
336 | private function searchTextInternal( $term ) { |
337 | // Searcher needs to be cloned before any actual query building is done. |
338 | $interleaveSearcher = $this->buildInterleaveSearcher(); |
339 | |
340 | $qb = $this->buildFullTextSearch( $term ); |
341 | $mainSearch = $this->buildSearch(); |
342 | $searches = MSearchRequests::build( self::MAINSEARCH_MSEARCH_KEY, $mainSearch ); |
343 | $description = "{$this->searchContext->getSearchType()} search for '{$this->searchContext->getOriginalSearchTerm()}'"; |
344 | |
345 | if ( !$this->searchContext->areResultsPossible() ) { |
346 | if ( $this->searchContext->getDebugOptions()->isCirrusDumpQuery() ) { |
347 | // return the empty array to suggest that no query will be run |
348 | return Status::newGood( [] ); |
349 | } |
350 | $status = $this->emptyResultSet(); |
351 | if ( $this->searchContext->getDebugOptions()->isCirrusDumpResult() ) { |
352 | return Status::newGood( |
353 | ( new MSearchResponses( [ $status->getValue() ], [] ) )->dumpResults( $description ) |
354 | ); |
355 | } |
356 | return $status; |
357 | } |
358 | |
359 | if ( $interleaveSearcher !== null ) { |
360 | $interleaveSearcher->buildFullTextSearch( $term ); |
361 | $interleaveSearch = $interleaveSearcher->buildSearch(); |
362 | if ( $this->areSearchesTheSame( $mainSearch, $interleaveSearch ) ) { |
363 | $interleaveSearcher = null; |
364 | } else { |
365 | $searches->addRequest( self::INTERLEAVED_MSEARCH_KEY, $interleaveSearch ); |
366 | } |
367 | } |
368 | |
369 | $fallbackRunner = $this->searchContext->getFallbackRunner(); |
370 | $fallbackRunner->attachSearchRequests( $searches, $this->connection->getClient() ); |
371 | |
372 | if ( $this->searchContext->getDebugOptions()->isCirrusDumpQuery() ) { |
373 | return $searches->dumpQuery( $description ); |
374 | } |
375 | |
376 | $responses = $this->searchMulti( $searches ); |
377 | if ( $responses->hasFailure() ) { |
378 | $status = $responses->getFailure(); |
379 | if ( ElasticaErrorHandler::isParseError( $status ) ) { |
380 | // Rebuild the search context because we need a fresh fetchPhaseBuilder |
381 | $this->searchContext = $this->searchContext->withConfig( $this->config ); |
382 | if ( $qb->buildDegraded( $this->searchContext ) ) { |
383 | // If that doesn't work we're out of luck but it should. |
384 | // There no guarantee it'll work properly with the syntax |
385 | // we've built above but it'll do _something_ and we'll |
386 | // still work on fixing all the parse errors that come in. |
387 | $status = $this->searchOne(); |
388 | } |
389 | } |
390 | return $status; |
391 | } |
392 | |
393 | if ( $this->searchContext->getDebugOptions()->isCirrusDumpResult() ) { |
394 | return $responses->dumpResults( $description ); |
395 | } |
396 | |
397 | $rType = $this->getSearchContext()->getResultsType(); |
398 | $mainSet = $responses->transformAsResultSet( $rType, self::MAINSEARCH_MSEARCH_KEY ); |
399 | if ( $interleaveSearcher !== null ) { |
400 | $interleaver = new TeamDraftInterleaver( $this->searchContext->getOriginalSearchTerm() ); |
401 | $testedSet = $responses->transformAsResultSet( $rType, self::INTERLEAVED_MSEARCH_KEY ); |
402 | $response = $interleaver->interleave( $mainSet, $testedSet, $this->limit ); |
403 | } else { |
404 | $response = $mainSet; |
405 | } |
406 | |
407 | $status = Status::newGood(); |
408 | if ( $this->namespacePrefixParser !== null ) { |
409 | $status = Status::newGood( $fallbackRunner->run( $this, $response, $responses, |
410 | $this->namespacePrefixParser, $this->cirrusSearchHookRunner ) ); |
411 | $this->appendMetrics( $fallbackRunner ); |
412 | } |
413 | |
414 | foreach ( $this->searchContext->getWarnings() as $warning ) { |
415 | $status->warning( ...$warning ); |
416 | } |
417 | return $status; |
418 | } |
419 | |
420 | /** |
421 | * Get the page with $docId. Note that the result is a status containing _all_ pages found. |
422 | * It is possible to find more then one page if the page is in multiple indexes. |
423 | * @param string[] $docIds array of document ids |
424 | * @param string[]|bool $sourceFiltering source filtering to apply |
425 | * @param bool $usePoolCounter false to disable the pool counter |
426 | * @return Status containing pages found, containing an empty array if not found, |
427 | * or an error if there was an error |
428 | */ |
429 | public function get( array $docIds, $sourceFiltering, $usePoolCounter = true ) { |
430 | $connection = $this->getOverriddenConnection(); |
431 | $indexSuffix = $connection->pickIndexSuffixForNamespaces( |
432 | $this->searchContext->getNamespaces() |
433 | ); |
434 | |
435 | // The worst case would be to have all ids duplicated in all available indices. |
436 | // We set the limit accordingly |
437 | $size = count( $connection->getAllIndexSuffixesForNamespaces( |
438 | $this->searchContext->getNamespaces() |
439 | ) ); |
440 | $size *= count( $docIds ); |
441 | |
442 | $work = function () use ( $docIds, $sourceFiltering, $indexSuffix, $size, $connection ) { |
443 | try { |
444 | $this->startNewLog( 'get of {indexSuffix}.{docIds}', 'get', [ |
445 | 'indexSuffix' => $indexSuffix, |
446 | 'docIds' => $docIds, |
447 | ] ); |
448 | // Shard timeout not supported on get requests so we just use the client side timeout |
449 | $connection->setTimeout( $this->getClientTimeout( 'get' ) ); |
450 | // We use a search query instead of _get/_mget, these methods are |
451 | // theorically well suited for this kind of job but they are not |
452 | // supported on aliases with multiple indices (content/general) |
453 | $index = $connection->getIndex( $this->indexBaseName, $indexSuffix ); |
454 | $query = new \Elastica\Query( new \Elastica\Query\Ids( $docIds ) ); |
455 | if ( is_array( $sourceFiltering ) ) { |
456 | // The title is a required field in the ApiTrait |
457 | if ( !in_array( "title", $sourceFiltering ) ) { |
458 | array_push( $sourceFiltering, "title" ); |
459 | } |
460 | $query->setParam( '_source', $sourceFiltering ); |
461 | } |
462 | $query->addParam( 'stats', 'get' ); |
463 | // We ignore limits provided to the searcher |
464 | // otherwize we could return fewer results than |
465 | // the ids requested. |
466 | $query->setFrom( 0 ); |
467 | $query->setSize( $size ); |
468 | $resultSet = $index->search( $query, [ 'search_type' => 'query_then_fetch' ] ); |
469 | self::throwIfNotOk( $connection, $resultSet->getResponse() ); |
470 | return $this->success( $resultSet->getResults(), $connection ); |
471 | } catch ( \Elastica\Exception\NotFoundException $e ) { |
472 | // NotFoundException just means the field didn't exist. |
473 | // It is up to the caller to decide if that is an error. |
474 | return $this->success( [], $connection ); |
475 | } catch ( \Elastica\Exception\ExceptionInterface $e ) { |
476 | return $this->failure( $e, $connection ); |
477 | } |
478 | }; |
479 | |
480 | if ( $usePoolCounter ) { |
481 | return Util::doPoolCounterWork( $this->getPoolCounterType(), $this->user, $work ); |
482 | } else { |
483 | return $work(); |
484 | } |
485 | } |
486 | |
487 | /** |
488 | * @param string $name |
489 | * @return Status |
490 | */ |
491 | private function findNamespace( $name ) { |
492 | return Util::doPoolCounterWork( |
493 | 'CirrusSearch-NamespaceLookup', |
494 | $this->user, |
495 | function () use ( $name ) { |
496 | try { |
497 | $this->startNewLog( 'lookup namespace for {namespaceName}', 'namespace', [ |
498 | 'namespaceName' => $name, |
499 | 'query' => $name, |
500 | ] ); |
501 | $connection = $this->getOverriddenConnection(); |
502 | $connection->setTimeout( $this->getClientTimeout( 'namespace' ) ); |
503 | |
504 | // A bit awkward, but accepted as this is the backup |
505 | // implementation of namespace lookup. Deployments should |
506 | // prefer to install php-intl and use utr30. |
507 | $store = ( new MetaStoreIndex( $connection, new NullPrinter(), $this->config ) ) |
508 | ->namespaceStore(); |
509 | $resultSet = $store->find( $name, [ |
510 | 'timeout' => $this->getTimeout( 'namespace' ), |
511 | ] ); |
512 | return $this->success( $resultSet->getResults(), $connection ); |
513 | } catch ( \Elastica\Exception\ExceptionInterface $e ) { |
514 | return $this->failure( $e, $connection ); |
515 | } |
516 | } ); |
517 | } |
518 | |
519 | /** |
520 | * @return \Elastica\Search |
521 | */ |
522 | protected function buildSearch() { |
523 | $builder = new SearchRequestBuilder( |
524 | $this->searchContext, $this->getOverriddenConnection(), $this->indexBaseName ); |
525 | return $builder->setLimit( $this->limit ) |
526 | ->setOffset( $this->offset ) |
527 | ->setIndex( $this->index ) |
528 | ->setSort( $this->sort ) |
529 | ->setTimeout( $this->getTimeout( $this->searchContext->getSearchType() ) ) |
530 | ->build(); |
531 | } |
532 | |
533 | /** |
534 | * Perform a single-query search. |
535 | * @return Status |
536 | */ |
537 | protected function searchOne() { |
538 | $search = $this->buildSearch(); |
539 | $description = "{$this->searchContext->getSearchType()} search for '{$this->searchContext->getOriginalSearchTerm()}'"; |
540 | $msearch = MSearchRequests::build( self::MAINSEARCH_MSEARCH_KEY, $search ); |
541 | if ( $this->searchContext->getDebugOptions()->isCirrusDumpQuery() ) { |
542 | return $msearch->dumpQuery( $description ); |
543 | } |
544 | if ( !$this->searchContext->areResultsPossible() ) { |
545 | return $this->emptyResultSet(); |
546 | } |
547 | |
548 | $mresults = $this->searchMulti( $msearch ); |
549 | |
550 | if ( $mresults->hasFailure() ) { |
551 | return $mresults->getFailure(); |
552 | } |
553 | |
554 | if ( $this->searchContext->getDebugOptions()->isReturnRaw() ) { |
555 | return $mresults->dumpResults( $description ); |
556 | } |
557 | return $mresults->transformAndGetSingle( $this->searchContext->getResultsType(), self::MAINSEARCH_MSEARCH_KEY ); |
558 | } |
559 | |
560 | /** |
561 | * Powers full-text-like searches including prefix search. |
562 | * |
563 | * @param MSearchRequests $msearches |
564 | * @return MSearchResponses search responses |
565 | */ |
566 | protected function searchMulti( MSearchRequests $msearches ) { |
567 | $searches = $msearches->getRequests(); |
568 | $contextResultsType = $this->searchContext->getResultsType(); |
569 | $cirrusDebugOptions = $this->searchContext->getDebugOptions(); |
570 | Assert::precondition( !$cirrusDebugOptions->isCirrusDumpQuery(), 'Must not reach this method when dumping the query' ); |
571 | |
572 | // TODO: should this be moved upper in the stack? |
573 | if ( $this->limit <= 0 ) { |
574 | return $msearches->failure( Status::newFatal( 'cirrussearch-offset-too-large', |
575 | self::MAX_OFFSET_LIMIT, $this->offset ) ); |
576 | } |
577 | |
578 | $connection = $this->getOverriddenConnection(); |
579 | $log = new MultiSearchRequestLog( |
580 | $connection->getClient(), |
581 | "{queryType} search for '{query}'", |
582 | $this->searchContext->getSearchType(), |
583 | [ |
584 | 'query' => $this->searchContext->getOriginalSearchTerm(), |
585 | 'limit' => $this->limit ?: null, |
586 | // Used syntax |
587 | 'syntax' => $this->searchContext->getSyntaxUsed(), |
588 | ], |
589 | $this->searchContext->getNamespaces() ?: [] |
590 | ); |
591 | |
592 | // Similar to indexing support only the bulk code path, rather than |
593 | // single and bulk. The extra overhead should be minimal, and the |
594 | // reduced complexity is welcomed. |
595 | $search = new MultiSearch( $connection->getClient() ); |
596 | $search->addSearches( $searches ); |
597 | |
598 | $connection->setTimeout( $this->getClientTimeout( $this->searchContext->getSearchType() ) ); |
599 | |
600 | if ( $this->config->get( 'CirrusSearchMoreAccurateScoringMode' ) ) { |
601 | $search->setSearchType( \Elastica\Search::OPTION_SEARCH_TYPE_DFS_QUERY_THEN_FETCH ); |
602 | } |
603 | |
604 | // Perform the search |
605 | $work = function () use ( $search, $log, $connection ) { |
606 | return Util::doPoolCounterWork( |
607 | $this->getPoolCounterType(), |
608 | $this->user, |
609 | function () use ( $search, $log, $connection ) { |
610 | // @todo only reports the first error, also turns |
611 | // a partial (single search) error into a complete |
612 | // failure across the board. Should be addressed |
613 | // at some point. |
614 | return $this->runMSearch( $search, $log, $connection ); |
615 | }, |
616 | $this->searchContext->isSyntaxUsed( 'regex' ) ? |
617 | 'cirrussearch-regex-too-busy-error' : null |
618 | ); |
619 | }; |
620 | |
621 | // Wrap with caching if needed, but don't cache debugging queries |
622 | $skipCache = $cirrusDebugOptions->mustNeverBeCached(); |
623 | if ( $this->searchContext->getCacheTtl() > 0 && !$skipCache ) { |
624 | $work = function () use ( $work, $searches, $log, $contextResultsType ) { |
625 | $services = MediaWikiServices::getInstance(); |
626 | $requestStats = Util::getStatsFactory(); |
627 | $cache = $services->getMainWANObjectCache(); |
628 | $keyParts = []; |
629 | foreach ( $searches as $key => $search ) { |
630 | $keyParts[] = $search->getPath() . |
631 | serialize( $search->getOptions() ) . |
632 | serialize( $search->getQuery()->toArray() ) . |
633 | ( $contextResultsType !== null ? get_class( $contextResultsType ) : "NONE" ); |
634 | } |
635 | $key = $cache->makeKey( 'cirrussearch', 'search', 'v2', md5( |
636 | implode( '|', $keyParts ) |
637 | ) ); |
638 | $cacheResult = $cache->get( $key ); |
639 | if ( $cacheResult ) { |
640 | [ $logVariables, $multiResultSet ] = $cacheResult; |
641 | $this->recordQueryCacheMetrics( $requestStats, "hit" ); |
642 | $log->setCachedResult( $logVariables ); |
643 | $this->successViaCache( $log ); |
644 | |
645 | if ( $multiResultSet->isOK() ) { |
646 | /** @var \Elastica\Multi\ResultSet $cachedMResultSet */ |
647 | $cachedMResultSet = $multiResultSet->getValue(); |
648 | if ( count( $cachedMResultSet->getResultSets() ) !== count( $searches ) ) { |
649 | LoggerFactory::getInstance( 'CirrusSearch' ) |
650 | ->warning( 'Ignoring a cached Multi/ResultSet wanted {nb_queries} response(s) but received {nb_responses}', |
651 | [ |
652 | 'nb_queries' => count( $searches ), |
653 | 'nb_responses' => count( $cachedMResultSet->getResultSets() ) |
654 | ] ); |
655 | $this->recordQueryCacheMetrics( $requestStats, "incoherent" ); |
656 | } else { |
657 | return $multiResultSet; |
658 | } |
659 | } else { |
660 | LoggerFactory::getInstance( 'CirrusSearch' ) |
661 | ->warning( 'Cached a Status value that is not OK' ); |
662 | $this->recordQueryCacheMetrics( $requestStats, "nok" ); |
663 | } |
664 | } else { |
665 | $this->recordQueryCacheMetrics( $requestStats, "miss" ); |
666 | } |
667 | |
668 | $multiResultSet = $work(); |
669 | |
670 | if ( $multiResultSet->isOK() ) { |
671 | $isPartialResult = false; |
672 | foreach ( $multiResultSet->getValue()->getResultSets() as $resultSet ) { |
673 | $responseData = $resultSet->getResponse()->getData(); |
674 | if ( isset( $responseData['timed_out'] ) && $responseData['timed_out'] ) { |
675 | $isPartialResult = true; |
676 | break; |
677 | } |
678 | } |
679 | if ( !$isPartialResult ) { |
680 | $this->recordQueryCacheMetrics( $requestStats, "set" ); |
681 | $cache->set( |
682 | $key, |
683 | [ $log->getLogVariables(), $multiResultSet ], |
684 | $this->searchContext->getCacheTtl() |
685 | ); |
686 | } |
687 | } |
688 | |
689 | return $multiResultSet; |
690 | }; |
691 | } |
692 | |
693 | $status = $work(); |
694 | |
695 | // @todo Does this need anything special for multi-search changes? |
696 | if ( !$status->isOK() ) { |
697 | return $msearches->failure( $status ); |
698 | } |
699 | |
700 | /** @var \Elastica\Multi\ResultSet $response */ |
701 | $response = $status->getValue(); |
702 | if ( count( $response->getResultSets() ) !== count( $msearches->getRequests() ) ) { |
703 | // Temp hack to investigate T231023 (use php serialize just in case it has some invalid |
704 | // UTF8 sequences that would prevent this message from being sent to logstash |
705 | LoggerFactory::getInstance( 'CirrusSearch' ) |
706 | ->warning( "Incoherent response received (#searches != #responses) for {query}: {response}", |
707 | [ 'query' => $this->searchContext->getOriginalSearchTerm(), 'response' => serialize( $response->getResponse() ) ] ); |
708 | return $msearches->failure( Status::newFatal( 'cirrussearch-backend-error' ) ); |
709 | } |
710 | $mreponses = $msearches->toMSearchResponses( $response->getResultSets() ); |
711 | if ( $mreponses->hasTimeout() ) { |
712 | LoggerFactory::getInstance( 'CirrusSearch' )->warning( |
713 | $log->getDescription() . " timed out and only returned partial results!", |
714 | $log->getLogVariables() |
715 | ); |
716 | $this->searchContext->addWarning( $this->searchContext->isSyntaxUsed( 'regex' ) |
717 | ? 'cirrussearch-regex-timed-out' |
718 | : 'cirrussearch-timed-out' |
719 | ); |
720 | } |
721 | return $mreponses; |
722 | } |
723 | |
724 | /** |
725 | * Attempt to suck a leading namespace followed by a colon from the query string. |
726 | * Reaches out to Elasticsearch to perform normalized lookup against the namespaces. |
727 | * Should be fast but for the network hop. |
728 | * |
729 | * @param string &$query |
730 | */ |
731 | public function updateNamespacesFromQuery( &$query ) { |
732 | $colon = strpos( $query, ':' ); |
733 | if ( $colon === false ) { |
734 | return; |
735 | } |
736 | $namespaceName = substr( $query, 0, $colon ); |
737 | $status = $this->findNamespace( $namespaceName ); |
738 | // Failure case is already logged so just handle success case |
739 | if ( !$status->isOK() ) { |
740 | return; |
741 | } |
742 | $foundNamespace = $status->getValue(); |
743 | if ( !$foundNamespace ) { |
744 | return; |
745 | } |
746 | $foundNamespace = $foundNamespace[ 0 ]; |
747 | $query = substr( $query, $colon + 1 ); |
748 | $this->searchContext->setNamespaces( [ $foundNamespace->namespace_id ] ); |
749 | } |
750 | |
751 | /** |
752 | * @return SearchContext |
753 | */ |
754 | public function getSearchContext() { |
755 | return $this->searchContext; |
756 | } |
757 | |
758 | private function getPoolCounterType(): string { |
759 | // Default pool counter for all search requests. Note that not all |
760 | // possible requests go through Searcher, so this isn't globally |
761 | // definitive. |
762 | $pool = 'CirrusSearch-Search'; |
763 | // Pool counter overrides based on query syntax. Goal is to |
764 | // separate expensive or high-volume traffic into dedicated |
765 | // pools with specific limits. Prefix is only high volume |
766 | // when completion is disabled. |
767 | $poolCounterTypes = [ |
768 | 'deepcat' => 'CirrusSearch-ExpensiveFullText', |
769 | 'regex' => 'CirrusSearch-ExpensiveFullText', |
770 | 'prefix' => 'CirrusSearch-Prefix', |
771 | 'more_like' => 'CirrusSearch-MoreLike', |
772 | ]; |
773 | foreach ( $poolCounterTypes as $type => $counter ) { |
774 | if ( $this->searchContext->isSyntaxUsed( $type ) ) { |
775 | $pool = $counter; |
776 | break; |
777 | } |
778 | } |
779 | // Put external automated requests into their own bucket The main idea |
780 | // here is to allow automated access, but prevent that automation from |
781 | // capping out the pools used by interactive queries. |
782 | // It's not clear when the automation bucket should not override other |
783 | // bucketing decisions, for now override everything except Regex since |
784 | // those can be very expensive and usually use a small pool. If both |
785 | // the automation and regex pools filled with regexes it would be |
786 | // significantly more load than expected. |
787 | if ( $pool !== 'CirrusSearch-ExpensiveFullText' && $this->isAutomatedRequest() ) { |
788 | $pool = 'CirrusSearch-Automated'; |
789 | } |
790 | return $pool; |
791 | } |
792 | |
793 | private function isAutomatedRequest(): bool { |
794 | $req = RequestContext::getMain()->getRequest(); |
795 | try { |
796 | $ip = $req->getIP(); |
797 | } catch ( MWException $e ) { |
798 | // No IP, typically this means a CLI invocation. We are attempting |
799 | // to segregate external automation, internal automation has its |
800 | // own ability to control configuration and shouldn't be flagged |
801 | if ( MW_ENTRY_POINT === 'cli' ) { |
802 | return false; |
803 | } |
804 | // When can we get here? Is this ever run? |
805 | LoggerFactory::getInstance( 'CirrusSearch' )->info( |
806 | 'No IP available during automated request check' ); |
807 | return false; |
808 | } |
809 | return Util::looksLikeAutomation( |
810 | $this->config, $ip, $req->getAllHeaders() ); |
811 | } |
812 | |
813 | /** |
814 | * Some queries, like more like this, are quite expensive and can cause |
815 | * latency spikes. This allows redirecting queries using particular |
816 | * features to specific clusters. |
817 | * @return Connection |
818 | */ |
819 | private function getOverriddenConnection() { |
820 | $overrides = $this->config->get( 'CirrusSearchClusterOverrides' ); |
821 | foreach ( $overrides as $feature => $cluster ) { |
822 | if ( $this->searchContext->isSyntaxUsed( $feature ) ) { |
823 | return Connection::getPool( $this->config, $cluster ); |
824 | } |
825 | } |
826 | return $this->connection; |
827 | } |
828 | |
829 | protected function recordQueryCacheMetrics( StatsFactory $requestStats, string $cacheStatus, ?string $type = null ): void { |
830 | $type = $type ?: $this->getSearchContext()->getSearchType(); |
831 | $requestStats->getCounter( "query_cache_total" ) |
832 | ->setLabel( "type", $type ) |
833 | ->setLabel( "status", $cacheStatus ) |
834 | ->increment(); |
835 | } |
836 | |
837 | /** |
838 | * @param string $description |
839 | * @param string $queryType |
840 | * @param string[] $extra |
841 | * @return SearchRequestLog |
842 | */ |
843 | protected function newLog( $description, $queryType, array $extra = [] ) { |
844 | return new SearchRequestLog( |
845 | $this->getOverriddenConnection()->getClient(), |
846 | $description, |
847 | $queryType, |
848 | $extra |
849 | ); |
850 | } |
851 | |
852 | /** |
853 | * If we're supposed to create raw result, create and return it, |
854 | * or output it and finish. |
855 | * @param mixed $result Search result data |
856 | * @param WebRequest $request Request context |
857 | * @return string The new raw result. |
858 | */ |
859 | public function processRawReturn( $result, WebRequest $request ) { |
860 | return Util::processSearchRawReturn( $result, $request, |
861 | $this->searchContext->getDebugOptions() ); |
862 | } |
863 | |
864 | /** |
865 | * Search titles in archive |
866 | * @param string $term |
867 | * @return Status<Title[]> |
868 | */ |
869 | public function searchArchive( $term ) { |
870 | $this->searchContext->setOriginalSearchTerm( $term ); |
871 | $term = $this->searchContext->escaper()->fixupWholeQueryString( $term ); |
872 | $this->setResultsType( new TitleResultsType() ); |
873 | |
874 | // This does not support cross-cluster search, but there is also no use case |
875 | // for cross-wiki archive search. |
876 | $this->index = $this->getOverriddenConnection()->getArchiveIndex( $this->indexBaseName ); |
877 | |
878 | // Setup the search query |
879 | $query = new BoolQuery(); |
880 | |
881 | $multi = new MultiMatch(); |
882 | $multi->setType( 'best_fields' ); |
883 | $multi->setTieBreaker( 0 ); |
884 | $multi->setQuery( $term ); |
885 | $multi->setFields( [ |
886 | 'title.near_match^100', |
887 | 'title.near_match_asciifolding^75', |
888 | 'title.plain^50', |
889 | 'title^25' |
890 | ] ); |
891 | $multi->setOperator( 'AND' ); |
892 | |
893 | $fuzzy = new \Elastica\Query\MatchQuery(); |
894 | $fuzzy->setFieldQuery( 'title.plain', $term ); |
895 | $fuzzy->setFieldFuzziness( 'title.plain', 'AUTO' ); |
896 | $fuzzy->setFieldOperator( 'title.plain', 'AND' ); |
897 | |
898 | $query->addShould( $multi ); |
899 | $query->addShould( $fuzzy ); |
900 | $query->setMinimumShouldMatch( 1 ); |
901 | |
902 | $this->sort = 'just_match'; |
903 | |
904 | $this->searchContext->setMainQuery( $query ); |
905 | $this->searchContext->addSyntaxUsed( 'archive' ); |
906 | $this->searchContext->setRescoreProfile( 'empty' ); |
907 | |
908 | return $this->searchOne(); |
909 | } |
910 | |
911 | /** |
912 | * Tests if two search objects are equivalent |
913 | * |
914 | * @param Search $a |
915 | * @param Search $b |
916 | * @return bool |
917 | */ |
918 | private function areSearchesTheSame( Search $a, Search $b ) { |
919 | // same object. |
920 | if ( $a === $b ) { |
921 | return true; |
922 | } |
923 | |
924 | // Check values not included in toArray() |
925 | if ( $a->getPath() !== $b->getPath() |
926 | || $a->getOptions() != $b->getOptions() |
927 | ) { |
928 | return false; |
929 | } |
930 | |
931 | $aArray = $a->getQuery()->toArray(); |
932 | $bArray = $b->getQuery()->toArray(); |
933 | |
934 | // normalize the 'now' value which contains a timestamp that |
935 | // may vary. |
936 | $fixNow = static function ( &$value, $key ) { |
937 | if ( $key === 'now' && is_int( $value ) ) { |
938 | $value = 12345678; |
939 | } |
940 | }; |
941 | array_walk_recursive( $aArray, $fixNow ); |
942 | array_walk_recursive( $bArray, $fixNow ); |
943 | |
944 | // Simplest form, requires both arrays to have exact same ordering, |
945 | // types, keys, etc. We could try much harder to remove edge cases, |
946 | // but they probably don't matter too much. The main thing we are |
947 | // looking for is if configuration used for interleaved search didn't |
948 | // have an effect query building. If we get it wrong in some rare |
949 | // cases it should have minimal effects on the interleaved search test. |
950 | return $aArray === $bArray; |
951 | } |
952 | |
953 | private function buildInterleaveSearcher(): ?self { |
954 | // If we aren't on the first page, or the user has specified |
955 | // some custom magic query options (override rescore profile, |
956 | // etc) then don't interleave. |
957 | if ( $this->offset > 0 || $this->searchContext->isDirty() ) { |
958 | return null; |
959 | } |
960 | |
961 | // Is interleaving configured? |
962 | $overrides = $this->config->get( 'CirrusSearchInterleaveConfig' ); |
963 | if ( $overrides === null ) { |
964 | return null; |
965 | } |
966 | |
967 | $config = new HashSearchConfig( $overrides, [ HashSearchConfig::FLAG_INHERIT ] ); |
968 | $other = clone $this; |
969 | $other->config = $config; |
970 | $other->searchContext = $other->searchContext->withConfig( $config ); |
971 | |
972 | return $other; |
973 | } |
974 | |
975 | /** |
976 | * @return Status |
977 | */ |
978 | private function emptyResultSet() { |
979 | $results = $this->searchContext->getResultsType()->createEmptyResult(); |
980 | if ( $results instanceof BaseCirrusSearchResultSet ) { |
981 | // TODO: Keywords are very specific to full-text search, while |
982 | // ResultsType and this method are much more general. |
983 | // While awkward, this maintains BC until we decide what to do. |
984 | $results = BaseCirrusSearchResultSet::emptyResultSet( |
985 | $this->searchContext->isSpecialKeywordUsed() |
986 | ); |
987 | } |
988 | $status = Status::newGood( $results ); |
989 | foreach ( $this->searchContext->getWarnings() as $warning ) { |
990 | $status->warning( ...$warning ); |
991 | } |
992 | return $status; |
993 | } |
994 | |
995 | /** |
996 | * Apply debug options to the elastica query |
997 | * @param Query $query |
998 | * @return Query |
999 | */ |
1000 | public function applyDebugOptionsToQuery( Query $query ) { |
1001 | return $this->searchContext->getDebugOptions()->applyDebugOptions( $query ); |
1002 | } |
1003 | |
1004 | public function makeSearcher( SearchQuery $query ): self { |
1005 | return new self( $this->connection, $query->getOffset(), $query->getLimit(), |
1006 | $query->getSearchConfig(), $query->getNamespaces(), $this->user, |
1007 | false, $query->getDebugOptions(), $this->namespacePrefixParser, $this->interwikiResolver, |
1008 | $this->titleHelper, $this->cirrusSearchHookRunner ); |
1009 | } |
1010 | |
1011 | /** |
1012 | * @param int $offset |
1013 | * @param int $limit |
1014 | */ |
1015 | private function setOffsetLimit( $offset, $limit ) { |
1016 | $this->offset = $offset; |
1017 | if ( $offset + $limit > self::MAX_OFFSET_LIMIT ) { |
1018 | $this->limit = self::MAX_OFFSET_LIMIT - $offset; |
1019 | } else { |
1020 | $this->limit = $limit; |
1021 | } |
1022 | } |
1023 | |
1024 | /** |
1025 | * Visible for testing |
1026 | * @return int[] 2 elements array |
1027 | */ |
1028 | public function getOffsetLimit() { |
1029 | Assert::precondition( defined( 'MW_PHPUNIT_TEST' ), |
1030 | 'getOffsetLimit must only be called for testing purposes' ); |
1031 | return [ $this->offset, $this->limit ]; |
1032 | } |
1033 | |
1034 | /** |
1035 | * Build a FullTextQueryBuilder defined in the $builderSettings: |
1036 | * format is: |
1037 | * [ |
1038 | * 'builder_factory' => callback |
1039 | * 'settings' => ... |
1040 | * ] |
1041 | * where callback must be function that accepts the settings array and returns a FullTextQueryBuilder |
1042 | * |
1043 | * Legacy version: |
1044 | * [ |
1045 | * 'builder_class' => ClassName |
1046 | * 'settings' => ... |
1047 | * ] |
1048 | * where ClassName must declare a constructor with these arguments: |
1049 | * SearchConfig $config, KeywordFeature[] $features, $settings |
1050 | * |
1051 | * Visible for testing only |
1052 | * @param array $builderSettings |
1053 | * @param SearchConfig $config |
1054 | * @param KeywordFeature[] $features |
1055 | * @return FullTextQueryBuilder |
1056 | * @throws \ReflectionException |
1057 | */ |
1058 | final public static function buildFullTextBuilder( |
1059 | array $builderSettings, |
1060 | SearchConfig $config, |
1061 | array $features |
1062 | ): FullTextQueryBuilder { |
1063 | if ( isset( $builderSettings['builder_class'] ) ) { |
1064 | $objectFactorySpecs = [ |
1065 | 'class' => $builderSettings['builder_class'], |
1066 | 'args' => [ |
1067 | $config, |
1068 | $features, |
1069 | $builderSettings['settings'] |
1070 | ] |
1071 | ]; |
1072 | } elseif ( $builderSettings['builder_factory'] ) { |
1073 | $objectFactorySpecs = [ |
1074 | 'factory' => $builderSettings['builder_factory'], |
1075 | 'args' => [ |
1076 | $builderSettings['settings'] |
1077 | ] |
1078 | ]; |
1079 | } else { |
1080 | throw new \InvalidArgumentException( 'Missing builder_class or builder_factory in the builderSettings' ); |
1081 | } |
1082 | |
1083 | /** @var FullTextQueryBuilder $qb */ |
1084 | // @phan-suppress-next-line PhanTypeInvalidCallableArraySize |
1085 | $qb = ObjectFactory::getObjectFromSpec( $objectFactorySpecs ); |
1086 | if ( !( $qb instanceof FullTextQueryBuilder ) ) { |
1087 | throw new RuntimeException( 'Bad builder class configured.' ); |
1088 | } |
1089 | |
1090 | return $qb; |
1091 | } |
1092 | } |