Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
54.53% covered (warning)
54.53%
259 / 475
38.89% covered (danger)
38.89%
14 / 36
CRAP
0.00% covered (danger)
0.00%
0 / 1
Searcher
54.53% covered (warning)
54.53%
259 / 475
38.89% covered (danger)
38.89%
14 / 36
1427.32
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
15 / 15
100.00% covered (success)
100.00%
1 / 1
2
 search
58.06% covered (warning)
58.06%
18 / 31
0.00% covered (danger)
0.00%
0 / 1
6.84
 setResultsType
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 isReturnRaw
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 setSort
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 limitSearchToLocalWiki
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 nearMatchTitleSearch
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 countContentWords
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
2
 prefixSearch
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 buildFullTextSearch
84.21% covered (warning)
84.21%
16 / 19
0.00% covered (danger)
0.00%
0 / 1
5.10
 searchTextInternal
71.43% covered (warning)
71.43%
35 / 49
0.00% covered (danger)
0.00%
0 / 1
18.57
 get
0.00% covered (danger)
0.00%
0 / 18
0.00% covered (danger)
0.00%
0 / 1
20
 getSuggest
0.00% covered (danger)
0.00%
0 / 7
0.00% covered (danger)
0.00%
0 / 1
2
 buildPoolWorkForGet
0.00% covered (danger)
0.00%
0 / 17
0.00% covered (danger)
0.00%
0 / 1
12
 findNamespace
0.00% covered (danger)
0.00%
0 / 19
0.00% covered (danger)
0.00%
0 / 1
6
 buildSearch
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
1
 searchOne
38.46% covered (danger)
38.46%
5 / 13
0.00% covered (danger)
0.00%
0 / 1
10.83
 searchMulti
38.89% covered (danger)
38.89%
42 / 108
0.00% covered (danger)
0.00%
0 / 1
121.65
 updateNamespacesFromQuery
0.00% covered (danger)
0.00%
0 / 13
0.00% covered (danger)
0.00%
0 / 1
20
 getSearchContext
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getPoolCounterType
76.47% covered (warning)
76.47%
13 / 17
0.00% covered (danger)
0.00%
0 / 1
6.47
 isAutomatedRequest
41.67% covered (danger)
41.67%
5 / 12
0.00% covered (danger)
0.00%
0 / 1
7.18
 isDeepWebScrapingRequest
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
3
 getOverriddenConnection
60.00% covered (warning)
60.00%
3 / 5
0.00% covered (danger)
0.00%
0 / 1
3.58
 recordQueryCacheMetrics
0.00% covered (danger)
0.00%
0 / 5
0.00% covered (danger)
0.00%
0 / 1
6
 newLog
0.00% covered (danger)
0.00%
0 / 6
0.00% covered (danger)
0.00%
0 / 1
2
 processRawReturn
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 searchArchive
100.00% covered (success)
100.00%
28 / 28
100.00% covered (success)
100.00%
1 / 1
1
 areSearchesTheSame
78.57% covered (warning)
78.57%
11 / 14
0.00% covered (danger)
0.00%
0 / 1
6.35
 buildInterleaveSearcher
100.00% covered (success)
100.00%
10 / 10
100.00% covered (success)
100.00%
1 / 1
4
 emptyResultSet
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
3
 applyDebugOptionsToQuery
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 makeSearcher
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
1
 setOffsetLimit
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
2
 getOffsetLimit
100.00% covered (success)
100.00%
3 / 3
100.00% covered (success)
100.00%
1 / 1
1
 buildFullTextBuilder
90.48% covered (success)
90.48%
19 / 21
0.00% covered (danger)
0.00%
0 / 1
4.01
1<?php
2
3namespace CirrusSearch;
4
5use CirrusSearch\Fallbacks\FallbackRunner;
6use CirrusSearch\Fallbacks\SearcherFactory;
7use CirrusSearch\Maintenance\NullPrinter;
8use CirrusSearch\MetaStore\MetaStoreIndex;
9use CirrusSearch\Parser\BasicQueryClassifier;
10use CirrusSearch\Parser\FullTextKeywordRegistry;
11use CirrusSearch\Parser\NamespacePrefixParser;
12use CirrusSearch\Profile\SearchProfileService;
13use CirrusSearch\Query\CountContentWordsBuilder;
14use CirrusSearch\Query\FullTextQueryBuilder;
15use CirrusSearch\Query\KeywordFeature;
16use CirrusSearch\Query\NearMatchQueryBuilder;
17use CirrusSearch\Query\PrefixSearchQueryBuilder;
18use CirrusSearch\Search\BaseCirrusSearchResultSet;
19use CirrusSearch\Search\FullTextResultsType;
20use CirrusSearch\Search\MSearchRequests;
21use CirrusSearch\Search\MSearchResponses;
22use CirrusSearch\Search\ResultsType;
23use CirrusSearch\Search\SearchContext;
24use CirrusSearch\Search\SearchQuery;
25use CirrusSearch\Search\SearchRequestBuilder;
26use CirrusSearch\Search\SemanticResultsType;
27use CirrusSearch\Search\TeamDraftInterleaver;
28use CirrusSearch\Search\TitleHelper;
29use CirrusSearch\Search\TitleResultsType;
30use CirrusSearch\SecondTry\SecondTryRunner;
31use Elastica\Exception\RuntimeException;
32use Elastica\Multi\Search as MultiSearch;
33use Elastica\Query;
34use Elastica\Query\BoolQuery;
35use Elastica\Query\MultiMatch;
36use Elastica\Search;
37use MediaWiki\Context\RequestContext;
38use MediaWiki\Exception\MWException;
39use MediaWiki\Logger\LoggerFactory;
40use MediaWiki\MediaWikiServices;
41use MediaWiki\Request\WebRequest;
42use MediaWiki\Status\Status;
43use MediaWiki\Title\Title;
44use MediaWiki\User\User;
45use MediaWiki\WikiMap\WikiMap;
46use Wikimedia\Assert\Assert;
47use Wikimedia\ObjectFactory\ObjectFactory;
48use Wikimedia\Stats\StatsFactory;
49
50/**
51 * Performs searches using Elasticsearch.  Note that each instance of this class
52 * is single use only.
53 *
54 * @license GPL-2.0-or-later
55 */
56class Searcher extends ElasticsearchIntermediary implements SearcherFactory {
57    public const SUGGESTION_HIGHLIGHT_PRE = '<em>';
58    public const SUGGESTION_HIGHLIGHT_POST = '</em>';
59    public const HIGHLIGHT_PRE_MARKER = ''; // \uE000. Can't be a unicode literal until php7
60    public const HIGHLIGHT_PRE = '<span class="searchmatch">';
61    public const HIGHLIGHT_POST_MARKER = ''; // \uE001
62    public const HIGHLIGHT_POST = '</span>';
63
64    /**
65     * Maximum offset + limit depth allowed. As in the deepest possible result
66     * to return. Too deep will cause very slow queries. 10,000 feels plenty
67     * deep. This should be <= index.max_result_window in elasticsearch.
68     */
69    private const MAX_OFFSET_LIMIT = 10000;
70
71    /**
72     * Queries with offset + limit greater than this value are considered
73     * potentially automated and may, after considering other related signals,
74     * be placed into the automated pool counter bucket.
75     */
76    private const AUTOMATED_RESULT_DEPTH_THRESHOLD_SMALL = 100;
77
78    /**
79     * Queries with offset + limit greater than this value are considered
80     * expensive and will use the expensive pool counter
81     */
82    private const AUTOMATED_RESULT_DEPTH_THRESHOLD_LARGE = 1000;
83
84    /**
85     * Identifies the main search in MSearchRequests/MSearchResponses
86     */
87    public const MAINSEARCH_MSEARCH_KEY = '__main__';
88
89    /**
90     * Identifies the "tested" search request in MSearchRequests/MSearchResponses
91     */
92    private const INTERLEAVED_MSEARCH_KEY = '__interleaved__';
93
94    /**
95     * @var int search offset
96     */
97    protected $offset;
98
99    /**
100     * @var int maximum number of result
101     */
102    protected $limit;
103
104    /**
105     * @var string sort type
106     */
107    private $sort = 'relevance';
108
109    /**
110     * @var string index base name to use
111     */
112    protected $indexBaseName;
113
114    /**
115     * Search environment configuration
116     * @var SearchConfig
117     */
118    protected $config;
119
120    /**
121     * @var SearchContext
122     */
123    protected $searchContext;
124
125    /**
126     * Indexing type we'll be using.
127     * @var string|\Elastica\Index
128     */
129    private $index;
130
131    /**
132     * @var NamespacePrefixParser|null
133     */
134    private $namespacePrefixParser;
135    /**
136     * @var InterwikiResolver
137     */
138    protected $interwikiResolver;
139
140    /** @var TitleHelper */
141    protected $titleHelper;
142    /**
143     * @var CirrusSearchHookRunner
144     */
145    protected $cirrusSearchHookRunner;
146
147    /**
148     * @param Connection $conn
149     * @param int $offset Offset the results by this much
150     * @param int $limit Limit the results to this many
151     * @param SearchConfig $config Configuration settings
152     * @param int[]|null $namespaces Array of namespace numbers to search or null to search all namespaces.
153     * @param User|null $user user for which this search is being performed.  Attached to slow request logs.
154     * @param string|bool $index Base name for index to search from, defaults to $wgCirrusSearchIndexBaseName
155     * @param CirrusDebugOptions|null $options the debugging options to use or null to use defaults
156     * @param NamespacePrefixParser|null $namespacePrefixParser
157     * @param InterwikiResolver|null $interwikiResolver
158     * @param TitleHelper|null $titleHelper
159     * @param CirrusSearchHookRunner|null $cirrusSearchHookRunner
160     * @see CirrusDebugOptions::defaultOptions()
161     */
162    public function __construct(
163        Connection $conn, $offset,
164        $limit,
165        SearchConfig $config,
166        ?array $namespaces = null,
167        ?User $user = null,
168        $index = false,
169        ?CirrusDebugOptions $options = null,
170        ?NamespacePrefixParser $namespacePrefixParser = null,
171        ?InterwikiResolver $interwikiResolver = null,
172        ?TitleHelper $titleHelper = null,
173        ?CirrusSearchHookRunner $cirrusSearchHookRunner = null
174    ) {
175        parent::__construct(
176            $conn,
177            $user,
178            $config->get( 'CirrusSearchSlowSearch' ),
179            $config->get( 'CirrusSearchExtraBackendLatency' )
180        );
181        $this->config = $config;
182        $this->setOffsetLimit( $offset, $limit );
183        $this->indexBaseName = $index ?: $config->get( SearchConfig::INDEX_BASE_NAME );
184        // TODO: Make these params mandatory once WBCS stops extending this class
185        $this->namespacePrefixParser = $namespacePrefixParser;
186        $this->interwikiResolver = $interwikiResolver ?? MediaWikiServices::getInstance()->getService( InterwikiResolver::SERVICE );
187        $this->titleHelper = $titleHelper ?? new TitleHelper( WikiMap::getCurrentWikiId(), $this->interwikiResolver );
188        $this->cirrusSearchHookRunner = $cirrusSearchHookRunner ?? new CirrusSearchHookRunner(
189            MediaWikiServices::getInstance()->getHookContainer() );
190        $this->searchContext = new SearchContext( $this->config, $namespaces, $options, null, null, $this->cirrusSearchHookRunner );
191    }
192
193    /**
194     * Unified search public entry-point.
195     *
196     * NOTE: only fulltext search supported for now.
197     * @param SearchQuery $query
198     * @return Status
199     */
200    public function search( SearchQuery $query ) {
201        if ( $query->getDebugOptions()->isCirrusDumpQueryAST() ) {
202            return Status::newGood( [ 'ast' => $query->getParsedQuery()->toArray() ] );
203        }
204        // TODO: properly pass the profile context name and its params once we have a dispatch service.
205        $this->searchContext = SearchContext::fromSearchQuery( $query, FallbackRunner::create( $query, $this->interwikiResolver ),
206            $this->cirrusSearchHookRunner );
207        $this->setOffsetLimit( $query->getOffset(), $query->getLimit() );
208        $this->config = $query->getSearchConfig();
209        $this->sort = $query->getSort();
210
211        if ( $query->getSearchEngineEntryPoint() === SearchQuery::SEARCH_TEXT ) {
212            switch ( $this->searchContext->getProfileContext() ) {
213                case SearchProfileService::CONTEXT_SEMANTIC:
214                    $profileSettings = $this->config->getProfileService()
215                        ->loadProfileByName( SearchProfileService::FT_QUERY_BUILDER,
216                            $this->searchContext->getFulltextQueryBuilderProfile() );
217                    $this->searchContext->setResultsType(
218                        new SemanticResultsType(
219                            $this->titleHelper,
220                            $query->getExtraFieldsToExtract(),
221                            $profileSettings,
222                        )
223                    );
224                    break;
225                default:
226                    $this->searchContext->setResultsType(
227                        new FullTextResultsType(
228                            $this->searchContext->getFetchPhaseBuilder(),
229                            $query->getParsedQuery()->isQueryOfClass( BasicQueryClassifier::COMPLEX_QUERY ),
230                            $this->titleHelper,
231                            $query->getExtraFieldsToExtract(),
232                            $this->searchContext->getConfig()->getElement( 'CirrusSearchDeduplicateInMemory' ) === true
233                        )
234                    );
235            }
236            return $this->searchTextInternal( $query->getParsedQuery()->getQueryWithoutNsHeader() );
237        } else {
238            throw new \RuntimeException( 'Only ' . SearchQuery::SEARCH_TEXT . ' is supported for now' );
239        }
240    }
241
242    /**
243     * @param ResultsType $resultsType results type to return
244     */
245    public function setResultsType( $resultsType ) {
246        $this->searchContext->setResultsType( $resultsType );
247    }
248
249    /**
250     * Is this searcher used to return debugging info?
251     * @return bool true if the search will return raw output
252     */
253    public function isReturnRaw() {
254        return $this->searchContext->getDebugOptions()->isReturnRaw();
255    }
256
257    /**
258     * Set the type of sort to perform.  Must be 'relevance', 'title_asc', 'title_desc'.
259     * @param string $sort sort type
260     */
261    public function setSort( $sort ) {
262        $this->sort = $sort;
263    }
264
265    /**
266     * Should this search limit results to the local wiki?  If not called the default is false.
267     * @param bool $limitSearchToLocalWiki should the results be limited?
268     */
269    public function limitSearchToLocalWiki( $limitSearchToLocalWiki ) {
270        $this->searchContext->setLimitSearchToLocalWiki( $limitSearchToLocalWiki );
271    }
272
273    /**
274     * Perform a "near match" title search which is pretty much a prefix match without the prefixes.
275     * @param string $term text by which to search
276     * @return Status status containing results defined by resultsType on success
277     */
278    public function nearMatchTitleSearch( $term ) {
279        ( new NearMatchQueryBuilder() )->build( $this->searchContext, $term );
280        return $this->searchOne();
281    }
282
283    /**
284     * Perform a sum over the number of words in the content index
285     * @return Status status containing a single integer
286     */
287    public function countContentWords() {
288        ( new CountContentWordsBuilder() )->build( $this->searchContext );
289        $this->limit = 1;
290        return $this->searchOne();
291    }
292
293    /**
294     * Perform a prefix search.
295     * @param string $term text by which to search
296     * @return Status status containing results defined by resultsType on success
297     */
298    public function prefixSearch( string $term, SecondTryRunner $secondTryRunner ): Status {
299        ( new PrefixSearchQueryBuilder( $secondTryRunner ) )->build( $this->searchContext, $term );
300        return $this->searchOne();
301    }
302
303    /**
304     * Build full text search for articles with provided term. All the
305     * state is applied to $this->searchContext. The returned query
306     * builder can be used to build a degraded query if necessary.
307     *
308     * @param string $term term to search
309     * @return FullTextQueryBuilder
310     */
311    protected function buildFullTextSearch( $term ) {
312        // Convert the unicode character 'ideographic whitespace' into standard
313        // whitespace. Cirrussearch treats them both as normal whitespace, but
314        // the preceding isn't appropriately trimmed.
315        // No searching for nothing! That takes forever!
316        $term = trim( str_replace( "\xE3\x80\x80", " ", $term ) );
317        if ( $term === '' ) {
318            $this->searchContext->setResultsPossible( false );
319        }
320
321        $builderSettings = $this->config->getProfileService()
322            ->loadProfileByName( SearchProfileService::FT_QUERY_BUILDER,
323                $this->searchContext->getFulltextQueryBuilderProfile() );
324        $features = ( new FullTextKeywordRegistry( $this->config ) )->getKeywords();
325        $qb = self::buildFullTextBuilder( $builderSettings, $this->config, $features );
326
327        $qb->build( $this->searchContext, $term );
328
329        if ( $this->searchContext->getSearchQuery() !== null ) {
330            $degradeOnParseWarnings = [
331                // && test, test AND && test
332                'cirrussearch-parse-error-unexpected-token',
333                // test AND
334                'cirrussearch-parse-error-unexpected-end'
335            ];
336            // Quick hack to avoid sending bad queries to the backend
337            foreach ( $this->searchContext->getSearchQuery()->getParsedQuery()->getParseWarnings() as $warning ) {
338                if ( in_array( $warning->getMessage(), $degradeOnParseWarnings ) ) {
339                    $qb->buildDegraded( $this->searchContext );
340                    return $qb;
341                }
342            }
343        }
344
345        return $qb;
346    }
347
348    /**
349     * @param string $term
350     * @return Status
351     */
352    private function searchTextInternal( $term ) {
353        // Searcher needs to be cloned before any actual query building is done.
354        $interleaveSearcher = $this->buildInterleaveSearcher();
355
356        $qb = $this->buildFullTextSearch( $term );
357        $mainSearch = $this->buildSearch();
358        $searches = MSearchRequests::build( self::MAINSEARCH_MSEARCH_KEY, $mainSearch );
359        $description = "{$this->searchContext->getSearchType()} search for '{$this->searchContext->getOriginalSearchTerm()}'";
360
361        if ( !$this->searchContext->areResultsPossible() ) {
362            if ( $this->searchContext->getDebugOptions()->isCirrusDumpQuery() ) {
363                // return the empty array to suggest that no query will be run
364                return Status::newGood( [] );
365            }
366            $status = $this->emptyResultSet();
367            if ( $this->searchContext->getDebugOptions()->isCirrusDumpResult() ) {
368                return Status::newGood(
369                    ( new MSearchResponses( [ $status->getValue() ], [] ) )->dumpResults( $description )
370                );
371            }
372            return $status;
373        }
374
375        if ( $interleaveSearcher !== null ) {
376            $interleaveSearcher->buildFullTextSearch( $term );
377            $interleaveSearch = $interleaveSearcher->buildSearch();
378            if ( $this->areSearchesTheSame( $mainSearch, $interleaveSearch ) ) {
379                $interleaveSearcher = null;
380            } else {
381                $searches->addRequest( self::INTERLEAVED_MSEARCH_KEY, $interleaveSearch );
382            }
383        }
384
385        $fallbackRunner = $this->searchContext->getFallbackRunner();
386        $fallbackRunner->attachSearchRequests( $searches, $this->connection->getClient() );
387
388        if ( $this->searchContext->getDebugOptions()->isCirrusDumpQuery() ) {
389            return $searches->dumpQuery( $description );
390        }
391
392        $responses = $this->searchMulti( $searches );
393        if ( $responses->hasFailure() ) {
394            $status = $responses->getFailure();
395            if ( ElasticaErrorHandler::isParseError( $status ) ) {
396                // Rebuild the search context because we need a fresh fetchPhaseBuilder
397                $this->searchContext = $this->searchContext->withConfig( $this->config );
398                if ( $qb->buildDegraded( $this->searchContext ) ) {
399                    // If that doesn't work we're out of luck but it should.
400                    // There no guarantee it'll work properly with the syntax
401                    // we've built above but it'll do _something_ and we'll
402                    // still work on fixing all the parse errors that come in.
403                    $status = $this->searchOne();
404                }
405            }
406            return $status;
407        }
408
409        if ( $this->searchContext->getDebugOptions()->isCirrusDumpResult() ) {
410            return $responses->dumpResults( $description );
411        }
412
413        $rType = $this->getSearchContext()->getResultsType();
414        $mainSet = $responses->transformAsResultSet( $rType, self::MAINSEARCH_MSEARCH_KEY );
415        if ( $interleaveSearcher !== null ) {
416            $interleaver = new TeamDraftInterleaver( $this->searchContext->getOriginalSearchTerm() );
417            $testedSet = $responses->transformAsResultSet( $rType, self::INTERLEAVED_MSEARCH_KEY );
418            $response = $interleaver->interleave( $mainSet, $testedSet, $this->limit );
419        } else {
420            $response = $mainSet;
421        }
422
423        $status = Status::newGood();
424        if ( $this->namespacePrefixParser !== null ) {
425            $status = Status::newGood( $fallbackRunner->run( $this, $response, $responses,
426                $this->namespacePrefixParser, $this->cirrusSearchHookRunner ) );
427            $this->appendMetrics( $fallbackRunner );
428        }
429
430        foreach ( $this->searchContext->getWarnings() as $warning ) {
431            $status->warning( ...$warning );
432        }
433        return $status;
434    }
435
436    /**
437     * Get the page with $docId.  Note that the result is a status containing _all_ pages found.
438     * It is possible to find more then one page if the page is in multiple indexes.
439     * @param string[] $docIds array of document ids
440     * @param string[]|bool $sourceFiltering source filtering to apply
441     * @param bool $usePoolCounter false to disable the pool counter
442     * @return Status containing pages found, containing an empty array if not found,
443     *    or an error if there was an error
444     */
445    public function get( array $docIds, $sourceFiltering, $usePoolCounter = true ) {
446        $connection = $this->getOverriddenConnection();
447        $indexSuffix = $connection->pickIndexSuffixForNamespaces(
448            $this->searchContext->getNamespaces()
449        );
450
451        // The worst case would be to have all ids duplicated in all available indices.
452        // We set the limit accordingly
453        $size = count( $connection->getAllIndexSuffixesForNamespaces(
454            $this->searchContext->getNamespaces()
455        ) );
456        $size *= count( $docIds );
457
458        $query = new \Elastica\Query( new \Elastica\Query\Ids( $docIds ) );
459        if ( is_array( $sourceFiltering ) ) {
460            // The title is a required field in the ApiTrait. Maybe the API should be injecting
461            // that requirement?
462            if ( !in_array( "title", $sourceFiltering ) ) {
463                array_push( $sourceFiltering, "title" );
464            }
465            $query->setParam( '_source', $sourceFiltering );
466        }
467
468        $logMeta = [ 'docIds' => $docIds ];
469
470        $work = $this->buildPoolWorkForGet( $query, $logMeta, $indexSuffix, $size, $connection );
471
472        if ( $usePoolCounter ) {
473            return Util::doPoolCounterWork( $this->getPoolCounterType(), $this->user, $work );
474        } else {
475            return $work();
476        }
477    }
478
479    /**
480     * Get the completion documents associated with $docIds.
481     * @param int[] $docIds
482     * @return Status containing pages found, containing an empty array if not found,
483     *    or an error if there was an error
484     */
485    public function getSuggest( array $docIds ) {
486        $connection = $this->getOverriddenConnection();
487        $query = new \Elastica\Query( new \Elastica\Query\Terms( 'source_doc_id', $docIds ) );
488        $indexSuffix = Connection::TITLE_SUGGEST_INDEX_SUFFIX;
489        $logMeta = [ 'docIds' => $docIds ];
490
491        // Should be two docs per source doc id
492        $size = count( $docIds ) * 3;
493
494        $work = $this->buildPoolWorkForGet( $query, $logMeta, $indexSuffix, $size, $connection );
495        return Util::doPoolCounterWork( $this->getPoolCounterType(), $this->user, $work );
496    }
497
498    private function buildPoolWorkForGet(
499        \Elastica\Query $query,
500        array $logMeta,
501        string $indexSuffix,
502        int $size,
503        Connection $connection
504    ): \Closure {
505        return function () use ( $query, $logMeta, $indexSuffix, $size, $connection ) {
506            try {
507                $this->startNewLog( 'get of {indexSuffix}.{docIds}', 'get', [
508                    'indexSuffix' => $indexSuffix,
509                ] + $logMeta );
510                // Shard timeout not supported on get requests so we just use the client side timeout
511                $connection->setTimeout( $this->getClientTimeout( 'get' ) );
512                // We use a search query instead of _get/_mget, these methods are
513                // theorically well suited for this kind of job but they are not
514                // supported on aliases with multiple indices (content/general)
515                $index = $connection->getIndex( $this->indexBaseName, $indexSuffix );
516                $query->addParam( 'stats', 'get' );
517                // We ignore limits provided to the searcher
518                // otherwize we could return fewer results than
519                // the ids requested.
520                $query->setFrom( 0 );
521                $query->setSize( $size );
522                $resultSet = $index->search( $query, [ 'search_type' => 'query_then_fetch' ] );
523                self::throwIfNotOk( $connection, $resultSet->getResponse() );
524                return $this->success( $resultSet->getResults(), $connection );
525            } catch ( \Elastica\Exception\NotFoundException ) {
526                // NotFoundException just means the field didn't exist.
527                // It is up to the caller to decide if that is an error.
528                return $this->success( [], $connection );
529            } catch ( \Elastica\Exception\ExceptionInterface $e ) {
530                return $this->failure( $e, $connection );
531            }
532        };
533    }
534
535    /**
536     * @param string $name
537     * @return Status
538     */
539    private function findNamespace( $name ) {
540        return Util::doPoolCounterWork(
541            'CirrusSearch-NamespaceLookup',
542            $this->user,
543            function () use ( $name ) {
544                try {
545                    $this->startNewLog( 'lookup namespace for {namespaceName}', 'namespace', [
546                        'namespaceName' => $name,
547                        'query' => $name,
548                    ] );
549                    $connection = $this->getOverriddenConnection();
550                    $connection->setTimeout( $this->getClientTimeout( 'namespace' ) );
551
552                    // A bit awkward, but accepted as this is the backup
553                    // implementation of namespace lookup. Deployments should
554                    // prefer to install php-intl and use utr30.
555                    $store = ( new MetaStoreIndex( $connection, new NullPrinter(), $this->config ) )
556                        ->namespaceStore();
557                    $resultSet = $store->find( $name, [
558                        'timeout' => $this->getTimeout( 'namespace' ),
559                    ] );
560                    return $this->success( $resultSet->getResults(), $connection );
561                } catch ( \Elastica\Exception\ExceptionInterface $e ) {
562                    return $this->failure( $e, $connection );
563                }
564            } );
565    }
566
567    /**
568     * @return \Elastica\Search
569     */
570    protected function buildSearch() {
571        $builder = new SearchRequestBuilder(
572            $this->searchContext, $this->getOverriddenConnection(), $this->indexBaseName );
573        return $builder->setLimit( $this->limit )
574            ->setOffset( $this->offset )
575            ->setIndex( $this->index )
576            ->setSort( $this->sort )
577            ->setTimeout( $this->getTimeout( $this->searchContext->getSearchType() ) )
578            ->build();
579    }
580
581    /**
582     * Perform a single-query search.
583     * @return Status
584     */
585    protected function searchOne() {
586        $search = $this->buildSearch();
587        $description = "{$this->searchContext->getSearchType()} search for '{$this->searchContext->getOriginalSearchTerm()}'";
588        $msearch = MSearchRequests::build( self::MAINSEARCH_MSEARCH_KEY, $search );
589        if ( $this->searchContext->getDebugOptions()->isCirrusDumpQuery() ) {
590            return $msearch->dumpQuery( $description );
591        }
592        if ( !$this->searchContext->areResultsPossible() ) {
593            return $this->emptyResultSet();
594        }
595
596        $mresults = $this->searchMulti( $msearch );
597
598        if ( $mresults->hasFailure() ) {
599            return $mresults->getFailure();
600        }
601
602        if ( $this->searchContext->getDebugOptions()->isReturnRaw() ) {
603            return $mresults->dumpResults( $description );
604        }
605        return $mresults->transformAndGetSingle( $this->searchContext->getResultsType(), self::MAINSEARCH_MSEARCH_KEY );
606    }
607
608    /**
609     * Powers full-text-like searches including prefix search.
610     *
611     * @param MSearchRequests $msearches
612     * @return MSearchResponses search responses
613     */
614    protected function searchMulti( MSearchRequests $msearches ) {
615        $searches = $msearches->getRequests();
616        $contextResultsType = $this->searchContext->getResultsType();
617        $cirrusDebugOptions = $this->searchContext->getDebugOptions();
618        Assert::precondition( !$cirrusDebugOptions->isCirrusDumpQuery(), 'Must not reach this method when dumping the query' );
619
620        // TODO: should this be moved upper in the stack?
621        if ( $this->limit <= 0 ) {
622            return $msearches->failure( Status::newFatal( 'cirrussearch-offset-too-large',
623                self::MAX_OFFSET_LIMIT, $this->offset ) );
624        }
625
626        $connection = $this->getOverriddenConnection();
627        $log = new MultiSearchRequestLog(
628            $connection->getClient(),
629            "{queryType} search for '{query}'",
630            $this->searchContext->getSearchType(),
631            [
632                'query' => $this->searchContext->getOriginalSearchTerm(),
633                'limit' => $this->limit ?: null,
634                // Used syntax
635                'syntax' => $this->searchContext->getSyntaxUsed(),
636            ],
637            $this->searchContext->getNamespaces() ?? []
638        );
639
640        // Similar to indexing support only the bulk code path, rather than
641        // single and bulk. The extra overhead should be minimal, and the
642        // reduced complexity is welcomed.
643        $search = new MultiSearch( $connection->getClient() );
644        $search->addSearches( $searches );
645
646        $connection->setTimeout( $this->getClientTimeout( $this->searchContext->getSearchType() ) );
647
648        if ( $this->config->get( 'CirrusSearchMoreAccurateScoringMode' ) ) {
649            $search->setSearchType( \Elastica\Search::OPTION_SEARCH_TYPE_DFS_QUERY_THEN_FETCH );
650        }
651
652        // Perform the search
653        $work = function () use ( $search, $log, $connection ) {
654            return Util::doPoolCounterWork(
655                $this->getPoolCounterType(),
656                $this->user,
657                function () use ( $search, $log, $connection ) {
658                    // @todo only reports the first error, also turns
659                    // a partial (single search) error into a complete
660                    // failure across the board. Should be addressed
661                    // at some point.
662                    return $this->runMSearch( $search, $log, $connection );
663                },
664                $this->searchContext->isSyntaxUsed( 'regex' ) ?
665                    'cirrussearch-regex-too-busy-error' : null
666            );
667        };
668
669        // Wrap with caching if needed, but don't cache debugging queries
670        $skipCache = $cirrusDebugOptions->mustNeverBeCached();
671        if ( $this->searchContext->getCacheTtl() > 0 && !$skipCache ) {
672            $work = function () use ( $work, $searches, $log, $contextResultsType ) {
673                $services = MediaWikiServices::getInstance();
674                $requestStats = Util::getStatsFactory();
675                $cache = $services->getMainWANObjectCache();
676                $keyParts = [];
677                foreach ( $searches as $key => $search ) {
678                    $keyParts[] = $search->getPath() .
679                        serialize( $search->getOptions() ) .
680                        serialize( $search->getQuery()->toArray() ) .
681                        ( $contextResultsType !== null ? get_class( $contextResultsType ) : "NONE" );
682                }
683                $key = $cache->makeKey( 'cirrussearch', 'search', 'v2', md5(
684                    implode( '|', $keyParts )
685                ) );
686                $cacheResult = $cache->get( $key );
687                if ( $cacheResult ) {
688                    [ $logVariables, $multiResultSet ] = $cacheResult;
689                    $this->recordQueryCacheMetrics( $requestStats, "hit" );
690                    $log->setCachedResult( $logVariables );
691                    $this->successViaCache( $log );
692
693                    if ( $multiResultSet->isOK() ) {
694                        /** @var \Elastica\Multi\ResultSet $cachedMResultSet */
695                        $cachedMResultSet = $multiResultSet->getValue();
696                        if ( count( $cachedMResultSet->getResultSets() ) !== count( $searches ) ) {
697                            LoggerFactory::getInstance( 'CirrusSearch' )
698                                ->warning( 'Ignoring a cached Multi/ResultSet wanted {nb_queries} response(s) but received {nb_responses}',
699                                    [
700                                        'nb_queries' => count( $searches ),
701                                        'nb_responses' => count( $cachedMResultSet->getResultSets() )
702                                    ] );
703                            $this->recordQueryCacheMetrics( $requestStats, "incoherent" );
704                        } else {
705                            return $multiResultSet;
706                        }
707                    } else {
708                        LoggerFactory::getInstance( 'CirrusSearch' )
709                            ->warning( 'Cached a Status value that is not OK' );
710                        $this->recordQueryCacheMetrics( $requestStats, "nok" );
711                    }
712                } else {
713                    $this->recordQueryCacheMetrics( $requestStats, "miss" );
714                }
715
716                $multiResultSet = $work();
717
718                if ( $multiResultSet->isOK() ) {
719                    $isPartialResult = false;
720                    foreach ( $multiResultSet->getValue()->getResultSets() as $resultSet ) {
721                        $responseData = $resultSet->getResponse()->getData();
722                        if ( isset( $responseData['timed_out'] ) && $responseData['timed_out'] ) {
723                            $isPartialResult = true;
724                            break;
725                        }
726                    }
727                    if ( !$isPartialResult ) {
728                        $this->recordQueryCacheMetrics( $requestStats, "set" );
729                        $cache->set(
730                            $key,
731                            [ $log->getLogVariables(), $multiResultSet ],
732                            $this->searchContext->getCacheTtl()
733                        );
734                    }
735                }
736
737                return $multiResultSet;
738            };
739        }
740
741        $status = $work();
742
743        // @todo Does this need anything special for multi-search changes?
744        if ( !$status->isOK() ) {
745            return $msearches->failure( $status );
746        }
747
748        /** @var \Elastica\Multi\ResultSet $response */
749        $response = $status->getValue();
750        if ( count( $response->getResultSets() ) !== count( $msearches->getRequests() ) ) {
751            // Temp hack to investigate T231023 (use php serialize just in case it has some invalid
752            // UTF8 sequences that would prevent this message from being sent to logstash
753            LoggerFactory::getInstance( 'CirrusSearch' )
754                ->warning( "Incoherent response received (#searches != #responses) for {query}: {response}",
755                    [ 'query' => $this->searchContext->getOriginalSearchTerm(), 'response' => serialize( $response->getResponse() ) ] );
756            return $msearches->failure( Status::newFatal( 'cirrussearch-backend-error' ) );
757        }
758        $mreponses = $msearches->toMSearchResponses( $response->getResultSets() );
759        if ( $mreponses->hasTimeout() ) {
760            LoggerFactory::getInstance( 'CirrusSearch' )->warning(
761                $log->getDescription() . " timed out and only returned partial results!",
762                $log->getLogVariables()
763            );
764            $this->searchContext->addWarning( $this->searchContext->isSyntaxUsed( 'regex' )
765                ? 'cirrussearch-regex-timed-out'
766                : 'cirrussearch-timed-out'
767            );
768        }
769        return $mreponses;
770    }
771
772    /**
773     * Attempt to suck a leading namespace followed by a colon from the query string.
774     * Reaches out to Elasticsearch to perform normalized lookup against the namespaces.
775     * Should be fast but for the network hop.
776     *
777     * @param string &$query
778     */
779    public function updateNamespacesFromQuery( &$query ) {
780        $colon = strpos( $query, ':' );
781        if ( $colon === false ) {
782            return;
783        }
784        $namespaceName = substr( $query, 0, $colon );
785        $status = $this->findNamespace( $namespaceName );
786        // Failure case is already logged so just handle success case
787        if ( !$status->isOK() ) {
788            return;
789        }
790        $foundNamespace = $status->getValue();
791        if ( !$foundNamespace ) {
792            return;
793        }
794        $foundNamespace = $foundNamespace[ 0 ];
795        $query = substr( $query, $colon + 1 );
796        $this->searchContext->setNamespaces( [ $foundNamespace->namespace_id ] );
797    }
798
799    /**
800     * @return SearchContext
801     */
802    public function getSearchContext() {
803        return $this->searchContext;
804    }
805
806    private function getPoolCounterType(): string {
807        // Default pool counter for all search requests. Note that not all
808        // possible requests go through Searcher, so this isn't globally
809        // definitive.
810        $pool = 'CirrusSearch-Search';
811        // Pool counter overrides based on query syntax. Goal is to
812        // separate expensive or high-volume traffic into dedicated
813        // pools with specific limits. Prefix is only high volume
814        // when completion is disabled.
815        // TODO: Should this be configuration?
816        $poolCounterTypes = [
817            'deepcat' => 'CirrusSearch-ExpensiveFullText',
818            'regex' => 'CirrusSearch-ExpensiveFullText',
819            'prefix' => 'CirrusSearch-Prefix',
820            'more_like' => 'CirrusSearch-MoreLike',
821            'semantic' => 'CirrusSearch-Semantic',
822        ];
823        foreach ( $poolCounterTypes as $type => $counter ) {
824            if ( $this->searchContext->isSyntaxUsed( $type ) ) {
825                $pool = $counter;
826                break;
827            }
828        }
829        // Put external automated requests into their own bucket The main idea
830        // here is to allow automated access, but prevent that automation from
831        // capping out the pools used by interactive queries.
832        // It's not clear when the automation bucket should not override other
833        // bucketing decisions, for now override everything except Regex since
834        // those can be very expensive and usually use a small pool. If both
835        // the automation and regex pools filled with regexes it would be
836        // significantly more load than expected.
837        if ( $pool !== 'CirrusSearch-ExpensiveFullText' && $this->isAutomatedRequest() ) {
838            $pool = 'CirrusSearch-Automated';
839        } elseif ( $this->offset + $this->limit >= self::AUTOMATED_RESULT_DEPTH_THRESHOLD_LARGE ) {
840            // Deep pagination is always expensive, but if the request was
841            // already flagged as automated leave it in the automated bucket.
842            // We don't want to accidently split a bot's requests into both
843            // buckets, allowing them to use all the capacity of both.
844            $pool = 'CirrusSearch-ExpensiveFullText';
845        }
846        return $pool;
847    }
848
849    private function isAutomatedRequest(): bool {
850        $req = RequestContext::getMain()->getRequest();
851
852        if ( $this->isDeepWebScrapingRequest( $req ) ) {
853            return true;
854        }
855
856        try {
857            $ip = $req->getIP();
858        } catch ( MWException ) {
859            // No IP, typically this means a CLI invocation. We are attempting
860            // to segregate external automation, internal automation has its
861            // own ability to control configuration and shouldn't be flagged
862            if ( MW_ENTRY_POINT === 'cli' ) {
863                return false;
864            }
865            // When can we get here? Is this ever run?
866            LoggerFactory::getInstance( 'CirrusSearch' )->info(
867                'No IP available during automated request check' );
868            return false;
869        }
870        return Util::looksLikeAutomation(
871            $this->config, $ip, $req->getAllHeaders() );
872    }
873
874    /**
875     * Flags requests that look potentially automated
876     *
877     * See https://phabricator.wikimedia.org/T405482
878     */
879    private function isDeepWebScrapingRequest( WebRequest $req ): bool {
880        // Request issued via index.php (web). We could consider api requests,
881        // but the no cookie signal is only useful in the web context.
882        return MW_ENTRY_POINT === 'index'
883            // Request has no cookies (signal for simple bots).
884            && !$req->getHeader( 'Cookie' )
885            // Asks for results well beyond the typical
886            && $this->offset + $this->limit >= self::AUTOMATED_RESULT_DEPTH_THRESHOLD_SMALL;
887    }
888
889    /**
890     * Some queries, like more like this, are quite expensive and can cause
891     * latency spikes. This allows redirecting queries using particular
892     * features to specific clusters.
893     * @return Connection
894     */
895    private function getOverriddenConnection() {
896        $overrides = $this->config->get( 'CirrusSearchClusterOverrides' );
897        foreach ( $overrides as $feature => $cluster ) {
898            if ( $this->searchContext->isSyntaxUsed( $feature ) ) {
899                return Connection::getPool( $this->config, $cluster );
900            }
901        }
902        return $this->connection;
903    }
904
905    protected function recordQueryCacheMetrics( StatsFactory $requestStats, string $cacheStatus, ?string $type = null ): void {
906        $type = $type ?: $this->getSearchContext()->getSearchType();
907        $requestStats->getCounter( "query_cache_total" )
908            ->setLabel( "type", $type )
909            ->setLabel( "status", $cacheStatus )
910            ->increment();
911    }
912
913    /**
914     * @param string $description
915     * @param string $queryType
916     * @param string[] $extra
917     * @return SearchRequestLog
918     */
919    protected function newLog( $description, $queryType, array $extra = [] ) {
920        return new SearchRequestLog(
921            $this->getOverriddenConnection()->getClient(),
922            $description,
923            $queryType,
924            $extra
925        );
926    }
927
928    /**
929     * If we're supposed to create raw result, create and return it,
930     * or output it and finish.
931     *
932     * @template T the type of the result passed and the return value of this function
933     * @param T $result Search result data
934     * @param WebRequest $request Request context
935     * @return T
936     */
937    public function processRawReturn( $result, WebRequest $request ) {
938        return Util::processSearchRawReturn( $result, $request,
939            $this->searchContext->getDebugOptions() );
940    }
941
942    /**
943     * Search titles in archive
944     * @param string $term
945     * @return Status<Title[]>
946     */
947    public function searchArchive( $term ) {
948        $this->searchContext->setOriginalSearchTerm( $term );
949        $term = $this->searchContext->escaper()->fixupWholeQueryString( $term );
950        $this->setResultsType( new TitleResultsType() );
951
952        // This does not support cross-cluster search, but there is also no use case
953        // for cross-wiki archive search.
954        $this->index = $this->getOverriddenConnection()->getArchiveIndex( $this->indexBaseName );
955
956        // Setup the search query
957        $query = new BoolQuery();
958
959        $multi = new MultiMatch();
960        $multi->setType( 'best_fields' );
961        $multi->setTieBreaker( 0 );
962        $multi->setQuery( $term );
963        $multi->setFields( [
964            'title.near_match^100',
965            'title.near_match_asciifolding^75',
966            'title.plain^50',
967            'title^25'
968        ] );
969        $multi->setOperator( 'AND' );
970
971        $fuzzy = new \Elastica\Query\MatchQuery();
972        $fuzzy->setFieldQuery( 'title.plain', $term );
973        $fuzzy->setFieldFuzziness( 'title.plain', 'AUTO' );
974        $fuzzy->setFieldOperator( 'title.plain', 'AND' );
975
976        $query->addShould( $multi );
977        $query->addShould( $fuzzy );
978        $query->setMinimumShouldMatch( 1 );
979
980        $this->sort = 'just_match';
981
982        $this->searchContext->setMainQuery( $query );
983        $this->searchContext->addSyntaxUsed( 'archive' );
984        $this->searchContext->setRescoreProfile( 'empty' );
985
986        return $this->searchOne();
987    }
988
989    /**
990     * Tests if two search objects are equivalent
991     *
992     * @param Search $a
993     * @param Search $b
994     * @return bool
995     */
996    private function areSearchesTheSame( Search $a, Search $b ) {
997        // same object.
998        if ( $a === $b ) {
999            return true;
1000        }
1001
1002        // Check values not included in toArray()
1003        if ( $a->getPath() !== $b->getPath()
1004            || $a->getOptions() != $b->getOptions()
1005        ) {
1006            return false;
1007        }
1008
1009        $aArray = $a->getQuery()->toArray();
1010        $bArray = $b->getQuery()->toArray();
1011
1012        // normalize the 'now' value which contains a timestamp that
1013        // may vary.
1014        $fixNow = static function ( &$value, $key ) {
1015            if ( $key === 'now' && is_int( $value ) ) {
1016                $value = 12345678;
1017            }
1018        };
1019        array_walk_recursive( $aArray, $fixNow );
1020        array_walk_recursive( $bArray, $fixNow );
1021
1022        // Simplest form, requires both arrays to have exact same ordering,
1023        // types, keys, etc. We could try much harder to remove edge cases,
1024        // but they probably don't matter too much. The main thing we are
1025        // looking for is if configuration used for interleaved search didn't
1026        // have an effect query building. If we get it wrong in some rare
1027        // cases it should have minimal effects on the interleaved search test.
1028        return $aArray === $bArray;
1029    }
1030
1031    private function buildInterleaveSearcher(): ?self {
1032        // If we aren't on the first page, or the user has specified
1033        // some custom magic query options (override rescore profile,
1034        // etc) then don't interleave.
1035        if ( $this->offset > 0 || $this->searchContext->isDirty() ) {
1036            return null;
1037        }
1038
1039        // Is interleaving configured?
1040        $overrides = $this->config->get( 'CirrusSearchInterleaveConfig' );
1041        if ( $overrides === null ) {
1042            return null;
1043        }
1044
1045        $config = new HashSearchConfig( $overrides, [ HashSearchConfig::FLAG_INHERIT ] );
1046        $other = clone $this;
1047        $other->config = $config;
1048        $other->searchContext = $other->searchContext->withConfig( $config );
1049
1050        return $other;
1051    }
1052
1053    /**
1054     * @return Status
1055     */
1056    private function emptyResultSet() {
1057        $results = $this->searchContext->getResultsType()->createEmptyResult();
1058        if ( $results instanceof BaseCirrusSearchResultSet ) {
1059            // TODO: Keywords are very specific to full-text search, while
1060            // ResultsType and this method are much more general.
1061            // While awkward, this maintains BC until we decide what to do.
1062            $results = BaseCirrusSearchResultSet::emptyResultSet(
1063                $this->searchContext->isSpecialKeywordUsed()
1064            );
1065        }
1066        $status = Status::newGood( $results );
1067        foreach ( $this->searchContext->getWarnings() as $warning ) {
1068            $status->warning( ...$warning );
1069        }
1070        return $status;
1071    }
1072
1073    /**
1074     * Apply debug options to the elastica query
1075     * @param Query $query
1076     * @return Query
1077     */
1078    public function applyDebugOptionsToQuery( Query $query ) {
1079        return $this->searchContext->getDebugOptions()->applyDebugOptions( $query );
1080    }
1081
1082    public function makeSearcher( SearchQuery $query ): self {
1083        return new self( $this->connection, $query->getOffset(), $query->getLimit(),
1084            $query->getSearchConfig(), $query->getNamespaces(), $this->user,
1085            false, $query->getDebugOptions(), $this->namespacePrefixParser, $this->interwikiResolver,
1086            $this->titleHelper, $this->cirrusSearchHookRunner );
1087    }
1088
1089    /**
1090     * @param int $offset
1091     * @param int $limit
1092     */
1093    private function setOffsetLimit( $offset, $limit ) {
1094        $this->offset = $offset;
1095        if ( $offset + $limit > self::MAX_OFFSET_LIMIT ) {
1096            $this->limit = self::MAX_OFFSET_LIMIT - $offset;
1097        } else {
1098            $this->limit = $limit;
1099        }
1100    }
1101
1102    /**
1103     * Visible for testing
1104     * @return int[] 2 elements array
1105     */
1106    public function getOffsetLimit() {
1107        Assert::precondition( defined( 'MW_PHPUNIT_TEST' ),
1108            'getOffsetLimit must only be called for testing purposes' );
1109        return [ $this->offset, $this->limit ];
1110    }
1111
1112    /**
1113     * Build a FullTextQueryBuilder defined in the $builderSettings:
1114     * format is:
1115     * [
1116     *     'builder_factory' => callback
1117     *     'settings' => ...
1118     * ]
1119     * where callback must be function that accepts the settings array and returns a FullTextQueryBuilder
1120     *
1121     * Legacy version:
1122     * [
1123     *     'builder_class' => ClassName
1124     *     'settings' => ...
1125     * ]
1126     * where ClassName must declare a constructor with these arguments:
1127     *   SearchConfig $config, KeywordFeature[] $features, $settings
1128     *
1129     * Visible for testing only
1130     * @param array $builderSettings
1131     * @param SearchConfig $config
1132     * @param KeywordFeature[] $features
1133     * @return FullTextQueryBuilder
1134     * @throws \ReflectionException
1135     */
1136    final public static function buildFullTextBuilder(
1137        array $builderSettings,
1138        SearchConfig $config,
1139        array $features
1140    ): FullTextQueryBuilder {
1141        if ( isset( $builderSettings['builder_class'] ) ) {
1142            $objectFactorySpecs = [
1143                'class' => $builderSettings['builder_class'],
1144                'args' => [
1145                    $config,
1146                    $features,
1147                    $builderSettings['settings']
1148                ]
1149            ];
1150        } elseif ( $builderSettings['builder_factory'] ) {
1151            $objectFactorySpecs = [
1152                'factory' => $builderSettings['builder_factory'],
1153                'args' => [
1154                    $builderSettings['settings']
1155                ]
1156            ];
1157        } else {
1158            throw new \InvalidArgumentException( 'Missing builder_class or builder_factory in the builderSettings' );
1159        }
1160
1161        /** @var FullTextQueryBuilder $qb */
1162        // @phan-suppress-next-line PhanTypeInvalidCallableArraySize
1163        $qb = ObjectFactory::getObjectFromSpec( $objectFactorySpecs );
1164        if ( !( $qb instanceof FullTextQueryBuilder ) ) {
1165            throw new RuntimeException( 'Bad builder class configured.' );
1166        }
1167
1168        return $qb;
1169    }
1170}