Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
55.68% covered (warning)
55.68%
206 / 370
39.39% covered (danger)
39.39%
13 / 33
CRAP
0.00% covered (danger)
0.00%
0 / 1
Searcher
55.68% covered (warning)
55.68%
206 / 370
39.39% covered (danger)
39.39%
13 / 33
1143.62
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
12 / 12
100.00% covered (success)
100.00%
1 / 1
5
 search
87.50% covered (warning)
87.50%
14 / 16
0.00% covered (danger)
0.00%
0 / 1
3.02
 setResultsType
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 isReturnRaw
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 setSort
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 limitSearchToLocalWiki
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 nearMatchTitleSearch
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 countContentWords
0.00% covered (danger)
0.00%
0 / 3
0.00% covered (danger)
0.00%
0 / 1
2
 prefixSearch
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 buildFullTextSearch
90.00% covered (success)
90.00%
9 / 10
0.00% covered (danger)
0.00%
0 / 1
2.00
 searchTextInternal
72.92% covered (warning)
72.92%
35 / 48
0.00% covered (danger)
0.00%
0 / 1
17.89
 get
0.00% covered (danger)
0.00%
0 / 27
0.00% covered (danger)
0.00%
0 / 1
42
 findNamespace
0.00% covered (danger)
0.00%
0 / 13
0.00% covered (danger)
0.00%
0 / 1
6
 buildSearch
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
1
 searchOne
38.46% covered (danger)
38.46%
5 / 13
0.00% covered (danger)
0.00%
0 / 1
10.83
 searchMulti
37.23% covered (danger)
37.23%
35 / 94
0.00% covered (danger)
0.00%
0 / 1
141.68
 updateNamespacesFromQuery
0.00% covered (danger)
0.00%
0 / 13
0.00% covered (danger)
0.00%
0 / 1
20
 getSearchContext
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getPoolCounterType
62.50% covered (warning)
62.50%
5 / 8
0.00% covered (danger)
0.00%
0 / 1
6.32
 isAutomatedRequest
44.44% covered (danger)
44.44%
4 / 9
0.00% covered (danger)
0.00%
0 / 1
4.54
 getOverriddenConnection
60.00% covered (warning)
60.00%
3 / 5
0.00% covered (danger)
0.00%
0 / 1
3.58
 getQueryCacheStatsKey
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 newLog
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 processRawReturn
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 searchArchive
100.00% covered (success)
100.00%
23 / 23
100.00% covered (success)
100.00%
1 / 1
1
 areSearchesTheSame
76.92% covered (warning)
76.92%
10 / 13
0.00% covered (danger)
0.00%
0 / 1
6.44
 buildInterleaveSearcher
100.00% covered (success)
100.00%
10 / 10
100.00% covered (success)
100.00%
1 / 1
4
 emptyResultSet
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
3
 applyDebugOptionsToQuery
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 makeSearcher
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
1
 setOffsetLimit
100.00% covered (success)
100.00%
4 / 4
100.00% covered (success)
100.00%
1 / 1
2
 getOffsetLimit
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 buildFullTextBuilder
81.82% covered (warning)
81.82%
9 / 11
0.00% covered (danger)
0.00%
0 / 1
4.10
1<?php
2
3namespace CirrusSearch;
4
5use CirrusSearch\Fallbacks\FallbackRunner;
6use CirrusSearch\Fallbacks\SearcherFactory;
7use CirrusSearch\Maintenance\NullPrinter;
8use CirrusSearch\MetaStore\MetaStoreIndex;
9use CirrusSearch\Parser\BasicQueryClassifier;
10use CirrusSearch\Parser\FullTextKeywordRegistry;
11use CirrusSearch\Parser\NamespacePrefixParser;
12use CirrusSearch\Profile\SearchProfileService;
13use CirrusSearch\Query\CountContentWordsBuilder;
14use CirrusSearch\Query\FullTextQueryBuilder;
15use CirrusSearch\Query\KeywordFeature;
16use CirrusSearch\Query\NearMatchQueryBuilder;
17use CirrusSearch\Query\PrefixSearchQueryBuilder;
18use CirrusSearch\Search\BaseCirrusSearchResultSet;
19use CirrusSearch\Search\FullTextResultsType;
20use CirrusSearch\Search\MSearchRequests;
21use CirrusSearch\Search\MSearchResponses;
22use CirrusSearch\Search\ResultsType;
23use CirrusSearch\Search\SearchContext;
24use CirrusSearch\Search\SearchQuery;
25use CirrusSearch\Search\SearchRequestBuilder;
26use CirrusSearch\Search\TeamDraftInterleaver;
27use CirrusSearch\Search\TitleHelper;
28use CirrusSearch\Search\TitleResultsType;
29use Elastica\Exception\RuntimeException;
30use Elastica\Multi\Search as MultiSearch;
31use Elastica\Query;
32use Elastica\Query\BoolQuery;
33use Elastica\Query\MultiMatch;
34use Elastica\Search;
35use MediaWiki\Logger\LoggerFactory;
36use MediaWiki\MediaWikiServices;
37use RequestContext;
38use Status;
39use Title;
40use User;
41use WebRequest;
42use WikiMap;
43use Wikimedia\Assert\Assert;
44use Wikimedia\ObjectFactory\ObjectFactory;
45
46/**
47 * Performs searches using Elasticsearch.  Note that each instance of this class
48 * is single use only.
49 *
50 * This program is free software; you can redistribute it and/or modify
51 * it under the terms of the GNU General Public License as published by
52 * the Free Software Foundation; either version 2 of the License, or
53 * (at your option) any later version.
54 *
55 * This program is distributed in the hope that it will be useful,
56 * but WITHOUT ANY WARRANTY; without even the implied warranty of
57 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
58 * GNU General Public License for more details.
59 *
60 * You should have received a copy of the GNU General Public License along
61 * with this program; if not, write to the Free Software Foundation, Inc.,
62 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
63 * http://www.gnu.org/copyleft/gpl.html
64 */
65class Searcher extends ElasticsearchIntermediary implements SearcherFactory {
66    public const SUGGESTION_HIGHLIGHT_PRE = '<em>';
67    public const SUGGESTION_HIGHLIGHT_POST = '</em>';
68    public const HIGHLIGHT_PRE_MARKER = ''; // \uE000. Can't be a unicode literal until php7
69    public const HIGHLIGHT_PRE = '<span class="searchmatch">';
70    public const HIGHLIGHT_POST_MARKER = ''; // \uE001
71    public const HIGHLIGHT_POST = '</span>';
72
73    /**
74     * Maximum offset + limit depth allowed. As in the deepest possible result
75     * to return. Too deep will cause very slow queries. 10,000 feels plenty
76     * deep. This should be <= index.max_result_window in elasticsearch.
77     */
78    private const MAX_OFFSET_LIMIT = 10000;
79
80    /**
81     * Identifies the main search in MSearchRequests/MSearchResponses
82     */
83    public const MAINSEARCH_MSEARCH_KEY = '__main__';
84
85    /**
86     * Identifies the "tested" search request in MSearchRequests/MSearchResponses
87     */
88    private const INTERLEAVED_MSEARCH_KEY = '__interleaved__';
89
90    /**
91     * @var int search offset
92     */
93    protected $offset;
94
95    /**
96     * @var int maximum number of result
97     */
98    protected $limit;
99
100    /**
101     * @var string sort type
102     */
103    private $sort = 'relevance';
104
105    /**
106     * @var string index base name to use
107     */
108    protected $indexBaseName;
109
110    /**
111     * Search environment configuration
112     * @var SearchConfig
113     */
114    protected $config;
115
116    /**
117     * @var SearchContext
118     */
119    protected $searchContext;
120
121    /**
122     * Indexing type we'll be using.
123     * @var string|\Elastica\Index
124     */
125    private $index;
126
127    /**
128     * @var NamespacePrefixParser|null
129     */
130    private $namespacePrefixParser;
131    /**
132     * @var InterwikiResolver
133     */
134    protected $interwikiResolver;
135
136    /** @var TitleHelper */
137    protected $titleHelper;
138    /**
139     * @var CirrusSearchHookRunner
140     */
141    protected $cirrusSearchHookRunner;
142
143    /**
144     * @param Connection $conn
145     * @param int $offset Offset the results by this much
146     * @param int $limit Limit the results to this many
147     * @param SearchConfig $config Configuration settings
148     * @param int[]|null $namespaces Array of namespace numbers to search or null to search all namespaces.
149     * @param User|null $user user for which this search is being performed.  Attached to slow request logs.
150     * @param string|bool $index Base name for index to search from, defaults to $wgCirrusSearchIndexBaseName
151     * @param CirrusDebugOptions|null $options the debugging options to use or null to use defaults
152     * @param NamespacePrefixParser|null $namespacePrefixParser
153     * @param InterwikiResolver|null $interwikiResolver
154     * @param TitleHelper|null $titleHelper
155     * @param CirrusSearchHookRunner|null $cirrusSearchHookRunner
156     * @see CirrusDebugOptions::defaultOptions()
157     */
158    public function __construct(
159        Connection $conn, $offset,
160        $limit,
161        SearchConfig $config,
162        array $namespaces = null,
163        User $user = null,
164        $index = false,
165        CirrusDebugOptions $options = null,
166        NamespacePrefixParser $namespacePrefixParser = null,
167        InterwikiResolver $interwikiResolver = null,
168        TitleHelper $titleHelper = null,
169        CirrusSearchHookRunner $cirrusSearchHookRunner = null
170    ) {
171        parent::__construct(
172            $conn,
173            $user,
174            $config->get( 'CirrusSearchSlowSearch' ),
175            $config->get( 'CirrusSearchExtraBackendLatency' )
176        );
177        $this->config = $config;
178        $this->setOffsetLimit( $offset, $limit );
179        $this->indexBaseName = $index ?: $config->get( SearchConfig::INDEX_BASE_NAME );
180        // TODO: Make these params mandatory once WBCS stops extending this class
181        $this->namespacePrefixParser = $namespacePrefixParser;
182        $this->interwikiResolver = $interwikiResolver ?: MediaWikiServices::getInstance()->getService( InterwikiResolver::SERVICE );
183        $this->titleHelper = $titleHelper ?: new TitleHelper( WikiMap::getCurrentWikiId(), $this->interwikiResolver );
184        $this->cirrusSearchHookRunner = $cirrusSearchHookRunner ?: new CirrusSearchHookRunner(
185            MediaWikiServices::getInstance()->getHookContainer() );
186        $this->searchContext = new SearchContext( $this->config, $namespaces, $options, null, null, $this->cirrusSearchHookRunner );
187    }
188
189    /**
190     * Unified search public entry-point.
191     *
192     * NOTE: only fulltext search supported for now.
193     * @param SearchQuery $query
194     * @return Status
195     */
196    public function search( SearchQuery $query ) {
197        if ( $query->getDebugOptions()->isCirrusDumpQueryAST() ) {
198            return Status::newGood( [ 'ast' => $query->getParsedQuery()->toArray() ] );
199        }
200        // TODO: properly pass the profile context name and its params once we have a dispatch service.
201        $this->searchContext = SearchContext::fromSearchQuery( $query, FallbackRunner::create( $query, $this->interwikiResolver ),
202            $this->cirrusSearchHookRunner );
203        $this->setOffsetLimit( $query->getOffset(), $query->getLimit() );
204        $this->config = $query->getSearchConfig();
205        $this->sort = $query->getSort();
206
207        if ( $query->getSearchEngineEntryPoint() === SearchQuery::SEARCH_TEXT ) {
208            $this->searchContext->setResultsType(
209                new FullTextResultsType(
210                    $this->searchContext->getFetchPhaseBuilder(),
211                    $query->getParsedQuery()->isQueryOfClass( BasicQueryClassifier::COMPLEX_QUERY ),
212                    $this->titleHelper,
213                    $query->getExtraFieldsToExtract()
214                )
215            );
216            return $this->searchTextInternal( $query->getParsedQuery()->getQueryWithoutNsHeader() );
217        } else {
218            throw new \RuntimeException( 'Only ' . SearchQuery::SEARCH_TEXT . ' is supported for now' );
219        }
220    }
221
222    /**
223     * @param ResultsType $resultsType results type to return
224     */
225    public function setResultsType( $resultsType ) {