MediaWiki master
ApiQuerySearch.php
Go to the documentation of this file.
1<?php
9namespace MediaWiki\Api;
10
21use Wikimedia\Timestamp\TimestampFormat as TS;
22
29 use \MediaWiki\Api\SearchApi;
30
31 private TitleMatcher $titleMatcher;
32
33 public function __construct(
34 ApiQuery $query,
35 string $moduleName,
36 SearchEngineConfig $searchEngineConfig,
37 SearchEngineFactory $searchEngineFactory,
38 TitleMatcher $titleMatcher
39 ) {
40 parent::__construct( $query, $moduleName, 'sr' );
41 // Services also needed in SearchApi trait
42 $this->searchEngineConfig = $searchEngineConfig;
43 $this->searchEngineFactory = $searchEngineFactory;
44 $this->titleMatcher = $titleMatcher;
45 }
46
47 public function execute() {
48 $this->run();
49 }
50
52 public function executeGenerator( $resultPageSet ) {
53 $this->run( $resultPageSet );
54 }
55
60 private function run( $resultPageSet = null ) {
61 $params = $this->extractRequestParams();
62
63 // Extract parameters
64 $query = $params['search'];
65 $what = $params['what'];
66 $interwiki = $params['interwiki'];
67 $searchInfo = array_fill_keys( $params['info'], true );
68 $prop = array_fill_keys( $params['prop'], true );
69
70 // Create search engine instance and set options
71 $search = $this->buildSearchEngine( $params );
72 if ( isset( $params['sort'] ) ) {
73 $search->setSort( $params['sort'] );
74 }
75 $search->setFeatureData( 'rewrite', (bool)$params['enablerewrites'] );
76 $search->setFeatureData( 'interwiki', (bool)$interwiki );
77 // Hint to some SearchEngines about what snippets we would like returned
78 $search->setFeatureData( 'snippets', $this->decideSnippets( $prop ) );
79
80 // Perform the actual search
81 if ( $what == 'text' ) {
82 $matches = $search->searchText( $query );
83 } elseif ( $what == 'title' ) {
84 $matches = $search->searchTitle( $query );
85 } elseif ( $what == 'nearmatch' ) {
86 // near matches must receive the user input as provided, otherwise
87 // the near matches within namespaces are lost.
88 $matches = $this->titleMatcher->getNearMatchResultSet( $params['search'] );
89 } else {
90 // We default to title searches; this is a terrible legacy
91 // of the way we initially set up the MySQL fulltext-based
92 // search engine with separate title and text fields.
93 // In the future, the default should be for a combined index.
94 $what = 'title';
95 $matches = $search->searchTitle( $query );
96
97 // Not all search engines support a separate title search,
98 // for instance the Lucene-based engine we use on Wikipedia.
99 // In this case, fall back to full-text search (which will
100 // include titles in it!)
101 if ( $matches === null ) {
102 $what = 'text';
103 $matches = $search->searchText( $query );
104 }
105 }
106
107 if ( $matches instanceof Status ) {
108 $status = $matches;
109 $matches = $status->getValue();
110 } else {
111 $status = null;
112 }
113
114 if ( $status ) {
115 if ( $status->isOK() ) {
116 $this->getMain()->getErrorFormatter()->addMessagesFromStatus(
117 $this->getModuleName(),
118 $status
119 );
120 } else {
121 $this->dieStatus( $status );
122 }
123 } elseif ( $matches === null ) {
124 $this->dieWithError( [ 'apierror-searchdisabled', $what ], "search-{$what}-disabled" );
125 }
126
127 $apiResult = $this->getResult();
128 // Add search meta data to result
129 if ( isset( $searchInfo['totalhits'] ) ) {
130 $totalhits = $matches->getTotalHits();
131 if ( $totalhits !== null ) {
132 $apiResult->addValue( [ 'query', 'searchinfo' ],
133 'totalhits', $totalhits );
134 if ( $matches->isApproximateTotalHits() ) {
135 $apiResult->addValue( [ 'query', 'searchinfo' ],
136 'approximate_totalhits', $matches->isApproximateTotalHits() );
137 }
138 }
139 }
140 if ( isset( $searchInfo['suggestion'] ) && $matches->hasSuggestion() ) {
141 $apiResult->addValue( [ 'query', 'searchinfo' ],
142 'suggestion', $matches->getSuggestionQuery() );
143 $apiResult->addValue( [ 'query', 'searchinfo' ],
144 'suggestionsnippet', HtmlArmor::getHtml( $matches->getSuggestionSnippet() ) );
145 }
146 if ( isset( $searchInfo['rewrittenquery'] ) && $matches->hasRewrittenQuery() ) {
147 $apiResult->addValue( [ 'query', 'searchinfo' ],
148 'rewrittenquery', $matches->getQueryAfterRewrite() );
149 $apiResult->addValue( [ 'query', 'searchinfo' ],
150 'rewrittenquerysnippet', HtmlArmor::getHtml( $matches->getQueryAfterRewriteSnippet() ) );
151 }
152
153 $titles = [];
154 $data = [];
155 $count = 0;
156
157 if ( $matches->hasMoreResults() ) {
158 $this->setContinueEnumParameter( 'offset', $params['offset'] + $params['limit'] );
159 }
160
161 foreach ( $matches as $result ) {
162 $count++;
163 // Silently skip broken and missing titles
164 if ( $result->isBrokenTitle() || $result->isMissingRevision() ) {
165 continue;
166 }
167
168 $vals = $this->getSearchResultData( $result, $prop );
169
170 if ( $resultPageSet === null ) {
171 if ( $vals ) {
172 // Add item to results and see whether it fits
173 $fit = $apiResult->addValue( [ 'query', $this->getModuleName() ], null, $vals );
174 if ( !$fit ) {
175 $this->setContinueEnumParameter( 'offset', $params['offset'] + $count - 1 );
176 break;
177 }
178 }
179 } else {
180 $titles[] = $result->getTitle();
181 $data[] = $vals ?: [];
182 }
183 }
184
185 // Here we assume interwiki results do not count with
186 // regular search results. We may want to reconsider this
187 // if we ever return a lot of interwiki results or want pagination
188 // for them.
189 // Interwiki results inside main result set
190 $canAddInterwiki = (bool)$params['enablerewrites'] && ( $resultPageSet === null );
191 if ( $canAddInterwiki ) {
192 // @phan-suppress-next-line PhanTypeMismatchArgumentNullable
193 $this->addInterwikiResults( $matches, $apiResult, $prop, 'additional',
195 }
196
197 // Interwiki results outside main result set
198 if ( $interwiki && $resultPageSet === null ) {
199 // @phan-suppress-next-line PhanTypeMismatchArgumentNullable
200 $this->addInterwikiResults( $matches, $apiResult, $prop, 'interwiki',
202 }
203
204 if ( $resultPageSet === null ) {
205 $apiResult->addIndexedTagName( [
206 'query', $this->getModuleName()
207 ], 'p' );
208 } else {
209 $resultPageSet->setRedirectMergePolicy( static function ( $current, $new ) {
210 if ( !isset( $current['index'] ) || $new['index'] < $current['index'] ) {
211 $current['index'] = $new['index'];
212 }
213 return $current;
214 } );
215 $resultPageSet->populateFromTitles( $titles );
216 $offset = $params['offset'] + 1;
217 foreach ( $titles as $index => $title ) {
218 $resultPageSet->setGeneratorData(
219 $title,
220 $data[ $index ] + [ 'index' => $index + $offset ]
221 );
222 }
223 }
224 }
225
232 private function getSearchResultData( SearchResult $result, $prop ) {
233 // Silently skip broken and missing titles
234 if ( $result->isBrokenTitle() || $result->isMissingRevision() ) {
235 return null;
236 }
237
238 $vals = [];
239
240 $title = $result->getTitle();
241 ApiQueryBase::addTitleInfo( $vals, $title );
242 $vals['pageid'] = $title->getArticleID();
243
244 if ( isset( $prop['size'] ) ) {
245 $vals['size'] = $result->getByteSize();
246 }
247 if ( isset( $prop['wordcount'] ) ) {
248 $vals['wordcount'] = $result->getWordCount();
249 }
250 if ( isset( $prop['snippet'] ) ) {
251 $vals['snippet'] = $result->getTextSnippet();
252 }
253 if ( isset( $prop['timestamp'] ) ) {
254 $vals['timestamp'] = wfTimestamp( TS::ISO_8601, $result->getTimestamp() );
255 }
256 if ( isset( $prop['titlesnippet'] ) ) {
257 $vals['titlesnippet'] = $result->getTitleSnippet();
258 }
259 if ( isset( $prop['categorysnippet'] ) ) {
260 $vals['categorysnippet'] = $result->getCategorySnippet();
261 }
262 if ( $result->getRedirectTitle() !== null ) {
263 if ( isset( $prop['redirecttitle'] ) ) {
264 $vals['redirecttitle'] = $result->getRedirectTitle()->getPrefixedText();
265 }
266 if ( isset( $prop['redirectsnippet'] ) ) {
267 $vals['redirectsnippet'] = $result->getRedirectSnippet();
268 }
269 }
270 if ( $result->getSectionTitle() !== null ) {
271 if ( isset( $prop['sectiontitle'] ) ) {
272 $vals['sectiontitle'] = $result->getSectionTitle()->getFragment();
273 }
274 if ( isset( $prop['sectionsnippet'] ) ) {
275 $vals['sectionsnippet'] = $result->getSectionSnippet();
276 }
277 }
278 if ( isset( $prop['isfilematch'] ) ) {
279 $vals['isfilematch'] = $result->isFileMatch();
280 }
281
282 if ( isset( $prop['extensiondata'] ) ) {
283 $extra = $result->getExtensionData();
284 // Add augmented data to the result. The data would be organized as a map:
285 // augmentorName => data
286 if ( $extra ) {
287 $vals['extensiondata'] = ApiResult::addMetadataToResultVars( $extra );
288 }
289 }
290
291 return $vals;
292 }
293
303 private function addInterwikiResults(
304 ISearchResultSet $matches, ApiResult $apiResult, $prop,
305 $section, $type
306 ) {
307 $totalhits = null;
308 $approximateTotalHits = false;
309 if ( $matches->hasInterwikiResults( $type ) ) {
310 foreach ( $matches->getInterwikiResults( $type ) as $interwikiMatches ) {
311 // Include number of results if requested
312 $interwikiTotalHits = $interwikiMatches->getTotalHits();
313 if ( $interwikiTotalHits !== null ) {
314 $totalhits += $interwikiTotalHits;
315 $approximateTotalHits = $approximateTotalHits || $interwikiMatches->isApproximateTotalHits();
316 }
317
318 foreach ( $interwikiMatches as $result ) {
319 $title = $result->getTitle();
320 $vals = $this->getSearchResultData( $result, $prop );
321
322 $vals['namespace'] = $result->getInterwikiNamespaceText();
323 $vals['title'] = $title->getText();
324 $vals['url'] = $title->getFullURL();
325
326 // Add item to results and see whether it fits
327 $fit = $apiResult->addValue( [
328 'query',
329 $section . $this->getModuleName(),
330 $result->getInterwikiPrefix()
331 ], null, $vals );
332
333 if ( !$fit ) {
334 // We hit the limit. We can't really provide any meaningful
335 // pagination info so just bail out
336 break;
337 }
338 }
339 }
340 if ( $totalhits !== null ) {
341 $apiResult->addValue( [ 'query', $section . 'searchinfo' ], 'totalhits', $totalhits );
342 if ( $approximateTotalHits ) {
343 $apiResult->addValue( [ 'query', $section . 'searchinfo' ], 'approximate_totalhits', true );
344 }
345 $apiResult->addIndexedTagName( [
346 'query', $section . $this->getModuleName()
347 ], 'p' );
348 }
349 }
350 return $totalhits;
351 }
352
353 private function decideSnippets( array $prop ): array {
354 // Field names align with definitions in ContentHandler::getFieldsForSearchIndex.
355 // Except `redirect` which isn't explicitly created, but refers to the title of
356 // pages that redirect to the result page.
357 $fields = [];
358 if ( isset( $prop['titlesnippet'] ) ) {
359 $fields[] = 'title';
360 }
361 // checking snippet and title variants is a bit special cased, but some search
362 // engines generate the title variant from the snippet and thus must have the
363 // snippet requested to provide the title.
364 if ( isset( $prop['redirectsnippet'] ) || isset( $prop['redirecttitle'] ) ) {
365 $fields[] = 'redirect';
366 }
367 if ( isset( $prop['categorysnippet'] ) ) {
368 $fields[] = 'category';
369 }
370 if ( isset( $prop['sectionsnippet'] ) || isset( $prop['sectiontitle'] ) ) {
371 $fields[] = 'heading';
372 }
373 return $fields;
374 }
375
377 public function getCacheMode( $params ) {
378 return 'public';
379 }
380
382 public function getAllowedParams() {
383 $allowedParams = $this->buildCommonApiParams() + [
384 'what' => [
385 ParamValidator::PARAM_TYPE => [
386 'title',
387 'text',
388 'nearmatch',
389 ]
390 ],
391 'info' => [
392 ParamValidator::PARAM_DEFAULT => 'totalhits|suggestion|rewrittenquery',
393 ParamValidator::PARAM_TYPE => [
394 'totalhits',
395 'suggestion',
396 'rewrittenquery',
397 ],
398 ParamValidator::PARAM_ISMULTI => true,
399 ],
400 'prop' => [
401 ParamValidator::PARAM_DEFAULT => 'size|wordcount|timestamp|snippet',
402 ParamValidator::PARAM_TYPE => [
403 'size',
404 'wordcount',
405 'timestamp',
406 'snippet',
407 'titlesnippet',
408 'redirecttitle',
409 'redirectsnippet',
410 'sectiontitle',
411 'sectionsnippet',
412 'isfilematch',
413 'categorysnippet',
414 'score', // deprecated
415 'hasrelated', // deprecated
416 'extensiondata',
417 ],
418 ParamValidator::PARAM_ISMULTI => true,
420 EnumDef::PARAM_DEPRECATED_VALUES => [
421 'score' => true,
422 'hasrelated' => true
423 ],
424 ],
425 'interwiki' => false,
426 'enablerewrites' => false,
427 ];
428
429 // Generators only add info/properties if explicitly requested. T263841
430 if ( $this->isInGeneratorMode() ) {
431 $allowedParams['prop'][ParamValidator::PARAM_DEFAULT] = '';
432 $allowedParams['info'][ParamValidator::PARAM_DEFAULT] = '';
433 }
434
435 // If we have more than one engine the list of available sorts is
436 // difficult to represent. For now don't expose it.
437 $alternatives = $this->searchEngineConfig->getSearchTypes();
438 if ( count( $alternatives ) == 1 ) {
439 $allowedParams['sort'] = [
440 ParamValidator::PARAM_DEFAULT => SearchEngine::DEFAULT_SORT,
441 ParamValidator::PARAM_TYPE => $this->searchEngineFactory->create()->getValidSorts(),
442 ];
443 }
444
445 return $allowedParams;
446 }
447
449 public function getSearchProfileParams() {
450 return [
451 // Query-independent scoring profile (article popularity, quality, etc)
452 'qiprofile' => [
453 'profile-type' => SearchEngine::FT_QUERY_INDEP_PROFILE_TYPE,
454 'help-message' => 'apihelp-query+search-param-qiprofile',
455 ],
456 // Query-dependent scoring profile (match strictness, default and/or, etc.)
457 'qdprofile' => [
458 'profile-type' => SearchEngine::FT_QUERY_DEP_PROFILE_TYPE,
459 'help-message' => 'apihelp-query+search-param-qdprofile',
460 ],
461 ];
462 }
463
465 protected function getExamplesMessages() {
466 return [
467 'action=query&list=search&srsearch=meaning'
468 => 'apihelp-query+search-example-simple',
469 'action=query&list=search&srwhat=text&srsearch=meaning'
470 => 'apihelp-query+search-example-text',
471 'action=query&generator=search&gsrsearch=meaning&prop=info'
472 => 'apihelp-query+search-example-generator',
473 ];
474 }
475
477 public function getHelpUrls() {
478 return 'https://www.mediawiki.org/wiki/Special:MyLanguage/API:Search';
479 }
480}
481
483class_alias( ApiQuerySearch::class, 'ApiQuerySearch' );
wfTimestamp( $outputtype=TS::UNIX, $ts=0)
Get a timestamp string in one of various formats.
dieWithError( $msg, $code=null, $data=null, $httpCode=0)
Abort execution with an error.
Definition ApiBase.php:1511
getModuleName()
Get the name of the module being executed by this instance.
Definition ApiBase.php:543
getMain()
Get the main module.
Definition ApiBase.php:561
getResult()
Get the result object.
Definition ApiBase.php:682
const PARAM_HELP_MSG_PER_VALUE
((string|array|Message)[]) When PARAM_TYPE is an array, or 'string' with PARAM_ISMULTI,...
Definition ApiBase.php:207
dieStatus(StatusValue $status)
Throw an ApiUsageException based on the Status object.
Definition ApiBase.php:1562
extractRequestParams( $options=[])
Using getAllowedParams(), this function makes an array of the values provided by the user,...
Definition ApiBase.php:823
static addTitleInfo(&$arr, $title, $prefix='')
Add information (title and namespace) about a Title object to a result array.
setContinueEnumParameter( $paramName, $paramValue)
Overridden to set the generator param if in generator mode.
Query module to perform full text search within wiki titles and content.
execute()
Evaluates the parameters, performs the requested query, and sets up the result.
getAllowedParams()
Returns an array of allowed parameters (parameter name) => (default value) or (parameter name) => (ar...
getExamplesMessages()
Returns usage examples for this module.Return value has query strings as keys, with values being eith...
__construct(ApiQuery $query, string $moduleName, SearchEngineConfig $searchEngineConfig, SearchEngineFactory $searchEngineFactory, TitleMatcher $titleMatcher)
getHelpUrls()
Return links to more detailed help pages about the module.1.25, returning boolean false is deprecated...
executeGenerator( $resultPageSet)
Execute this module as a generator.
getCacheMode( $params)
Get the cache mode for the data generated by this module.Override this in the module subclass....
This is the main query class.
Definition ApiQuery.php:36
static addMetadataToResultVars( $vars, $forceHash=true)
Add the correct metadata to an array of vars we want to export through the API.
Configuration handling class for SearchEngine.
Factory class for SearchEngine.
Contain a class for special pages.
An abstract base class representing a search engine result.
Service implementation of near match title search.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:44
Marks HTML that shouldn't be escaped.
Definition HtmlArmor.php:18
Service for formatting and validating API parameters.
Type definition for enumeration types.
Definition EnumDef.php:32
A set of SearchEngine results.
const INLINE_RESULTS
Identifier for interwiki results that can be displayed even if no existing main wiki results exist.
const SECONDARY_RESULTS
Identifier for interwiki results that are displayed only together with existing main wiki results.
buildCommonApiParams( $isScrollable=true)
The set of api parameters that are shared between api calls that call the SearchEngine.
Definition SearchApi.php:57
buildSearchEngine(?array $params=null)
Build the search engine to use.
array $params
The job parameters.