MediaWiki master
ApiQuerySearch.php
Go to the documentation of this file.
1<?php
9namespace MediaWiki\Api;
10
21use Wikimedia\Timestamp\TimestampFormat as TS;
22
29 use SearchApi;
30
31 public function __construct(
32 ApiQuery $query,
33 string $moduleName,
34 SearchEngineConfig $searchEngineConfig,
35 SearchEngineFactory $searchEngineFactory,
36 private readonly TitleMatcher $titleMatcher,
37 ) {
38 parent::__construct( $query, $moduleName, 'sr' );
39 // Services also needed in SearchApi trait
40 $this->searchEngineConfig = $searchEngineConfig;
41 $this->searchEngineFactory = $searchEngineFactory;
42 }
43
44 public function execute() {
45 $this->run();
46 }
47
49 public function executeGenerator( $resultPageSet ) {
50 $this->run( $resultPageSet );
51 }
52
57 private function run( $resultPageSet = null ) {
58 $params = $this->extractRequestParams();
59
60 // Extract parameters
61 $query = $params['search'];
62 $what = $params['what'];
63 $interwiki = $params['interwiki'];
64 $searchInfo = array_fill_keys( $params['info'], true );
65 $prop = array_fill_keys( $params['prop'], true );
66
67 // Create search engine instance and set options
68 $search = $this->buildSearchEngine( $params );
69 if ( isset( $params['sort'] ) ) {
70 $search->setSort( $params['sort'] );
71 }
72 $search->setFeatureData( 'rewrite', (bool)$params['enablerewrites'] );
73 $search->setFeatureData( 'interwiki', (bool)$interwiki );
74 // Hint to some SearchEngines about what snippets we would like returned
75 $search->setFeatureData( 'snippets', $this->decideSnippets( $prop ) );
76
77 // Perform the actual search
78 if ( $what == 'text' ) {
79 $matches = $search->searchText( $query );
80 } elseif ( $what == 'title' ) {
81 $matches = $search->searchTitle( $query );
82 } elseif ( $what == 'nearmatch' ) {
83 // near matches must receive the user input as provided, otherwise
84 // the near matches within namespaces are lost.
85 $matches = $this->titleMatcher->getNearMatchResultSet( $params['search'] );
86 } else {
87 // We default to title searches; this is a terrible legacy
88 // of the way we initially set up the MySQL fulltext-based
89 // search engine with separate title and text fields.
90 // In the future, the default should be for a combined index.
91 $what = 'title';
92 $matches = $search->searchTitle( $query );
93
94 // Not all search engines support a separate title search,
95 // for instance the Lucene-based engine we use on Wikipedia.
96 // In this case, fall back to full-text search (which will
97 // include titles in it!)
98 if ( $matches === null ) {
99 $what = 'text';
100 $matches = $search->searchText( $query );
101 }
102 }
103
104 if ( $matches instanceof Status ) {
105 $status = $matches;
106 $matches = $status->getValue();
107 } else {
108 $status = null;
109 }
110
111 if ( $status ) {
112 if ( $status->isOK() ) {
113 $this->getMain()->getErrorFormatter()->addMessagesFromStatus(
114 $this->getModuleName(),
115 $status
116 );
117 } else {
118 $this->dieStatus( $status );
119 }
120 } elseif ( $matches === null ) {
121 $this->dieWithError( [ 'apierror-searchdisabled', $what ], "search-{$what}-disabled" );
122 }
123
124 $apiResult = $this->getResult();
125 // Add search meta data to result
126 if ( isset( $searchInfo['totalhits'] ) ) {
127 $totalhits = $matches->getTotalHits();
128 if ( $totalhits !== null ) {
129 $apiResult->addValue( [ 'query', 'searchinfo' ],
130 'totalhits', $totalhits );
131 if ( $matches->isApproximateTotalHits() ) {
132 $apiResult->addValue( [ 'query', 'searchinfo' ],
133 'approximate_totalhits', $matches->isApproximateTotalHits() );
134 }
135 }
136 }
137 if ( isset( $searchInfo['suggestion'] ) && $matches->hasSuggestion() ) {
138 $apiResult->addValue( [ 'query', 'searchinfo' ],
139 'suggestion', $matches->getSuggestionQuery() );
140 $apiResult->addValue( [ 'query', 'searchinfo' ],
141 'suggestionsnippet', HtmlArmor::getHtml( $matches->getSuggestionSnippet() ) );
142 }
143 if ( isset( $searchInfo['rewrittenquery'] ) && $matches->hasRewrittenQuery() ) {
144 $apiResult->addValue( [ 'query', 'searchinfo' ],
145 'rewrittenquery', $matches->getQueryAfterRewrite() );
146 $apiResult->addValue( [ 'query', 'searchinfo' ],
147 'rewrittenquerysnippet', HtmlArmor::getHtml( $matches->getQueryAfterRewriteSnippet() ) );
148 }
149
150 $titles = [];
151 $data = [];
152 $count = 0;
153
154 if ( $matches->hasMoreResults() ) {
155 $this->setContinueEnumParameter( 'offset', $params['offset'] + $params['limit'] );
156 }
157
158 foreach ( $matches as $result ) {
159 $count++;
160 // Silently skip broken and missing titles
161 if ( $result->isBrokenTitle() || $result->isMissingRevision() ) {
162 continue;
163 }
164
165 $vals = $this->getSearchResultData( $result, $prop );
166
167 if ( $resultPageSet === null ) {
168 if ( $vals ) {
169 // Add item to results and see whether it fits
170 $fit = $apiResult->addValue( [ 'query', $this->getModuleName() ], null, $vals );
171 if ( !$fit ) {
172 $this->setContinueEnumParameter( 'offset', $params['offset'] + $count - 1 );
173 break;
174 }
175 }
176 } else {
177 $titles[] = $result->getTitle();
178 $data[] = $vals ?: [];
179 }
180 }
181
182 // Here we assume interwiki results do not count with
183 // regular search results. We may want to reconsider this
184 // if we ever return a lot of interwiki results or want pagination
185 // for them.
186 // Interwiki results inside main result set
187 $canAddInterwiki = (bool)$params['enablerewrites'] && ( $resultPageSet === null );
188 if ( $canAddInterwiki ) {
189 // @phan-suppress-next-line PhanTypeMismatchArgumentNullable
190 $this->addInterwikiResults( $matches, $apiResult, $prop, 'additional',
192 }
193
194 // Interwiki results outside main result set
195 if ( $interwiki && $resultPageSet === null ) {
196 // @phan-suppress-next-line PhanTypeMismatchArgumentNullable
197 $this->addInterwikiResults( $matches, $apiResult, $prop, 'interwiki',
199 }
200
201 if ( $resultPageSet === null ) {
202 $apiResult->addIndexedTagName( [
203 'query', $this->getModuleName()
204 ], 'p' );
205 } else {
206 $resultPageSet->setRedirectMergePolicy( static function ( $current, $new ) {
207 if ( !isset( $current['index'] ) || $new['index'] < $current['index'] ) {
208 $current['index'] = $new['index'];
209 }
210 return $current;
211 } );
212 $resultPageSet->populateFromTitles( $titles );
213 $offset = $params['offset'] + 1;
214 foreach ( $titles as $index => $title ) {
215 $resultPageSet->setGeneratorData(
216 $title,
217 $data[ $index ] + [ 'index' => $index + $offset ]
218 );
219 }
220 }
221 }
222
229 private function getSearchResultData( SearchResult $result, $prop ) {
230 // Silently skip broken and missing titles
231 if ( $result->isBrokenTitle() || $result->isMissingRevision() ) {
232 return null;
233 }
234
235 $vals = [];
236
237 $title = $result->getTitle();
238 ApiQueryBase::addTitleInfo( $vals, $title );
239 $vals['pageid'] = $title->getArticleID();
240
241 if ( isset( $prop['size'] ) ) {
242 $vals['size'] = $result->getByteSize();
243 }
244 if ( isset( $prop['wordcount'] ) ) {
245 $vals['wordcount'] = $result->getWordCount();
246 }
247 if ( isset( $prop['snippet'] ) ) {
248 $vals['snippet'] = $result->getTextSnippet();
249 }
250 if ( isset( $prop['timestamp'] ) ) {
251 $vals['timestamp'] = wfTimestamp( TS::ISO_8601, $result->getTimestamp() );
252 }
253 if ( isset( $prop['titlesnippet'] ) ) {
254 $vals['titlesnippet'] = $result->getTitleSnippet();
255 }
256 if ( isset( $prop['categorysnippet'] ) ) {
257 $vals['categorysnippet'] = $result->getCategorySnippet();
258 }
259 if ( $result->getRedirectTitle() !== null ) {
260 if ( isset( $prop['redirecttitle'] ) ) {
261 $vals['redirecttitle'] = $result->getRedirectTitle()->getPrefixedText();
262 }
263 if ( isset( $prop['redirectsnippet'] ) ) {
264 $vals['redirectsnippet'] = $result->getRedirectSnippet();
265 }
266 }
267 if ( $result->getSectionTitle() !== null ) {
268 if ( isset( $prop['sectiontitle'] ) ) {
269 $vals['sectiontitle'] = $result->getSectionTitle()->getFragment();
270 }
271 if ( isset( $prop['sectionsnippet'] ) ) {
272 $vals['sectionsnippet'] = $result->getSectionSnippet();
273 }
274 }
275 if ( isset( $prop['isfilematch'] ) ) {
276 $vals['isfilematch'] = $result->isFileMatch();
277 }
278
279 if ( isset( $prop['extensiondata'] ) ) {
280 $extra = $result->getExtensionData();
281 // Add augmented data to the result. The data would be organized as a map:
282 // augmentorName => data
283 if ( $extra ) {
284 $vals['extensiondata'] = ApiResult::addMetadataToResultVars( $extra );
285 }
286 }
287
288 return $vals;
289 }
290
300 private function addInterwikiResults(
301 ISearchResultSet $matches, ApiResult $apiResult, $prop,
302 $section, $type
303 ) {
304 $totalhits = null;
305 $approximateTotalHits = false;
306 if ( $matches->hasInterwikiResults( $type ) ) {
307 foreach ( $matches->getInterwikiResults( $type ) as $interwikiMatches ) {
308 // Include number of results if requested
309 $interwikiTotalHits = $interwikiMatches->getTotalHits();
310 if ( $interwikiTotalHits !== null ) {
311 $totalhits += $interwikiTotalHits;
312 $approximateTotalHits = $approximateTotalHits || $interwikiMatches->isApproximateTotalHits();
313 }
314
315 foreach ( $interwikiMatches as $result ) {
316 $title = $result->getTitle();
317 $vals = $this->getSearchResultData( $result, $prop );
318
319 $vals['namespace'] = $result->getInterwikiNamespaceText();
320 $vals['title'] = $title->getText();
321 $vals['url'] = $title->getFullURL();
322
323 // Add item to results and see whether it fits
324 $fit = $apiResult->addValue( [
325 'query',
326 $section . $this->getModuleName(),
327 $result->getInterwikiPrefix()
328 ], null, $vals );
329
330 if ( !$fit ) {
331 // We hit the limit. We can't really provide any meaningful
332 // pagination info so just bail out
333 break;
334 }
335 }
336 }
337 if ( $totalhits !== null ) {
338 $apiResult->addValue( [ 'query', $section . 'searchinfo' ], 'totalhits', $totalhits );
339 if ( $approximateTotalHits ) {
340 $apiResult->addValue( [ 'query', $section . 'searchinfo' ], 'approximate_totalhits', true );
341 }
342 $apiResult->addIndexedTagName( [
343 'query', $section . $this->getModuleName()
344 ], 'p' );
345 }
346 }
347 return $totalhits;
348 }
349
350 private function decideSnippets( array $prop ): array {
351 // Field names align with definitions in ContentHandler::getFieldsForSearchIndex.
352 // Except `redirect` which isn't explicitly created, but refers to the title of
353 // pages that redirect to the result page.
354 $fields = [];
355 if ( isset( $prop['titlesnippet'] ) ) {
356 $fields[] = 'title';
357 }
358 // checking snippet and title variants is a bit special cased, but some search
359 // engines generate the title variant from the snippet and thus must have the
360 // snippet requested to provide the title.
361 if ( isset( $prop['redirectsnippet'] ) || isset( $prop['redirecttitle'] ) ) {
362 $fields[] = 'redirect';
363 }
364 if ( isset( $prop['categorysnippet'] ) ) {
365 $fields[] = 'category';
366 }
367 if ( isset( $prop['sectionsnippet'] ) || isset( $prop['sectiontitle'] ) ) {
368 $fields[] = 'heading';
369 }
370 return $fields;
371 }
372
374 public function getCacheMode( $params ) {
375 return 'public';
376 }
377
379 public function getAllowedParams() {
380 $allowedParams = $this->buildCommonApiParams() + [
381 'what' => [
382 ParamValidator::PARAM_TYPE => [
383 'title',
384 'text',
385 'nearmatch',
386 ]
387 ],
388 'info' => [
389 ParamValidator::PARAM_DEFAULT => 'totalhits|suggestion|rewrittenquery',
390 ParamValidator::PARAM_TYPE => [
391 'totalhits',
392 'suggestion',
393 'rewrittenquery',
394 ],
395 ParamValidator::PARAM_ISMULTI => true,
396 ],
397 'prop' => [
398 ParamValidator::PARAM_DEFAULT => 'size|wordcount|timestamp|snippet',
399 ParamValidator::PARAM_TYPE => [
400 'size',
401 'wordcount',
402 'timestamp',
403 'snippet',
404 'titlesnippet',
405 'redirecttitle',
406 'redirectsnippet',
407 'sectiontitle',
408 'sectionsnippet',
409 'isfilematch',
410 'categorysnippet',
411 'score', // deprecated
412 'hasrelated', // deprecated
413 'extensiondata',
414 ],
415 ParamValidator::PARAM_ISMULTI => true,
417 EnumDef::PARAM_DEPRECATED_VALUES => [
418 'score' => true,
419 'hasrelated' => true
420 ],
421 ],
422 'interwiki' => false,
423 'enablerewrites' => false,
424 ];
425
426 // Generators only add info/properties if explicitly requested. T263841
427 if ( $this->isInGeneratorMode() ) {
428 $allowedParams['prop'][ParamValidator::PARAM_DEFAULT] = '';
429 $allowedParams['info'][ParamValidator::PARAM_DEFAULT] = '';
430 }
431
432 // If we have more than one engine the list of available sorts is
433 // difficult to represent. For now don't expose it.
434 $alternatives = $this->searchEngineConfig->getSearchTypes();
435 if ( count( $alternatives ) == 1 ) {
436 $allowedParams['sort'] = [
437 ParamValidator::PARAM_DEFAULT => SearchEngine::DEFAULT_SORT,
438 ParamValidator::PARAM_TYPE => $this->searchEngineFactory->create()->getValidSorts(),
439 ];
440 }
441
442 return $allowedParams;
443 }
444
446 public function getSearchProfileParams() {
447 return [
448 // Query-independent scoring profile (article popularity, quality, etc)
449 'qiprofile' => [
450 'profile-type' => SearchEngine::FT_QUERY_INDEP_PROFILE_TYPE,
451 'help-message' => 'apihelp-query+search-param-qiprofile',
452 ],
453 // Query-dependent scoring profile (match strictness, default and/or, etc.)
454 'qdprofile' => [
455 'profile-type' => SearchEngine::FT_QUERY_DEP_PROFILE_TYPE,
456 'help-message' => 'apihelp-query+search-param-qdprofile',
457 ],
458 ];
459 }
460
462 protected function getExamplesMessages() {
463 return [
464 'action=query&list=search&srsearch=meaning'
465 => 'apihelp-query+search-example-simple',
466 'action=query&list=search&srwhat=text&srsearch=meaning'
467 => 'apihelp-query+search-example-text',
468 'action=query&generator=search&gsrsearch=meaning&prop=info'
469 => 'apihelp-query+search-example-generator',
470 ];
471 }
472
474 public function getHelpUrls() {
475 return 'https://www.mediawiki.org/wiki/Special:MyLanguage/API:Search';
476 }
477}
478
480class_alias( ApiQuerySearch::class, 'ApiQuerySearch' );
wfTimestamp( $outputtype=TS::UNIX, $ts=0)
Get a timestamp string in one of various formats.
dieWithError( $msg, $code=null, $data=null, $httpCode=0)
Abort execution with an error.
Definition ApiBase.php:1522
getModuleName()
Get the name of the module being executed by this instance.
Definition ApiBase.php:557
getMain()
Get the main module.
Definition ApiBase.php:575
getResult()
Get the result object.
Definition ApiBase.php:696
const PARAM_HELP_MSG_PER_VALUE
((string|array|Message)[]) When PARAM_TYPE is an array, or 'string' with PARAM_ISMULTI,...
Definition ApiBase.php:206
dieStatus(StatusValue $status)
Throw an ApiUsageException based on the Status object.
Definition ApiBase.php:1573
extractRequestParams( $options=[])
Using getAllowedParams(), this function makes an array of the values provided by the user,...
Definition ApiBase.php:837
static addTitleInfo(&$arr, $title, $prefix='')
Add information (title and namespace) about a Title object to a result array.
setContinueEnumParameter( $paramName, $paramValue)
Overridden to set the generator param if in generator mode.
Query module to perform full text search within wiki titles and content.
execute()
Evaluates the parameters, performs the requested query, and sets up the result.
__construct(ApiQuery $query, string $moduleName, SearchEngineConfig $searchEngineConfig, SearchEngineFactory $searchEngineFactory, private readonly TitleMatcher $titleMatcher,)
getAllowedParams()
Returns an array of allowed parameters (parameter name) => (default value) or (parameter name) => (ar...
getExamplesMessages()
Returns usage examples for this module.Return value has query strings as keys, with values being eith...
getHelpUrls()
Return links to more detailed help pages about the module.1.25, returning boolean false is deprecated...
executeGenerator( $resultPageSet)
Execute this module as a generator.
getCacheMode( $params)
Get the cache mode for the data generated by this module.Override this in the module subclass....
This is the main query class.
Definition ApiQuery.php:36
static addMetadataToResultVars( $vars, $forceHash=true)
Add the correct metadata to an array of vars we want to export through the API.
Configuration handling class for SearchEngine.
Factory class for SearchEngine.
Contain a class for special pages.
An abstract base class representing a search engine result.
Service implementation of near match title search.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:44
Marks HTML that shouldn't be escaped.
Definition HtmlArmor.php:18
Service for formatting and validating API parameters.
Type definition for enumeration types.
Definition EnumDef.php:32
trait SearchApi
Traits for API components that use a SearchEngine.
Definition SearchApi.php:23
A set of SearchEngine results.
const INLINE_RESULTS
Identifier for interwiki results that can be displayed even if no existing main wiki results exist.
const SECONDARY_RESULTS
Identifier for interwiki results that are displayed only together with existing main wiki results.
buildCommonApiParams( $isScrollable=true)
The set of api parameters that are shared between api calls that call the SearchEngine.
Definition SearchApi.php:57
buildSearchEngine(?array $params=null)
Build the search engine to use.
array $params
The job parameters.