MediaWiki master
ApiQuerySearch.php
Go to the documentation of this file.
1<?php
23namespace MediaWiki\Api;
24
25use HtmlArmor;
29use SearchEngine;
32use SearchResult;
35
42 use \MediaWiki\Api\SearchApi;
43
44 private TitleMatcher $titleMatcher;
45
46 public function __construct(
47 ApiQuery $query,
48 string $moduleName,
49 SearchEngineConfig $searchEngineConfig,
50 SearchEngineFactory $searchEngineFactory,
51 TitleMatcher $titleMatcher
52 ) {
53 parent::__construct( $query, $moduleName, 'sr' );
54 // Services also needed in SearchApi trait
55 $this->searchEngineConfig = $searchEngineConfig;
56 $this->searchEngineFactory = $searchEngineFactory;
57 $this->titleMatcher = $titleMatcher;
58 }
59
60 public function execute() {
61 $this->run();
62 }
63
64 public function executeGenerator( $resultPageSet ) {
65 $this->run( $resultPageSet );
66 }
67
72 private function run( $resultPageSet = null ) {
74
75 // Extract parameters
76 $query = $params['search'];
77 $what = $params['what'];
78 $interwiki = $params['interwiki'];
79 $searchInfo = array_fill_keys( $params['info'], true );
80 $prop = array_fill_keys( $params['prop'], true );
81
82 // Create search engine instance and set options
83 $search = $this->buildSearchEngine( $params );
84 if ( isset( $params['sort'] ) ) {
85 $search->setSort( $params['sort'] );
86 }
87 $search->setFeatureData( 'rewrite', (bool)$params['enablerewrites'] );
88 $search->setFeatureData( 'interwiki', (bool)$interwiki );
89 // Hint to some SearchEngines about what snippets we would like returned
90 $search->setFeatureData( 'snippets', $this->decideSnippets( $prop ) );
91
92 $nquery = $search->replacePrefixes( $query );
93 if ( $nquery !== $query ) {
94 $query = $nquery;
95 wfDeprecatedMsg( 'SearchEngine::replacePrefixes() is overridden by ' .
96 get_class( $search ) . ', this was deprecated in MediaWiki 1.32',
97 '1.32' );
98 }
99 // Perform the actual search
100 if ( $what == 'text' ) {
101 $matches = $search->searchText( $query );
102 } elseif ( $what == 'title' ) {
103 $matches = $search->searchTitle( $query );
104 } elseif ( $what == 'nearmatch' ) {
105 // near matches must receive the user input as provided, otherwise
106 // the near matches within namespaces are lost.
107 $matches = $this->titleMatcher->getNearMatchResultSet( $params['search'] );
108 } else {
109 // We default to title searches; this is a terrible legacy
110 // of the way we initially set up the MySQL fulltext-based
111 // search engine with separate title and text fields.
112 // In the future, the default should be for a combined index.
113 $what = 'title';
114 $matches = $search->searchTitle( $query );
115
116 // Not all search engines support a separate title search,
117 // for instance the Lucene-based engine we use on Wikipedia.
118 // In this case, fall back to full-text search (which will
119 // include titles in it!)
120 if ( $matches === null ) {
121 $what = 'text';
122 $matches = $search->searchText( $query );
123 }
124 }
125
126 if ( $matches instanceof Status ) {
127 $status = $matches;
128 $matches = $status->getValue();
129 } else {
130 $status = null;
131 }
132
133 if ( $status ) {
134 if ( $status->isOK() ) {
135 $this->getMain()->getErrorFormatter()->addMessagesFromStatus(
136 $this->getModuleName(),
137 $status
138 );
139 } else {
140 $this->dieStatus( $status );
141 }
142 } elseif ( $matches === null ) {
143 $this->dieWithError( [ 'apierror-searchdisabled', $what ], "search-{$what}-disabled" );
144 }
145
146 $apiResult = $this->getResult();
147 // Add search meta data to result
148 if ( isset( $searchInfo['totalhits'] ) ) {
149 $totalhits = $matches->getTotalHits();
150 if ( $totalhits !== null ) {
151 $apiResult->addValue( [ 'query', 'searchinfo' ],
152 'totalhits', $totalhits );
153 if ( $matches->isApproximateTotalHits() ) {
154 $apiResult->addValue( [ 'query', 'searchinfo' ],
155 'approximate_totalhits', $matches->isApproximateTotalHits() );
156 }
157 }
158 }
159 if ( isset( $searchInfo['suggestion'] ) && $matches->hasSuggestion() ) {
160 $apiResult->addValue( [ 'query', 'searchinfo' ],
161 'suggestion', $matches->getSuggestionQuery() );
162 $apiResult->addValue( [ 'query', 'searchinfo' ],
163 'suggestionsnippet', HtmlArmor::getHtml( $matches->getSuggestionSnippet() ) );
164 }
165 if ( isset( $searchInfo['rewrittenquery'] ) && $matches->hasRewrittenQuery() ) {
166 $apiResult->addValue( [ 'query', 'searchinfo' ],
167 'rewrittenquery', $matches->getQueryAfterRewrite() );
168 $apiResult->addValue( [ 'query', 'searchinfo' ],
169 'rewrittenquerysnippet', HtmlArmor::getHtml( $matches->getQueryAfterRewriteSnippet() ) );
170 }
171
172 $titles = [];
173 $data = [];
174 $count = 0;
175
176 if ( $matches->hasMoreResults() ) {
177 $this->setContinueEnumParameter( 'offset', $params['offset'] + $params['limit'] );
178 }
179
180 foreach ( $matches as $result ) {
181 $count++;
182 // Silently skip broken and missing titles
183 if ( $result->isBrokenTitle() || $result->isMissingRevision() ) {
184 continue;
185 }
186
187 $vals = $this->getSearchResultData( $result, $prop );
188
189 if ( $resultPageSet === null ) {
190 if ( $vals ) {
191 // Add item to results and see whether it fits
192 $fit = $apiResult->addValue( [ 'query', $this->getModuleName() ], null, $vals );
193 if ( !$fit ) {
194 $this->setContinueEnumParameter( 'offset', $params['offset'] + $count - 1 );
195 break;
196 }
197 }
198 } else {
199 $titles[] = $result->getTitle();
200 $data[] = $vals ?: [];
201 }
202 }
203
204 // Here we assume interwiki results do not count with
205 // regular search results. We may want to reconsider this
206 // if we ever return a lot of interwiki results or want pagination
207 // for them.
208 // Interwiki results inside main result set
209 $canAddInterwiki = (bool)$params['enablerewrites'] && ( $resultPageSet === null );
210 if ( $canAddInterwiki ) {
211 $this->addInterwikiResults( $matches, $apiResult, $prop, 'additional',
213 }
214
215 // Interwiki results outside main result set
216 if ( $interwiki && $resultPageSet === null ) {
217 $this->addInterwikiResults( $matches, $apiResult, $prop, 'interwiki',
219 }
220
221 if ( $resultPageSet === null ) {
222 $apiResult->addIndexedTagName( [
223 'query', $this->getModuleName()
224 ], 'p' );
225 } else {
226 $resultPageSet->setRedirectMergePolicy( static function ( $current, $new ) {
227 if ( !isset( $current['index'] ) || $new['index'] < $current['index'] ) {
228 $current['index'] = $new['index'];
229 }
230 return $current;
231 } );
232 $resultPageSet->populateFromTitles( $titles );
233 $offset = $params['offset'] + 1;
234 foreach ( $titles as $index => $title ) {
235 $resultPageSet->setGeneratorData(
236 $title,
237 $data[ $index ] + [ 'index' => $index + $offset ]
238 );
239 }
240 }
241 }
242
249 private function getSearchResultData( SearchResult $result, $prop ) {
250 // Silently skip broken and missing titles
251 if ( $result->isBrokenTitle() || $result->isMissingRevision() ) {
252 return null;
253 }
254
255 $vals = [];
256
257 $title = $result->getTitle();
258 ApiQueryBase::addTitleInfo( $vals, $title );
259 $vals['pageid'] = $title->getArticleID();
260
261 if ( isset( $prop['size'] ) ) {
262 $vals['size'] = $result->getByteSize();
263 }
264 if ( isset( $prop['wordcount'] ) ) {
265 $vals['wordcount'] = $result->getWordCount();
266 }
267 if ( isset( $prop['snippet'] ) ) {
268 $vals['snippet'] = $result->getTextSnippet();
269 }
270 if ( isset( $prop['timestamp'] ) ) {
271 $vals['timestamp'] = wfTimestamp( TS_ISO_8601, $result->getTimestamp() );
272 }
273 if ( isset( $prop['titlesnippet'] ) ) {
274 $vals['titlesnippet'] = $result->getTitleSnippet();
275 }
276 if ( isset( $prop['categorysnippet'] ) ) {
277 $vals['categorysnippet'] = $result->getCategorySnippet();
278 }
279 if ( $result->getRedirectTitle() !== null ) {
280 if ( isset( $prop['redirecttitle'] ) ) {
281 $vals['redirecttitle'] = $result->getRedirectTitle()->getPrefixedText();
282 }
283 if ( isset( $prop['redirectsnippet'] ) ) {
284 $vals['redirectsnippet'] = $result->getRedirectSnippet();
285 }
286 }
287 if ( $result->getSectionTitle() !== null ) {
288 if ( isset( $prop['sectiontitle'] ) ) {
289 $vals['sectiontitle'] = $result->getSectionTitle()->getFragment();
290 }
291 if ( isset( $prop['sectionsnippet'] ) ) {
292 $vals['sectionsnippet'] = $result->getSectionSnippet();
293 }
294 }
295 if ( isset( $prop['isfilematch'] ) ) {
296 $vals['isfilematch'] = $result->isFileMatch();
297 }
298
299 if ( isset( $prop['extensiondata'] ) ) {
300 $extra = $result->getExtensionData();
301 // Add augmented data to the result. The data would be organized as a map:
302 // augmentorName => data
303 if ( $extra ) {
304 $vals['extensiondata'] = ApiResult::addMetadataToResultVars( $extra );
305 }
306 }
307
308 return $vals;
309 }
310
320 private function addInterwikiResults(
321 ISearchResultSet $matches, ApiResult $apiResult, $prop,
322 $section, $type
323 ) {
324 $totalhits = null;
325 $approximateTotalHits = false;
326 if ( $matches->hasInterwikiResults( $type ) ) {
327 foreach ( $matches->getInterwikiResults( $type ) as $interwikiMatches ) {
328 // Include number of results if requested
329 $interwikiTotalHits = $interwikiMatches->getTotalHits();
330 if ( $interwikiTotalHits !== null ) {
331 $totalhits += $interwikiTotalHits;
332 $approximateTotalHits = $approximateTotalHits || $interwikiMatches->isApproximateTotalHits();
333 }
334
335 foreach ( $interwikiMatches as $result ) {
336 $title = $result->getTitle();
337 $vals = $this->getSearchResultData( $result, $prop );
338
339 $vals['namespace'] = $result->getInterwikiNamespaceText();
340 $vals['title'] = $title->getText();
341 $vals['url'] = $title->getFullURL();
342
343 // Add item to results and see whether it fits
344 $fit = $apiResult->addValue( [
345 'query',
346 $section . $this->getModuleName(),
347 $result->getInterwikiPrefix()
348 ], null, $vals );
349
350 if ( !$fit ) {
351 // We hit the limit. We can't really provide any meaningful
352 // pagination info so just bail out
353 break;
354 }
355 }
356 }
357 if ( $totalhits !== null ) {
358 $apiResult->addValue( [ 'query', $section . 'searchinfo' ], 'totalhits', $totalhits );
359 if ( $approximateTotalHits ) {
360 $apiResult->addValue( [ 'query', $section . 'searchinfo' ], 'approximate_totalhits', true );
361 }
362 $apiResult->addIndexedTagName( [
363 'query', $section . $this->getModuleName()
364 ], 'p' );
365 }
366 }
367 return $totalhits;
368 }
369
370 private function decideSnippets( array $prop ): array {
371 // Field names align with definitions in ContentHandler::getFieldsForSearchIndex.
372 // Except `redirect` which isn't explicitly created, but refers to the title of
373 // pages that redirect to the result page.
374 $fields = [];
375 if ( isset( $prop['titlesnippet'] ) ) {
376 $fields[] = 'title';
377 }
378 // checking snippet and title variants is a bit special cased, but some search
379 // engines generate the title variant from the snippet and thus must have the
380 // snippet requested to provide the title.
381 if ( isset( $prop['redirectsnippet'] ) || isset( $prop['redirecttitle'] ) ) {
382 $fields[] = 'redirect';
383 }
384 if ( isset( $prop['categorysnippet'] ) ) {
385 $fields[] = 'category';
386 }
387 if ( isset( $prop['sectionsnippet'] ) || isset( $prop['sectiontitle'] ) ) {
388 $fields[] = 'heading';
389 }
390 return $fields;
391 }
392
393 public function getCacheMode( $params ) {
394 return 'public';
395 }
396
397 public function getAllowedParams() {
398 $allowedParams = $this->buildCommonApiParams() + [
399 'what' => [
400 ParamValidator::PARAM_TYPE => [
401 'title',
402 'text',
403 'nearmatch',
404 ]
405 ],
406 'info' => [
407 ParamValidator::PARAM_DEFAULT => 'totalhits|suggestion|rewrittenquery',
408 ParamValidator::PARAM_TYPE => [
409 'totalhits',
410 'suggestion',
411 'rewrittenquery',
412 ],
413 ParamValidator::PARAM_ISMULTI => true,
414 ],
415 'prop' => [
416 ParamValidator::PARAM_DEFAULT => 'size|wordcount|timestamp|snippet',
417 ParamValidator::PARAM_TYPE => [
418 'size',
419 'wordcount',
420 'timestamp',
421 'snippet',
422 'titlesnippet',
423 'redirecttitle',
424 'redirectsnippet',
425 'sectiontitle',
426 'sectionsnippet',
427 'isfilematch',
428 'categorysnippet',
429 'score', // deprecated
430 'hasrelated', // deprecated
431 'extensiondata',
432 ],
433 ParamValidator::PARAM_ISMULTI => true,
435 EnumDef::PARAM_DEPRECATED_VALUES => [
436 'score' => true,
437 'hasrelated' => true
438 ],
439 ],
440 'interwiki' => false,
441 'enablerewrites' => false,
442 ];
443
444 // Generators only add info/properties if explicitly requested. T263841
445 if ( $this->isInGeneratorMode() ) {
446 $allowedParams['prop'][ParamValidator::PARAM_DEFAULT] = '';
447 $allowedParams['info'][ParamValidator::PARAM_DEFAULT] = '';
448 }
449
450 // If we have more than one engine the list of available sorts is
451 // difficult to represent. For now don't expose it.
452 $alternatives = $this->searchEngineConfig->getSearchTypes();
453 if ( count( $alternatives ) == 1 ) {
454 $allowedParams['sort'] = [
455 ParamValidator::PARAM_DEFAULT => SearchEngine::DEFAULT_SORT,
456 ParamValidator::PARAM_TYPE => $this->searchEngineFactory->create()->getValidSorts(),
457 ];
458 }
459
460 return $allowedParams;
461 }
462
463 public function getSearchProfileParams() {
464 return [
465 'qiprofile' => [
466 'profile-type' => SearchEngine::FT_QUERY_INDEP_PROFILE_TYPE,
467 'help-message' => 'apihelp-query+search-param-qiprofile',
468 ],
469 ];
470 }
471
472 protected function getExamplesMessages() {
473 return [
474 'action=query&list=search&srsearch=meaning'
475 => 'apihelp-query+search-example-simple',
476 'action=query&list=search&srwhat=text&srsearch=meaning'
477 => 'apihelp-query+search-example-text',
478 'action=query&generator=search&gsrsearch=meaning&prop=info'
479 => 'apihelp-query+search-example-generator',
480 ];
481 }
482
483 public function getHelpUrls() {
484 return 'https://www.mediawiki.org/wiki/Special:MyLanguage/API:Search';
485 }
486}
487
489class_alias( ApiQuerySearch::class, 'ApiQuerySearch' );
wfDeprecatedMsg( $msg, $version=false, $component=false, $callerOffset=2)
Log a deprecation warning with arbitrary message text.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
array $params
The job parameters.
run()
Run the job.
Marks HTML that shouldn't be escaped.
Definition HtmlArmor.php:30
dieWithError( $msg, $code=null, $data=null, $httpCode=0)
Abort execution with an error.
Definition ApiBase.php:1522
getModuleName()
Get the name of the module being executed by this instance.
Definition ApiBase.php:557
getMain()
Get the main module.
Definition ApiBase.php:575
getResult()
Get the result object.
Definition ApiBase.php:696
const PARAM_HELP_MSG_PER_VALUE
((string|array|Message)[]) When PARAM_TYPE is an array, or 'string' with PARAM_ISMULTI,...
Definition ApiBase.php:221
dieStatus(StatusValue $status)
Throw an ApiUsageException based on the Status object.
Definition ApiBase.php:1573
extractRequestParams( $options=[])
Using getAllowedParams(), this function makes an array of the values provided by the user,...
Definition ApiBase.php:837
static addTitleInfo(&$arr, $title, $prefix='')
Add information (title and namespace) about a Title object to a result array.
setContinueEnumParameter( $paramName, $paramValue)
Overridden to set the generator param if in generator mode.
Query module to perform full text search within wiki titles and content.
execute()
Evaluates the parameters, performs the requested query, and sets up the result.
getAllowedParams()
Returns an array of allowed parameters (parameter name) => (default value) or (parameter name) => (ar...
getExamplesMessages()
Returns usage examples for this module.
__construct(ApiQuery $query, string $moduleName, SearchEngineConfig $searchEngineConfig, SearchEngineFactory $searchEngineFactory, TitleMatcher $titleMatcher)
getHelpUrls()
Return links to more detailed help pages about the module.
executeGenerator( $resultPageSet)
Execute this module as a generator.
getCacheMode( $params)
Get the cache mode for the data generated by this module.
This is the main query class.
Definition ApiQuery.php:48
static addMetadataToResultVars( $vars, $forceHash=true)
Add the correct metadata to an array of vars we want to export through the API.
Service implementation of near match title search.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:54
Configuration handling class for SearchEngine.
Factory class for SearchEngine.
Contain a class for special pages.
NOTE: this class is being refactored into an abstract base class.
Service for formatting and validating API parameters.
Type definition for enumeration types.
Definition EnumDef.php:32
A set of SearchEngine results.
const INLINE_RESULTS
Identifier for interwiki results that can be displayed even if no existing main wiki results exist.
const SECONDARY_RESULTS
Identifier for interwiki results that are displayed only together with existing main wiki results.
buildCommonApiParams( $isScrollable=true)
The set of api parameters that are shared between api calls that call the SearchEngine.
Definition SearchApi.php:71
buildSearchEngine(?array $params=null)
Build the search engine to use.