MediaWiki 1.40.4
ApiQuerySearch.php
Go to the documentation of this file.
1<?php
26
33 use SearchApi;
34
36 private $allowedParams;
37
39 private $searchEngineConfig;
40
42 private $searchEngineFactory;
43
44 private TitleMatcher $titleMatcher;
45
53 public function __construct(
54 ApiQuery $query,
55 $moduleName,
56 SearchEngineConfig $searchEngineConfig,
57 SearchEngineFactory $searchEngineFactory,
58 TitleMatcher $titleMatcher
59 ) {
60 parent::__construct( $query, $moduleName, 'sr' );
61 // Services also needed in SearchApi trait
62 $this->searchEngineConfig = $searchEngineConfig;
63 $this->searchEngineFactory = $searchEngineFactory;
64 $this->titleMatcher = $titleMatcher;
65 }
66
67 public function execute() {
68 $this->run();
69 }
70
71 public function executeGenerator( $resultPageSet ) {
72 $this->run( $resultPageSet );
73 }
74
79 private function run( $resultPageSet = null ) {
80 $params = $this->extractRequestParams();
81
82 // Extract parameters
83 $query = $params['search'];
84 $what = $params['what'];
85 $interwiki = $params['interwiki'];
86 $searchInfo = array_fill_keys( $params['info'], true );
87 $prop = array_fill_keys( $params['prop'], true );
88
89 // Create search engine instance and set options
90 $search = $this->buildSearchEngine( $params );
91 if ( isset( $params['sort'] ) ) {
92 $search->setSort( $params['sort'] );
93 }
94 $search->setFeatureData( 'rewrite', (bool)$params['enablerewrites'] );
95 $search->setFeatureData( 'interwiki', (bool)$interwiki );
96 // Hint to some SearchEngines about what snippets we would like returned
97 $search->setFeatureData( 'snippets', $this->decideSnippets( $prop ) );
98
99 $nquery = $search->replacePrefixes( $query );
100 if ( $nquery !== $query ) {
101 $query = $nquery;
102 wfDeprecatedMsg( 'SearchEngine::replacePrefixes() is overridden by ' .
103 get_class( $search ) . ', this was deprecated in MediaWiki 1.32',
104 '1.32' );
105 }
106 // Perform the actual search
107 if ( $what == 'text' ) {
108 $matches = $search->searchText( $query );
109 } elseif ( $what == 'title' ) {
110 $matches = $search->searchTitle( $query );
111 } elseif ( $what == 'nearmatch' ) {
112 // near matches must receive the user input as provided, otherwise
113 // the near matches within namespaces are lost.
114 $matches = $this->titleMatcher->getNearMatchResultSet( $params['search'] );
115 } else {
116 // We default to title searches; this is a terrible legacy
117 // of the way we initially set up the MySQL fulltext-based
118 // search engine with separate title and text fields.
119 // In the future, the default should be for a combined index.
120 $what = 'title';
121 $matches = $search->searchTitle( $query );
122
123 // Not all search engines support a separate title search,
124 // for instance the Lucene-based engine we use on Wikipedia.
125 // In this case, fall back to full-text search (which will
126 // include titles in it!)
127 if ( $matches === null ) {
128 $what = 'text';
129 $matches = $search->searchText( $query );
130 }
131 }
132
133 if ( $matches instanceof Status ) {
134 $status = $matches;
135 $matches = $status->getValue();
136 } else {
137 $status = null;
138 }
139
140 if ( $status ) {
141 if ( $status->isOK() ) {
142 $this->getMain()->getErrorFormatter()->addMessagesFromStatus(
143 $this->getModuleName(),
144 $status
145 );
146 } else {
147 $this->dieStatus( $status );
148 }
149 } elseif ( $matches === null ) {
150 $this->dieWithError( [ 'apierror-searchdisabled', $what ], "search-{$what}-disabled" );
151 }
152
153 $apiResult = $this->getResult();
154 // Add search meta data to result
155 if ( isset( $searchInfo['totalhits'] ) ) {
156 $totalhits = $matches->getTotalHits();
157 if ( $totalhits !== null ) {
158 $apiResult->addValue( [ 'query', 'searchinfo' ],
159 'totalhits', $totalhits );
160 }
161 }
162 if ( isset( $searchInfo['suggestion'] ) && $matches->hasSuggestion() ) {
163 $apiResult->addValue( [ 'query', 'searchinfo' ],
164 'suggestion', $matches->getSuggestionQuery() );
165 $apiResult->addValue( [ 'query', 'searchinfo' ],
166 'suggestionsnippet', HtmlArmor::getHtml( $matches->getSuggestionSnippet() ) );
167 }
168 if ( isset( $searchInfo['rewrittenquery'] ) && $matches->hasRewrittenQuery() ) {
169 $apiResult->addValue( [ 'query', 'searchinfo' ],
170 'rewrittenquery', $matches->getQueryAfterRewrite() );
171 $apiResult->addValue( [ 'query', 'searchinfo' ],
172 'rewrittenquerysnippet', HtmlArmor::getHtml( $matches->getQueryAfterRewriteSnippet() ) );
173 }
174
175 $titles = [];
176 $data = [];
177 $count = 0;
178
179 if ( $matches->hasMoreResults() ) {
180 $this->setContinueEnumParameter( 'offset', $params['offset'] + $params['limit'] );
181 }
182
183 foreach ( $matches as $result ) {
184 $count++;
185 // Silently skip broken and missing titles
186 if ( $result->isBrokenTitle() || $result->isMissingRevision() ) {
187 continue;
188 }
189
190 $vals = $this->getSearchResultData( $result, $prop );
191
192 if ( $resultPageSet === null ) {
193 if ( $vals ) {
194 // Add item to results and see whether it fits
195 $fit = $apiResult->addValue( [ 'query', $this->getModuleName() ], null, $vals );
196 if ( !$fit ) {
197 $this->setContinueEnumParameter( 'offset', $params['offset'] + $count - 1 );
198 break;
199 }
200 }
201 } else {
202 $titles[] = $result->getTitle();
203 $data[] = $vals ?: [];
204 }
205 }
206
207 // Here we assume interwiki results do not count with
208 // regular search results. We may want to reconsider this
209 // if we ever return a lot of interwiki results or want pagination
210 // for them.
211 // Interwiki results inside main result set
212 $canAddInterwiki = (bool)$params['enablerewrites'] && ( $resultPageSet === null );
213 if ( $canAddInterwiki ) {
214 $this->addInterwikiResults( $matches, $apiResult, $prop, 'additional',
215 ISearchResultSet::INLINE_RESULTS );
216 }
217
218 // Interwiki results outside main result set
219 if ( $interwiki && $resultPageSet === null ) {
220 $this->addInterwikiResults( $matches, $apiResult, $prop, 'interwiki',
221 ISearchResultSet::SECONDARY_RESULTS );
222 }
223
224 if ( $resultPageSet === null ) {
225 $apiResult->addIndexedTagName( [
226 'query', $this->getModuleName()
227 ], 'p' );
228 } else {
229 $resultPageSet->setRedirectMergePolicy( static function ( $current, $new ) {
230 if ( !isset( $current['index'] ) || $new['index'] < $current['index'] ) {
231 $current['index'] = $new['index'];
232 }
233 return $current;
234 } );
235 $resultPageSet->populateFromTitles( $titles );
236 $offset = $params['offset'] + 1;
237 foreach ( $titles as $index => $title ) {
238 $resultPageSet->setGeneratorData(
239 $title,
240 $data[ $index ] + [ 'index' => $index + $offset ]
241 );
242 }
243 }
244 }
245
252 private function getSearchResultData( SearchResult $result, $prop ) {
253 // Silently skip broken and missing titles
254 if ( $result->isBrokenTitle() || $result->isMissingRevision() ) {
255 return null;
256 }
257
258 $vals = [];
259
260 $title = $result->getTitle();
262 $vals['pageid'] = $title->getArticleID();
263
264 if ( isset( $prop['size'] ) ) {
265 $vals['size'] = $result->getByteSize();
266 }
267 if ( isset( $prop['wordcount'] ) ) {
268 $vals['wordcount'] = $result->getWordCount();
269 }
270 if ( isset( $prop['snippet'] ) ) {
271 $vals['snippet'] = $result->getTextSnippet();
272 }
273 if ( isset( $prop['timestamp'] ) ) {
274 $vals['timestamp'] = wfTimestamp( TS_ISO_8601, $result->getTimestamp() );
275 }
276 if ( isset( $prop['titlesnippet'] ) ) {
277 $vals['titlesnippet'] = $result->getTitleSnippet();
278 }
279 if ( isset( $prop['categorysnippet'] ) ) {
280 $vals['categorysnippet'] = $result->getCategorySnippet();
281 }
282 if ( $result->getRedirectTitle() !== null ) {
283 if ( isset( $prop['redirecttitle'] ) ) {
284 $vals['redirecttitle'] = $result->getRedirectTitle()->getPrefixedText();
285 }
286 if ( isset( $prop['redirectsnippet'] ) ) {
287 $vals['redirectsnippet'] = $result->getRedirectSnippet();
288 }
289 }
290 if ( $result->getSectionTitle() !== null ) {
291 if ( isset( $prop['sectiontitle'] ) ) {
292 $vals['sectiontitle'] = $result->getSectionTitle()->getFragment();
293 }
294 if ( isset( $prop['sectionsnippet'] ) ) {
295 $vals['sectionsnippet'] = $result->getSectionSnippet();
296 }
297 }
298 if ( isset( $prop['isfilematch'] ) ) {
299 $vals['isfilematch'] = $result->isFileMatch();
300 }
301
302 if ( isset( $prop['extensiondata'] ) ) {
303 $extra = $result->getExtensionData();
304 // Add augmented data to the result. The data would be organized as a map:
305 // augmentorName => data
306 if ( $extra ) {
307 $vals['extensiondata'] = ApiResult::addMetadataToResultVars( $extra );
308 }
309 }
310
311 return $vals;
312 }
313
323 private function addInterwikiResults(
324 ISearchResultSet $matches, ApiResult $apiResult, $prop,
325 $section, $type
326 ) {
327 $totalhits = null;
328 if ( $matches->hasInterwikiResults( $type ) ) {
329 foreach ( $matches->getInterwikiResults( $type ) as $interwikiMatches ) {
330 // Include number of results if requested
331 $totalhits += $interwikiMatches->getTotalHits();
332
333 foreach ( $interwikiMatches as $result ) {
334 $title = $result->getTitle();
335 $vals = $this->getSearchResultData( $result, $prop );
336
337 $vals['namespace'] = $result->getInterwikiNamespaceText();
338 $vals['title'] = $title->getText();
339 $vals['url'] = $title->getFullURL();
340
341 // Add item to results and see whether it fits
342 $fit = $apiResult->addValue( [
343 'query',
344 $section . $this->getModuleName(),
345 $result->getInterwikiPrefix()
346 ], null, $vals );
347
348 if ( !$fit ) {
349 // We hit the limit. We can't really provide any meaningful
350 // pagination info so just bail out
351 break;
352 }
353 }
354 }
355 if ( $totalhits !== null ) {
356 $apiResult->addValue( [ 'query', $section . 'searchinfo' ], 'totalhits', $totalhits );
357 $apiResult->addIndexedTagName( [
358 'query', $section . $this->getModuleName()
359 ], 'p' );
360 }
361 }
362 return $totalhits;
363 }
364
365 private function decideSnippets( array $prop ): array {
366 // Field names align with definitions in ContentHandler::getFieldsForSearchIndex.
367 // Except `redirect` which isn't explicitly created, but refers to the title of
368 // pages that redirect to the result page.
369 $fields = [];
370 if ( isset( $prop['titlesnippet'] ) ) {
371 $fields[] = 'title';
372 }
373 // checking snippet and title variants is a bit special cased, but some search
374 // engines generate the title variant from the snippet and thus must have the
375 // snippet requested to provide the title.
376 if ( isset( $prop['redirectsnippet'] ) || isset( $prop['redirecttitle'] ) ) {
377 $fields[] = 'redirect';
378 }
379 if ( isset( $prop['categorysnippet'] ) ) {
380 $fields[] = 'category';
381 }
382 if ( isset( $prop['sectionsnippet'] ) || isset( $prop['sectiontitle'] ) ) {
383 $fields[] = 'heading';
384 }
385 return $fields;
386 }
387
388 public function getCacheMode( $params ) {
389 return 'public';
390 }
391
392 public function getAllowedParams() {
393 if ( $this->allowedParams !== null ) {
394 return $this->allowedParams;
395 }
396
397 $this->allowedParams = $this->buildCommonApiParams() + [
398 'what' => [
399 ParamValidator::PARAM_TYPE => [
400 'title',
401 'text',
402 'nearmatch',
403 ]
404 ],
405 'info' => [
406 ParamValidator::PARAM_DEFAULT => 'totalhits|suggestion|rewrittenquery',
407 ParamValidator::PARAM_TYPE => [
408 'totalhits',
409 'suggestion',
410 'rewrittenquery',
411 ],
412 ParamValidator::PARAM_ISMULTI => true,
413 ],
414 'prop' => [
415 ParamValidator::PARAM_DEFAULT => 'size|wordcount|timestamp|snippet',
416 ParamValidator::PARAM_TYPE => [
417 'size',
418 'wordcount',
419 'timestamp',
420 'snippet',
421 'titlesnippet',
422 'redirecttitle',
423 'redirectsnippet',
424 'sectiontitle',
425 'sectionsnippet',
426 'isfilematch',
427 'categorysnippet',
428 'score', // deprecated
429 'hasrelated', // deprecated
430 'extensiondata',
431 ],
432 ParamValidator::PARAM_ISMULTI => true,
434 EnumDef::PARAM_DEPRECATED_VALUES => [
435 'score' => true,
436 'hasrelated' => true
437 ],
438 ],
439 'interwiki' => false,
440 'enablerewrites' => false,
441 ];
442
443 // Generators only add info/properties if explicitly requested. T263841
444 if ( $this->isInGeneratorMode() ) {
445 $this->allowedParams['prop'][ParamValidator::PARAM_DEFAULT] = '';
446 $this->allowedParams['info'][ParamValidator::PARAM_DEFAULT] = '';
447 }
448
449 // If we have more than one engine the list of available sorts is
450 // difficult to represent. For now don't expose it.
451 $alternatives = $this->searchEngineConfig->getSearchTypes();
452 if ( count( $alternatives ) == 1 ) {
453 $this->allowedParams['sort'] = [
454 ParamValidator::PARAM_DEFAULT => SearchEngine::DEFAULT_SORT,
455 ParamValidator::PARAM_TYPE => $this->searchEngineFactory->create()->getValidSorts(),
456 ];
457 }
458
459 return $this->allowedParams;
460 }
461
462 public function getSearchProfileParams() {
463 return [
464 'qiprofile' => [
465 'profile-type' => SearchEngine::FT_QUERY_INDEP_PROFILE_TYPE,
466 'help-message' => 'apihelp-query+search-param-qiprofile',
467 ],
468 ];
469 }
470
471 protected function getExamplesMessages() {
472 return [
473 'action=query&list=search&srsearch=meaning'
474 => 'apihelp-query+search-example-simple',
475 'action=query&list=search&srwhat=text&srsearch=meaning'
476 => 'apihelp-query+search-example-text',
477 'action=query&generator=search&gsrsearch=meaning&prop=info'
478 => 'apihelp-query+search-example-generator',
479 ];
480 }
481
482 public function getHelpUrls() {
483 return 'https://www.mediawiki.org/wiki/Special:MyLanguage/API:Search';
484 }
485}
wfDeprecatedMsg( $msg, $version=false, $component=false, $callerOffset=2)
Log a deprecation warning with arbitrary message text.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
buildSearchEngine(array $params=null)
Build the search engine to use.
buildCommonApiParams( $isScrollable=true)
The set of api parameters that are shared between api calls that call the SearchEngine.
Definition SearchApi.php:66
dieWithError( $msg, $code=null, $data=null, $httpCode=0)
Abort execution with an error.
Definition ApiBase.php:1460
getMain()
Get the main module.
Definition ApiBase.php:522
const PARAM_HELP_MSG_PER_VALUE
((string|array|Message)[]) When PARAM_TYPE is an array, or 'string' with PARAM_ISMULTI,...
Definition ApiBase.php:204
getResult()
Get the result object.
Definition ApiBase.php:637
extractRequestParams( $options=[])
Using getAllowedParams(), this function makes an array of the values provided by the user,...
Definition ApiBase.php:773
getModuleName()
Get the name of the module being executed by this instance.
Definition ApiBase.php:506
dieStatus(StatusValue $status)
Throw an ApiUsageException based on the Status object.
Definition ApiBase.php:1521
static addTitleInfo(&$arr, $title, $prefix='')
Add information (title and namespace) about a Title object to a result array.
setContinueEnumParameter( $paramName, $paramValue)
Overridden to set the generator param if in generator mode.
Query module to perform full text search within wiki titles and content.
execute()
Evaluates the parameters, performs the requested query, and sets up the result.
getHelpUrls()
Return links to more detailed help pages about the module.
getExamplesMessages()
Returns usage examples for this module.
getCacheMode( $params)
Get the cache mode for the data generated by this module.
getAllowedParams()
Returns an array of allowed parameters (parameter name) => (default value) or (parameter name) => (ar...
executeGenerator( $resultPageSet)
Execute this module as a generator.
__construct(ApiQuery $query, $moduleName, SearchEngineConfig $searchEngineConfig, SearchEngineFactory $searchEngineFactory, TitleMatcher $titleMatcher)
This is the main query class.
Definition ApiQuery.php:42
This class represents the result of the API operations.
Definition ApiResult.php:35
static addMetadataToResultVars( $vars, $forceHash=true)
Add the correct metadata to an array of vars we want to export through the API.
addIndexedTagName( $path, $tag)
Set the tag name for numeric-keyed values in XML format.
addValue( $path, $name, $value, $flags=0)
Add value to the output data at the given path.
Service implementation of near match title search.
Configuration handling class for SearchEngine.
Factory class for SearchEngine.
NOTE: this class is being refactored into an abstract base class.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:46
Service for formatting and validating API parameters.
Type definition for enumeration types.
Definition EnumDef.php:32
trait SearchApi
Traits for API components that use a SearchEngine.
Definition SearchApi.php:30
A set of SearchEngine results.
return true
Definition router.php:92