MediaWiki REL1_39
ApiQuerySearch.php
Go to the documentation of this file.
1<?php
25
32 use SearchApi;
33
35 private $allowedParams;
36
38 private $searchEngineConfig;
39
41 private $searchEngineFactory;
42
49 public function __construct(
50 ApiQuery $query,
51 $moduleName,
52 SearchEngineConfig $searchEngineConfig,
53 SearchEngineFactory $searchEngineFactory
54 ) {
55 parent::__construct( $query, $moduleName, 'sr' );
56 // Services also needed in SearchApi trait
57 $this->searchEngineConfig = $searchEngineConfig;
58 $this->searchEngineFactory = $searchEngineFactory;
59 }
60
61 public function execute() {
62 $this->run();
63 }
64
65 public function executeGenerator( $resultPageSet ) {
66 $this->run( $resultPageSet );
67 }
68
73 private function run( $resultPageSet = null ) {
74 $params = $this->extractRequestParams();
75
76 // Extract parameters
77 $query = $params['search'];
78 $what = $params['what'];
79 $interwiki = $params['interwiki'];
80 $searchInfo = array_fill_keys( $params['info'], true );
81 $prop = array_fill_keys( $params['prop'], true );
82
83 // Create search engine instance and set options
84 $search = $this->buildSearchEngine( $params );
85 if ( isset( $params['sort'] ) ) {
86 $search->setSort( $params['sort'] );
87 }
88 $search->setFeatureData( 'rewrite', (bool)$params['enablerewrites'] );
89 $search->setFeatureData( 'interwiki', (bool)$interwiki );
90 // Hint to some SearchEngines about what snippets we would like returned
91 $search->setFeatureData( 'snippets', $this->decideSnippets( $prop ) );
92
93 $nquery = $search->replacePrefixes( $query );
94 if ( $nquery !== $query ) {
95 $query = $nquery;
96 wfDeprecatedMsg( 'SearchEngine::replacePrefixes() is overridden by ' .
97 get_class( $search ) . ', this was deprecated in MediaWiki 1.32',
98 '1.32' );
99 }
100 // Perform the actual search
101 if ( $what == 'text' ) {
102 $matches = $search->searchText( $query );
103 } elseif ( $what == 'title' ) {
104 $matches = $search->searchTitle( $query );
105 } elseif ( $what == 'nearmatch' ) {
106 // near matches must receive the user input as provided, otherwise
107 // the near matches within namespaces are lost.
108 $matches = $search->getNearMatcher( $this->getConfig() )
109 ->getNearMatchResultSet( $params['search'] );
110 } else {
111 // We default to title searches; this is a terrible legacy
112 // of the way we initially set up the MySQL fulltext-based
113 // search engine with separate title and text fields.
114 // In the future, the default should be for a combined index.
115 $what = 'title';
116 $matches = $search->searchTitle( $query );
117
118 // Not all search engines support a separate title search,
119 // for instance the Lucene-based engine we use on Wikipedia.
120 // In this case, fall back to full-text search (which will
121 // include titles in it!)
122 if ( $matches === null ) {
123 $what = 'text';
124 $matches = $search->searchText( $query );
125 }
126 }
127
128 if ( $matches instanceof Status ) {
129 $status = $matches;
130 $matches = $status->getValue();
131 } else {
132 $status = null;
133 }
134
135 if ( $status ) {
136 if ( $status->isOK() ) {
137 $this->getMain()->getErrorFormatter()->addMessagesFromStatus(
138 $this->getModuleName(),
139 $status
140 );
141 } else {
142 $this->dieStatus( $status );
143 }
144 } elseif ( $matches === null ) {
145 $this->dieWithError( [ 'apierror-searchdisabled', $what ], "search-{$what}-disabled" );
146 }
147
148 $apiResult = $this->getResult();
149 // Add search meta data to result
150 if ( isset( $searchInfo['totalhits'] ) ) {
151 $totalhits = $matches->getTotalHits();
152 if ( $totalhits !== null ) {
153 $apiResult->addValue( [ 'query', 'searchinfo' ],
154 'totalhits', $totalhits );
155 }
156 }
157 if ( isset( $searchInfo['suggestion'] ) && $matches->hasSuggestion() ) {
158 $apiResult->addValue( [ 'query', 'searchinfo' ],
159 'suggestion', $matches->getSuggestionQuery() );
160 $apiResult->addValue( [ 'query', 'searchinfo' ],
161 'suggestionsnippet', HtmlArmor::getHtml( $matches->getSuggestionSnippet() ) );
162 }
163 if ( isset( $searchInfo['rewrittenquery'] ) && $matches->hasRewrittenQuery() ) {
164 $apiResult->addValue( [ 'query', 'searchinfo' ],
165 'rewrittenquery', $matches->getQueryAfterRewrite() );
166 $apiResult->addValue( [ 'query', 'searchinfo' ],
167 'rewrittenquerysnippet', HtmlArmor::getHtml( $matches->getQueryAfterRewriteSnippet() ) );
168 }
169
170 $titles = [];
171 $data = [];
172 $count = 0;
173
174 if ( $matches->hasMoreResults() ) {
175 $this->setContinueEnumParameter( 'offset', $params['offset'] + $params['limit'] );
176 }
177
178 foreach ( $matches as $result ) {
179 $count++;
180 // Silently skip broken and missing titles
181 if ( $result->isBrokenTitle() || $result->isMissingRevision() ) {
182 continue;
183 }
184
185 $vals = $this->getSearchResultData( $result, $prop );
186
187 if ( $resultPageSet === null ) {
188 if ( $vals ) {
189 // Add item to results and see whether it fits
190 $fit = $apiResult->addValue( [ 'query', $this->getModuleName() ], null, $vals );
191 if ( !$fit ) {
192 $this->setContinueEnumParameter( 'offset', $params['offset'] + $count - 1 );
193 break;
194 }
195 }
196 } else {
197 $titles[] = $result->getTitle();
198 $data[] = $vals ?: [];
199 }
200 }
201
202 // Here we assume interwiki results do not count with
203 // regular search results. We may want to reconsider this
204 // if we ever return a lot of interwiki results or want pagination
205 // for them.
206 // Interwiki results inside main result set
207 $canAddInterwiki = (bool)$params['enablerewrites'] && ( $resultPageSet === null );
208 if ( $canAddInterwiki ) {
209 $this->addInterwikiResults( $matches, $apiResult, $prop, 'additional',
210 ISearchResultSet::INLINE_RESULTS );
211 }
212
213 // Interwiki results outside main result set
214 if ( $interwiki && $resultPageSet === null ) {
215 $this->addInterwikiResults( $matches, $apiResult, $prop, 'interwiki',
216 ISearchResultSet::SECONDARY_RESULTS );
217 }
218
219 if ( $resultPageSet === null ) {
220 $apiResult->addIndexedTagName( [
221 'query', $this->getModuleName()
222 ], 'p' );
223 } else {
224 $resultPageSet->setRedirectMergePolicy( static function ( $current, $new ) {
225 if ( !isset( $current['index'] ) || $new['index'] < $current['index'] ) {
226 $current['index'] = $new['index'];
227 }
228 return $current;
229 } );
230 $resultPageSet->populateFromTitles( $titles );
231 $offset = $params['offset'] + 1;
232 foreach ( $titles as $index => $title ) {
233 $resultPageSet->setGeneratorData(
234 $title,
235 $data[ $index ] + [ 'index' => $index + $offset ]
236 );
237 }
238 }
239 }
240
247 private function getSearchResultData( SearchResult $result, $prop ) {
248 // Silently skip broken and missing titles
249 if ( $result->isBrokenTitle() || $result->isMissingRevision() ) {
250 return null;
251 }
252
253 $vals = [];
254
255 $title = $result->getTitle();
257 $vals['pageid'] = $title->getArticleID();
258
259 if ( isset( $prop['size'] ) ) {
260 $vals['size'] = $result->getByteSize();
261 }
262 if ( isset( $prop['wordcount'] ) ) {
263 $vals['wordcount'] = $result->getWordCount();
264 }
265 if ( isset( $prop['snippet'] ) ) {
266 $vals['snippet'] = $result->getTextSnippet();
267 }
268 if ( isset( $prop['timestamp'] ) ) {
269 $vals['timestamp'] = wfTimestamp( TS_ISO_8601, $result->getTimestamp() );
270 }
271 if ( isset( $prop['titlesnippet'] ) ) {
272 $vals['titlesnippet'] = $result->getTitleSnippet();
273 }
274 if ( isset( $prop['categorysnippet'] ) ) {
275 $vals['categorysnippet'] = $result->getCategorySnippet();
276 }
277 if ( $result->getRedirectTitle() !== null ) {
278 if ( isset( $prop['redirecttitle'] ) ) {
279 $vals['redirecttitle'] = $result->getRedirectTitle()->getPrefixedText();
280 }
281 if ( isset( $prop['redirectsnippet'] ) ) {
282 $vals['redirectsnippet'] = $result->getRedirectSnippet();
283 }
284 }
285 if ( $result->getSectionTitle() !== null ) {
286 if ( isset( $prop['sectiontitle'] ) ) {
287 $vals['sectiontitle'] = $result->getSectionTitle()->getFragment();
288 }
289 if ( isset( $prop['sectionsnippet'] ) ) {
290 $vals['sectionsnippet'] = $result->getSectionSnippet();
291 }
292 }
293 if ( isset( $prop['isfilematch'] ) ) {
294 $vals['isfilematch'] = $result->isFileMatch();
295 }
296
297 if ( isset( $prop['extensiondata'] ) ) {
298 $extra = $result->getExtensionData();
299 // Add augmented data to the result. The data would be organized as a map:
300 // augmentorName => data
301 if ( $extra ) {
302 $vals['extensiondata'] = ApiResult::addMetadataToResultVars( $extra );
303 }
304 }
305
306 return $vals;
307 }
308
318 private function addInterwikiResults(
319 ISearchResultSet $matches, ApiResult $apiResult, $prop,
320 $section, $type
321 ) {
322 $totalhits = null;
323 if ( $matches->hasInterwikiResults( $type ) ) {
324 foreach ( $matches->getInterwikiResults( $type ) as $interwikiMatches ) {
325 // Include number of results if requested
326 $totalhits += $interwikiMatches->getTotalHits();
327
328 foreach ( $interwikiMatches as $result ) {
329 $title = $result->getTitle();
330 $vals = $this->getSearchResultData( $result, $prop );
331
332 $vals['namespace'] = $result->getInterwikiNamespaceText();
333 $vals['title'] = $title->getText();
334 $vals['url'] = $title->getFullURL();
335
336 // Add item to results and see whether it fits
337 $fit = $apiResult->addValue( [
338 'query',
339 $section . $this->getModuleName(),
340 $result->getInterwikiPrefix()
341 ], null, $vals );
342
343 if ( !$fit ) {
344 // We hit the limit. We can't really provide any meaningful
345 // pagination info so just bail out
346 break;
347 }
348 }
349 }
350 if ( $totalhits !== null ) {
351 $apiResult->addValue( [ 'query', $section . 'searchinfo' ], 'totalhits', $totalhits );
352 $apiResult->addIndexedTagName( [
353 'query', $section . $this->getModuleName()
354 ], 'p' );
355 }
356 }
357 return $totalhits;
358 }
359
360 private function decideSnippets( array $prop ): array {
361 // Field names align with definitions in ContentHandler::getFieldsForSearchIndex.
362 // Except `redirect` which isn't explicitly created, but refers to the title of
363 // pages that redirect to the result page.
364 $fields = [];
365 if ( isset( $prop['titlesnippet'] ) ) {
366 $fields[] = 'title';
367 }
368 // checking snippet and title variants is a bit special cased, but some search
369 // engines generate the title variant from the snippet and thus must have the
370 // snippet requested to provide the title.
371 if ( isset( $prop['redirectsnippet'] ) || isset( $prop['redirecttitle'] ) ) {
372 $fields[] = 'redirect';
373 }
374 if ( isset( $prop['categorysnippet'] ) ) {
375 $fields[] = 'category';
376 }
377 if ( isset( $prop['sectionsnippet'] ) || isset( $prop['sectiontitle'] ) ) {
378 $fields[] = 'heading';
379 }
380 return $fields;
381 }
382
383 public function getCacheMode( $params ) {
384 return 'public';
385 }
386
387 public function getAllowedParams() {
388 if ( $this->allowedParams !== null ) {
389 return $this->allowedParams;
390 }
391
392 $this->allowedParams = $this->buildCommonApiParams() + [
393 'what' => [
394 ParamValidator::PARAM_TYPE => [
395 'title',
396 'text',
397 'nearmatch',
398 ]
399 ],
400 'info' => [
401 ParamValidator::PARAM_DEFAULT => 'totalhits|suggestion|rewrittenquery',
402 ParamValidator::PARAM_TYPE => [
403 'totalhits',
404 'suggestion',
405 'rewrittenquery',
406 ],
407 ParamValidator::PARAM_ISMULTI => true,
408 ],
409 'prop' => [
410 ParamValidator::PARAM_DEFAULT => 'size|wordcount|timestamp|snippet',
411 ParamValidator::PARAM_TYPE => [
412 'size',
413 'wordcount',
414 'timestamp',
415 'snippet',
416 'titlesnippet',
417 'redirecttitle',
418 'redirectsnippet',
419 'sectiontitle',
420 'sectionsnippet',
421 'isfilematch',
422 'categorysnippet',
423 'score', // deprecated
424 'hasrelated', // deprecated
425 'extensiondata',
426 ],
427 ParamValidator::PARAM_ISMULTI => true,
429 EnumDef::PARAM_DEPRECATED_VALUES => [
430 'score' => true,
431 'hasrelated' => true
432 ],
433 ],
434 'interwiki' => false,
435 'enablerewrites' => false,
436 ];
437
438 // Generators only add info/properties if explicitly requested. T263841
439 if ( $this->isInGeneratorMode() ) {
440 $this->allowedParams['prop'][ParamValidator::PARAM_DEFAULT] = '';
441 $this->allowedParams['info'][ParamValidator::PARAM_DEFAULT] = '';
442 }
443
444 // If we have more than one engine the list of available sorts is
445 // difficult to represent. For now don't expose it.
446 $alternatives = $this->searchEngineConfig->getSearchTypes();
447 if ( count( $alternatives ) == 1 ) {
448 $this->allowedParams['sort'] = [
449 ParamValidator::PARAM_DEFAULT => SearchEngine::DEFAULT_SORT,
450 ParamValidator::PARAM_TYPE => $this->searchEngineFactory->create()->getValidSorts(),
451 ];
452 }
453
454 return $this->allowedParams;
455 }
456
457 public function getSearchProfileParams() {
458 return [
459 'qiprofile' => [
460 'profile-type' => SearchEngine::FT_QUERY_INDEP_PROFILE_TYPE,
461 'help-message' => 'apihelp-query+search-param-qiprofile',
462 ],
463 ];
464 }
465
466 protected function getExamplesMessages() {
467 return [
468 'action=query&list=search&srsearch=meaning'
469 => 'apihelp-query+search-example-simple',
470 'action=query&list=search&srwhat=text&srsearch=meaning'
471 => 'apihelp-query+search-example-text',
472 'action=query&generator=search&gsrsearch=meaning&prop=info'
473 => 'apihelp-query+search-example-generator',
474 ];
475 }
476
477 public function getHelpUrls() {
478 return 'https://www.mediawiki.org/wiki/Special:MyLanguage/API:Search';
479 }
480}
wfDeprecatedMsg( $msg, $version=false, $component=false, $callerOffset=2)
Log a deprecation warning with arbitrary message text.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
buildSearchEngine(array $params=null)
Build the search engine to use.
buildCommonApiParams( $isScrollable=true)
The set of api parameters that are shared between api calls that call the SearchEngine.
Definition SearchApi.php:66
dieWithError( $msg, $code=null, $data=null, $httpCode=0)
Abort execution with an error.
Definition ApiBase.php:1454
getMain()
Get the main module.
Definition ApiBase.php:514
const PARAM_HELP_MSG_PER_VALUE
((string|array|Message)[]) When PARAM_TYPE is an array, this is an array mapping those values to $msg...
Definition ApiBase.php:196
getResult()
Get the result object.
Definition ApiBase.php:629
extractRequestParams( $options=[])
Using getAllowedParams(), this function makes an array of the values provided by the user,...
Definition ApiBase.php:765
getModuleName()
Get the name of the module being executed by this instance.
Definition ApiBase.php:498
dieStatus(StatusValue $status)
Throw an ApiUsageException based on the Status object.
Definition ApiBase.php:1515
static addTitleInfo(&$arr, $title, $prefix='')
Add information (title and namespace) about a Title object to a result array.
setContinueEnumParameter( $paramName, $paramValue)
Overridden to set the generator param if in generator mode.
Query module to perform full text search within wiki titles and content.
execute()
Evaluates the parameters, performs the requested query, and sets up the result.
getHelpUrls()
Return links to more detailed help pages about the module.
getExamplesMessages()
Returns usage examples for this module.
__construct(ApiQuery $query, $moduleName, SearchEngineConfig $searchEngineConfig, SearchEngineFactory $searchEngineFactory)
getCacheMode( $params)
Get the cache mode for the data generated by this module.
getAllowedParams()
Returns an array of allowed parameters (parameter name) => (default value) or (parameter name) => (ar...
executeGenerator( $resultPageSet)
Execute this module as a generator.
This is the main query class.
Definition ApiQuery.php:41
This class represents the result of the API operations.
Definition ApiResult.php:35
static addMetadataToResultVars( $vars, $forceHash=true)
Add the correct metadata to an array of vars we want to export through the API.
addIndexedTagName( $path, $tag)
Set the tag name for numeric-keyed values in XML format.
addValue( $path, $name, $value, $flags=0)
Add value to the output data at the given path.
Configuration handling class for SearchEngine.
Factory class for SearchEngine.
NOTE: this class is being refactored into an abstract base class.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:44
Service for formatting and validating API parameters.
Type definition for enumeration types.
Definition EnumDef.php:32
trait SearchApi
Traits for API components that use a SearchEngine.
Definition SearchApi.php:30
A set of SearchEngine results.
return true
Definition router.php:92