MediaWiki  master
ApiQuerySearch.php
Go to the documentation of this file.
1 <?php
25 
32  use SearchApi;
33 
35  private $allowedParams;
36 
38  private $searchEngineConfig;
39 
41  private $searchEngineFactory;
42 
49  public function __construct(
50  ApiQuery $query,
51  $moduleName,
52  SearchEngineConfig $searchEngineConfig,
53  SearchEngineFactory $searchEngineFactory
54  ) {
55  parent::__construct( $query, $moduleName, 'sr' );
56  // Services also needed in SearchApi trait
57  $this->searchEngineConfig = $searchEngineConfig;
58  $this->searchEngineFactory = $searchEngineFactory;
59  }
60 
61  public function execute() {
62  $this->run();
63  }
64 
65  public function executeGenerator( $resultPageSet ) {
66  $this->run( $resultPageSet );
67  }
68 
73  private function run( $resultPageSet = null ) {
74  $params = $this->extractRequestParams();
75 
76  // Extract parameters
77  $query = $params['search'];
78  $what = $params['what'];
79  $interwiki = $params['interwiki'];
80  $searchInfo = array_fill_keys( $params['info'], true );
81  $prop = array_fill_keys( $params['prop'], true );
82 
83  // Create search engine instance and set options
84  $search = $this->buildSearchEngine( $params );
85  if ( isset( $params['sort'] ) ) {
86  $search->setSort( $params['sort'] );
87  }
88  $search->setFeatureData( 'rewrite', (bool)$params['enablerewrites'] );
89  $search->setFeatureData( 'interwiki', (bool)$interwiki );
90  // Hint to some SearchEngines about what snippets we would like returned
91  $search->setFeatureData( 'snippets', $this->decideSnippets( $prop ) );
92 
93  $nquery = $search->replacePrefixes( $query );
94  if ( $nquery !== $query ) {
95  $query = $nquery;
96  wfDeprecatedMsg( 'SearchEngine::replacePrefixes() is overridden by ' .
97  get_class( $search ) . ', this was deprecated in MediaWiki 1.32',
98  '1.32' );
99  }
100  // Perform the actual search
101  if ( $what == 'text' ) {
102  $matches = $search->searchText( $query );
103  } elseif ( $what == 'title' ) {
104  $matches = $search->searchTitle( $query );
105  } elseif ( $what == 'nearmatch' ) {
106  // near matches must receive the user input as provided, otherwise
107  // the near matches within namespaces are lost.
108  $matches = $search->getNearMatcher( $this->getConfig() )
109  ->getNearMatchResultSet( $params['search'] );
110  } else {
111  // We default to title searches; this is a terrible legacy
112  // of the way we initially set up the MySQL fulltext-based
113  // search engine with separate title and text fields.
114  // In the future, the default should be for a combined index.
115  $what = 'title';
116  $matches = $search->searchTitle( $query );
117 
118  // Not all search engines support a separate title search,
119  // for instance the Lucene-based engine we use on Wikipedia.
120  // In this case, fall back to full-text search (which will
121  // include titles in it!)
122  if ( $matches === null ) {
123  $what = 'text';
124  $matches = $search->searchText( $query );
125  }
126  }
127 
128  if ( $matches instanceof Status ) {
129  $status = $matches;
130  $matches = $status->getValue();
131  } else {
132  $status = null;
133  }
134 
135  if ( $status ) {
136  if ( $status->isOK() ) {
137  $this->getMain()->getErrorFormatter()->addMessagesFromStatus(
138  $this->getModuleName(),
139  $status
140  );
141  } else {
142  $this->dieStatus( $status );
143  }
144  } elseif ( $matches === null ) {
145  $this->dieWithError( [ 'apierror-searchdisabled', $what ], "search-{$what}-disabled" );
146  }
147 
148  $apiResult = $this->getResult();
149  // Add search meta data to result
150  if ( isset( $searchInfo['totalhits'] ) ) {
151  $totalhits = $matches->getTotalHits();
152  if ( $totalhits !== null ) {
153  $apiResult->addValue( [ 'query', 'searchinfo' ],
154  'totalhits', $totalhits );
155  }
156  }
157  if ( isset( $searchInfo['suggestion'] ) && $matches->hasSuggestion() ) {
158  $apiResult->addValue( [ 'query', 'searchinfo' ],
159  'suggestion', $matches->getSuggestionQuery() );
160  $apiResult->addValue( [ 'query', 'searchinfo' ],
161  'suggestionsnippet', HtmlArmor::getHtml( $matches->getSuggestionSnippet() ) );
162  }
163  if ( isset( $searchInfo['rewrittenquery'] ) && $matches->hasRewrittenQuery() ) {
164  $apiResult->addValue( [ 'query', 'searchinfo' ],
165  'rewrittenquery', $matches->getQueryAfterRewrite() );
166  $apiResult->addValue( [ 'query', 'searchinfo' ],
167  'rewrittenquerysnippet', HtmlArmor::getHtml( $matches->getQueryAfterRewriteSnippet() ) );
168  }
169 
170  $titles = [];
171  $data = [];
172  $count = 0;
173 
174  if ( $matches->hasMoreResults() ) {
175  $this->setContinueEnumParameter( 'offset', $params['offset'] + $params['limit'] );
176  }
177 
178  foreach ( $matches as $result ) {
179  $count++;
180  // Silently skip broken and missing titles
181  if ( $result->isBrokenTitle() || $result->isMissingRevision() ) {
182  continue;
183  }
184 
185  $vals = $this->getSearchResultData( $result, $prop );
186 
187  if ( $resultPageSet === null ) {
188  if ( $vals ) {
189  // Add item to results and see whether it fits
190  $fit = $apiResult->addValue( [ 'query', $this->getModuleName() ], null, $vals );
191  if ( !$fit ) {
192  $this->setContinueEnumParameter( 'offset', $params['offset'] + $count - 1 );
193  break;
194  }
195  }
196  } else {
197  $titles[] = $result->getTitle();
198  $data[] = $vals ?: [];
199  }
200  }
201 
202  // Here we assume interwiki results do not count with
203  // regular search results. We may want to reconsider this
204  // if we ever return a lot of interwiki results or want pagination
205  // for them.
206  // Interwiki results inside main result set
207  $canAddInterwiki = (bool)$params['enablerewrites'] && ( $resultPageSet === null );
208  if ( $canAddInterwiki ) {
209  $this->addInterwikiResults( $matches, $apiResult, $prop, 'additional',
211  }
212 
213  // Interwiki results outside main result set
214  if ( $interwiki && $resultPageSet === null ) {
215  $this->addInterwikiResults( $matches, $apiResult, $prop, 'interwiki',
217  }
218 
219  if ( $resultPageSet === null ) {
220  $apiResult->addIndexedTagName( [
221  'query', $this->getModuleName()
222  ], 'p' );
223  } else {
224  $resultPageSet->setRedirectMergePolicy( static function ( $current, $new ) {
225  if ( !isset( $current['index'] ) || $new['index'] < $current['index'] ) {
226  $current['index'] = $new['index'];
227  }
228  return $current;
229  } );
230  $resultPageSet->populateFromTitles( $titles );
231  $offset = $params['offset'] + 1;
232  foreach ( $titles as $index => $title ) {
233  $resultPageSet->setGeneratorData(
234  $title,
235  $data[ $index ] + [ 'index' => $index + $offset ]
236  );
237  }
238  }
239  }
240 
247  private function getSearchResultData( SearchResult $result, $prop ) {
248  // Silently skip broken and missing titles
249  if ( $result->isBrokenTitle() || $result->isMissingRevision() ) {
250  return null;
251  }
252 
253  $vals = [];
254 
255  $title = $result->getTitle();
257  $vals['pageid'] = $title->getArticleID();
258 
259  if ( isset( $prop['size'] ) ) {
260  $vals['size'] = $result->getByteSize();
261  }
262  if ( isset( $prop['wordcount'] ) ) {
263  $vals['wordcount'] = $result->getWordCount();
264  }
265  if ( isset( $prop['snippet'] ) ) {
266  $vals['snippet'] = $result->getTextSnippet();
267  }
268  if ( isset( $prop['timestamp'] ) ) {
269  $vals['timestamp'] = wfTimestamp( TS_ISO_8601, $result->getTimestamp() );
270  }
271  if ( isset( $prop['titlesnippet'] ) ) {
272  $vals['titlesnippet'] = $result->getTitleSnippet();
273  }
274  if ( isset( $prop['categorysnippet'] ) ) {
275  $vals['categorysnippet'] = $result->getCategorySnippet();
276  }
277  if ( $result->getRedirectTitle() !== null ) {
278  if ( isset( $prop['redirecttitle'] ) ) {
279  $vals['redirecttitle'] = $result->getRedirectTitle()->getPrefixedText();
280  }
281  if ( isset( $prop['redirectsnippet'] ) ) {
282  $vals['redirectsnippet'] = $result->getRedirectSnippet();
283  }
284  }
285  if ( $result->getSectionTitle() !== null ) {
286  if ( isset( $prop['sectiontitle'] ) ) {
287  $vals['sectiontitle'] = $result->getSectionTitle()->getFragment();
288  }
289  if ( isset( $prop['sectionsnippet'] ) ) {
290  $vals['sectionsnippet'] = $result->getSectionSnippet();
291  }
292  }
293  if ( isset( $prop['isfilematch'] ) ) {
294  $vals['isfilematch'] = $result->isFileMatch();
295  }
296 
297  if ( isset( $prop['extensiondata'] ) ) {
298  $extra = $result->getExtensionData();
299  // Add augmented data to the result. The data would be organized as a map:
300  // augmentorName => data
301  if ( $extra ) {
302  $vals['extensiondata'] = ApiResult::addMetadataToResultVars( $extra );
303  }
304  }
305 
306  return $vals;
307  }
308 
318  private function addInterwikiResults(
319  ISearchResultSet $matches, ApiResult $apiResult, $prop,
320  $section, $type
321  ) {
322  $totalhits = null;
323  if ( $matches->hasInterwikiResults( $type ) ) {
324  foreach ( $matches->getInterwikiResults( $type ) as $interwikiMatches ) {
325  // Include number of results if requested
326  $totalhits += $interwikiMatches->getTotalHits();
327 
328  foreach ( $interwikiMatches as $result ) {
329  $title = $result->getTitle();
330  $vals = $this->getSearchResultData( $result, $prop );
331 
332  $vals['namespace'] = $result->getInterwikiNamespaceText();
333  $vals['title'] = $title->getText();
334  $vals['url'] = $title->getFullURL();
335 
336  // Add item to results and see whether it fits
337  $fit = $apiResult->addValue( [
338  'query',
339  $section . $this->getModuleName(),
340  $result->getInterwikiPrefix()
341  ], null, $vals );
342 
343  if ( !$fit ) {
344  // We hit the limit. We can't really provide any meaningful
345  // pagination info so just bail out
346  break;
347  }
348  }
349  }
350  if ( $totalhits !== null ) {
351  $apiResult->addValue( [ 'query', $section . 'searchinfo' ], 'totalhits', $totalhits );
352  $apiResult->addIndexedTagName( [
353  'query', $section . $this->getModuleName()
354  ], 'p' );
355  }
356  }
357  return $totalhits;
358  }
359 
360  private function decideSnippets( array $prop ): array {
361  // Field names align with definitions in ContentHandler::getFieldsForSearchIndex.
362  // Except `redirect` which isn't explicitly created, but refers to the title of
363  // pages that redirect to the result page.
364  $fields = [];
365  if ( isset( $prop['titlesnippet'] ) ) {
366  $fields[] = 'title';
367  }
368  // checking snippet and title variants is a bit special cased, but some search
369  // engines generate the title variant from the snippet and thus must have the
370  // snippet requested to provide the title.
371  if ( isset( $prop['redirectsnippet'] ) || isset( $prop['redirecttitle'] ) ) {
372  $fields[] = 'redirect';
373  }
374  if ( isset( $prop['categorysnippet'] ) ) {
375  $fields[] = 'category';
376  }
377  if ( isset( $prop['sectionsnippet'] ) || isset( $prop['sectiontitle'] ) ) {
378  $fields[] = 'heading';
379  }
380  return $fields;
381  }
382 
383  public function getCacheMode( $params ) {
384  return 'public';
385  }
386 
387  public function getAllowedParams() {
388  if ( $this->allowedParams !== null ) {
389  return $this->allowedParams;
390  }
391 
392  $this->allowedParams = $this->buildCommonApiParams() + [
393  'what' => [
394  ParamValidator::PARAM_TYPE => [
395  'title',
396  'text',
397  'nearmatch',
398  ]
399  ],
400  'info' => [
401  ParamValidator::PARAM_DEFAULT => 'totalhits|suggestion|rewrittenquery',
402  ParamValidator::PARAM_TYPE => [
403  'totalhits',
404  'suggestion',
405  'rewrittenquery',
406  ],
407  ParamValidator::PARAM_ISMULTI => true,
408  ],
409  'prop' => [
410  ParamValidator::PARAM_DEFAULT => 'size|wordcount|timestamp|snippet',
411  ParamValidator::PARAM_TYPE => [
412  'size',
413  'wordcount',
414  'timestamp',
415  'snippet',
416  'titlesnippet',
417  'redirecttitle',
418  'redirectsnippet',
419  'sectiontitle',
420  'sectionsnippet',
421  'isfilematch',
422  'categorysnippet',
423  'score', // deprecated
424  'hasrelated', // deprecated
425  'extensiondata',
426  ],
427  ParamValidator::PARAM_ISMULTI => true,
429  EnumDef::PARAM_DEPRECATED_VALUES => [
430  'score' => true,
431  'hasrelated' => true
432  ],
433  ],
434  'interwiki' => false,
435  'enablerewrites' => false,
436  ];
437 
438  // Generators only add info/properties if explicitly requested. T263841
439  if ( $this->isInGeneratorMode() ) {
440  $this->allowedParams['prop'][ParamValidator::PARAM_DEFAULT] = '';
441  $this->allowedParams['info'][ParamValidator::PARAM_DEFAULT] = '';
442  }
443 
444  // If we have more than one engine the list of available sorts is
445  // difficult to represent. For now don't expose it.
446  $alternatives = $this->searchEngineConfig->getSearchTypes();
447  if ( count( $alternatives ) == 1 ) {
448  $this->allowedParams['sort'] = [
449  ParamValidator::PARAM_DEFAULT => SearchEngine::DEFAULT_SORT,
450  ParamValidator::PARAM_TYPE => $this->searchEngineFactory->create()->getValidSorts(),
451  ];
452  }
453 
454  return $this->allowedParams;
455  }
456 
457  public function getSearchProfileParams() {
458  return [
459  'qiprofile' => [
461  'help-message' => 'apihelp-query+search-param-qiprofile',
462  ],
463  ];
464  }
465 
466  protected function getExamplesMessages() {
467  return [
468  'action=query&list=search&srsearch=meaning'
469  => 'apihelp-query+search-example-simple',
470  'action=query&list=search&srwhat=text&srsearch=meaning'
471  => 'apihelp-query+search-example-text',
472  'action=query&generator=search&gsrsearch=meaning&prop=info'
473  => 'apihelp-query+search-example-generator',
474  ];
475  }
476 
477  public function getHelpUrls() {
478  return 'https://www.mediawiki.org/wiki/Special:MyLanguage/API:Search';
479  }
480 }
wfDeprecatedMsg( $msg, $version=false, $component=false, $callerOffset=2)
Log a deprecation warning with arbitrary message text.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
$matches
buildSearchEngine(array $params=null)
Build the search engine to use.
Definition: SearchApi.php:177
buildCommonApiParams( $isScrollable=true)
The set of api parameters that are shared between api calls that call the SearchEngine.
Definition: SearchApi.php:66
dieWithError( $msg, $code=null, $data=null, $httpCode=0)
Abort execution with an error.
Definition: ApiBase.php:1453
getMain()
Get the main module.
Definition: ApiBase.php:514
const PARAM_HELP_MSG_PER_VALUE
((string|array|Message)[]) When PARAM_TYPE is an array, this is an array mapping those values to $msg...
Definition: ApiBase.php:196
getResult()
Get the result object.
Definition: ApiBase.php:629
extractRequestParams( $options=[])
Using getAllowedParams(), this function makes an array of the values provided by the user,...
Definition: ApiBase.php:765
getModuleName()
Get the name of the module being executed by this instance.
Definition: ApiBase.php:498
dieStatus(StatusValue $status)
Throw an ApiUsageException based on the Status object.
Definition: ApiBase.php:1516
static addTitleInfo(&$arr, $title, $prefix='')
Add information (title and namespace) about a Title object to a result array.
setContinueEnumParameter( $paramName, $paramValue)
Overridden to set the generator param if in generator mode.
Query module to perform full text search within wiki titles and content.
execute()
Evaluates the parameters, performs the requested query, and sets up the result.
getHelpUrls()
Return links to more detailed help pages about the module.
getExamplesMessages()
Returns usage examples for this module.
__construct(ApiQuery $query, $moduleName, SearchEngineConfig $searchEngineConfig, SearchEngineFactory $searchEngineFactory)
getCacheMode( $params)
Get the cache mode for the data generated by this module.
getAllowedParams()
Returns an array of allowed parameters (parameter name) => (default value) or (parameter name) => (ar...
executeGenerator( $resultPageSet)
Execute this module as a generator.
This is the main query class.
Definition: ApiQuery.php:41
This class represents the result of the API operations.
Definition: ApiResult.php:35
static addMetadataToResultVars( $vars, $forceHash=true)
Add the correct metadata to an array of vars we want to export through the API.
Definition: ApiResult.php:1144
addIndexedTagName( $path, $tag)
Set the tag name for numeric-keyed values in XML format.
Definition: ApiResult.php:617
addValue( $path, $name, $value, $flags=0)
Add value to the output data at the given path.
Definition: ApiResult.php:394
static getHtml( $input)
Provide a string or HtmlArmor object and get safe HTML back.
Definition: HtmlArmor.php:54
Configuration handling class for SearchEngine.
Factory class for SearchEngine.
const DEFAULT_SORT
const FT_QUERY_INDEP_PROFILE_TYPE
Profile type for query independent ranking features.
NOTE: this class is being refactored into an abstract base class.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition: Status.php:44
Service for formatting and validating API parameters.
Type definition for enumeration types.
Definition: EnumDef.php:32
trait SearchApi
Traits for API components that use a SearchEngine.
Definition: SearchApi.php:30
A set of SearchEngine results.
const INLINE_RESULTS
Identifier for interwiki results that can be displayed even if no existing main wiki results exist.
const SECONDARY_RESULTS
Identifier for interwiki results that are displayed only together with existing main wiki results.
return true
Definition: router.php:90