MediaWiki  master
ApiQuerySearch.php
Go to the documentation of this file.
1 <?php
27 
34  use SearchApi;
35 
36  private TitleMatcher $titleMatcher;
37 
45  public function __construct(
46  ApiQuery $query,
47  $moduleName,
48  SearchEngineConfig $searchEngineConfig,
49  SearchEngineFactory $searchEngineFactory,
50  TitleMatcher $titleMatcher
51  ) {
52  parent::__construct( $query, $moduleName, 'sr' );
53  // Services also needed in SearchApi trait
54  $this->searchEngineConfig = $searchEngineConfig;
55  $this->searchEngineFactory = $searchEngineFactory;
56  $this->titleMatcher = $titleMatcher;
57  }
58 
59  public function execute() {
60  $this->run();
61  }
62 
63  public function executeGenerator( $resultPageSet ) {
64  $this->run( $resultPageSet );
65  }
66 
71  private function run( $resultPageSet = null ) {
72  $params = $this->extractRequestParams();
73 
74  // Extract parameters
75  $query = $params['search'];
76  $what = $params['what'];
77  $interwiki = $params['interwiki'];
78  $searchInfo = array_fill_keys( $params['info'], true );
79  $prop = array_fill_keys( $params['prop'], true );
80 
81  // Create search engine instance and set options
82  $search = $this->buildSearchEngine( $params );
83  if ( isset( $params['sort'] ) ) {
84  $search->setSort( $params['sort'] );
85  }
86  $search->setFeatureData( 'rewrite', (bool)$params['enablerewrites'] );
87  $search->setFeatureData( 'interwiki', (bool)$interwiki );
88  // Hint to some SearchEngines about what snippets we would like returned
89  $search->setFeatureData( 'snippets', $this->decideSnippets( $prop ) );
90 
91  $nquery = $search->replacePrefixes( $query );
92  if ( $nquery !== $query ) {
93  $query = $nquery;
94  wfDeprecatedMsg( 'SearchEngine::replacePrefixes() is overridden by ' .
95  get_class( $search ) . ', this was deprecated in MediaWiki 1.32',
96  '1.32' );
97  }
98  // Perform the actual search
99  if ( $what == 'text' ) {
100  $matches = $search->searchText( $query );
101  } elseif ( $what == 'title' ) {
102  $matches = $search->searchTitle( $query );
103  } elseif ( $what == 'nearmatch' ) {
104  // near matches must receive the user input as provided, otherwise
105  // the near matches within namespaces are lost.
106  $matches = $this->titleMatcher->getNearMatchResultSet( $params['search'] );
107  } else {
108  // We default to title searches; this is a terrible legacy
109  // of the way we initially set up the MySQL fulltext-based
110  // search engine with separate title and text fields.
111  // In the future, the default should be for a combined index.
112  $what = 'title';
113  $matches = $search->searchTitle( $query );
114 
115  // Not all search engines support a separate title search,
116  // for instance the Lucene-based engine we use on Wikipedia.
117  // In this case, fall back to full-text search (which will
118  // include titles in it!)
119  if ( $matches === null ) {
120  $what = 'text';
121  $matches = $search->searchText( $query );
122  }
123  }
124 
125  if ( $matches instanceof Status ) {
126  $status = $matches;
127  $matches = $status->getValue();
128  } else {
129  $status = null;
130  }
131 
132  if ( $status ) {
133  if ( $status->isOK() ) {
134  $this->getMain()->getErrorFormatter()->addMessagesFromStatus(
135  $this->getModuleName(),
136  $status
137  );
138  } else {
139  $this->dieStatus( $status );
140  }
141  } elseif ( $matches === null ) {
142  $this->dieWithError( [ 'apierror-searchdisabled', $what ], "search-{$what}-disabled" );
143  }
144 
145  $apiResult = $this->getResult();
146  // Add search meta data to result
147  if ( isset( $searchInfo['totalhits'] ) ) {
148  $totalhits = $matches->getTotalHits();
149  if ( $totalhits !== null ) {
150  $apiResult->addValue( [ 'query', 'searchinfo' ],
151  'totalhits', $totalhits );
152  }
153  }
154  if ( isset( $searchInfo['suggestion'] ) && $matches->hasSuggestion() ) {
155  $apiResult->addValue( [ 'query', 'searchinfo' ],
156  'suggestion', $matches->getSuggestionQuery() );
157  $apiResult->addValue( [ 'query', 'searchinfo' ],
158  'suggestionsnippet', HtmlArmor::getHtml( $matches->getSuggestionSnippet() ) );
159  }
160  if ( isset( $searchInfo['rewrittenquery'] ) && $matches->hasRewrittenQuery() ) {
161  $apiResult->addValue( [ 'query', 'searchinfo' ],
162  'rewrittenquery', $matches->getQueryAfterRewrite() );
163  $apiResult->addValue( [ 'query', 'searchinfo' ],
164  'rewrittenquerysnippet', HtmlArmor::getHtml( $matches->getQueryAfterRewriteSnippet() ) );
165  }
166 
167  $titles = [];
168  $data = [];
169  $count = 0;
170 
171  if ( $matches->hasMoreResults() ) {
172  $this->setContinueEnumParameter( 'offset', $params['offset'] + $params['limit'] );
173  }
174 
175  foreach ( $matches as $result ) {
176  $count++;
177  // Silently skip broken and missing titles
178  if ( $result->isBrokenTitle() || $result->isMissingRevision() ) {
179  continue;
180  }
181 
182  $vals = $this->getSearchResultData( $result, $prop );
183 
184  if ( $resultPageSet === null ) {
185  if ( $vals ) {
186  // Add item to results and see whether it fits
187  $fit = $apiResult->addValue( [ 'query', $this->getModuleName() ], null, $vals );
188  if ( !$fit ) {
189  $this->setContinueEnumParameter( 'offset', $params['offset'] + $count - 1 );
190  break;
191  }
192  }
193  } else {
194  $titles[] = $result->getTitle();
195  $data[] = $vals ?: [];
196  }
197  }
198 
199  // Here we assume interwiki results do not count with
200  // regular search results. We may want to reconsider this
201  // if we ever return a lot of interwiki results or want pagination
202  // for them.
203  // Interwiki results inside main result set
204  $canAddInterwiki = (bool)$params['enablerewrites'] && ( $resultPageSet === null );
205  if ( $canAddInterwiki ) {
206  $this->addInterwikiResults( $matches, $apiResult, $prop, 'additional',
208  }
209 
210  // Interwiki results outside main result set
211  if ( $interwiki && $resultPageSet === null ) {
212  $this->addInterwikiResults( $matches, $apiResult, $prop, 'interwiki',
214  }
215 
216  if ( $resultPageSet === null ) {
217  $apiResult->addIndexedTagName( [
218  'query', $this->getModuleName()
219  ], 'p' );
220  } else {
221  $resultPageSet->setRedirectMergePolicy( static function ( $current, $new ) {
222  if ( !isset( $current['index'] ) || $new['index'] < $current['index'] ) {
223  $current['index'] = $new['index'];
224  }
225  return $current;
226  } );
227  $resultPageSet->populateFromTitles( $titles );
228  $offset = $params['offset'] + 1;
229  foreach ( $titles as $index => $title ) {
230  $resultPageSet->setGeneratorData(
231  $title,
232  $data[ $index ] + [ 'index' => $index + $offset ]
233  );
234  }
235  }
236  }
237 
244  private function getSearchResultData( SearchResult $result, $prop ) {
245  // Silently skip broken and missing titles
246  if ( $result->isBrokenTitle() || $result->isMissingRevision() ) {
247  return null;
248  }
249 
250  $vals = [];
251 
252  $title = $result->getTitle();
253  ApiQueryBase::addTitleInfo( $vals, $title );
254  $vals['pageid'] = $title->getArticleID();
255 
256  if ( isset( $prop['size'] ) ) {
257  $vals['size'] = $result->getByteSize();
258  }
259  if ( isset( $prop['wordcount'] ) ) {
260  $vals['wordcount'] = $result->getWordCount();
261  }
262  if ( isset( $prop['snippet'] ) ) {
263  $vals['snippet'] = $result->getTextSnippet();
264  }
265  if ( isset( $prop['timestamp'] ) ) {
266  $vals['timestamp'] = wfTimestamp( TS_ISO_8601, $result->getTimestamp() );
267  }
268  if ( isset( $prop['titlesnippet'] ) ) {
269  $vals['titlesnippet'] = $result->getTitleSnippet();
270  }
271  if ( isset( $prop['categorysnippet'] ) ) {
272  $vals['categorysnippet'] = $result->getCategorySnippet();
273  }
274  if ( $result->getRedirectTitle() !== null ) {
275  if ( isset( $prop['redirecttitle'] ) ) {
276  $vals['redirecttitle'] = $result->getRedirectTitle()->getPrefixedText();
277  }
278  if ( isset( $prop['redirectsnippet'] ) ) {
279  $vals['redirectsnippet'] = $result->getRedirectSnippet();
280  }
281  }
282  if ( $result->getSectionTitle() !== null ) {
283  if ( isset( $prop['sectiontitle'] ) ) {
284  $vals['sectiontitle'] = $result->getSectionTitle()->getFragment();
285  }
286  if ( isset( $prop['sectionsnippet'] ) ) {
287  $vals['sectionsnippet'] = $result->getSectionSnippet();
288  }
289  }
290  if ( isset( $prop['isfilematch'] ) ) {
291  $vals['isfilematch'] = $result->isFileMatch();
292  }
293 
294  if ( isset( $prop['extensiondata'] ) ) {
295  $extra = $result->getExtensionData();
296  // Add augmented data to the result. The data would be organized as a map:
297  // augmentorName => data
298  if ( $extra ) {
299  $vals['extensiondata'] = ApiResult::addMetadataToResultVars( $extra );
300  }
301  }
302 
303  return $vals;
304  }
305 
315  private function addInterwikiResults(
316  ISearchResultSet $matches, ApiResult $apiResult, $prop,
317  $section, $type
318  ) {
319  $totalhits = null;
320  if ( $matches->hasInterwikiResults( $type ) ) {
321  foreach ( $matches->getInterwikiResults( $type ) as $interwikiMatches ) {
322  // Include number of results if requested
323  $totalhits += $interwikiMatches->getTotalHits();
324 
325  foreach ( $interwikiMatches as $result ) {
326  $title = $result->getTitle();
327  $vals = $this->getSearchResultData( $result, $prop );
328 
329  $vals['namespace'] = $result->getInterwikiNamespaceText();
330  $vals['title'] = $title->getText();
331  $vals['url'] = $title->getFullURL();
332 
333  // Add item to results and see whether it fits
334  $fit = $apiResult->addValue( [
335  'query',
336  $section . $this->getModuleName(),
337  $result->getInterwikiPrefix()
338  ], null, $vals );
339 
340  if ( !$fit ) {
341  // We hit the limit. We can't really provide any meaningful
342  // pagination info so just bail out
343  break;
344  }
345  }
346  }
347  if ( $totalhits !== null ) {
348  $apiResult->addValue( [ 'query', $section . 'searchinfo' ], 'totalhits', $totalhits );
349  $apiResult->addIndexedTagName( [
350  'query', $section . $this->getModuleName()
351  ], 'p' );
352  }
353  }
354  return $totalhits;
355  }
356 
357  private function decideSnippets( array $prop ): array {
358  // Field names align with definitions in ContentHandler::getFieldsForSearchIndex.
359  // Except `redirect` which isn't explicitly created, but refers to the title of
360  // pages that redirect to the result page.
361  $fields = [];
362  if ( isset( $prop['titlesnippet'] ) ) {
363  $fields[] = 'title';
364  }
365  // checking snippet and title variants is a bit special cased, but some search
366  // engines generate the title variant from the snippet and thus must have the
367  // snippet requested to provide the title.
368  if ( isset( $prop['redirectsnippet'] ) || isset( $prop['redirecttitle'] ) ) {
369  $fields[] = 'redirect';
370  }
371  if ( isset( $prop['categorysnippet'] ) ) {
372  $fields[] = 'category';
373  }
374  if ( isset( $prop['sectionsnippet'] ) || isset( $prop['sectiontitle'] ) ) {
375  $fields[] = 'heading';
376  }
377  return $fields;
378  }
379 
380  public function getCacheMode( $params ) {
381  return 'public';
382  }
383 
384  public function getAllowedParams() {
385  $allowedParams = $this->buildCommonApiParams() + [
386  'what' => [
387  ParamValidator::PARAM_TYPE => [
388  'title',
389  'text',
390  'nearmatch',
391  ]
392  ],
393  'info' => [
394  ParamValidator::PARAM_DEFAULT => 'totalhits|suggestion|rewrittenquery',
395  ParamValidator::PARAM_TYPE => [
396  'totalhits',
397  'suggestion',
398  'rewrittenquery',
399  ],
400  ParamValidator::PARAM_ISMULTI => true,
401  ],
402  'prop' => [
403  ParamValidator::PARAM_DEFAULT => 'size|wordcount|timestamp|snippet',
404  ParamValidator::PARAM_TYPE => [
405  'size',
406  'wordcount',
407  'timestamp',
408  'snippet',
409  'titlesnippet',
410  'redirecttitle',
411  'redirectsnippet',
412  'sectiontitle',
413  'sectionsnippet',
414  'isfilematch',
415  'categorysnippet',
416  'score', // deprecated
417  'hasrelated', // deprecated
418  'extensiondata',
419  ],
420  ParamValidator::PARAM_ISMULTI => true,
422  EnumDef::PARAM_DEPRECATED_VALUES => [
423  'score' => true,
424  'hasrelated' => true
425  ],
426  ],
427  'interwiki' => false,
428  'enablerewrites' => false,
429  ];
430 
431  // Generators only add info/properties if explicitly requested. T263841
432  if ( $this->isInGeneratorMode() ) {
433  $allowedParams['prop'][ParamValidator::PARAM_DEFAULT] = '';
434  $allowedParams['info'][ParamValidator::PARAM_DEFAULT] = '';
435  }
436 
437  // If we have more than one engine the list of available sorts is
438  // difficult to represent. For now don't expose it.
439  $alternatives = $this->searchEngineConfig->getSearchTypes();
440  if ( count( $alternatives ) == 1 ) {
441  $allowedParams['sort'] = [
442  ParamValidator::PARAM_DEFAULT => SearchEngine::DEFAULT_SORT,
443  ParamValidator::PARAM_TYPE => $this->searchEngineFactory->create()->getValidSorts(),
444  ];
445  }
446 
447  return $allowedParams;
448  }
449 
450  public function getSearchProfileParams() {
451  return [
452  'qiprofile' => [
454  'help-message' => 'apihelp-query+search-param-qiprofile',
455  ],
456  ];
457  }
458 
459  protected function getExamplesMessages() {
460  return [
461  'action=query&list=search&srsearch=meaning'
462  => 'apihelp-query+search-example-simple',
463  'action=query&list=search&srwhat=text&srsearch=meaning'
464  => 'apihelp-query+search-example-text',
465  'action=query&generator=search&gsrsearch=meaning&prop=info'
466  => 'apihelp-query+search-example-generator',
467  ];
468  }
469 
470  public function getHelpUrls() {
471  return 'https://www.mediawiki.org/wiki/Special:MyLanguage/API:Search';
472  }
473 }
wfDeprecatedMsg( $msg, $version=false, $component=false, $callerOffset=2)
Log a deprecation warning with arbitrary message text.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
$matches
buildSearchEngine(array $params=null)
Build the search engine to use.
Definition: SearchApi.php:174
buildCommonApiParams( $isScrollable=true)
The set of api parameters that are shared between api calls that call the SearchEngine.
Definition: SearchApi.php:64
dieWithError( $msg, $code=null, $data=null, $httpCode=0)
Abort execution with an error.
Definition: ApiBase.php:1516
getMain()
Get the main module.
Definition: ApiBase.php:547
const PARAM_HELP_MSG_PER_VALUE
((string|array|Message)[]) When PARAM_TYPE is an array, or 'string' with PARAM_ISMULTI,...
Definition: ApiBase.php:210
getResult()
Get the result object.
Definition: ApiBase.php:668
extractRequestParams( $options=[])
Using getAllowedParams(), this function makes an array of the values provided by the user,...
Definition: ApiBase.php:808
getModuleName()
Get the name of the module being executed by this instance.
Definition: ApiBase.php:529
dieStatus(StatusValue $status)
Throw an ApiUsageException based on the Status object.
Definition: ApiBase.php:1571
static addTitleInfo(&$arr, $title, $prefix='')
Add information (title and namespace) about a Title object to a result array.
setContinueEnumParameter( $paramName, $paramValue)
Overridden to set the generator param if in generator mode.
Query module to perform full text search within wiki titles and content.
execute()
Evaluates the parameters, performs the requested query, and sets up the result.
getHelpUrls()
Return links to more detailed help pages about the module.
getExamplesMessages()
Returns usage examples for this module.
getCacheMode( $params)
Get the cache mode for the data generated by this module.
getAllowedParams()
Returns an array of allowed parameters (parameter name) => (default value) or (parameter name) => (ar...
executeGenerator( $resultPageSet)
Execute this module as a generator.
__construct(ApiQuery $query, $moduleName, SearchEngineConfig $searchEngineConfig, SearchEngineFactory $searchEngineFactory, TitleMatcher $titleMatcher)
This is the main query class.
Definition: ApiQuery.php:43
This class represents the result of the API operations.
Definition: ApiResult.php:35
static addMetadataToResultVars( $vars, $forceHash=true)
Add the correct metadata to an array of vars we want to export through the API.
Definition: ApiResult.php:1144
addIndexedTagName( $path, $tag)
Set the tag name for numeric-keyed values in XML format.
Definition: ApiResult.php:617
addValue( $path, $name, $value, $flags=0)
Add value to the output data at the given path.
Definition: ApiResult.php:394
static getHtml( $input)
Provide a string or HtmlArmor object and get safe HTML back.
Definition: HtmlArmor.php:54
Service implementation of near match title search.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition: Status.php:58
Configuration handling class for SearchEngine.
Factory class for SearchEngine.
const DEFAULT_SORT
const FT_QUERY_INDEP_PROFILE_TYPE
Profile type for query independent ranking features.
NOTE: this class is being refactored into an abstract base class.
Service for formatting and validating API parameters.
Type definition for enumeration types.
Definition: EnumDef.php:32
trait SearchApi
Traits for API components that use a SearchEngine.
Definition: SearchApi.php:30
A set of SearchEngine results.
const INLINE_RESULTS
Identifier for interwiki results that can be displayed even if no existing main wiki results exist.
const SECONDARY_RESULTS
Identifier for interwiki results that are displayed only together with existing main wiki results.
return true
Definition: router.php:90