MediaWiki master
ApiQuerySearch.php
Go to the documentation of this file.
1<?php
23namespace MediaWiki\Api;
24
25use HtmlArmor;
29use SearchEngine;
32use SearchResult;
35
42 use \MediaWiki\Api\SearchApi;
43
44 private TitleMatcher $titleMatcher;
45
46 public function __construct(
47 ApiQuery $query,
48 string $moduleName,
49 SearchEngineConfig $searchEngineConfig,
50 SearchEngineFactory $searchEngineFactory,
51 TitleMatcher $titleMatcher
52 ) {
53 parent::__construct( $query, $moduleName, 'sr' );
54 // Services also needed in SearchApi trait
55 $this->searchEngineConfig = $searchEngineConfig;
56 $this->searchEngineFactory = $searchEngineFactory;
57 $this->titleMatcher = $titleMatcher;
58 }
59
60 public function execute() {
61 $this->run();
62 }
63
64 public function executeGenerator( $resultPageSet ) {
65 $this->run( $resultPageSet );
66 }
67
72 private function run( $resultPageSet = null ) {
74
75 // Extract parameters
76 $query = $params['search'];
77 $what = $params['what'];
78 $interwiki = $params['interwiki'];
79 $searchInfo = array_fill_keys( $params['info'], true );
80 $prop = array_fill_keys( $params['prop'], true );
81
82 // Create search engine instance and set options
83 $search = $this->buildSearchEngine( $params );
84 if ( isset( $params['sort'] ) ) {
85 $search->setSort( $params['sort'] );
86 }
87 $search->setFeatureData( 'rewrite', (bool)$params['enablerewrites'] );
88 $search->setFeatureData( 'interwiki', (bool)$interwiki );
89 // Hint to some SearchEngines about what snippets we would like returned
90 $search->setFeatureData( 'snippets', $this->decideSnippets( $prop ) );
91
92 $nquery = $search->replacePrefixes( $query );
93 if ( $nquery !== $query ) {
94 $query = $nquery;
95 wfDeprecatedMsg( 'SearchEngine::replacePrefixes() is overridden by ' .
96 get_class( $search ) . ', this was deprecated in MediaWiki 1.32',
97 '1.32' );
98 }
99 // Perform the actual search
100 if ( $what == 'text' ) {
101 $matches = $search->searchText( $query );
102 } elseif ( $what == 'title' ) {
103 $matches = $search->searchTitle( $query );
104 } elseif ( $what == 'nearmatch' ) {
105 // near matches must receive the user input as provided, otherwise
106 // the near matches within namespaces are lost.
107 $matches = $this->titleMatcher->getNearMatchResultSet( $params['search'] );
108 } else {
109 // We default to title searches; this is a terrible legacy
110 // of the way we initially set up the MySQL fulltext-based
111 // search engine with separate title and text fields.
112 // In the future, the default should be for a combined index.
113 $what = 'title';
114 $matches = $search->searchTitle( $query );
115
116 // Not all search engines support a separate title search,
117 // for instance the Lucene-based engine we use on Wikipedia.
118 // In this case, fall back to full-text search (which will
119 // include titles in it!)
120 if ( $matches === null ) {
121 $what = 'text';
122 $matches = $search->searchText( $query );
123 }
124 }
125
126 if ( $matches instanceof Status ) {
127 $status = $matches;
128 $matches = $status->getValue();
129 } else {
130 $status = null;
131 }
132
133 if ( $status ) {
134 if ( $status->isOK() ) {
135 $this->getMain()->getErrorFormatter()->addMessagesFromStatus(
136 $this->getModuleName(),
137 $status
138 );
139 } else {
140 $this->dieStatus( $status );
141 }
142 } elseif ( $matches === null ) {
143 $this->dieWithError( [ 'apierror-searchdisabled', $what ], "search-{$what}-disabled" );
144 }
145
146 $apiResult = $this->getResult();
147 // Add search meta data to result
148 if ( isset( $searchInfo['totalhits'] ) ) {
149 $totalhits = $matches->getTotalHits();
150 if ( $totalhits !== null ) {
151 $apiResult->addValue( [ 'query', 'searchinfo' ],
152 'totalhits', $totalhits );
153 }
154 }
155 if ( isset( $searchInfo['suggestion'] ) && $matches->hasSuggestion() ) {
156 $apiResult->addValue( [ 'query', 'searchinfo' ],
157 'suggestion', $matches->getSuggestionQuery() );
158 $apiResult->addValue( [ 'query', 'searchinfo' ],
159 'suggestionsnippet', HtmlArmor::getHtml( $matches->getSuggestionSnippet() ) );
160 }
161 if ( isset( $searchInfo['rewrittenquery'] ) && $matches->hasRewrittenQuery() ) {
162 $apiResult->addValue( [ 'query', 'searchinfo' ],
163 'rewrittenquery', $matches->getQueryAfterRewrite() );
164 $apiResult->addValue( [ 'query', 'searchinfo' ],
165 'rewrittenquerysnippet', HtmlArmor::getHtml( $matches->getQueryAfterRewriteSnippet() ) );
166 }
167
168 $titles = [];
169 $data = [];
170 $count = 0;
171
172 if ( $matches->hasMoreResults() ) {
173 $this->setContinueEnumParameter( 'offset', $params['offset'] + $params['limit'] );
174 }
175
176 foreach ( $matches as $result ) {
177 $count++;
178 // Silently skip broken and missing titles
179 if ( $result->isBrokenTitle() || $result->isMissingRevision() ) {
180 continue;
181 }
182
183 $vals = $this->getSearchResultData( $result, $prop );
184
185 if ( $resultPageSet === null ) {
186 if ( $vals ) {
187 // Add item to results and see whether it fits
188 $fit = $apiResult->addValue( [ 'query', $this->getModuleName() ], null, $vals );
189 if ( !$fit ) {
190 $this->setContinueEnumParameter( 'offset', $params['offset'] + $count - 1 );
191 break;
192 }
193 }
194 } else {
195 $titles[] = $result->getTitle();
196 $data[] = $vals ?: [];
197 }
198 }
199
200 // Here we assume interwiki results do not count with
201 // regular search results. We may want to reconsider this
202 // if we ever return a lot of interwiki results or want pagination
203 // for them.
204 // Interwiki results inside main result set
205 $canAddInterwiki = (bool)$params['enablerewrites'] && ( $resultPageSet === null );
206 if ( $canAddInterwiki ) {
207 $this->addInterwikiResults( $matches, $apiResult, $prop, 'additional',
209 }
210
211 // Interwiki results outside main result set
212 if ( $interwiki && $resultPageSet === null ) {
213 $this->addInterwikiResults( $matches, $apiResult, $prop, 'interwiki',
215 }
216
217 if ( $resultPageSet === null ) {
218 $apiResult->addIndexedTagName( [
219 'query', $this->getModuleName()
220 ], 'p' );
221 } else {
222 $resultPageSet->setRedirectMergePolicy( static function ( $current, $new ) {
223 if ( !isset( $current['index'] ) || $new['index'] < $current['index'] ) {
224 $current['index'] = $new['index'];
225 }
226 return $current;
227 } );
228 $resultPageSet->populateFromTitles( $titles );
229 $offset = $params['offset'] + 1;
230 foreach ( $titles as $index => $title ) {
231 $resultPageSet->setGeneratorData(
232 $title,
233 $data[ $index ] + [ 'index' => $index + $offset ]
234 );
235 }
236 }
237 }
238
245 private function getSearchResultData( SearchResult $result, $prop ) {
246 // Silently skip broken and missing titles
247 if ( $result->isBrokenTitle() || $result->isMissingRevision() ) {
248 return null;
249 }
250
251 $vals = [];
252
253 $title = $result->getTitle();
254 ApiQueryBase::addTitleInfo( $vals, $title );
255 $vals['pageid'] = $title->getArticleID();
256
257 if ( isset( $prop['size'] ) ) {
258 $vals['size'] = $result->getByteSize();
259 }
260 if ( isset( $prop['wordcount'] ) ) {
261 $vals['wordcount'] = $result->getWordCount();
262 }
263 if ( isset( $prop['snippet'] ) ) {
264 $vals['snippet'] = $result->getTextSnippet();
265 }
266 if ( isset( $prop['timestamp'] ) ) {
267 $vals['timestamp'] = wfTimestamp( TS_ISO_8601, $result->getTimestamp() );
268 }
269 if ( isset( $prop['titlesnippet'] ) ) {
270 $vals['titlesnippet'] = $result->getTitleSnippet();
271 }
272 if ( isset( $prop['categorysnippet'] ) ) {
273 $vals['categorysnippet'] = $result->getCategorySnippet();
274 }
275 if ( $result->getRedirectTitle() !== null ) {
276 if ( isset( $prop['redirecttitle'] ) ) {
277 $vals['redirecttitle'] = $result->getRedirectTitle()->getPrefixedText();
278 }
279 if ( isset( $prop['redirectsnippet'] ) ) {
280 $vals['redirectsnippet'] = $result->getRedirectSnippet();
281 }
282 }
283 if ( $result->getSectionTitle() !== null ) {
284 if ( isset( $prop['sectiontitle'] ) ) {
285 $vals['sectiontitle'] = $result->getSectionTitle()->getFragment();
286 }
287 if ( isset( $prop['sectionsnippet'] ) ) {
288 $vals['sectionsnippet'] = $result->getSectionSnippet();
289 }
290 }
291 if ( isset( $prop['isfilematch'] ) ) {
292 $vals['isfilematch'] = $result->isFileMatch();
293 }
294
295 if ( isset( $prop['extensiondata'] ) ) {
296 $extra = $result->getExtensionData();
297 // Add augmented data to the result. The data would be organized as a map:
298 // augmentorName => data
299 if ( $extra ) {
300 $vals['extensiondata'] = ApiResult::addMetadataToResultVars( $extra );
301 }
302 }
303
304 return $vals;
305 }
306
316 private function addInterwikiResults(
317 ISearchResultSet $matches, ApiResult $apiResult, $prop,
318 $section, $type
319 ) {
320 $totalhits = null;
321 if ( $matches->hasInterwikiResults( $type ) ) {
322 foreach ( $matches->getInterwikiResults( $type ) as $interwikiMatches ) {
323 // Include number of results if requested
324 $totalhits += $interwikiMatches->getTotalHits();
325
326 foreach ( $interwikiMatches as $result ) {
327 $title = $result->getTitle();
328 $vals = $this->getSearchResultData( $result, $prop );
329
330 $vals['namespace'] = $result->getInterwikiNamespaceText();
331 $vals['title'] = $title->getText();
332 $vals['url'] = $title->getFullURL();
333
334 // Add item to results and see whether it fits
335 $fit = $apiResult->addValue( [
336 'query',
337 $section . $this->getModuleName(),
338 $result->getInterwikiPrefix()
339 ], null, $vals );
340
341 if ( !$fit ) {
342 // We hit the limit. We can't really provide any meaningful
343 // pagination info so just bail out
344 break;
345 }
346 }
347 }
348 if ( $totalhits !== null ) {
349 $apiResult->addValue( [ 'query', $section . 'searchinfo' ], 'totalhits', $totalhits );
350 $apiResult->addIndexedTagName( [
351 'query', $section . $this->getModuleName()
352 ], 'p' );
353 }
354 }
355 return $totalhits;
356 }
357
358 private function decideSnippets( array $prop ): array {
359 // Field names align with definitions in ContentHandler::getFieldsForSearchIndex.
360 // Except `redirect` which isn't explicitly created, but refers to the title of
361 // pages that redirect to the result page.
362 $fields = [];
363 if ( isset( $prop['titlesnippet'] ) ) {
364 $fields[] = 'title';
365 }
366 // checking snippet and title variants is a bit special cased, but some search
367 // engines generate the title variant from the snippet and thus must have the
368 // snippet requested to provide the title.
369 if ( isset( $prop['redirectsnippet'] ) || isset( $prop['redirecttitle'] ) ) {
370 $fields[] = 'redirect';
371 }
372 if ( isset( $prop['categorysnippet'] ) ) {
373 $fields[] = 'category';
374 }
375 if ( isset( $prop['sectionsnippet'] ) || isset( $prop['sectiontitle'] ) ) {
376 $fields[] = 'heading';
377 }
378 return $fields;
379 }
380
381 public function getCacheMode( $params ) {
382 return 'public';
383 }
384
385 public function getAllowedParams() {
386 $allowedParams = $this->buildCommonApiParams() + [
387 'what' => [
388 ParamValidator::PARAM_TYPE => [
389 'title',
390 'text',
391 'nearmatch',
392 ]
393 ],
394 'info' => [
395 ParamValidator::PARAM_DEFAULT => 'totalhits|suggestion|rewrittenquery',
396 ParamValidator::PARAM_TYPE => [
397 'totalhits',
398 'suggestion',
399 'rewrittenquery',
400 ],
401 ParamValidator::PARAM_ISMULTI => true,
402 ],
403 'prop' => [
404 ParamValidator::PARAM_DEFAULT => 'size|wordcount|timestamp|snippet',
405 ParamValidator::PARAM_TYPE => [
406 'size',
407 'wordcount',
408 'timestamp',
409 'snippet',
410 'titlesnippet',
411 'redirecttitle',
412 'redirectsnippet',
413 'sectiontitle',
414 'sectionsnippet',
415 'isfilematch',
416 'categorysnippet',
417 'score', // deprecated
418 'hasrelated', // deprecated
419 'extensiondata',
420 ],
421 ParamValidator::PARAM_ISMULTI => true,
423 EnumDef::PARAM_DEPRECATED_VALUES => [
424 'score' => true,
425 'hasrelated' => true
426 ],
427 ],
428 'interwiki' => false,
429 'enablerewrites' => false,
430 ];
431
432 // Generators only add info/properties if explicitly requested. T263841
433 if ( $this->isInGeneratorMode() ) {
434 $allowedParams['prop'][ParamValidator::PARAM_DEFAULT] = '';
435 $allowedParams['info'][ParamValidator::PARAM_DEFAULT] = '';
436 }
437
438 // If we have more than one engine the list of available sorts is
439 // difficult to represent. For now don't expose it.
440 $alternatives = $this->searchEngineConfig->getSearchTypes();
441 if ( count( $alternatives ) == 1 ) {
442 $allowedParams['sort'] = [
443 ParamValidator::PARAM_DEFAULT => SearchEngine::DEFAULT_SORT,
444 ParamValidator::PARAM_TYPE => $this->searchEngineFactory->create()->getValidSorts(),
445 ];
446 }
447
448 return $allowedParams;
449 }
450
451 public function getSearchProfileParams() {
452 return [
453 'qiprofile' => [
454 'profile-type' => SearchEngine::FT_QUERY_INDEP_PROFILE_TYPE,
455 'help-message' => 'apihelp-query+search-param-qiprofile',
456 ],
457 ];
458 }
459
460 protected function getExamplesMessages() {
461 return [
462 'action=query&list=search&srsearch=meaning'
463 => 'apihelp-query+search-example-simple',
464 'action=query&list=search&srwhat=text&srsearch=meaning'
465 => 'apihelp-query+search-example-text',
466 'action=query&generator=search&gsrsearch=meaning&prop=info'
467 => 'apihelp-query+search-example-generator',
468 ];
469 }
470
471 public function getHelpUrls() {
472 return 'https://www.mediawiki.org/wiki/Special:MyLanguage/API:Search';
473 }
474}
475
477class_alias( ApiQuerySearch::class, 'ApiQuerySearch' );
wfDeprecatedMsg( $msg, $version=false, $component=false, $callerOffset=2)
Log a deprecation warning with arbitrary message text.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
array $params
The job parameters.
run()
Run the job.
Marks HTML that shouldn't be escaped.
Definition HtmlArmor.php:30
dieWithError( $msg, $code=null, $data=null, $httpCode=0)
Abort execution with an error.
Definition ApiBase.php:1577
getModuleName()
Get the name of the module being executed by this instance.
Definition ApiBase.php:571
getMain()
Get the main module.
Definition ApiBase.php:589
getResult()
Get the result object.
Definition ApiBase.php:710
const PARAM_HELP_MSG_PER_VALUE
((string|array|Message)[]) When PARAM_TYPE is an array, or 'string' with PARAM_ISMULTI,...
Definition ApiBase.php:224
dieStatus(StatusValue $status)
Throw an ApiUsageException based on the Status object.
Definition ApiBase.php:1632
extractRequestParams( $options=[])
Using getAllowedParams(), this function makes an array of the values provided by the user,...
Definition ApiBase.php:851
static addTitleInfo(&$arr, $title, $prefix='')
Add information (title and namespace) about a Title object to a result array.
setContinueEnumParameter( $paramName, $paramValue)
Overridden to set the generator param if in generator mode.
Query module to perform full text search within wiki titles and content.
execute()
Evaluates the parameters, performs the requested query, and sets up the result.
getAllowedParams()
Returns an array of allowed parameters (parameter name) => (default value) or (parameter name) => (ar...
getExamplesMessages()
Returns usage examples for this module.
__construct(ApiQuery $query, string $moduleName, SearchEngineConfig $searchEngineConfig, SearchEngineFactory $searchEngineFactory, TitleMatcher $titleMatcher)
getHelpUrls()
Return links to more detailed help pages about the module.
executeGenerator( $resultPageSet)
Execute this module as a generator.
getCacheMode( $params)
Get the cache mode for the data generated by this module.
This is the main query class.
Definition ApiQuery.php:48
static addMetadataToResultVars( $vars, $forceHash=true)
Add the correct metadata to an array of vars we want to export through the API.
Service implementation of near match title search.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:54
Configuration handling class for SearchEngine.
Factory class for SearchEngine.
Contain a class for special pages.
NOTE: this class is being refactored into an abstract base class.
Service for formatting and validating API parameters.
Type definition for enumeration types.
Definition EnumDef.php:32
A set of SearchEngine results.
const INLINE_RESULTS
Identifier for interwiki results that can be displayed even if no existing main wiki results exist.
const SECONDARY_RESULTS
Identifier for interwiki results that are displayed only together with existing main wiki results.
buildCommonApiParams( $isScrollable=true)
The set of api parameters that are shared between api calls that call the SearchEngine.
Definition SearchApi.php:72
buildSearchEngine(?array $params=null)
Build the search engine to use.