MediaWiki master
ApiQuerySearch.php
Go to the documentation of this file.
1<?php
27
34 use SearchApi;
35
36 private TitleMatcher $titleMatcher;
37
45 public function __construct(
46 ApiQuery $query,
47 $moduleName,
48 SearchEngineConfig $searchEngineConfig,
49 SearchEngineFactory $searchEngineFactory,
50 TitleMatcher $titleMatcher
51 ) {
52 parent::__construct( $query, $moduleName, 'sr' );
53 // Services also needed in SearchApi trait
54 $this->searchEngineConfig = $searchEngineConfig;
55 $this->searchEngineFactory = $searchEngineFactory;
56 $this->titleMatcher = $titleMatcher;
57 }
58
59 public function execute() {
60 $this->run();
61 }
62
63 public function executeGenerator( $resultPageSet ) {
64 $this->run( $resultPageSet );
65 }
66
71 private function run( $resultPageSet = null ) {
73
74 // Extract parameters
75 $query = $params['search'];
76 $what = $params['what'];
77 $interwiki = $params['interwiki'];
78 $searchInfo = array_fill_keys( $params['info'], true );
79 $prop = array_fill_keys( $params['prop'], true );
80
81 // Create search engine instance and set options
82 $search = $this->buildSearchEngine( $params );
83 if ( isset( $params['sort'] ) ) {
84 $search->setSort( $params['sort'] );
85 }
86 $search->setFeatureData( 'rewrite', (bool)$params['enablerewrites'] );
87 $search->setFeatureData( 'interwiki', (bool)$interwiki );
88 // Hint to some SearchEngines about what snippets we would like returned
89 $search->setFeatureData( 'snippets', $this->decideSnippets( $prop ) );
90
91 $nquery = $search->replacePrefixes( $query );
92 if ( $nquery !== $query ) {
93 $query = $nquery;
94 wfDeprecatedMsg( 'SearchEngine::replacePrefixes() is overridden by ' .
95 get_class( $search ) . ', this was deprecated in MediaWiki 1.32',
96 '1.32' );
97 }
98 // Perform the actual search
99 if ( $what == 'text' ) {
100 $matches = $search->searchText( $query );
101 } elseif ( $what == 'title' ) {
102 $matches = $search->searchTitle( $query );
103 } elseif ( $what == 'nearmatch' ) {
104 // near matches must receive the user input as provided, otherwise
105 // the near matches within namespaces are lost.
106 $matches = $this->titleMatcher->getNearMatchResultSet( $params['search'] );
107 } else {
108 // We default to title searches; this is a terrible legacy
109 // of the way we initially set up the MySQL fulltext-based
110 // search engine with separate title and text fields.
111 // In the future, the default should be for a combined index.
112 $what = 'title';
113 $matches = $search->searchTitle( $query );
114
115 // Not all search engines support a separate title search,
116 // for instance the Lucene-based engine we use on Wikipedia.
117 // In this case, fall back to full-text search (which will
118 // include titles in it!)
119 if ( $matches === null ) {
120 $what = 'text';
121 $matches = $search->searchText( $query );
122 }
123 }
124
125 if ( $matches instanceof Status ) {
126 $status = $matches;
127 $matches = $status->getValue();
128 } else {
129 $status = null;
130 }
131
132 if ( $status ) {
133 if ( $status->isOK() ) {
134 $this->getMain()->getErrorFormatter()->addMessagesFromStatus(
135 $this->getModuleName(),
136 $status
137 );
138 } else {
139 $this->dieStatus( $status );
140 }
141 } elseif ( $matches === null ) {
142 $this->dieWithError( [ 'apierror-searchdisabled', $what ], "search-{$what}-disabled" );
143 }
144
145 $apiResult = $this->getResult();
146 // Add search meta data to result
147 if ( isset( $searchInfo['totalhits'] ) ) {
148 $totalhits = $matches->getTotalHits();
149 if ( $totalhits !== null ) {
150 $apiResult->addValue( [ 'query', 'searchinfo' ],
151 'totalhits', $totalhits );
152 }
153 }
154 if ( isset( $searchInfo['suggestion'] ) && $matches->hasSuggestion() ) {
155 $apiResult->addValue( [ 'query', 'searchinfo' ],
156 'suggestion', $matches->getSuggestionQuery() );
157 $apiResult->addValue( [ 'query', 'searchinfo' ],
158 'suggestionsnippet', HtmlArmor::getHtml( $matches->getSuggestionSnippet() ) );
159 }
160 if ( isset( $searchInfo['rewrittenquery'] ) && $matches->hasRewrittenQuery() ) {
161 $apiResult->addValue( [ 'query', 'searchinfo' ],
162 'rewrittenquery', $matches->getQueryAfterRewrite() );
163 $apiResult->addValue( [ 'query', 'searchinfo' ],
164 'rewrittenquerysnippet', HtmlArmor::getHtml( $matches->getQueryAfterRewriteSnippet() ) );
165 }
166
167 $titles = [];
168 $data = [];
169 $count = 0;
170
171 if ( $matches->hasMoreResults() ) {
172 $this->setContinueEnumParameter( 'offset', $params['offset'] + $params['limit'] );
173 }
174
175 foreach ( $matches as $result ) {
176 $count++;
177 // Silently skip broken and missing titles
178 if ( $result->isBrokenTitle() || $result->isMissingRevision() ) {
179 continue;
180 }
181
182 $vals = $this->getSearchResultData( $result, $prop );
183
184 if ( $resultPageSet === null ) {
185 if ( $vals ) {
186 // Add item to results and see whether it fits
187 $fit = $apiResult->addValue( [ 'query', $this->getModuleName() ], null, $vals );
188 if ( !$fit ) {
189 $this->setContinueEnumParameter( 'offset', $params['offset'] + $count - 1 );
190 break;
191 }
192 }
193 } else {
194 $titles[] = $result->getTitle();
195 $data[] = $vals ?: [];
196 }
197 }
198
199 // Here we assume interwiki results do not count with
200 // regular search results. We may want to reconsider this
201 // if we ever return a lot of interwiki results or want pagination
202 // for them.
203 // Interwiki results inside main result set
204 $canAddInterwiki = (bool)$params['enablerewrites'] && ( $resultPageSet === null );
205 if ( $canAddInterwiki ) {
206 $this->addInterwikiResults( $matches, $apiResult, $prop, 'additional',
207 ISearchResultSet::INLINE_RESULTS );
208 }
209
210 // Interwiki results outside main result set
211 if ( $interwiki && $resultPageSet === null ) {
212 $this->addInterwikiResults( $matches, $apiResult, $prop, 'interwiki',
213 ISearchResultSet::SECONDARY_RESULTS );
214 }
215
216 if ( $resultPageSet === null ) {
217 $apiResult->addIndexedTagName( [
218 'query', $this->getModuleName()
219 ], 'p' );
220 } else {
221 $resultPageSet->setRedirectMergePolicy( static function ( $current, $new ) {
222 if ( !isset( $current['index'] ) || $new['index'] < $current['index'] ) {
223 $current['index'] = $new['index'];
224 }
225 return $current;
226 } );
227 $resultPageSet->populateFromTitles( $titles );
228 $offset = $params['offset'] + 1;
229 foreach ( $titles as $index => $title ) {
230 $resultPageSet->setGeneratorData(
231 $title,
232 $data[ $index ] + [ 'index' => $index + $offset ]
233 );
234 }
235 }
236 }
237
244 private function getSearchResultData( SearchResult $result, $prop ) {
245 // Silently skip broken and missing titles
246 if ( $result->isBrokenTitle() || $result->isMissingRevision() ) {
247 return null;
248 }
249
250 $vals = [];
251
252 $title = $result->getTitle();
253 ApiQueryBase::addTitleInfo( $vals, $title );
254 $vals['pageid'] = $title->getArticleID();
255
256 if ( isset( $prop['size'] ) ) {
257 $vals['size'] = $result->getByteSize();
258 }
259 if ( isset( $prop['wordcount'] ) ) {
260 $vals['wordcount'] = $result->getWordCount();
261 }
262 if ( isset( $prop['snippet'] ) ) {
263 $vals['snippet'] = $result->getTextSnippet();
264 }
265 if ( isset( $prop['timestamp'] ) ) {
266 $vals['timestamp'] = wfTimestamp( TS_ISO_8601, $result->getTimestamp() );
267 }
268 if ( isset( $prop['titlesnippet'] ) ) {
269 $vals['titlesnippet'] = $result->getTitleSnippet();
270 }
271 if ( isset( $prop['categorysnippet'] ) ) {
272 $vals['categorysnippet'] = $result->getCategorySnippet();
273 }
274 if ( $result->getRedirectTitle() !== null ) {
275 if ( isset( $prop['redirecttitle'] ) ) {
276 $vals['redirecttitle'] = $result->getRedirectTitle()->getPrefixedText();
277 }
278 if ( isset( $prop['redirectsnippet'] ) ) {
279 $vals['redirectsnippet'] = $result->getRedirectSnippet();
280 }
281 }
282 if ( $result->getSectionTitle() !== null ) {
283 if ( isset( $prop['sectiontitle'] ) ) {
284 $vals['sectiontitle'] = $result->getSectionTitle()->getFragment();
285 }
286 if ( isset( $prop['sectionsnippet'] ) ) {
287 $vals['sectionsnippet'] = $result->getSectionSnippet();
288 }
289 }
290 if ( isset( $prop['isfilematch'] ) ) {
291 $vals['isfilematch'] = $result->isFileMatch();
292 }
293
294 if ( isset( $prop['extensiondata'] ) ) {
295 $extra = $result->getExtensionData();
296 // Add augmented data to the result. The data would be organized as a map:
297 // augmentorName => data
298 if ( $extra ) {
299 $vals['extensiondata'] = ApiResult::addMetadataToResultVars( $extra );
300 }
301 }
302
303 return $vals;
304 }
305
315 private function addInterwikiResults(
316 ISearchResultSet $matches, ApiResult $apiResult, $prop,
317 $section, $type
318 ) {
319 $totalhits = null;
320 if ( $matches->hasInterwikiResults( $type ) ) {
321 foreach ( $matches->getInterwikiResults( $type ) as $interwikiMatches ) {
322 // Include number of results if requested
323 $totalhits += $interwikiMatches->getTotalHits();
324
325 foreach ( $interwikiMatches as $result ) {
326 $title = $result->getTitle();
327 $vals = $this->getSearchResultData( $result, $prop );
328
329 $vals['namespace'] = $result->getInterwikiNamespaceText();
330 $vals['title'] = $title->getText();
331 $vals['url'] = $title->getFullURL();
332
333 // Add item to results and see whether it fits
334 $fit = $apiResult->addValue( [
335 'query',
336 $section . $this->getModuleName(),
337 $result->getInterwikiPrefix()
338 ], null, $vals );
339
340 if ( !$fit ) {
341 // We hit the limit. We can't really provide any meaningful
342 // pagination info so just bail out
343 break;
344 }
345 }
346 }
347 if ( $totalhits !== null ) {
348 $apiResult->addValue( [ 'query', $section . 'searchinfo' ], 'totalhits', $totalhits );
349 $apiResult->addIndexedTagName( [
350 'query', $section . $this->getModuleName()
351 ], 'p' );
352 }
353 }
354 return $totalhits;
355 }
356
357 private function decideSnippets( array $prop ): array {
358 // Field names align with definitions in ContentHandler::getFieldsForSearchIndex.
359 // Except `redirect` which isn't explicitly created, but refers to the title of
360 // pages that redirect to the result page.
361 $fields = [];
362 if ( isset( $prop['titlesnippet'] ) ) {
363 $fields[] = 'title';
364 }
365 // checking snippet and title variants is a bit special cased, but some search
366 // engines generate the title variant from the snippet and thus must have the
367 // snippet requested to provide the title.
368 if ( isset( $prop['redirectsnippet'] ) || isset( $prop['redirecttitle'] ) ) {
369 $fields[] = 'redirect';
370 }
371 if ( isset( $prop['categorysnippet'] ) ) {
372 $fields[] = 'category';
373 }
374 if ( isset( $prop['sectionsnippet'] ) || isset( $prop['sectiontitle'] ) ) {
375 $fields[] = 'heading';
376 }
377 return $fields;
378 }
379
380 public function getCacheMode( $params ) {
381 return 'public';
382 }
383
384 public function getAllowedParams() {
385 $allowedParams = $this->buildCommonApiParams() + [
386 'what' => [
387 ParamValidator::PARAM_TYPE => [
388 'title',
389 'text',
390 'nearmatch',
391 ]
392 ],
393 'info' => [
394 ParamValidator::PARAM_DEFAULT => 'totalhits|suggestion|rewrittenquery',
395 ParamValidator::PARAM_TYPE => [
396 'totalhits',
397 'suggestion',
398 'rewrittenquery',
399 ],
400 ParamValidator::PARAM_ISMULTI => true,
401 ],
402 'prop' => [
403 ParamValidator::PARAM_DEFAULT => 'size|wordcount|timestamp|snippet',
404 ParamValidator::PARAM_TYPE => [
405 'size',
406 'wordcount',
407 'timestamp',
408 'snippet',
409 'titlesnippet',
410 'redirecttitle',
411 'redirectsnippet',
412 'sectiontitle',
413 'sectionsnippet',
414 'isfilematch',
415 'categorysnippet',
416 'score', // deprecated
417 'hasrelated', // deprecated
418 'extensiondata',
419 ],
420 ParamValidator::PARAM_ISMULTI => true,
422 EnumDef::PARAM_DEPRECATED_VALUES => [
423 'score' => true,
424 'hasrelated' => true
425 ],
426 ],
427 'interwiki' => false,
428 'enablerewrites' => false,
429 ];
430
431 // Generators only add info/properties if explicitly requested. T263841
432 if ( $this->isInGeneratorMode() ) {
433 $allowedParams['prop'][ParamValidator::PARAM_DEFAULT] = '';
434 $allowedParams['info'][ParamValidator::PARAM_DEFAULT] = '';
435 }
436
437 // If we have more than one engine the list of available sorts is
438 // difficult to represent. For now don't expose it.
439 $alternatives = $this->searchEngineConfig->getSearchTypes();
440 if ( count( $alternatives ) == 1 ) {
441 $allowedParams['sort'] = [
442 ParamValidator::PARAM_DEFAULT => SearchEngine::DEFAULT_SORT,
443 ParamValidator::PARAM_TYPE => $this->searchEngineFactory->create()->getValidSorts(),
444 ];
445 }
446
447 return $allowedParams;
448 }
449
450 public function getSearchProfileParams() {
451 return [
452 'qiprofile' => [
453 'profile-type' => SearchEngine::FT_QUERY_INDEP_PROFILE_TYPE,
454 'help-message' => 'apihelp-query+search-param-qiprofile',
455 ],
456 ];
457 }
458
459 protected function getExamplesMessages() {
460 return [
461 'action=query&list=search&srsearch=meaning'
462 => 'apihelp-query+search-example-simple',
463 'action=query&list=search&srwhat=text&srsearch=meaning'
464 => 'apihelp-query+search-example-text',
465 'action=query&generator=search&gsrsearch=meaning&prop=info'
466 => 'apihelp-query+search-example-generator',
467 ];
468 }
469
470 public function getHelpUrls() {
471 return 'https://www.mediawiki.org/wiki/Special:MyLanguage/API:Search';
472 }
473}
wfDeprecatedMsg( $msg, $version=false, $component=false, $callerOffset=2)
Log a deprecation warning with arbitrary message text.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
buildSearchEngine(array $params=null)
Build the search engine to use.
buildCommonApiParams( $isScrollable=true)
The set of api parameters that are shared between api calls that call the SearchEngine.
Definition SearchApi.php:65
array $params
The job parameters.
run()
Run the job.
dieWithError( $msg, $code=null, $data=null, $httpCode=0)
Abort execution with an error.
Definition ApiBase.php:1567
getMain()
Get the main module.
Definition ApiBase.php:579
const PARAM_HELP_MSG_PER_VALUE
((string|array|Message)[]) When PARAM_TYPE is an array, or 'string' with PARAM_ISMULTI,...
Definition ApiBase.php:215
getResult()
Get the result object.
Definition ApiBase.php:700
extractRequestParams( $options=[])
Using getAllowedParams(), this function makes an array of the values provided by the user,...
Definition ApiBase.php:841
getModuleName()
Get the name of the module being executed by this instance.
Definition ApiBase.php:561
dieStatus(StatusValue $status)
Throw an ApiUsageException based on the Status object.
Definition ApiBase.php:1622
static addTitleInfo(&$arr, $title, $prefix='')
Add information (title and namespace) about a Title object to a result array.
setContinueEnumParameter( $paramName, $paramValue)
Overridden to set the generator param if in generator mode.
Query module to perform full text search within wiki titles and content.
execute()
Evaluates the parameters, performs the requested query, and sets up the result.
getHelpUrls()
Return links to more detailed help pages about the module.
getExamplesMessages()
Returns usage examples for this module.
getCacheMode( $params)
Get the cache mode for the data generated by this module.
getAllowedParams()
Returns an array of allowed parameters (parameter name) => (default value) or (parameter name) => (ar...
executeGenerator( $resultPageSet)
Execute this module as a generator.
__construct(ApiQuery $query, $moduleName, SearchEngineConfig $searchEngineConfig, SearchEngineFactory $searchEngineFactory, TitleMatcher $titleMatcher)
This is the main query class.
Definition ApiQuery.php:43
This class represents the result of the API operations.
Definition ApiResult.php:36
static addMetadataToResultVars( $vars, $forceHash=true)
Add the correct metadata to an array of vars we want to export through the API.
addIndexedTagName( $path, $tag)
Set the tag name for numeric-keyed values in XML format.
addValue( $path, $name, $value, $flags=0)
Add value to the output data at the given path.
Service implementation of near match title search.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:54
Configuration handling class for SearchEngine.
Factory class for SearchEngine.
NOTE: this class is being refactored into an abstract base class.
Service for formatting and validating API parameters.
Type definition for enumeration types.
Definition EnumDef.php:32
trait SearchApi
Traits for API components that use a SearchEngine.
Definition SearchApi.php:31
A set of SearchEngine results.