MediaWiki  master
SearchHandler.php
Go to the documentation of this file.
1 <?php
2 
3 namespace MediaWiki\Rest\Handler;
4 
5 use Config;
6 use InvalidArgumentException;
13 use SearchEngine;
16 use SearchResult;
18 use Status;
22 
26 class SearchHandler extends Handler {
27 
30 
33 
37  public const FULLTEXT_MODE = 'fulltext';
38 
42  public const COMPLETION_MODE = 'completion';
43 
48 
52  private $mode = null;
53 
55  private const LIMIT = 50;
56 
58  private const MAX_LIMIT = 100;
59 
61  private const OFFSET = 0;
62 
70 
76  public function __construct(
80  ) {
81  $this->searchEngineFactory = $searchEngineFactory;
82  $this->searchEngineConfig = $searchEngineConfig;
83 
84  // @todo Avoid injecting the entire config, see T246377
85  $this->completionCacheExpiry = $config->get( 'SearchSuggestCacheExpiry' );
86  }
87 
88  protected function postInitSetup() {
89  $this->mode = $this->getConfig()['mode'] ?? self::FULLTEXT_MODE;
90 
91  if ( !in_array( $this->mode, self::SUPPORTED_MODES ) ) {
92  throw new InvalidArgumentException(
93  "Unsupported search mode `{$this->mode}` configured. Supported modes: " .
94  implode( ', ', self::SUPPORTED_MODES )
95  );
96  }
97  }
98 
102  private function createSearchEngine() {
103  $limit = $this->getValidatedParams()['limit'];
104 
105  $searchEngine = $this->searchEngineFactory->create();
106  $searchEngine->setNamespaces( $this->searchEngineConfig->defaultNamespaces() );
107  $searchEngine->setLimitOffset( $limit, self::OFFSET );
108  return $searchEngine;
109  }
110 
111  public function needsWriteAccess() {
112  return false;
113  }
114 
121  private function getSearchResultsOrThrow( $results ) {
122  if ( $results ) {
123  if ( $results instanceof Status ) {
124  $status = $results;
125  if ( !$status->isOK() ) {
126  list( $error ) = $status->splitByErrorType();
127  if ( $error->getErrors() ) { // Only throw for errors, suppress warnings (for now)
128  $errorMessages = $error->getMessage();
129  throw new LocalizedHttpException(
130  new MessageValue( "rest-search-error", [ $errorMessages->getKey() ] )
131  );
132  }
133  }
134  $statusValue = $status->getValue();
135  if ( $statusValue instanceof ISearchResultSet ) {
136  return $statusValue->extractResults();
137  }
138  } else {
139  return $results->extractResults();
140  }
141  }
142  return [];
143  }
144 
152  private function doSearch( $searchEngine ) {
153  $query = $this->getValidatedParams()['q'];
154 
155  if ( $this->mode == self::COMPLETION_MODE ) {
156  $completionSearch = $searchEngine->completionSearchWithVariants( $query );
157  return $this->buildPageInfosFromSuggestions( $completionSearch->getSuggestions() );
158  } else {
159  $titleSearch = $searchEngine->searchTitle( $query );
160  $textSearch = $searchEngine->searchText( $query );
161 
162  $titleSearchResults = $this->getSearchResultsOrThrow( $titleSearch );
163  $textSearchResults = $this->getSearchResultsOrThrow( $textSearch );
164 
165  $mergedResults = array_merge( $titleSearchResults, $textSearchResults );
166  return $this->buildPageInfosFromSearchResults( $mergedResults );
167  }
168  }
169 
179  private function buildPageInfosFromSuggestions( array $suggestions ): array {
180  $pageInfos = [];
181 
182  foreach ( $suggestions as $sugg ) {
183  $title = $sugg->getSuggestedTitle();
184  if ( $title && $title->exists() ) {
185  $pageID = $title->getArticleID();
186  if ( !isset( $pageInfos[$pageID] ) &&
187  $this->getAuthority()->probablyCan( 'read', $title )
188  ) {
189  $pageInfos[ $pageID ] = [ $title, $sugg, null ];
190  }
191  }
192  }
193  return $pageInfos;
194  }
195 
205  private function buildPageInfosFromSearchResults( array $searchResults ): array {
206  $pageInfos = [];
207 
208  foreach ( $searchResults as $result ) {
209  if ( !$result->isBrokenTitle() && !$result->isMissingRevision() ) {
210  $title = $result->getTitle();
211  $pageID = $title->getArticleID();
212  if ( !isset( $pageInfos[$pageID] ) &&
213  $this->getAuthority()->probablyCan( 'read', $title )
214  ) {
215  $pageInfos[$pageID] = [ $title, null, $result ];
216  }
217  }
218  }
219  return $pageInfos;
220  }
221 
229  private function buildResultFromPageInfos( array $pageInfos ): array {
230  return array_map( function ( $pageInfo ) {
231  list( $title, $sugg, $result ) = $pageInfo;
232  return [
233  'id' => $title->getArticleID(),
234  'key' => $title->getPrefixedDBkey(),
235  'title' => $title->getPrefixedText(),
236  'excerpt' => ( $sugg ? $sugg->getText() : $result->getTextSnippet() ) ?: null,
237  ];
238  },
239  $pageInfos );
240  }
241 
249  private function serializeThumbnail( ?SearchResultThumbnail $thumbnail ) : ?array {
250  if ( $thumbnail == null ) {
251  return null;
252  }
253 
254  return [
255  'mimetype' => $thumbnail->getMimeType(),
256  'size' => $thumbnail->getSize(),
257  'width' => $thumbnail->getWidth(),
258  'height' => $thumbnail->getHeight(),
259  'duration' => $thumbnail->getDuration(),
260  'url' => $thumbnail->getUrl(),
261  ];
262  }
263 
274  private function buildDescriptionsFromPageIdentities( array $pageIdentities ) {
275  $descriptions = array_fill_keys( array_keys( $pageIdentities ), null );
276 
277  $this->getHookRunner()->onSearchResultProvideDescription( $pageIdentities, $descriptions );
278 
279  return array_map( function ( $description ) {
280  return [ 'description' => $description ];
281  }, $descriptions );
282  }
283 
295  private function buildThumbnailsFromPageIdentities( array $pageIdentities ) {
296  $thumbnails = array_fill_keys( array_keys( $pageIdentities ), null );
297 
298  $this->getHookRunner()->onSearchResultProvideThumbnail( $pageIdentities, $thumbnails );
299 
300  return array_map( function ( $thumbnail ) {
301  return [ 'thumbnail' => $this->serializeThumbnail( $thumbnail ) ];
302  }, $thumbnails );
303  }
304 
309  public function execute() {
310  $searchEngine = $this->createSearchEngine();
311  $pageInfos = $this->doSearch( $searchEngine );
312  $pageIdentities = array_map( function ( $pageInfo ) {
313  list( $title ) = $pageInfo;
315  $title->getArticleID(),
316  $title->getNamespace(),
317  $title->getDBkey()
318  );
319  }, $pageInfos );
320 
321  $result = array_map( "array_merge",
322  $this->buildResultFromPageInfos( $pageInfos ),
323  $this->buildDescriptionsFromPageIdentities( $pageIdentities ),
324  $this->buildThumbnailsFromPageIdentities( $pageIdentities )
325  );
326 
327  $response = $this->getResponseFactory()->createJson( [ 'pages' => $result ] );
328 
329  if ( $this->mode === self::COMPLETION_MODE && $this->completionCacheExpiry ) {
330  // Type-ahead completion matches should be cached by the client and
331  // in the CDN, especially for short prefixes.
332  // See also $wgSearchSuggestCacheExpiry and ApiOpenSearch
333  $response->setHeader( 'Cache-Control', 'public, max-age=' . $this->completionCacheExpiry );
334  }
335 
336  return $response;
337  }
338 
339  public function getParamSettings() {
340  return [
341  'q' => [
342  self::PARAM_SOURCE => 'query',
343  ParamValidator::PARAM_TYPE => 'string',
344  ParamValidator::PARAM_REQUIRED => true,
345  ],
346  'limit' => [
347  self::PARAM_SOURCE => 'query',
348  ParamValidator::PARAM_TYPE => 'integer',
349  ParamValidator::PARAM_REQUIRED => false,
350  ParamValidator::PARAM_DEFAULT => self::LIMIT,
351  IntegerDef::PARAM_MIN => 1,
352  IntegerDef::PARAM_MAX => self::MAX_LIMIT,
353  ],
354  ];
355  }
356 }
MediaWiki\Rest\Handler
Definition: AbstractContributionHandler.php:3
MediaWiki\Rest\Handler\getResponseFactory
getResponseFactory()
Get the ResponseFactory which can be used to generate Response objects.
Definition: Handler.php:171
MediaWiki\Rest\Handler\SearchHandler\buildPageInfosFromSearchResults
buildPageInfosFromSearchResults(array $searchResults)
Remove duplicate pages and turn search results into array with information needed for further process...
Definition: SearchHandler.php:205
MediaWiki\Rest\Handler\SearchHandler\buildDescriptionsFromPageIdentities
buildDescriptionsFromPageIdentities(array $pageIdentities)
Turn page info into serializable array with description field for the page.
Definition: SearchHandler.php:274
MediaWiki\Rest\Handler\SearchHandler\execute
execute()
Definition: SearchHandler.php:309
MediaWiki\Rest\Handler\SearchHandler\buildResultFromPageInfos
buildResultFromPageInfos(array $pageInfos)
Turn array of page info into serializable array with common information about the page.
Definition: SearchHandler.php:229
MediaWiki\Rest\Handler\SearchHandler\buildPageInfosFromSuggestions
buildPageInfosFromSuggestions(array $suggestions)
Remove duplicate pages and turn suggestions into array with information needed for further processing...
Definition: SearchHandler.php:179
MediaWiki\Rest\Handler\SearchHandler\LIMIT
const LIMIT
Limit results to 50 pages per default.
Definition: SearchHandler.php:55
MediaWiki\Rest\Handler\SearchHandler\getParamSettings
getParamSettings()
Fetch ParamValidator settings for parameters.
Definition: SearchHandler.php:339
SearchEngineFactory
Factory class for SearchEngine.
Definition: SearchEngineFactory.php:13
Wikimedia\Message\MessageValue
Value object representing a message for i18n.
Definition: MessageValue.php:16
MediaWiki\Rest\Handler
Base class for REST route handlers.
Definition: Handler.php:17
Status
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition: Status.php:44
MediaWiki\Rest\Handler\SearchHandler\buildThumbnailsFromPageIdentities
buildThumbnailsFromPageIdentities(array $pageIdentities)
Turn page info into serializable array with thumbnail information for the page.
Definition: SearchHandler.php:295
Config
Interface for configuration instances.
Definition: Config.php:30
MediaWiki\Search\Entity\SearchResultThumbnail\getDuration
getDuration()
Duration of the representation in seconds or null if not applicable.
Definition: SearchResultThumbnail.php:125
MediaWiki\Rest\Handler\SearchHandler\serializeThumbnail
serializeThumbnail(?SearchResultThumbnail $thumbnail)
Converts SearchResultThumbnail object into serializable array.
Definition: SearchHandler.php:249
SearchResult
NOTE: this class is being refactored into an abstract base class.
Definition: SearchResult.php:38
MediaWiki\Search\Entity\SearchResultThumbnail\getHeight
getHeight()
Height of the representation in pixels or null if not applicable.
Definition: SearchResultThumbnail.php:101
MediaWiki\Rest\Handler\SearchHandler\OFFSET
const OFFSET
Default to first page.
Definition: SearchHandler.php:61
MediaWiki\Rest\Handler\SearchHandler\$mode
string $mode
Definition: SearchHandler.php:52
MediaWiki\Rest\Handler\SearchHandler\FULLTEXT_MODE
const FULLTEXT_MODE
Search page body and titles.
Definition: SearchHandler.php:37
MediaWiki\Rest\Handler\SearchHandler\needsWriteAccess
needsWriteAccess()
Indicates whether this route requires write access.
Definition: SearchHandler.php:111
MediaWiki\Search\Entity\SearchResultThumbnail\getWidth
getWidth()
Width of the representation in pixels or null if not applicable.
Definition: SearchResultThumbnail.php:93
SearchSuggestion
Search suggestion.
Definition: SearchSuggestion.php:25
MediaWiki\Rest\Response
Definition: Response.php:8
MediaWiki\Rest\Handler\SearchHandler\SUPPORTED_MODES
const SUPPORTED_MODES
Supported modes.
Definition: SearchHandler.php:47
$title
$title
Definition: testCompression.php:38
MediaWiki\Rest\Handler\getValidatedParams
getValidatedParams()
Fetch the validated parameters.
Definition: Handler.php:282
ISearchResultSet
A set of SearchEngine results.
Definition: ISearchResultSet.php:12
MediaWiki\Rest\Handler\SearchHandler\COMPLETION_MODE
const COMPLETION_MODE
Search title completion matches.
Definition: SearchHandler.php:42
MediaWiki\Rest\Handler\getConfig
getConfig()
Get the configuration array for the current route.
Definition: Handler.php:160
MediaWiki\Rest\Handler\getHookRunner
getHookRunner()
Get a HookRunner for running core hooks.
Definition: Handler.php:316
MediaWiki\Rest\Handler\$config
array $config
Definition: Handler.php:38
MediaWiki\Rest\Handler\SearchHandler\MAX_LIMIT
const MAX_LIMIT
Hard limit results to 100 pages.
Definition: SearchHandler.php:58
MediaWiki\Rest\Handler\SearchHandler\__construct
__construct(Config $config, SearchEngineFactory $searchEngineFactory, SearchEngineConfig $searchEngineConfig)
Definition: SearchHandler.php:76
SearchEngine
Contain a class for special pages Stable to extend.
Definition: SearchEngine.php:37
MediaWiki\Search\Entity\SearchResultThumbnail
Class that stores information about thumbnail, e.
Definition: SearchResultThumbnail.php:9
MediaWiki\Rest\Entity\SearchResultPageIdentityValue
Lightweight value class representing a page identity.
Definition: SearchResultPageIdentityValue.php:11
MediaWiki\Rest\Handler\SearchHandler\createSearchEngine
createSearchEngine()
Definition: SearchHandler.php:102
MediaWiki\Search\Entity\SearchResultThumbnail\getSize
getSize()
Size of the representation in bytes or null if not applicable.
Definition: SearchResultThumbnail.php:117
Wikimedia\ParamValidator\TypeDef\IntegerDef
Type definition for integer types.
Definition: IntegerDef.php:23
MediaWiki\Rest\Handler\SearchHandler
Handler class for Core REST API endpoint that handles basic search.
Definition: SearchHandler.php:26
MediaWiki\Rest\Handler\SearchHandler\postInitSetup
postInitSetup()
The handler can override this to do any necessary setup after init() is called to inject the dependen...
Definition: SearchHandler.php:88
SearchEngineConfig
Configuration handling class for SearchEngine.
Definition: SearchEngineConfig.php:12
MediaWiki\Rest\Handler\SearchHandler\doSearch
doSearch( $searchEngine)
Execute search and return info about pages for further processing.
Definition: SearchHandler.php:152
MediaWiki\Search\Entity\SearchResultThumbnail\getMimeType
getMimeType()
Internet mime type for the representation, like "image/png" or "audio/mp3".
Definition: SearchResultThumbnail.php:109
MediaWiki\Rest\Handler\SearchHandler\getSearchResultsOrThrow
getSearchResultsOrThrow( $results)
Get SearchResults when results are either SearchResultSet or Status objects.
Definition: SearchHandler.php:121
MediaWiki\Rest\Handler\SearchHandler\$searchEngineConfig
SearchEngineConfig $searchEngineConfig
Definition: SearchHandler.php:32
MediaWiki\Rest\Handler\getAuthority
getAuthority()
Get the current acting authority.
Definition: Handler.php:149
MediaWiki\Rest\Handler\SearchHandler\$searchEngineFactory
SearchEngineFactory $searchEngineFactory
Definition: SearchHandler.php:29
MediaWiki\Rest\Handler\SearchHandler\$completionCacheExpiry
int null $completionCacheExpiry
Expiry time for use as max-age value in the cache-control header of completion search responses.
Definition: SearchHandler.php:69
Wikimedia\ParamValidator\ParamValidator
Service for formatting and validating API parameters.
Definition: ParamValidator.php:42
MediaWiki\Search\Entity\SearchResultThumbnail\getUrl
getUrl()
Full URL to the contents of the file.
Definition: SearchResultThumbnail.php:85
MediaWiki\Rest\LocalizedHttpException
@newable
Definition: LocalizedHttpException.php:10