MediaWiki  master
SearchHandler.php
Go to the documentation of this file.
1 <?php
2 
3 namespace MediaWiki\Rest\Handler;
4 
5 use Config;
6 use InvalidArgumentException;
13 use SearchEngine;
16 use SearchResult;
18 use Status;
19 use Title;
23 
27 class SearchHandler extends Handler {
28 
31 
34 
38  public const FULLTEXT_MODE = 'fulltext';
39 
43  public const COMPLETION_MODE = 'completion';
44 
49 
53  private $mode = null;
54 
56  private const LIMIT = 50;
57 
59  private const MAX_LIMIT = 100;
60 
62  private const OFFSET = 0;
63 
71 
77  public function __construct(
81  ) {
82  $this->searchEngineFactory = $searchEngineFactory;
83  $this->searchEngineConfig = $searchEngineConfig;
84 
85  // @todo Avoid injecting the entire config, see T246377
86  $this->completionCacheExpiry = $config->get( 'SearchSuggestCacheExpiry' );
87  }
88 
89  protected function postInitSetup() {
90  $this->mode = $this->getConfig()['mode'] ?? self::FULLTEXT_MODE;
91 
92  if ( !in_array( $this->mode, self::SUPPORTED_MODES ) ) {
93  throw new InvalidArgumentException(
94  "Unsupported search mode `{$this->mode}` configured. Supported modes: " .
95  implode( ', ', self::SUPPORTED_MODES )
96  );
97  }
98  }
99 
103  private function createSearchEngine() {
104  $limit = $this->getValidatedParams()['limit'];
105 
106  $searchEngine = $this->searchEngineFactory->create();
107  $searchEngine->setNamespaces( $this->searchEngineConfig->defaultNamespaces() );
108  $searchEngine->setLimitOffset( $limit, self::OFFSET );
109  return $searchEngine;
110  }
111 
112  public function needsWriteAccess() {
113  return false;
114  }
115 
122  private function getSearchResultsOrThrow( $results ) {
123  if ( $results ) {
124  if ( $results instanceof Status ) {
125  $status = $results;
126  if ( !$status->isOK() ) {
127  list( $error ) = $status->splitByErrorType();
128  if ( $error->getErrors() ) { // Only throw for errors, suppress warnings (for now)
129  $errorMessages = $error->getMessage();
130  throw new LocalizedHttpException(
131  new MessageValue( "rest-search-error", [ $errorMessages->getKey() ] )
132  );
133  }
134  }
135  $statusValue = $status->getValue();
136  if ( $statusValue instanceof ISearchResultSet ) {
137  return $statusValue->extractResults();
138  }
139  } else {
140  return $results->extractResults();
141  }
142  }
143  return [];
144  }
145 
153  private function doSearch( $searchEngine ) {
154  $query = $this->getValidatedParams()['q'];
155 
156  if ( $this->mode == self::COMPLETION_MODE ) {
157  $completionSearch = $searchEngine->completionSearchWithVariants( $query );
158  return $this->buildPageInfosFromSuggestions( $completionSearch->getSuggestions() );
159  } else {
160  $titleSearch = $searchEngine->searchTitle( $query );
161  $textSearch = $searchEngine->searchText( $query );
162 
163  $titleSearchResults = $this->getSearchResultsOrThrow( $titleSearch );
164  $textSearchResults = $this->getSearchResultsOrThrow( $textSearch );
165 
166  $mergedResults = array_merge( $titleSearchResults, $textSearchResults );
167  return $this->buildPageInfosFromSearchResults( $mergedResults );
168  }
169  }
170 
180  private function buildPageInfosFromSuggestions( array $suggestions ): array {
181  $pageInfos = [];
182 
183  foreach ( $suggestions as $sugg ) {
184  $title = $sugg->getSuggestedTitle();
185  if ( $title && $title->exists() ) {
186  $pageID = $title->getArticleID();
187  if ( !isset( $pageInfos[$pageID] ) &&
188  $this->getAuthority()->probablyCan( 'read', $title )
189  ) {
190  $pageInfos[ $pageID ] = [ $title, $sugg, null ];
191  }
192  }
193  }
194  return $pageInfos;
195  }
196 
206  private function buildPageInfosFromSearchResults( array $searchResults ): array {
207  $pageInfos = [];
208 
209  foreach ( $searchResults as $result ) {
210  if ( !$result->isBrokenTitle() && !$result->isMissingRevision() ) {
211  $title = $result->getTitle();
212  $pageID = $title->getArticleID();
213  if ( !isset( $pageInfos[$pageID] ) &&
214  $this->getAuthority()->probablyCan( 'read', $title )
215  ) {
216  $pageInfos[$pageID] = [ $title, null, $result ];
217  }
218  }
219  }
220  return $pageInfos;
221  }
222 
230  private function buildResultFromPageInfos( array $pageInfos ): array {
231  return array_map( static function ( $pageInfo ) {
232  list( $title, $sugg, $result ) = $pageInfo;
233  return [
234  'id' => $title->getArticleID(),
235  'key' => $title->getPrefixedDBkey(),
236  'title' => $title->getPrefixedText(),
237  'excerpt' => ( $sugg ? $sugg->getText() : $result->getTextSnippet() ) ?: null,
238  ];
239  },
240  $pageInfos );
241  }
242 
250  private function serializeThumbnail( ?SearchResultThumbnail $thumbnail ): ?array {
251  if ( $thumbnail == null ) {
252  return null;
253  }
254 
255  return [
256  'mimetype' => $thumbnail->getMimeType(),
257  'size' => $thumbnail->getSize(),
258  'width' => $thumbnail->getWidth(),
259  'height' => $thumbnail->getHeight(),
260  'duration' => $thumbnail->getDuration(),
261  'url' => $thumbnail->getUrl(),
262  ];
263  }
264 
275  private function buildDescriptionsFromPageIdentities( array $pageIdentities ) {
276  $descriptions = array_fill_keys( array_keys( $pageIdentities ), null );
277 
278  $this->getHookRunner()->onSearchResultProvideDescription( $pageIdentities, $descriptions );
279 
280  return array_map( static function ( $description ) {
281  return [ 'description' => $description ];
282  }, $descriptions );
283  }
284 
296  private function buildThumbnailsFromPageIdentities( array $pageIdentities ) {
297  $thumbnails = array_fill_keys( array_keys( $pageIdentities ), null );
298 
299  $this->getHookRunner()->onSearchResultProvideThumbnail( $pageIdentities, $thumbnails );
300 
301  return array_map( function ( $thumbnail ) {
302  return [ 'thumbnail' => $this->serializeThumbnail( $thumbnail ) ];
303  }, $thumbnails );
304  }
305 
310  public function execute() {
311  $searchEngine = $this->createSearchEngine();
312  $pageInfos = $this->doSearch( $searchEngine );
313 
315  $pageIdentities = array_map( static function ( $pageInfo ) {
317  list( $title ) = $pageInfo;
318  return $title->exists() ? $title->toPageIdentity() : null;
319  }, $pageInfos );
320 
321  // Remove empty entries resulting from non-proper pages like e.g. special pages
322  // in the search result.
323  $pageIdentities = array_filter( $pageIdentities );
324 
325  $result = array_map( "array_merge",
326  $this->buildResultFromPageInfos( $pageInfos ),
327  $this->buildDescriptionsFromPageIdentities( $pageIdentities ),
328  $this->buildThumbnailsFromPageIdentities( $pageIdentities )
329  );
330 
331  $response = $this->getResponseFactory()->createJson( [ 'pages' => $result ] );
332 
333  if ( $this->mode === self::COMPLETION_MODE && $this->completionCacheExpiry ) {
334  // Type-ahead completion matches should be cached by the client and
335  // in the CDN, especially for short prefixes.
336  // See also $wgSearchSuggestCacheExpiry and ApiOpenSearch
337  $response->setHeader( 'Cache-Control', 'public, max-age=' . $this->completionCacheExpiry );
338  }
339 
340  return $response;
341  }
342 
343  public function getParamSettings() {
344  return [
345  'q' => [
346  self::PARAM_SOURCE => 'query',
347  ParamValidator::PARAM_TYPE => 'string',
348  ParamValidator::PARAM_REQUIRED => true,
349  ],
350  'limit' => [
351  self::PARAM_SOURCE => 'query',
352  ParamValidator::PARAM_TYPE => 'integer',
353  ParamValidator::PARAM_REQUIRED => false,
354  ParamValidator::PARAM_DEFAULT => self::LIMIT,
355  IntegerDef::PARAM_MIN => 1,
356  IntegerDef::PARAM_MAX => self::MAX_LIMIT,
357  ],
358  ];
359  }
360 }
MediaWiki\Rest\Handler
Definition: AbstractContributionHandler.php:3
MediaWiki\Rest\Handler\getResponseFactory
getResponseFactory()
Get the ResponseFactory which can be used to generate Response objects.
Definition: Handler.php:170
MediaWiki\Rest\Handler\SearchHandler\buildPageInfosFromSearchResults
buildPageInfosFromSearchResults(array $searchResults)
Remove duplicate pages and turn search results into array with information needed for further process...
Definition: SearchHandler.php:206
MediaWiki\Rest\Handler\SearchHandler\buildDescriptionsFromPageIdentities
buildDescriptionsFromPageIdentities(array $pageIdentities)
Turn page info into serializable array with description field for the page.
Definition: SearchHandler.php:275
MediaWiki\Rest\Handler\SearchHandler\execute
execute()
Definition: SearchHandler.php:310
MediaWiki\Rest\Handler\SearchHandler\buildResultFromPageInfos
buildResultFromPageInfos(array $pageInfos)
Turn array of page info into serializable array with common information about the page.
Definition: SearchHandler.php:230
MediaWiki\Rest\Handler\SearchHandler\buildPageInfosFromSuggestions
buildPageInfosFromSuggestions(array $suggestions)
Remove duplicate pages and turn suggestions into array with information needed for further processing...
Definition: SearchHandler.php:180
MediaWiki\Rest\Handler\SearchHandler\LIMIT
const LIMIT
Limit results to 50 pages per default.
Definition: SearchHandler.php:56
MediaWiki\Rest\Handler\SearchHandler\getParamSettings
getParamSettings()
Fetch ParamValidator settings for parameters.
Definition: SearchHandler.php:343
SearchEngineFactory
Factory class for SearchEngine.
Definition: SearchEngineFactory.php:12
Wikimedia\Message\MessageValue
Value object representing a message for i18n.
Definition: MessageValue.php:16
MediaWiki\Rest\Handler
Base class for REST route handlers.
Definition: Handler.php:17
Status
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition: Status.php:44
MediaWiki\Rest\Handler\SearchHandler\buildThumbnailsFromPageIdentities
buildThumbnailsFromPageIdentities(array $pageIdentities)
Turn page info into serializable array with thumbnail information for the page.
Definition: SearchHandler.php:296
Config
Interface for configuration instances.
Definition: Config.php:30
MediaWiki\Search\Entity\SearchResultThumbnail\getDuration
getDuration()
Duration of the representation in seconds or null if not applicable.
Definition: SearchResultThumbnail.php:125
MediaWiki\Rest\Handler\SearchHandler\serializeThumbnail
serializeThumbnail(?SearchResultThumbnail $thumbnail)
Converts SearchResultThumbnail object into serializable array.
Definition: SearchHandler.php:250
SearchResult
NOTE: this class is being refactored into an abstract base class.
Definition: SearchResult.php:38
MediaWiki\Search\Entity\SearchResultThumbnail\getHeight
getHeight()
Height of the representation in pixels or null if not applicable.
Definition: SearchResultThumbnail.php:101
MediaWiki\Rest\Handler\SearchHandler\OFFSET
const OFFSET
Default to first page.
Definition: SearchHandler.php:62
MediaWiki\Rest\Handler\SearchHandler\$mode
string $mode
Definition: SearchHandler.php:53
MediaWiki\Rest\Handler\SearchHandler\FULLTEXT_MODE
const FULLTEXT_MODE
Search page body and titles.
Definition: SearchHandler.php:38
MediaWiki\Rest\Handler\SearchHandler\needsWriteAccess
needsWriteAccess()
Indicates whether this route requires write access.
Definition: SearchHandler.php:112
MediaWiki\Search\Entity\SearchResultThumbnail\getWidth
getWidth()
Width of the representation in pixels or null if not applicable.
Definition: SearchResultThumbnail.php:93
SearchSuggestion
Search suggestion.
Definition: SearchSuggestion.php:25
MediaWiki\Rest\Response
Definition: Response.php:8
MediaWiki\Rest\Handler\SearchHandler\SUPPORTED_MODES
const SUPPORTED_MODES
Supported modes.
Definition: SearchHandler.php:48
$title
$title
Definition: testCompression.php:38
MediaWiki\Rest\Handler\getValidatedParams
getValidatedParams()
Fetch the validated parameters.
Definition: Handler.php:282
ISearchResultSet
A set of SearchEngine results.
Definition: ISearchResultSet.php:12
MediaWiki\Rest\Handler\SearchHandler\COMPLETION_MODE
const COMPLETION_MODE
Search title completion matches.
Definition: SearchHandler.php:43
MediaWiki\Rest\Handler\getConfig
getConfig()
Get the configuration array for the current route.
Definition: Handler.php:159
MediaWiki\Rest\Handler\getHookRunner
getHookRunner()
Get a HookRunner for running core hooks.
Definition: Handler.php:316
MediaWiki\Rest\Handler\$config
array $config
Definition: Handler.php:38
Page\ProperPageIdentity
Interface for objects representing a page that is (or could be, or used to be) an editable page on a ...
Definition: ProperPageIdentity.php:43
MediaWiki\Rest\Handler\SearchHandler\MAX_LIMIT
const MAX_LIMIT
Hard limit results to 100 pages.
Definition: SearchHandler.php:59
MediaWiki\Rest\Handler\SearchHandler\__construct
__construct(Config $config, SearchEngineFactory $searchEngineFactory, SearchEngineConfig $searchEngineConfig)
Definition: SearchHandler.php:77
SearchEngine
Contain a class for special pages.
Definition: SearchEngine.php:37
MediaWiki\Search\Entity\SearchResultThumbnail
Class that stores information about thumbnail, e.
Definition: SearchResultThumbnail.php:9
MediaWiki\Rest\Handler\SearchHandler\createSearchEngine
createSearchEngine()
Definition: SearchHandler.php:103
MediaWiki\Search\Entity\SearchResultThumbnail\getSize
getSize()
Size of the representation in bytes or null if not applicable.
Definition: SearchResultThumbnail.php:117
Title
Represents a title within MediaWiki.
Definition: Title.php:48
Wikimedia\ParamValidator\TypeDef\IntegerDef
Type definition for integer types.
Definition: IntegerDef.php:23
MediaWiki\Rest\Handler\SearchHandler
Handler class for Core REST API endpoint that handles basic search.
Definition: SearchHandler.php:27
MediaWiki\Rest\Handler\SearchHandler\postInitSetup
postInitSetup()
The handler can override this to do any necessary setup after init() is called to inject the dependen...
Definition: SearchHandler.php:89
SearchEngineConfig
Configuration handling class for SearchEngine.
Definition: SearchEngineConfig.php:12
MediaWiki\Rest\Handler\SearchHandler\doSearch
doSearch( $searchEngine)
Execute search and return info about pages for further processing.
Definition: SearchHandler.php:153
MediaWiki\Search\Entity\SearchResultThumbnail\getMimeType
getMimeType()
Internet mime type for the representation, like "image/png" or "audio/mp3".
Definition: SearchResultThumbnail.php:109
MediaWiki\Rest\Handler\SearchHandler\getSearchResultsOrThrow
getSearchResultsOrThrow( $results)
Get SearchResults when results are either SearchResultSet or Status objects.
Definition: SearchHandler.php:122
MediaWiki\Rest\Handler\SearchHandler\$searchEngineConfig
SearchEngineConfig $searchEngineConfig
Definition: SearchHandler.php:33
MediaWiki\Rest\Handler\getAuthority
getAuthority()
Get the current acting authority.
Definition: Handler.php:148
MediaWiki\Rest\Handler\SearchHandler\$searchEngineFactory
SearchEngineFactory $searchEngineFactory
Definition: SearchHandler.php:30
MediaWiki\Rest\Handler\SearchHandler\$completionCacheExpiry
int null $completionCacheExpiry
Expiry time for use as max-age value in the cache-control header of completion search responses.
Definition: SearchHandler.php:70
Wikimedia\ParamValidator\ParamValidator
Service for formatting and validating API parameters.
Definition: ParamValidator.php:42
MediaWiki\Search\Entity\SearchResultThumbnail\getUrl
getUrl()
Full URL to the contents of the file.
Definition: SearchResultThumbnail.php:85
MediaWiki\Rest\LocalizedHttpException
@newable
Definition: LocalizedHttpException.php:10