Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
99.37% covered (success)
99.37%
157 / 158
93.33% covered (success)
93.33%
14 / 15
CRAP
0.00% covered (danger)
0.00%
0 / 1
SearchHandler
99.37% covered (success)
99.37%
157 / 158
93.33% covered (success)
93.33%
14 / 15
55
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
1
 postInitSetup
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
2
 createSearchEngine
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
1
 needsWriteAccess
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getSearchResultsOrThrow
100.00% covered (success)
100.00%
11 / 11
100.00% covered (success)
100.00%
1 / 1
6
 doSearch
100.00% covered (success)
100.00%
10 / 10
100.00% covered (success)
100.00%
1 / 1
2
 buildPageObjects
100.00% covered (success)
100.00%
18 / 18
100.00% covered (success)
100.00%
1 / 1
10
 buildSinglePage
100.00% covered (success)
100.00%
16 / 16
100.00% covered (success)
100.00%
1 / 1
9
 buildResultFromPageInfos
100.00% covered (success)
100.00%
22 / 22
100.00% covered (success)
100.00%
1 / 1
10
 serializeThumbnail
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
2
 buildDescriptionsFromPageIdentities
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
1
 buildThumbnailsFromPageIdentities
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
1
 execute
100.00% covered (success)
100.00%
24 / 24
100.00% covered (success)
100.00%
1 / 1
7
 getParamSettings
100.00% covered (success)
100.00%
17 / 17
100.00% covered (success)
100.00%
1 / 1
1
 getResponseBodySchemaFileName
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
1<?php
2
3namespace MediaWiki\Rest\Handler;
4
5use InvalidArgumentException;
6use MediaWiki\Config\Config;
7use MediaWiki\MainConfigNames;
8use MediaWiki\Page\CacheKeyHelper;
9use MediaWiki\Page\PageIdentity;
10use MediaWiki\Page\PageStore;
11use MediaWiki\Page\RedirectLookup;
12use MediaWiki\Permissions\PermissionManager;
13use MediaWiki\Rest\Handler;
14use MediaWiki\Rest\Handler\Helper\RestStatusTrait;
15use MediaWiki\Rest\LocalizedHttpException;
16use MediaWiki\Rest\Response;
17use MediaWiki\Rest\ResponseHeaders;
18use MediaWiki\Search\Entity\SearchResultThumbnail;
19use MediaWiki\Search\ISearchResultSet;
20use MediaWiki\Search\SearchEngine;
21use MediaWiki\Search\SearchEngineConfig;
22use MediaWiki\Search\SearchEngineFactory;
23use MediaWiki\Search\SearchResult;
24use MediaWiki\Search\SearchResultThumbnailProvider;
25use MediaWiki\Search\SearchSuggestion;
26use MediaWiki\Title\TitleFormatter;
27use StatusValue;
28use Wikimedia\Message\MessageValue;
29use Wikimedia\ParamValidator\ParamValidator;
30use Wikimedia\ParamValidator\TypeDef\IntegerDef;
31
32/**
33 * Handler class for Core REST API endpoint that handles basic search
34 */
35class SearchHandler extends Handler {
36    use RestStatusTrait;
37
38    private SearchEngineFactory $searchEngineFactory;
39    private SearchEngineConfig $searchEngineConfig;
40    private SearchResultThumbnailProvider $searchResultThumbnailProvider;
41    private PermissionManager $permissionManager;
42    private RedirectLookup $redirectLookup;
43    private PageStore $pageStore;
44    private TitleFormatter $titleFormatter;
45
46    /**
47     * Search page body and titles.
48     */
49    public const FULLTEXT_MODE = 'fulltext';
50
51    /**
52     * Search title completion matches.
53     */
54    public const COMPLETION_MODE = 'completion';
55
56    /**
57     * Supported modes
58     */
59    private const SUPPORTED_MODES = [ self::FULLTEXT_MODE, self::COMPLETION_MODE ];
60
61    /**
62     * @var string
63     */
64    private $mode = null;
65
66    /** Limit results to 50 pages by default */
67    private const LIMIT = 50;
68
69    /** Hard limit results to 100 pages */
70    private const MAX_LIMIT = 100;
71
72    /** Default to first page */
73    private const OFFSET = 0;
74
75    /**
76     * Expiry time for use as max-age value in the cache-control header
77     * of completion search responses.
78     * @see $wgSearchSuggestCacheExpiry
79     * @var int|null
80     */
81    private $completionCacheExpiry;
82
83    public function __construct(
84        Config $config,
85        SearchEngineFactory $searchEngineFactory,
86        SearchEngineConfig $searchEngineConfig,
87        SearchResultThumbnailProvider $searchResultThumbnailProvider,
88        PermissionManager $permissionManager,
89        RedirectLookup $redirectLookup,
90        PageStore $pageStore,
91        TitleFormatter $titleFormatter
92    ) {
93        $this->searchEngineFactory = $searchEngineFactory;
94        $this->searchEngineConfig = $searchEngineConfig;
95        $this->searchResultThumbnailProvider = $searchResultThumbnailProvider;
96        $this->permissionManager = $permissionManager;
97        $this->redirectLookup = $redirectLookup;
98        $this->pageStore = $pageStore;
99        $this->titleFormatter = $titleFormatter;
100
101        // @todo Avoid injecting the entire config, see T246377
102        $this->completionCacheExpiry = $config->get( MainConfigNames::SearchSuggestCacheExpiry );
103    }
104
105    protected function postInitSetup() {
106        $this->mode = $this->getConfig()['mode'] ?? self::FULLTEXT_MODE;
107
108        if ( !in_array( $this->mode, self::SUPPORTED_MODES ) ) {
109            throw new InvalidArgumentException(
110                "Unsupported search mode `{$this->mode}` configured. Supported modes: " .
111                implode( ', ', self::SUPPORTED_MODES )
112            );
113        }
114    }
115
116    /**
117     * @return SearchEngine
118     */
119    private function createSearchEngine() {
120        $limit = $this->getValidatedParams()['limit'];
121
122        $searchEngine = $this->searchEngineFactory->create();
123        $searchEngine->setNamespaces( $this->searchEngineConfig->defaultNamespaces() );
124        $searchEngine->setLimitOffset( $limit, self::OFFSET );
125        return $searchEngine;
126    }
127
128    /** @inheritDoc */
129    public function needsWriteAccess() {
130        return false;
131    }
132
133    /**
134     * Get SearchResults when results are either SearchResultSet or Status objects
135     * @param ISearchResultSet|StatusValue|null $results
136     * @return SearchResult[]
137     * @throws LocalizedHttpException
138     */
139    private function getSearchResultsOrThrow( $results ) {
140        if ( $results ) {
141            if ( $results instanceof StatusValue ) {
142                $status = $results;
143                if ( !$status->isOK() ) {
144                    if ( $status->getMessages( 'error' ) ) { // Only throw for errors, suppress warnings (for now)
145                        $this->throwExceptionForStatus( $status, 'rest-search-error', 500 );
146                    }
147                }
148                $statusValue = $status->getValue();
149                if ( $statusValue instanceof ISearchResultSet ) {
150                    return $statusValue->extractResults();
151                }
152            } else {
153                return $results->extractResults();
154            }
155        }
156        return [];
157    }
158
159    /**
160     * Execute search and return info about pages for further processing.
161     *
162     * @param SearchEngine $searchEngine
163     * @return array[]
164     * @throws LocalizedHttpException
165     */
166    private function doSearch( $searchEngine ) {
167        $query = $this->getValidatedParams()['q'];
168
169        if ( $this->mode == self::COMPLETION_MODE ) {
170            $completionSearch = $searchEngine->completionSearchWithVariants( $query );
171            return $this->buildPageObjects( $completionSearch->getSuggestions() );
172        } else {
173            $titleSearch = $searchEngine->searchTitle( $query );
174            $textSearch = $searchEngine->searchText( $query );
175
176            $titleSearchResults = $this->getSearchResultsOrThrow( $titleSearch );
177            $textSearchResults = $this->getSearchResultsOrThrow( $textSearch );
178
179            $mergedResults = array_merge( $titleSearchResults, $textSearchResults );
180            return $this->buildPageObjects( $mergedResults );
181        }
182    }
183
184    /**
185     * Build an array of pageInfo objects.
186     * @param SearchSuggestion[]|SearchResult[] $searchResponse
187     *
188     * @phpcs:ignore Generic.Files.LineLength
189     * @phan-return array{int:array{pageIdentity:PageIdentity,suggestion:?SearchSuggestion,result:?SearchResult,redirect:?PageIdentity}} $pageInfos
190     * @return array Associative array mapping pageID to pageInfo objects:
191     *   - pageIdentity: PageIdentity of page to return as the match
192     *   - suggestion: SearchSuggestion or null if $searchResponse is SearchResults[]
193     *   - result: SearchResult or null if $searchResponse is SearchSuggestions[]
194     *   - redirect: PageIdentity or null if the SearchResult|SearchSuggestion was not a redirect
195     */
196    private function buildPageObjects( array $searchResponse ): array {
197        $pageInfos = [];
198        foreach ( $searchResponse as $response ) {
199            $isSearchResult = $response instanceof SearchResult;
200            if ( $isSearchResult ) {
201                if ( $response->isBrokenTitle() || $response->isMissingRevision() ) {
202                    continue;
203                }
204                $title = $response->getTitle();
205            } else {
206                $title = $response->getSuggestedTitle();
207            }
208            $pageObj = $this->buildSinglePage( $title, $response );
209            if ( $pageObj ) {
210                $pageNsAndID = CacheKeyHelper::getKeyForPage( $pageObj['pageIdentity'] );
211                // This handles the edge case where we have both the redirect source and redirect target page come back
212                // in our search results. In such event, we prefer (and thus replace) with  the redirect target page.
213                if ( isset( $pageInfos[$pageNsAndID] ) ) {
214                    if ( $pageInfos[$pageNsAndID]['redirect'] !== null ) {
215                        $pageInfos[$pageNsAndID]['result'] = $isSearchResult ? $response : null;
216                        $pageInfos[$pageNsAndID]['suggestion'] = $isSearchResult ? null : $response;
217                    }
218                    continue;
219                }
220                $pageInfos[$pageNsAndID] = $pageObj;
221            }
222        }
223        return $pageInfos;
224    }
225
226    /**
227     * Build one pageInfo object from either a SearchResult or SearchSuggestion.
228     * @param PageIdentity $title
229     * @param SearchResult|SearchSuggestion $result
230     *
231     * @phpcs:ignore Generic.Files.LineLength
232     * @phan-return (false|array{pageIdentity:PageIdentity,suggestion:?SearchSuggestion,result:?SearchResult,redirect:?PageIdentity,anchor:?string}) $pageInfos
233     * @return bool|array Objects representing a given page:
234     *   - pageIdentity: PageIdentity of page to return as the match
235     *   - suggestion: SearchSuggestion or null if $searchResponse is SearchResults
236     *   - result: SearchResult or null if $searchResponse is SearchSuggestions
237     *   - redirect: PageIdentity|null depending on if the SearchResult|SearchSuggestion was a redirect
238     *      - anchor: string|null if the SearchResult|SearchSuggestion was a redirect, this is the page anchor (if any)
239     */
240    private function buildSinglePage( $title, $result ) {
241        $redirectTarget = $title->canExist() ? $this->redirectLookup->getRedirectTarget( $title ) : null;
242        // Our page has a redirect that is not in a virtual namespace and is not an interwiki link.
243        // See T301346, T303352
244        if ( $redirectTarget && $redirectTarget->getNamespace() > -1 && !$redirectTarget->isExternal() ) {
245            $redirectSource = $title;
246            $anchor = $redirectTarget->getFragment();
247            $title = $this->pageStore->getPageForLink( $redirectTarget );
248        } else {
249            $redirectSource = null;
250            $anchor = null;
251        }
252        if ( !$title || !$this->getAuthority()->probablyCan( 'read', $title ) ) {
253            return false;
254        }
255        return [
256            'pageIdentity' => $title,
257            'suggestion' => $result instanceof SearchSuggestion ? $result : null,
258            'result' => $result instanceof SearchResult ? $result : null,
259            'redirect' => $redirectSource,
260            'anchor' => $anchor
261        ];
262    }
263
264    /**
265     * Turn array of page info into serializable array with common information about the page
266     * @param array $pageInfos Page Info objects
267     * @param array $thumbsAndDesc Associative array mapping pageId to array of description and thumbnail
268     * @phpcs:ignore Generic.Files.LineLength
269     * @phan-param array<int,array{pageIdentity:PageIdentity,suggestion:SearchSuggestion,result:SearchResult,redirect:?PageIdentity,anchor:?string}> $pageInfos
270     * @phan-param array<int,array{description:array,thumbnail:array}> $thumbsAndDesc
271     *
272     * @phpcs:ignore Generic.Files.LineLength
273     * @phan-return array<int,array{id:int,key:string,title:string,excerpt:?string,matched_title:?string,anchor:?string, description:?array, thumbnail:?array}> $pages
274     * @return array[] of [ id, key, title, excerpt, matched_title, anchor ]
275     */
276    private function buildResultFromPageInfos( array $pageInfos, array $thumbsAndDesc ): array {
277        $pages = [];
278        foreach ( $pageInfos as $pageInfo ) {
279            [
280                'pageIdentity' => $page,
281                'suggestion' => $sugg,
282                'result' => $result,
283                'redirect' => $redirect,
284                'anchor' => $anchor
285            ] = $pageInfo;
286            $excerpt = $sugg ? $sugg->getText() : $result->getTextSnippet();
287            $id = ( $page instanceof PageIdentity && $page->canExist() ) ? $page->getId() : 0;
288            $pages[] = [
289                'id' => $id,
290                'key' => $this->titleFormatter->getPrefixedDBkey( $page ),
291                'title' => $this->titleFormatter->getPrefixedText( $page ),
292                'excerpt' => $excerpt ?: null,
293                'matched_title' => $redirect ? $this->titleFormatter->getPrefixedText( $redirect ) : null,
294                'anchor' => $anchor ?: null,
295                'description' => $id > 0 ? $thumbsAndDesc[$id]['description'] : null,
296                'thumbnail' => $id > 0 ? $thumbsAndDesc[$id]['thumbnail'] : null,
297            ];
298        }
299        return $pages;
300    }
301
302    /**
303     * Converts SearchResultThumbnail object into serializable array
304     *
305     * @param SearchResultThumbnail|null $thumbnail
306     *
307     * @return array|null
308     */
309    private function serializeThumbnail( ?SearchResultThumbnail $thumbnail ): ?array {
310        if ( $thumbnail == null ) {
311            return null;
312        }
313
314        return [
315            'mimetype' => $thumbnail->getMimeType(),
316            'width' => $thumbnail->getWidth(),
317            'height' => $thumbnail->getHeight(),
318            'duration' => $thumbnail->getDuration(),
319            'url' => $thumbnail->getUrl(),
320        ];
321    }
322
323    /**
324     * Turn page info into serializable array with description field for the page.
325     *
326     * The information about description should be provided by extension by implementing
327     * 'SearchResultProvideDescription' hook. Description is set to null if no extensions
328     * implement the hook.
329     * @param PageIdentity[] $pageIdentities
330     *
331     * @return array
332     */
333    private function buildDescriptionsFromPageIdentities( array $pageIdentities ) {
334        $descriptions = array_fill_keys( array_keys( $pageIdentities ), null );
335
336        $this->getHookRunner()->onSearchResultProvideDescription( $pageIdentities, $descriptions );
337
338        return array_map( static function ( $description ) {
339            return [ 'description' => $description ];
340        }, $descriptions );
341    }
342
343    /**
344     * Turn page info into serializable array with thumbnail information for the page.
345     *
346     * The information about thumbnail should be provided by extension by implementing
347     * 'SearchResultProvideThumbnail' hook. Thumbnail is set to null if no extensions implement
348     * the hook.
349     *
350     * @param PageIdentity[] $pageIdentities
351     *
352     * @return array
353     */
354    private function buildThumbnailsFromPageIdentities( array $pageIdentities ) {
355        $thumbnails = $this->searchResultThumbnailProvider->getThumbnails( $pageIdentities );
356        $thumbnails += array_fill_keys( array_keys( $pageIdentities ), null );
357
358        return array_map( function ( $thumbnail ) {
359            return [ 'thumbnail' => $this->serializeThumbnail( $thumbnail ) ];
360        }, $thumbnails );
361    }
362
363    /**
364     * @return Response
365     * @throws LocalizedHttpException
366     */
367    public function execute() {
368        $searchEngine = $this->createSearchEngine();
369        $pageInfos = $this->doSearch( $searchEngine );
370
371        // We can only pass validated "real" PageIdentities to our hook handlers below
372        $pageIdentities = array_reduce(
373            array_values( $pageInfos ),
374            static function ( $realPages, $item ) {
375                $page = $item['pageIdentity'];
376                if ( $page instanceof PageIdentity && $page->exists() ) {
377                    $realPages[$item['pageIdentity']->getId()] = $item['pageIdentity'];
378                }
379                return $realPages;
380            }, []
381        );
382
383        $descriptions = $this->buildDescriptionsFromPageIdentities( $pageIdentities );
384        $thumbs = $this->buildThumbnailsFromPageIdentities( $pageIdentities );
385
386        $thumbsAndDescriptions = [];
387        foreach ( $descriptions as $pageId => $description ) {
388            $thumbsAndDescriptions[$pageId] = $description + $thumbs[$pageId];
389        }
390
391        $result = $this->buildResultFromPageInfos( $pageInfos, $thumbsAndDescriptions );
392
393        $response = $this->getResponseFactory()->createJson( [ 'pages' => $result ] );
394
395        if ( $this->mode === self::COMPLETION_MODE && $this->completionCacheExpiry ) {
396            // Type-ahead completion matches should be cached by the client and
397            // in the CDN, especially for short prefixes.
398            // See also $wgSearchSuggestCacheExpiry and ApiOpenSearch
399            if ( $this->permissionManager->isEveryoneAllowed( 'read' ) ) {
400                $cacheControl = 'public, max-age=' . $this->completionCacheExpiry;
401            } else {
402                $cacheControl = 'no-store, max-age=0';
403            }
404            $response->setHeader( ResponseHeaders::CACHE_CONTROL, $cacheControl );
405        }
406
407        return $response;
408    }
409
410    /** @inheritDoc */
411    public function getParamSettings() {
412        return [
413            'q' => [
414                self::PARAM_SOURCE => 'query',
415                ParamValidator::PARAM_TYPE => 'string',
416                ParamValidator::PARAM_REQUIRED => true,
417                Handler::PARAM_DESCRIPTION => new MessageValue( 'rest-param-desc-search-q' ),
418            ],
419            'limit' => [
420                self::PARAM_SOURCE => 'query',
421                ParamValidator::PARAM_TYPE => 'integer',
422                ParamValidator::PARAM_REQUIRED => false,
423                ParamValidator::PARAM_DEFAULT => self::LIMIT,
424                IntegerDef::PARAM_MIN => 1,
425                IntegerDef::PARAM_MAX => self::MAX_LIMIT,
426                Handler::PARAM_DESCRIPTION => new MessageValue( 'rest-param-desc-search-limit' ),
427            ],
428        ];
429    }
430
431    public function getResponseBodySchemaFileName( string $method ): ?string {
432        return __DIR__ . '/Schema/SearchResults.json';
433    }
434}