Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
52.27% |
115 / 220 |
|
15.91% |
7 / 44 |
CRAP | |
0.00% |
0 / 1 |
SearchEngine | |
52.27% |
115 / 220 |
|
15.91% |
7 / 44 |
1076.18 | |
0.00% |
0 / 1 |
searchText | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
doSearchText | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
searchArchiveTitle | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
doSearchArchiveTitle | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
searchTitle | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
doSearchTitle | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
maybePaginate | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
42 | |||
supports | |
50.00% |
2 / 4 |
|
0.00% |
0 / 1 |
6.00 | |||
setFeatureData | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getFeatureData | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
normalizeText | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getNearMatcher | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
defaultNearMatcher | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
legalSearchChars | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setLimitOffset | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
setNamespaces | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
3.03 | |||
setShowSuggestion | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getValidSorts | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setSort | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getSort | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
replacePrefixes | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
parseNamespacePrefixes | |
96.97% |
32 / 33 |
|
0.00% |
0 / 1 |
11 | |||
userHighlightPrefs | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
update | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
updateTitle | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
delete | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getTextFromContent | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
textAlreadyUpdatedForIndex | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
normalizeNamespaces | |
60.00% |
3 / 5 |
|
0.00% |
0 / 1 |
2.26 | |||
completionSearchBackendOverfetch | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
completionSearchBackend | |
87.50% |
7 / 8 |
|
0.00% |
0 / 1 |
3.02 | |||
completionSearch | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
2.03 | |||
completionSearchWithVariants | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
30 | |||
extractTitles | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
processCompletionResults | |
78.79% |
26 / 33 |
|
0.00% |
0 / 1 |
7.47 | |||
defaultPrefixSearch | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
simplePrefixSearch | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getProfiles | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
makeSearchFieldMapping | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getSearchIndexFields | |
81.82% |
18 / 22 |
|
0.00% |
0 / 1 |
7.29 | |||
augmentSearchResults | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
56 | |||
setHookContainer | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getHookContainer | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getHookRunner | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 |
1 | <?php |
2 | /** |
3 | * Basic search engine |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | * @ingroup Search |
22 | */ |
23 | |
24 | /** |
25 | * @defgroup Search Search |
26 | */ |
27 | |
28 | use MediaWiki\Config\Config; |
29 | use MediaWiki\Content\Content; |
30 | use MediaWiki\HookContainer\HookContainer; |
31 | use MediaWiki\HookContainer\HookRunner; |
32 | use MediaWiki\MediaWikiServices; |
33 | use MediaWiki\Search\TitleMatcher; |
34 | use MediaWiki\Status\Status; |
35 | use MediaWiki\Title\Title; |
36 | use MediaWiki\User\User; |
37 | |
38 | /** |
39 | * Contain a class for special pages |
40 | * @stable to extend |
41 | * @ingroup Search |
42 | */ |
43 | abstract class SearchEngine { |
44 | public const DEFAULT_SORT = 'relevance'; |
45 | |
46 | /** @var string */ |
47 | public $prefix = ''; |
48 | |
49 | /** @var int[]|null */ |
50 | public $namespaces = [ NS_MAIN ]; |
51 | |
52 | /** @var int */ |
53 | protected $limit = 10; |
54 | |
55 | /** @var int */ |
56 | protected $offset = 0; |
57 | |
58 | /** |
59 | * @var string[] |
60 | * @deprecated since 1.34 |
61 | */ |
62 | protected $searchTerms = []; |
63 | |
64 | /** @var bool */ |
65 | protected $showSuggestion = true; |
66 | /** @var string */ |
67 | private $sort = self::DEFAULT_SORT; |
68 | |
69 | /** @var array Feature values */ |
70 | protected $features = []; |
71 | |
72 | /** @var HookContainer */ |
73 | private $hookContainer; |
74 | |
75 | /** @var HookRunner */ |
76 | private $hookRunner; |
77 | |
78 | /** Profile type for completionSearch */ |
79 | public const COMPLETION_PROFILE_TYPE = 'completionSearchProfile'; |
80 | |
81 | /** Profile type for query independent ranking features */ |
82 | public const FT_QUERY_INDEP_PROFILE_TYPE = 'fulltextQueryIndepProfile'; |
83 | |
84 | /** Integer flag for legalSearchChars: includes all chars allowed in a search query */ |
85 | protected const CHARS_ALL = 1; |
86 | |
87 | /** Integer flag for legalSearchChars: includes all chars allowed in a search term */ |
88 | protected const CHARS_NO_SYNTAX = 2; |
89 | |
90 | /** |
91 | * Perform a full text search query and return a result set. |
92 | * If full text searches are not supported or disabled, return null. |
93 | * |
94 | * @note As of 1.32 overriding this function is deprecated. It will |
95 | * be converted to final in 1.34. Override self::doSearchText(). |
96 | * |
97 | * @param string $term Raw search term |
98 | * @return ISearchResultSet|Status|null |
99 | */ |
100 | public function searchText( $term ) { |
101 | return $this->maybePaginate( function () use ( $term ) { |
102 | return $this->doSearchText( $term ); |
103 | } ); |
104 | } |
105 | |
106 | /** |
107 | * Perform a full text search query and return a result set. |
108 | * |
109 | * @stable to override |
110 | * |
111 | * @param string $term Raw search term |
112 | * @return ISearchResultSet|Status|null |
113 | * @since 1.32 |
114 | */ |
115 | protected function doSearchText( $term ) { |
116 | return null; |
117 | } |
118 | |
119 | /** |
120 | * Perform a title search in the article archive. |
121 | * NOTE: these results still should be filtered by |
122 | * matching against PageArchive, permissions checks etc |
123 | * The results returned by this methods are only suggestions and |
124 | * may not end up being shown to the user. |
125 | * |
126 | * @note As of 1.32 overriding this function is deprecated. It will |
127 | * be converted to final in 1.34. Override self::doSearchArchiveTitle(). |
128 | * |
129 | * @param string $term Raw search term |
130 | * @return Status |
131 | * @since 1.29 |
132 | */ |
133 | public function searchArchiveTitle( $term ) { |
134 | return $this->doSearchArchiveTitle( $term ); |
135 | } |
136 | |
137 | /** |
138 | * Perform a title search in the article archive. |
139 | * |
140 | * @stable to override |
141 | * |
142 | * @param string $term Raw search term |
143 | * @return Status |
144 | * @since 1.32 |
145 | */ |
146 | protected function doSearchArchiveTitle( $term ) { |
147 | return Status::newGood( [] ); |
148 | } |
149 | |
150 | /** |
151 | * Perform a title-only search query and return a result set. |
152 | * If title searches are not supported or disabled, return null. |
153 | * STUB |
154 | * |
155 | * @note As of 1.32 overriding this function is deprecated. It will |
156 | * be converted to final in 1.34. Override self::doSearchTitle(). |
157 | * |
158 | * @param string $term Raw search term |
159 | * @return ISearchResultSet|null |
160 | */ |
161 | public function searchTitle( $term ) { |
162 | return $this->maybePaginate( function () use ( $term ) { |
163 | return $this->doSearchTitle( $term ); |
164 | } ); |
165 | } |
166 | |
167 | /** |
168 | * Perform a title-only search query and return a result set. |
169 | * |
170 | * @stable to override |
171 | * |
172 | * @param string $term Raw search term |
173 | * @return ISearchResultSet|null |
174 | * @since 1.32 |
175 | */ |
176 | protected function doSearchTitle( $term ) { |
177 | return null; |
178 | } |
179 | |
180 | /** |
181 | * Performs an overfetch and shrink operation to determine if |
182 | * the next page is available for search engines that do not |
183 | * explicitly implement their own pagination. |
184 | * |
185 | * @param Closure $fn Takes no arguments |
186 | * @return ISearchResultSet|Status<ISearchResultSet>|null Result of calling $fn |
187 | */ |
188 | private function maybePaginate( Closure $fn ) { |
189 | if ( $this instanceof PaginatingSearchEngine ) { |
190 | return $fn(); |
191 | } |
192 | $this->limit++; |
193 | try { |
194 | $resultSetOrStatus = $fn(); |
195 | } finally { |
196 | $this->limit--; |
197 | } |
198 | |
199 | $resultSet = null; |
200 | if ( $resultSetOrStatus instanceof ISearchResultSet ) { |
201 | $resultSet = $resultSetOrStatus; |
202 | } elseif ( $resultSetOrStatus instanceof Status && |
203 | $resultSetOrStatus->getValue() instanceof ISearchResultSet |
204 | ) { |
205 | $resultSet = $resultSetOrStatus->getValue(); |
206 | } |
207 | if ( $resultSet ) { |
208 | $resultSet->shrink( $this->limit ); |
209 | } |
210 | |
211 | return $resultSetOrStatus; |
212 | } |
213 | |
214 | /** |
215 | * @since 1.18 |
216 | * @stable to override |
217 | * |
218 | * @param string $feature |
219 | * @return bool |
220 | */ |
221 | public function supports( $feature ) { |
222 | switch ( $feature ) { |
223 | case 'search-update': |
224 | return true; |
225 | case 'title-suffix-filter': |
226 | default: |
227 | return false; |
228 | } |
229 | } |
230 | |
231 | /** |
232 | * Way to pass custom data for engines |
233 | * @since 1.18 |
234 | * @param string $feature |
235 | * @param mixed $data |
236 | */ |
237 | public function setFeatureData( $feature, $data ) { |
238 | $this->features[$feature] = $data; |
239 | } |
240 | |
241 | /** |
242 | * Way to retrieve custom data set by setFeatureData |
243 | * or by the engine itself. |
244 | * @since 1.29 |
245 | * @param string $feature feature name |
246 | * @return mixed the feature value or null if unset |
247 | */ |
248 | public function getFeatureData( $feature ) { |
249 | return $this->features[$feature] ?? null; |
250 | } |
251 | |
252 | /** |
253 | * When overridden in derived class, performs database-specific conversions |
254 | * on text to be used for searching or updating search index. |
255 | * Default implementation does nothing (simply returns $string). |
256 | * |
257 | * @param string $string String to process |
258 | * @return string |
259 | */ |
260 | public function normalizeText( $string ) { |
261 | // Some languages such as Chinese require word segmentation |
262 | return MediaWikiServices::getInstance()->getContentLanguage()->segmentByWord( $string ); |
263 | } |
264 | |
265 | /** |
266 | * Get service class to finding near matches. |
267 | * |
268 | * @return TitleMatcher |
269 | * @deprecated since 1.40, use MediaWikiServices::getInstance()->getTitleMatcher() |
270 | */ |
271 | public function getNearMatcher( Config $config ) { |
272 | return MediaWikiServices::getInstance()->getTitleMatcher(); |
273 | } |
274 | |
275 | /** |
276 | * Get near matcher for default SearchEngine. |
277 | * |
278 | * @return TitleMatcher |
279 | * @deprecated since 1.40, MediaWikiServices::getInstance()->getTitleMatcher() |
280 | */ |
281 | protected static function defaultNearMatcher() { |
282 | wfDeprecated( __METHOD__, '1.40' ); |
283 | return MediaWikiServices::getInstance()->getTitleMatcher(); |
284 | } |
285 | |
286 | /** |
287 | * Get chars legal for search |
288 | * @param int $type type of search chars (see self::CHARS_ALL |
289 | * and self::CHARS_NO_SYNTAX). Defaults to CHARS_ALL |
290 | * @return string |
291 | */ |
292 | public function legalSearchChars( $type = self::CHARS_ALL ) { |
293 | return "A-Za-z_'.0-9\\x80-\\xFF\\-"; |
294 | } |
295 | |
296 | /** |
297 | * Set the maximum number of results to return |
298 | * and how many to skip before returning the first. |
299 | * |
300 | * @param int $limit |
301 | * @param int $offset |
302 | */ |
303 | public function setLimitOffset( $limit, $offset = 0 ) { |
304 | $this->limit = intval( $limit ); |
305 | $this->offset = intval( $offset ); |
306 | } |
307 | |
308 | /** |
309 | * Set which namespaces the search should include. |
310 | * Give an array of namespace index numbers. |
311 | * |
312 | * @param int[]|null $namespaces |
313 | */ |
314 | public function setNamespaces( $namespaces ) { |
315 | if ( $namespaces ) { |
316 | // Filter namespaces to only keep valid ones |
317 | $validNs = MediaWikiServices::getInstance()->getSearchEngineConfig()->searchableNamespaces(); |
318 | $namespaces = array_filter( $namespaces, static function ( $ns ) use( $validNs ) { |
319 | return $ns < 0 || isset( $validNs[$ns] ); |
320 | } ); |
321 | } else { |
322 | $namespaces = []; |
323 | } |
324 | $this->namespaces = $namespaces; |
325 | } |
326 | |
327 | /** |
328 | * Set whether the searcher should try to build a suggestion. Note: some searchers |
329 | * don't support building a suggestion in the first place and others don't respect |
330 | * this flag. |
331 | * |
332 | * @param bool $showSuggestion Should the searcher try to build suggestions |
333 | */ |
334 | public function setShowSuggestion( $showSuggestion ) { |
335 | $this->showSuggestion = $showSuggestion; |
336 | } |
337 | |
338 | /** |
339 | * Get the valid sort directions. All search engines support 'relevance' but others |
340 | * might support more. The default in all implementations must be 'relevance.' |
341 | * |
342 | * @since 1.25 |
343 | * @stable to override |
344 | * |
345 | * @return string[] the valid sort directions for setSort |
346 | */ |
347 | public function getValidSorts() { |
348 | return [ self::DEFAULT_SORT ]; |
349 | } |
350 | |
351 | /** |
352 | * Set the sort direction of the search results. Must be one returned by |
353 | * SearchEngine::getValidSorts() |
354 | * |
355 | * @since 1.25 |
356 | * @param string $sort sort direction for query result |
357 | */ |
358 | public function setSort( $sort ) { |
359 | if ( !in_array( $sort, $this->getValidSorts() ) ) { |
360 | throw new InvalidArgumentException( "Invalid sort: $sort. " . |
361 | "Must be one of: " . implode( ', ', $this->getValidSorts() ) ); |
362 | } |
363 | $this->sort = $sort; |
364 | } |
365 | |
366 | /** |
367 | * Get the sort direction of the search results |
368 | * |
369 | * @since 1.25 |
370 | * @return string |
371 | */ |
372 | public function getSort() { |
373 | return $this->sort; |
374 | } |
375 | |
376 | /** |
377 | * Parse some common prefixes: all (search everything) |
378 | * or namespace names and set the list of namespaces |
379 | * of this class accordingly. |
380 | * |
381 | * @deprecated since 1.32; should be handled internally by the search engine |
382 | * @param string $query |
383 | * @return string |
384 | */ |
385 | public function replacePrefixes( $query ) { |
386 | return $query; |
387 | } |
388 | |
389 | /** |
390 | * Parse some common prefixes: all (search everything) |
391 | * or namespace names |
392 | * |
393 | * @param string $query |
394 | * @param bool $withAllKeyword activate support of the "all:" keyword and its |
395 | * translations to activate searching on all namespaces. |
396 | * @param bool $withPrefixSearchExtractNamespaceHook call the PrefixSearchExtractNamespace hook |
397 | * if classic namespace identification did not match. |
398 | * @return false|array false if no namespace was extracted, an array |
399 | * with the parsed query at index 0 and an array of namespaces at index |
400 | * 1 (or null for all namespaces). |
401 | */ |
402 | public static function parseNamespacePrefixes( |
403 | $query, |
404 | $withAllKeyword = true, |
405 | $withPrefixSearchExtractNamespaceHook = false |
406 | ) { |
407 | $parsed = $query; |
408 | if ( strpos( $query, ':' ) === false ) { // nothing to do |
409 | return false; |
410 | } |
411 | $extractedNamespace = null; |
412 | |
413 | $allQuery = false; |
414 | if ( $withAllKeyword ) { |
415 | $allkeywords = []; |
416 | |
417 | $allkeywords[] = wfMessage( 'searchall' )->inContentLanguage()->text() . ":"; |
418 | // force all: so that we have a common syntax for all the wikis |
419 | if ( !in_array( 'all:', $allkeywords ) ) { |
420 | $allkeywords[] = 'all:'; |
421 | } |
422 | |
423 | foreach ( $allkeywords as $kw ) { |
424 | if ( str_starts_with( $query, $kw ) ) { |
425 | $parsed = substr( $query, strlen( $kw ) ); |
426 | $allQuery = true; |
427 | break; |
428 | } |
429 | } |
430 | } |
431 | |
432 | if ( !$allQuery && strpos( $query, ':' ) !== false ) { |
433 | $prefix = str_replace( ' ', '_', substr( $query, 0, strpos( $query, ':' ) ) ); |
434 | $services = MediaWikiServices::getInstance(); |
435 | $index = $services->getContentLanguage()->getNsIndex( $prefix ); |
436 | if ( $index !== false ) { |
437 | $extractedNamespace = [ $index ]; |
438 | $parsed = substr( $query, strlen( $prefix ) + 1 ); |
439 | } elseif ( $withPrefixSearchExtractNamespaceHook ) { |
440 | $hookNamespaces = [ NS_MAIN ]; |
441 | $hookQuery = $query; |
442 | ( new HookRunner( $services->getHookContainer() ) ) |
443 | ->onPrefixSearchExtractNamespace( $hookNamespaces, $hookQuery ); |
444 | if ( $hookQuery !== $query ) { |
445 | $parsed = $hookQuery; |
446 | $extractedNamespace = $hookNamespaces; |
447 | } else { |
448 | return false; |
449 | } |
450 | } else { |
451 | return false; |
452 | } |
453 | } |
454 | |
455 | return [ $parsed, $extractedNamespace ]; |
456 | } |
457 | |
458 | /** |
459 | * Find snippet highlight settings for all users |
460 | * @return array Contextlines, contextchars |
461 | * @deprecated since 1.34; use the SearchHighlighter constants directly |
462 | * @see SearchHighlighter::DEFAULT_CONTEXT_CHARS |
463 | * @see SearchHighlighter::DEFAULT_CONTEXT_LINES |
464 | */ |
465 | public static function userHighlightPrefs() { |
466 | $contextlines = SearchHighlighter::DEFAULT_CONTEXT_LINES; |
467 | $contextchars = SearchHighlighter::DEFAULT_CONTEXT_CHARS; |
468 | return [ $contextlines, $contextchars ]; |
469 | } |
470 | |
471 | /** |
472 | * Create or update the search index record for the given page. |
473 | * Title and text should be pre-processed. |
474 | * STUB |
475 | * |
476 | * @param int $id |
477 | * @param string $title |
478 | * @param string $text |
479 | */ |
480 | public function update( $id, $title, $text ) { |
481 | // no-op |
482 | } |
483 | |
484 | /** |
485 | * Update a search index record's title only. |
486 | * Title should be pre-processed. |
487 | * STUB |
488 | * |
489 | * @param int $id |
490 | * @param string $title |
491 | */ |
492 | public function updateTitle( $id, $title ) { |
493 | // no-op |
494 | } |
495 | |
496 | /** |
497 | * Delete an indexed page |
498 | * Title should be pre-processed. |
499 | * STUB |
500 | * |
501 | * @param int $id Page id that was deleted |
502 | * @param string $title Title of page that was deleted |
503 | */ |
504 | public function delete( $id, $title ) { |
505 | // no-op |
506 | } |
507 | |
508 | /** |
509 | * Get the raw text for updating the index from a content object |
510 | * Nicer search backends could possibly do something cooler than |
511 | * just returning raw text |
512 | * |
513 | * @todo This isn't ideal, we'd really like to have content-specific handling here |
514 | * @param Title $t Title we're indexing |
515 | * @param Content|null $c Content of the page to index |
516 | * @return string |
517 | * @deprecated since 1.34 use Content::getTextForSearchIndex directly |
518 | */ |
519 | public function getTextFromContent( Title $t, ?Content $c = null ) { |
520 | return $c ? $c->getTextForSearchIndex() : ''; |
521 | } |
522 | |
523 | /** |
524 | * If an implementation of SearchEngine handles all of its own text processing |
525 | * in getTextFromContent() and doesn't require SearchUpdate::updateText()'s |
526 | * rather silly handling, it should return true here instead. |
527 | * |
528 | * @return bool |
529 | * @deprecated since 1.34 no longer needed since getTextFromContent is being deprecated |
530 | */ |
531 | public function textAlreadyUpdatedForIndex() { |
532 | return false; |
533 | } |
534 | |
535 | /** |
536 | * Makes search simple string if it was namespaced. |
537 | * Sets namespaces of the search to namespaces extracted from string. |
538 | * @param string $search |
539 | * @return string Simplified search string |
540 | */ |
541 | protected function normalizeNamespaces( $search ) { |
542 | $queryAndNs = self::parseNamespacePrefixes( $search, false, true ); |
543 | if ( $queryAndNs !== false ) { |
544 | $this->setNamespaces( $queryAndNs[1] ); |
545 | return $queryAndNs[0]; |
546 | } |
547 | return $search; |
548 | } |
549 | |
550 | /** |
551 | * Perform an overfetch of completion search results. This allows |
552 | * determining if another page of results is available. |
553 | * |
554 | * @param string $search |
555 | * @return SearchSuggestionSet |
556 | */ |
557 | protected function completionSearchBackendOverfetch( $search ) { |
558 | $this->limit++; |
559 | try { |
560 | return $this->completionSearchBackend( $search ); |
561 | } finally { |
562 | $this->limit--; |
563 | } |
564 | } |
565 | |
566 | /** |
567 | * Perform a completion search. |
568 | * Does not resolve namespaces and does not check variants. |
569 | * Search engine implementations may want to override this function. |
570 | * |
571 | * @stable to override |
572 | * |
573 | * @param string $search |
574 | * @return SearchSuggestionSet |
575 | */ |
576 | protected function completionSearchBackend( $search ) { |
577 | $results = []; |
578 | |
579 | $search = trim( $search ); |
580 | |
581 | if ( !in_array( NS_SPECIAL, $this->namespaces ) && // We do not run hook on Special: search |
582 | !$this->getHookRunner()->onPrefixSearchBackend( |
583 | $this->namespaces, $search, $this->limit, $results, $this->offset ) |
584 | ) { |
585 | // False means hook worked. |
586 | // FIXME: Yes, the API is weird. That's why it is going to be deprecated. |
587 | |
588 | return SearchSuggestionSet::fromStrings( $results ); |
589 | } else { |
590 | // Hook did not do the job, use default simple search |
591 | $results = $this->simplePrefixSearch( $search ); |
592 | return SearchSuggestionSet::fromTitles( $results ); |
593 | } |
594 | } |
595 | |
596 | /** |
597 | * Perform a completion search. |
598 | * @param string $search |
599 | * @return SearchSuggestionSet |
600 | */ |
601 | public function completionSearch( $search ) { |
602 | if ( trim( $search ) === '' ) { |
603 | return SearchSuggestionSet::emptySuggestionSet(); // Return empty result |
604 | } |
605 | $search = $this->normalizeNamespaces( $search ); |
606 | $suggestions = $this->completionSearchBackendOverfetch( $search ); |
607 | return $this->processCompletionResults( $search, $suggestions ); |
608 | } |
609 | |
610 | /** |
611 | * Perform a completion search with variants. |
612 | * @stable to override |
613 | * |
614 | * @param string $search |
615 | * @return SearchSuggestionSet |
616 | */ |
617 | public function completionSearchWithVariants( $search ) { |
618 | if ( trim( $search ) === '' ) { |
619 | return SearchSuggestionSet::emptySuggestionSet(); // Return empty result |
620 | } |
621 | $search = $this->normalizeNamespaces( $search ); |
622 | |
623 | $results = $this->completionSearchBackendOverfetch( $search ); |
624 | $fallbackLimit = 1 + $this->limit - $results->getSize(); |
625 | if ( $fallbackLimit > 0 ) { |
626 | $services = MediaWikiServices::getInstance(); |
627 | $fallbackSearches = $services->getLanguageConverterFactory() |
628 | ->getLanguageConverter( $services->getContentLanguage() ) |
629 | ->autoConvertToAllVariants( $search ); |
630 | $fallbackSearches = array_diff( array_unique( $fallbackSearches ), [ $search ] ); |
631 | |
632 | foreach ( $fallbackSearches as $fbs ) { |
633 | $this->setLimitOffset( $fallbackLimit ); |
634 | $fallbackSearchResult = $this->completionSearch( $fbs ); |
635 | $results->appendAll( $fallbackSearchResult ); |
636 | $fallbackLimit -= $fallbackSearchResult->getSize(); |
637 | if ( $fallbackLimit <= 0 ) { |
638 | break; |
639 | } |
640 | } |
641 | } |
642 | return $this->processCompletionResults( $search, $results ); |
643 | } |
644 | |
645 | /** |
646 | * Extract titles from completion results |
647 | * @param SearchSuggestionSet $completionResults |
648 | * @return Title[] |
649 | */ |
650 | public function extractTitles( SearchSuggestionSet $completionResults ) { |
651 | return $completionResults->map( static function ( SearchSuggestion $sugg ) { |
652 | return $sugg->getSuggestedTitle(); |
653 | } ); |
654 | } |
655 | |
656 | /** |
657 | * Process completion search results. |
658 | * Resolves the titles and rescores. |
659 | * @param string $search |
660 | * @param SearchSuggestionSet $suggestions |
661 | * @return SearchSuggestionSet |
662 | */ |
663 | protected function processCompletionResults( $search, SearchSuggestionSet $suggestions ) { |
664 | // We over-fetched to determine pagination. Shrink back down if we have extra results |
665 | // and mark if pagination is possible |
666 | $suggestions->shrink( $this->limit ); |
667 | |
668 | $search = trim( $search ); |
669 | // preload the titles with LinkBatch |
670 | $linkBatchFactory = MediaWikiServices::getInstance()->getLinkBatchFactory(); |
671 | $lb = $linkBatchFactory->newLinkBatch( $suggestions->map( static function ( SearchSuggestion $sugg ) { |
672 | return $sugg->getSuggestedTitle(); |
673 | } ) ); |
674 | $lb->setCaller( __METHOD__ ); |
675 | $lb->execute(); |
676 | |
677 | $diff = $suggestions->filter( static function ( SearchSuggestion $sugg ) { |
678 | return $sugg->getSuggestedTitle()->isKnown(); |
679 | } ); |
680 | if ( $diff > 0 ) { |
681 | MediaWikiServices::getInstance()->getStatsdDataFactory() |
682 | ->updateCount( 'search.completion.missing', $diff ); |
683 | } |
684 | |
685 | // SearchExactMatchRescorer should probably be refactored to work directly on top of a SearchSuggestionSet |
686 | // instead of converting it to array and trying to infer if it has re-scored anything by inspected the head |
687 | // of the returned array. |
688 | $results = $suggestions->map( static function ( SearchSuggestion $sugg ) { |
689 | return $sugg->getSuggestedTitle()->getPrefixedText(); |
690 | } ); |
691 | |
692 | $rescorer = new SearchExactMatchRescorer(); |
693 | if ( $this->offset === 0 ) { |
694 | // Rescore results with an exact title match |
695 | // NOTE: in some cases like cross-namespace redirects |
696 | // (frequently used as shortcuts e.g. WP:WP on huwiki) some |
697 | // backends like Cirrus will return no results. We should still |
698 | // try an exact title match to workaround this limitation |
699 | $rescoredResults = $rescorer->rescore( $search, $this->namespaces, $results, $this->limit ); |
700 | } else { |
701 | // No need to rescore if offset is not 0 |
702 | // The exact match must have been returned at position 0 |
703 | // if it existed. |
704 | $rescoredResults = $results; |
705 | } |
706 | |
707 | if ( count( $rescoredResults ) > 0 ) { |
708 | $found = array_search( $rescoredResults[0], $results ); |
709 | if ( $found === false ) { |
710 | // If the first result is not in the previous array it |
711 | // means that we found a new exact match |
712 | $exactMatch = SearchSuggestion::fromTitle( 0, Title::newFromText( $rescoredResults[0] ) ); |
713 | $suggestions->prepend( $exactMatch ); |
714 | if ( $rescorer->getReplacedRedirect() !== null ) { |
715 | // the exact match rescorer replaced one of the suggestion found by the search engine |
716 | // let's remove it from our suggestions set to avoid showing duplicates |
717 | $suggestions->remove( SearchSuggestion::fromTitle( 0, |
718 | Title::newFromText( $rescorer->getReplacedRedirect() ) ) ); |
719 | } |
720 | $suggestions->shrink( $this->limit ); |
721 | } else { |
722 | // if the first result is not the same we need to rescore |
723 | if ( $found > 0 ) { |
724 | $suggestions->rescore( $found ); |
725 | } |
726 | } |
727 | } |
728 | |
729 | return $suggestions; |
730 | } |
731 | |
732 | /** |
733 | * Simple prefix search for subpages. |
734 | * @param string $search |
735 | * @return Title[] |
736 | */ |
737 | public function defaultPrefixSearch( $search ) { |
738 | if ( trim( $search ) === '' ) { |
739 | return []; |
740 | } |
741 | |
742 | $search = $this->normalizeNamespaces( $search ); |
743 | return $this->simplePrefixSearch( $search ); |
744 | } |
745 | |
746 | /** |
747 | * Call out to simple search backend. |
748 | * Defaults to TitlePrefixSearch. |
749 | * @param string $search |
750 | * @return Title[] |
751 | */ |
752 | protected function simplePrefixSearch( $search ) { |
753 | // Use default database prefix search |
754 | $backend = new TitlePrefixSearch; |
755 | return $backend->defaultSearchBackend( $this->namespaces, $search, $this->limit, $this->offset ); |
756 | } |
757 | |
758 | /** |
759 | * Get a list of supported profiles. |
760 | * Some search engine implementations may expose specific profiles to fine-tune |
761 | * its behaviors. |
762 | * The profile can be passed as a feature data with setFeatureData( $profileType, $profileName ) |
763 | * The array returned by this function contains the following keys: |
764 | * - name: the profile name to use with setFeatureData |
765 | * - desc-message: the i18n description |
766 | * - default: set to true if this profile is the default |
767 | * |
768 | * @since 1.28 |
769 | * @stable to override |
770 | * |
771 | * @param string $profileType the type of profiles |
772 | * @param User|null $user the user requesting the list of profiles |
773 | * @return array|null the list of profiles or null if none available |
774 | * @phan-return null|array{name:string,desc-message:string,default?:bool} |
775 | */ |
776 | public function getProfiles( $profileType, ?User $user = null ) { |
777 | return null; |
778 | } |
779 | |
780 | /** |
781 | * Create a search field definition. |
782 | * Specific search engines should override this method to create search fields. |
783 | * @stable to override |
784 | * |
785 | * @param string $name |
786 | * @param string $type One of the types in SearchIndexField::INDEX_TYPE_* |
787 | * @return SearchIndexField |
788 | * @since 1.28 |
789 | */ |
790 | public function makeSearchFieldMapping( $name, $type ) { |
791 | return new NullIndexField(); |
792 | } |
793 | |
794 | /** |
795 | * Get fields for search index |
796 | * @since 1.28 |
797 | * @return SearchIndexField[] Index field definitions for all content handlers |
798 | */ |
799 | public function getSearchIndexFields() { |
800 | $models = MediaWikiServices::getInstance()->getContentHandlerFactory()->getContentModels(); |
801 | $fields = []; |
802 | $seenHandlers = new SplObjectStorage(); |
803 | foreach ( $models as $model ) { |
804 | try { |
805 | $handler = MediaWikiServices::getInstance() |
806 | ->getContentHandlerFactory() |
807 | ->getContentHandler( $model ); |
808 | } catch ( MWUnknownContentModelException $e ) { |
809 | // If we can find no handler, ignore it |
810 | continue; |
811 | } |
812 | // Several models can have the same handler, so avoid processing it repeatedly |
813 | if ( $seenHandlers->contains( $handler ) ) { |
814 | // We already did this one |
815 | continue; |
816 | } |
817 | $seenHandlers->attach( $handler ); |
818 | $handlerFields = $handler->getFieldsForSearchIndex( $this ); |
819 | foreach ( $handlerFields as $fieldName => $fieldData ) { |
820 | if ( empty( $fields[$fieldName] ) ) { |
821 | $fields[$fieldName] = $fieldData; |
822 | } else { |
823 | // TODO: do we allow some clashes with the same type or reject all of them? |
824 | $mergeDef = $fields[$fieldName]->merge( $fieldData ); |
825 | if ( !$mergeDef ) { |
826 | throw new InvalidArgumentException( "Duplicate field $fieldName for model $model" ); |
827 | } |
828 | $fields[$fieldName] = $mergeDef; |
829 | } |
830 | } |
831 | } |
832 | // Hook to allow extensions to produce search mapping fields |
833 | $this->getHookRunner()->onSearchIndexFields( $fields, $this ); |
834 | return $fields; |
835 | } |
836 | |
837 | /** |
838 | * Augment search results with extra data. |
839 | */ |
840 | public function augmentSearchResults( ISearchResultSet $resultSet ) { |
841 | $setAugmentors = []; |
842 | $rowAugmentors = []; |
843 | $this->getHookRunner()->onSearchResultsAugment( $setAugmentors, $rowAugmentors ); |
844 | if ( !$setAugmentors && !$rowAugmentors ) { |
845 | // We're done here |
846 | return; |
847 | } |
848 | |
849 | // Convert row augmentors to set augmentor |
850 | foreach ( $rowAugmentors as $name => $row ) { |
851 | if ( isset( $setAugmentors[$name] ) ) { |
852 | throw new InvalidArgumentException( "Both row and set augmentors are defined for $name" ); |
853 | } |
854 | $setAugmentors[$name] = new PerRowAugmentor( $row ); |
855 | } |
856 | |
857 | /** |
858 | * @var string $name |
859 | * @var ResultSetAugmentor $augmentor |
860 | */ |
861 | foreach ( $setAugmentors as $name => $augmentor ) { |
862 | $data = $augmentor->augmentAll( $resultSet ); |
863 | if ( $data ) { |
864 | $resultSet->setAugmentedData( $name, $data ); |
865 | } |
866 | } |
867 | } |
868 | |
869 | /** |
870 | * @since 1.35 |
871 | * @internal |
872 | * @param HookContainer $hookContainer |
873 | */ |
874 | public function setHookContainer( HookContainer $hookContainer ) { |
875 | $this->hookContainer = $hookContainer; |
876 | $this->hookRunner = new HookRunner( $hookContainer ); |
877 | } |
878 | |
879 | /** |
880 | * Get a HookContainer, for running extension hooks or for hook metadata. |
881 | * |
882 | * @since 1.35 |
883 | * @return HookContainer |
884 | */ |
885 | protected function getHookContainer(): HookContainer { |
886 | if ( !$this->hookContainer ) { |
887 | // This shouldn't be hit in core, but it is needed for CirrusSearch |
888 | // which commonly creates a CirrusSearch object without cirrus being |
889 | // configured in $wgSearchType/$wgSearchTypeAlternatives. |
890 | $this->hookContainer = MediaWikiServices::getInstance()->getHookContainer(); |
891 | } |
892 | return $this->hookContainer; |
893 | } |
894 | |
895 | /** |
896 | * Get a HookRunner for running core hooks. |
897 | * |
898 | * @internal This is for use by core only. Hook interfaces may be removed |
899 | * without notice. |
900 | * @since 1.35 |
901 | * @return HookRunner |
902 | */ |
903 | protected function getHookRunner(): HookRunner { |
904 | if ( !$this->hookRunner ) { |
905 | $this->hookRunner = new HookRunner( $this->getHookContainer() ); |
906 | } |
907 | return $this->hookRunner; |
908 | } |
909 | |
910 | } |