Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
61.78% |
139 / 225 |
|
18.18% |
8 / 44 |
CRAP | |
0.00% |
0 / 1 |
SearchEngine | |
61.78% |
139 / 225 |
|
18.18% |
8 / 44 |
598.96 | |
0.00% |
0 / 1 |
searchText | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
doSearchText | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
searchArchiveTitle | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
doSearchArchiveTitle | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
searchTitle | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
doSearchTitle | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
maybePaginate | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
42 | |||
supports | |
50.00% |
2 / 4 |
|
0.00% |
0 / 1 |
6.00 | |||
setFeatureData | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getFeatureData | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
normalizeText | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getNearMatcher | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
defaultNearMatcher | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
legalSearchChars | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setLimitOffset | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
setNamespaces | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
3.03 | |||
setShowSuggestion | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getValidSorts | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
setSort | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
getSort | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
replacePrefixes | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
parseNamespacePrefixes | |
96.97% |
32 / 33 |
|
0.00% |
0 / 1 |
11 | |||
userHighlightPrefs | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
update | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
updateTitle | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
delete | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getTextFromContent | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
6 | |||
textAlreadyUpdatedForIndex | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
normalizeNamespaces | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
completionSearchBackendOverfetch | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
completionSearchBackend | |
87.50% |
7 / 8 |
|
0.00% |
0 / 1 |
3.02 | |||
completionSearch | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
2.03 | |||
completionSearchWithVariants | |
90.91% |
20 / 22 |
|
0.00% |
0 / 1 |
5.02 | |||
extractTitles | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
processCompletionResults | |
80.00% |
28 / 35 |
|
0.00% |
0 / 1 |
7.39 | |||
defaultPrefixSearch | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
simplePrefixSearch | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getProfiles | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
makeSearchFieldMapping | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getSearchIndexFields | |
81.82% |
18 / 22 |
|
0.00% |
0 / 1 |
7.29 | |||
augmentSearchResults | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
56 | |||
setHookContainer | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
getHookContainer | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getHookRunner | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 |
1 | <?php |
2 | /** |
3 | * Basic search engine |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | * @ingroup Search |
22 | */ |
23 | |
24 | /** |
25 | * @defgroup Search Search |
26 | */ |
27 | |
28 | use MediaWiki\Config\Config; |
29 | use MediaWiki\Content\Content; |
30 | use MediaWiki\Exception\MWUnknownContentModelException; |
31 | use MediaWiki\HookContainer\HookContainer; |
32 | use MediaWiki\HookContainer\HookRunner; |
33 | use MediaWiki\MediaWikiServices; |
34 | use MediaWiki\Search\TitleMatcher; |
35 | use MediaWiki\Status\Status; |
36 | use MediaWiki\Title\Title; |
37 | use MediaWiki\User\User; |
38 | |
39 | /** |
40 | * Contain a class for special pages |
41 | * @stable to extend |
42 | * @ingroup Search |
43 | */ |
44 | abstract class SearchEngine { |
45 | public const DEFAULT_SORT = 'relevance'; |
46 | |
47 | /** @var string */ |
48 | public $prefix = ''; |
49 | |
50 | /** @var int[]|null */ |
51 | public $namespaces = [ NS_MAIN ]; |
52 | |
53 | /** @var int */ |
54 | protected $limit = 10; |
55 | |
56 | /** @var int */ |
57 | protected $offset = 0; |
58 | |
59 | /** |
60 | * @var string[] |
61 | * @deprecated since 1.34 |
62 | */ |
63 | protected $searchTerms = []; |
64 | |
65 | /** @var bool */ |
66 | protected $showSuggestion = true; |
67 | /** @var string */ |
68 | private $sort = self::DEFAULT_SORT; |
69 | |
70 | /** @var array Feature values */ |
71 | protected $features = []; |
72 | |
73 | /** @var HookContainer */ |
74 | private $hookContainer; |
75 | |
76 | /** @var HookRunner */ |
77 | private $hookRunner; |
78 | |
79 | /** Profile type for completionSearch */ |
80 | public const COMPLETION_PROFILE_TYPE = 'completionSearchProfile'; |
81 | |
82 | /** Profile type for query independent ranking features */ |
83 | public const FT_QUERY_INDEP_PROFILE_TYPE = 'fulltextQueryIndepProfile'; |
84 | |
85 | /** Integer flag for legalSearchChars: includes all chars allowed in a search query */ |
86 | protected const CHARS_ALL = 1; |
87 | |
88 | /** Integer flag for legalSearchChars: includes all chars allowed in a search term */ |
89 | protected const CHARS_NO_SYNTAX = 2; |
90 | |
91 | /** |
92 | * Perform a full text search query and return a result set. |
93 | * If full text searches are not supported or disabled, return null. |
94 | * |
95 | * @note As of 1.32 overriding this function is deprecated. It will |
96 | * be converted to final in 1.34. Override self::doSearchText(). |
97 | * |
98 | * @param string $term Raw search term |
99 | * @return ISearchResultSet|Status|null |
100 | */ |
101 | public function searchText( $term ) { |
102 | return $this->maybePaginate( function () use ( $term ) { |
103 | return $this->doSearchText( $term ); |
104 | } ); |
105 | } |
106 | |
107 | /** |
108 | * Perform a full text search query and return a result set. |
109 | * |
110 | * @stable to override |
111 | * |
112 | * @param string $term Raw search term |
113 | * @return ISearchResultSet|Status|null |
114 | * @since 1.32 |
115 | */ |
116 | protected function doSearchText( $term ) { |
117 | return null; |
118 | } |
119 | |
120 | /** |
121 | * Perform a title search in the article archive. |
122 | * NOTE: these results still should be filtered by |
123 | * matching against PageArchive, permissions checks etc |
124 | * The results returned by this methods are only suggestions and |
125 | * may not end up being shown to the user. |
126 | * |
127 | * @note As of 1.32 overriding this function is deprecated. It will |
128 | * be converted to final in 1.34. Override self::doSearchArchiveTitle(). |
129 | * |
130 | * @param string $term Raw search term |
131 | * @return Status |
132 | * @since 1.29 |
133 | */ |
134 | public function searchArchiveTitle( $term ) { |
135 | return $this->doSearchArchiveTitle( $term ); |
136 | } |
137 | |
138 | /** |
139 | * Perform a title search in the article archive. |
140 | * |
141 | * @stable to override |
142 | * |
143 | * @param string $term Raw search term |
144 | * @return Status |
145 | * @since 1.32 |
146 | */ |
147 | protected function doSearchArchiveTitle( $term ) { |
148 | return Status::newGood( [] ); |
149 | } |
150 | |
151 | /** |
152 | * Perform a title-only search query and return a result set. |
153 | * If title searches are not supported or disabled, return null. |
154 | * STUB |
155 | * |
156 | * @note As of 1.32 overriding this function is deprecated. It will |
157 | * be converted to final in 1.34. Override self::doSearchTitle(). |
158 | * |
159 | * @param string $term Raw search term |
160 | * @return ISearchResultSet|null |
161 | */ |
162 | public function searchTitle( $term ) { |
163 | return $this->maybePaginate( function () use ( $term ) { |
164 | return $this->doSearchTitle( $term ); |
165 | } ); |
166 | } |
167 | |
168 | /** |
169 | * Perform a title-only search query and return a result set. |
170 | * |
171 | * @stable to override |
172 | * |
173 | * @param string $term Raw search term |
174 | * @return ISearchResultSet|null |
175 | * @since 1.32 |
176 | */ |
177 | protected function doSearchTitle( $term ) { |
178 | return null; |
179 | } |
180 | |
181 | /** |
182 | * Performs an overfetch and shrink operation to determine if |
183 | * the next page is available for search engines that do not |
184 | * explicitly implement their own pagination. |
185 | * |
186 | * @param Closure $fn Takes no arguments |
187 | * @return ISearchResultSet|Status<ISearchResultSet>|null Result of calling $fn |
188 | */ |
189 | private function maybePaginate( Closure $fn ) { |
190 | if ( $this instanceof PaginatingSearchEngine ) { |
191 | return $fn(); |
192 | } |
193 | $this->limit++; |
194 | try { |
195 | $resultSetOrStatus = $fn(); |
196 | } finally { |
197 | $this->limit--; |
198 | } |
199 | |
200 | $resultSet = null; |
201 | if ( $resultSetOrStatus instanceof ISearchResultSet ) { |
202 | $resultSet = $resultSetOrStatus; |
203 | } elseif ( $resultSetOrStatus instanceof Status && |
204 | $resultSetOrStatus->getValue() instanceof ISearchResultSet |
205 | ) { |
206 | $resultSet = $resultSetOrStatus->getValue(); |
207 | } |
208 | if ( $resultSet ) { |
209 | $resultSet->shrink( $this->limit ); |
210 | } |
211 | |
212 | return $resultSetOrStatus; |
213 | } |
214 | |
215 | /** |
216 | * @since 1.18 |
217 | * @stable to override |
218 | * |
219 | * @param string $feature |
220 | * @return bool |
221 | */ |
222 | public function supports( $feature ) { |
223 | switch ( $feature ) { |
224 | case 'search-update': |
225 | return true; |
226 | case 'title-suffix-filter': |
227 | default: |
228 | return false; |
229 | } |
230 | } |
231 | |
232 | /** |
233 | * Way to pass custom data for engines |
234 | * @since 1.18 |
235 | * @param string $feature |
236 | * @param mixed $data |
237 | */ |
238 | public function setFeatureData( $feature, $data ) { |
239 | $this->features[$feature] = $data; |
240 | } |
241 | |
242 | /** |
243 | * Way to retrieve custom data set by setFeatureData |
244 | * or by the engine itself. |
245 | * @since 1.29 |
246 | * @param string $feature feature name |
247 | * @return mixed the feature value or null if unset |
248 | */ |
249 | public function getFeatureData( $feature ) { |
250 | return $this->features[$feature] ?? null; |
251 | } |
252 | |
253 | /** |
254 | * When overridden in derived class, performs database-specific conversions |
255 | * on text to be used for searching or updating search index. |
256 | * Default implementation does nothing (simply returns $string). |
257 | * |
258 | * @param string $string String to process |
259 | * @return string |
260 | */ |
261 | public function normalizeText( $string ) { |
262 | // Some languages such as Chinese require word segmentation |
263 | return MediaWikiServices::getInstance()->getContentLanguage()->segmentByWord( $string ); |
264 | } |
265 | |
266 | /** |
267 | * Get service class to finding near matches. |
268 | * |
269 | * @return TitleMatcher |
270 | * @deprecated since 1.40, use MediaWikiServices::getInstance()->getTitleMatcher() |
271 | */ |
272 | public function getNearMatcher( Config $config ) { |
273 | return MediaWikiServices::getInstance()->getTitleMatcher(); |
274 | } |
275 | |
276 | /** |
277 | * Get near matcher for default SearchEngine. |
278 | * |
279 | * @return TitleMatcher |
280 | * @deprecated since 1.40, MediaWikiServices::getInstance()->getTitleMatcher() |
281 | */ |
282 | protected static function defaultNearMatcher() { |
283 | wfDeprecated( __METHOD__, '1.40' ); |
284 | return MediaWikiServices::getInstance()->getTitleMatcher(); |
285 | } |
286 | |
287 | /** |
288 | * Get chars legal for search |
289 | * @param int $type type of search chars (see self::CHARS_ALL |
290 | * and self::CHARS_NO_SYNTAX). Defaults to CHARS_ALL |
291 | * @return string |
292 | */ |
293 | public function legalSearchChars( $type = self::CHARS_ALL ) { |
294 | return "A-Za-z_'.0-9\\x80-\\xFF\\-"; |
295 | } |
296 | |
297 | /** |
298 | * Set the maximum number of results to return |
299 | * and how many to skip before returning the first. |
300 | * |
301 | * @param int $limit |
302 | * @param int $offset |
303 | */ |
304 | public function setLimitOffset( $limit, $offset = 0 ) { |
305 | $this->limit = intval( $limit ); |
306 | $this->offset = intval( $offset ); |
307 | } |
308 | |
309 | /** |
310 | * Set which namespaces the search should include. |
311 | * Give an array of namespace index numbers. |
312 | * |
313 | * @param int[]|null $namespaces |
314 | */ |
315 | public function setNamespaces( $namespaces ) { |
316 | if ( $namespaces ) { |
317 | // Filter namespaces to only keep valid ones |
318 | $validNs = MediaWikiServices::getInstance()->getSearchEngineConfig()->searchableNamespaces(); |
319 | $namespaces = array_filter( $namespaces, static function ( $ns ) use( $validNs ) { |
320 | return $ns < 0 || isset( $validNs[$ns] ); |
321 | } ); |
322 | } else { |
323 | $namespaces = []; |
324 | } |
325 | $this->namespaces = $namespaces; |
326 | } |
327 | |
328 | /** |
329 | * Set whether the searcher should try to build a suggestion. Note: some searchers |
330 | * don't support building a suggestion in the first place and others don't respect |
331 | * this flag. |
332 | * |
333 | * @param bool $showSuggestion Should the searcher try to build suggestions |
334 | */ |
335 | public function setShowSuggestion( $showSuggestion ) { |
336 | $this->showSuggestion = $showSuggestion; |
337 | } |
338 | |
339 | /** |
340 | * Get the valid sort directions. All search engines support 'relevance' but others |
341 | * might support more. The default in all implementations must be 'relevance.' |
342 | * |
343 | * @since 1.25 |
344 | * @stable to override |
345 | * |
346 | * @return string[] the valid sort directions for setSort |
347 | */ |
348 | public function getValidSorts() { |
349 | return [ self::DEFAULT_SORT ]; |
350 | } |
351 | |
352 | /** |
353 | * Set the sort direction of the search results. Must be one returned by |
354 | * SearchEngine::getValidSorts() |
355 | * |
356 | * @since 1.25 |
357 | * @param string $sort sort direction for query result |
358 | */ |
359 | public function setSort( $sort ) { |
360 | if ( !in_array( $sort, $this->getValidSorts() ) ) { |
361 | throw new InvalidArgumentException( "Invalid sort: $sort. " . |
362 | "Must be one of: " . implode( ', ', $this->getValidSorts() ) ); |
363 | } |
364 | $this->sort = $sort; |
365 | } |
366 | |
367 | /** |
368 | * Get the sort direction of the search results |
369 | * |
370 | * @since 1.25 |
371 | * @return string |
372 | */ |
373 | public function getSort() { |
374 | return $this->sort; |
375 | } |
376 | |
377 | /** |
378 | * Parse some common prefixes: all (search everything) |
379 | * or namespace names and set the list of namespaces |
380 | * of this class accordingly. |
381 | * |
382 | * @deprecated since 1.32; should be handled internally by the search engine |
383 | * @param string $query |
384 | * @return string |
385 | */ |
386 | public function replacePrefixes( $query ) { |
387 | return $query; |
388 | } |
389 | |
390 | /** |
391 | * Parse some common prefixes: all (search everything) |
392 | * or namespace names |
393 | * |
394 | * @param string $query |
395 | * @param bool $withAllKeyword activate support of the "all:" keyword and its |
396 | * translations to activate searching on all namespaces. |
397 | * @param bool $withPrefixSearchExtractNamespaceHook call the PrefixSearchExtractNamespace hook |
398 | * if classic namespace identification did not match. |
399 | * @return false|array false if no namespace was extracted, an array |
400 | * with the parsed query at index 0 and an array of namespaces at index |
401 | * 1 (or null for all namespaces). |
402 | */ |
403 | public static function parseNamespacePrefixes( |
404 | $query, |
405 | $withAllKeyword = true, |
406 | $withPrefixSearchExtractNamespaceHook = false |
407 | ) { |
408 | $parsed = $query; |
409 | if ( strpos( $query, ':' ) === false ) { // nothing to do |
410 | return false; |
411 | } |
412 | $extractedNamespace = null; |
413 | |
414 | $allQuery = false; |
415 | if ( $withAllKeyword ) { |
416 | $allkeywords = []; |
417 | |
418 | $allkeywords[] = wfMessage( 'searchall' )->inContentLanguage()->text() . ":"; |
419 | // force all: so that we have a common syntax for all the wikis |
420 | if ( !in_array( 'all:', $allkeywords ) ) { |
421 | $allkeywords[] = 'all:'; |
422 | } |
423 | |
424 | foreach ( $allkeywords as $kw ) { |
425 | if ( str_starts_with( $query, $kw ) ) { |
426 | $parsed = substr( $query, strlen( $kw ) ); |
427 | $allQuery = true; |
428 | break; |
429 | } |
430 | } |
431 | } |
432 | |
433 | if ( !$allQuery && strpos( $query, ':' ) !== false ) { |
434 | $prefix = str_replace( ' ', '_', substr( $query, 0, strpos( $query, ':' ) ) ); |
435 | $services = MediaWikiServices::getInstance(); |
436 | $index = $services->getContentLanguage()->getNsIndex( $prefix ); |
437 | if ( $index !== false ) { |
438 | $extractedNamespace = [ $index ]; |
439 | $parsed = substr( $query, strlen( $prefix ) + 1 ); |
440 | } elseif ( $withPrefixSearchExtractNamespaceHook ) { |
441 | $hookNamespaces = [ NS_MAIN ]; |
442 | $hookQuery = $query; |
443 | ( new HookRunner( $services->getHookContainer() ) ) |
444 | ->onPrefixSearchExtractNamespace( $hookNamespaces, $hookQuery ); |
445 | if ( $hookQuery !== $query ) { |
446 | $parsed = $hookQuery; |
447 | $extractedNamespace = $hookNamespaces; |
448 | } else { |
449 | return false; |
450 | } |
451 | } else { |
452 | return false; |
453 | } |
454 | } |
455 | |
456 | return [ $parsed, $extractedNamespace ]; |
457 | } |
458 | |
459 | /** |
460 | * Find snippet highlight settings for all users |
461 | * @return array Contextlines, contextchars |
462 | * @deprecated since 1.34; use the SearchHighlighter constants directly |
463 | * @see SearchHighlighter::DEFAULT_CONTEXT_CHARS |
464 | * @see SearchHighlighter::DEFAULT_CONTEXT_LINES |
465 | */ |
466 | public static function userHighlightPrefs() { |
467 | $contextlines = SearchHighlighter::DEFAULT_CONTEXT_LINES; |
468 | $contextchars = SearchHighlighter::DEFAULT_CONTEXT_CHARS; |
469 | return [ $contextlines, $contextchars ]; |
470 | } |
471 | |
472 | /** |
473 | * Create or update the search index record for the given page. |
474 | * Title and text should be pre-processed. |
475 | * STUB |
476 | * |
477 | * @param int $id |
478 | * @param string $title |
479 | * @param string $text |
480 | */ |
481 | public function update( $id, $title, $text ) { |
482 | // no-op |
483 | } |
484 | |
485 | /** |
486 | * Update a search index record's title only. |
487 | * Title should be pre-processed. |
488 | * STUB |
489 | * |
490 | * @param int $id |
491 | * @param string $title |
492 | */ |
493 | public function updateTitle( $id, $title ) { |
494 | // no-op |
495 | } |
496 | |
497 | /** |
498 | * Delete an indexed page |
499 | * Title should be pre-processed. |
500 | * STUB |
501 | * |
502 | * @param int $id Page id that was deleted |
503 | * @param string $title Title of page that was deleted |
504 | */ |
505 | public function delete( $id, $title ) { |
506 | // no-op |
507 | } |
508 | |
509 | /** |
510 | * Get the raw text for updating the index from a content object |
511 | * Nicer search backends could possibly do something cooler than |
512 | * just returning raw text |
513 | * |
514 | * @todo This isn't ideal, we'd really like to have content-specific handling here |
515 | * @param Title $t Title we're indexing |
516 | * @param Content|null $c Content of the page to index |
517 | * @return string |
518 | * @deprecated since 1.34 use Content::getTextForSearchIndex directly |
519 | */ |
520 | public function getTextFromContent( Title $t, ?Content $c = null ) { |
521 | return $c ? $c->getTextForSearchIndex() : ''; |
522 | } |
523 | |
524 | /** |
525 | * If an implementation of SearchEngine handles all of its own text processing |
526 | * in getTextFromContent() and doesn't require SearchUpdate::updateText()'s |
527 | * rather silly handling, it should return true here instead. |
528 | * |
529 | * @return bool |
530 | * @deprecated since 1.34 no longer needed since getTextFromContent is being deprecated |
531 | */ |
532 | public function textAlreadyUpdatedForIndex() { |
533 | return false; |
534 | } |
535 | |
536 | /** |
537 | * Makes search simple string if it was namespaced. |
538 | * Sets namespaces of the search to namespaces extracted from string. |
539 | * @param string $search |
540 | * @return string Simplified search string |
541 | */ |
542 | protected function normalizeNamespaces( $search ) { |
543 | $queryAndNs = self::parseNamespacePrefixes( $search, false, true ); |
544 | if ( $queryAndNs !== false ) { |
545 | $this->setNamespaces( $queryAndNs[1] ); |
546 | return $queryAndNs[0]; |
547 | } |
548 | return $search; |
549 | } |
550 | |
551 | /** |
552 | * Perform an overfetch of completion search results. This allows |
553 | * determining if another page of results is available. |
554 | * |
555 | * @param string $search |
556 | * @return SearchSuggestionSet |
557 | */ |
558 | protected function completionSearchBackendOverfetch( $search ) { |
559 | $this->limit++; |
560 | try { |
561 | return $this->completionSearchBackend( $search ); |
562 | } finally { |
563 | $this->limit--; |
564 | } |
565 | } |
566 | |
567 | /** |
568 | * Perform a completion search. |
569 | * Does not resolve namespaces and does not check variants. |
570 | * Search engine implementations may want to override this function. |
571 | * |
572 | * @stable to override |
573 | * |
574 | * @param string $search |
575 | * @return SearchSuggestionSet |
576 | */ |
577 | protected function completionSearchBackend( $search ) { |
578 | $results = []; |
579 | |
580 | $search = trim( $search ); |
581 | |
582 | if ( !in_array( NS_SPECIAL, $this->namespaces ) && // We do not run hook on Special: search |
583 | !$this->getHookRunner()->onPrefixSearchBackend( |
584 | $this->namespaces, $search, $this->limit, $results, $this->offset ) |
585 | ) { |
586 | // False means hook worked. |
587 | // FIXME: Yes, the API is weird. That's why it is going to be deprecated. |
588 | |
589 | return SearchSuggestionSet::fromStrings( $results ); |
590 | } else { |
591 | // Hook did not do the job, use default simple search |
592 | $results = $this->simplePrefixSearch( $search ); |
593 | return SearchSuggestionSet::fromTitles( $results ); |
594 | } |
595 | } |
596 | |
597 | /** |
598 | * Perform a completion search. |
599 | * @param string $search |
600 | * @return SearchSuggestionSet |
601 | */ |
602 | public function completionSearch( $search ) { |
603 | if ( trim( $search ) === '' ) { |
604 | return SearchSuggestionSet::emptySuggestionSet(); // Return empty result |
605 | } |
606 | $search = $this->normalizeNamespaces( $search ); |
607 | $suggestions = $this->completionSearchBackendOverfetch( $search ); |
608 | return $this->processCompletionResults( $search, $suggestions ); |
609 | } |
610 | |
611 | /** |
612 | * Perform a completion search with variants. |
613 | * @stable to override |
614 | * |
615 | * @param string $search |
616 | * @return SearchSuggestionSet |
617 | */ |
618 | public function completionSearchWithVariants( $search ) { |
619 | if ( trim( $search ) === '' ) { |
620 | return SearchSuggestionSet::emptySuggestionSet(); // Return empty result |
621 | } |
622 | $search = $this->normalizeNamespaces( $search ); |
623 | |
624 | $results = $this->completionSearchBackendOverfetch( $search ); |
625 | $fallbackLimit = 1 + $this->limit - $results->getSize(); |
626 | if ( $fallbackLimit > 0 ) { |
627 | $services = MediaWikiServices::getInstance(); |
628 | $fallbackSearches = $services->getLanguageConverterFactory() |
629 | ->getLanguageConverter( $services->getContentLanguage() ) |
630 | ->autoConvertToAllVariants( $search ); |
631 | $fallbackSearches = array_diff( array_unique( $fallbackSearches ), [ $search ] ); |
632 | |
633 | $origLimit = $this->limit; |
634 | $origOffset = $this->offset; |
635 | foreach ( $fallbackSearches as $fbs ) { |
636 | try { |
637 | $this->setLimitOffset( $fallbackLimit ); |
638 | $fallbackSearchResult = $this->completionSearch( $fbs ); |
639 | $results->appendAll( $fallbackSearchResult ); |
640 | $fallbackLimit -= $fallbackSearchResult->getSize(); |
641 | } finally { |
642 | $this->setLimitOffset( $origLimit, $origOffset ); |
643 | } |
644 | if ( $fallbackLimit <= 0 ) { |
645 | break; |
646 | } |
647 | } |
648 | } |
649 | return $this->processCompletionResults( $search, $results ); |
650 | } |
651 | |
652 | /** |
653 | * Extract titles from completion results |
654 | * @param SearchSuggestionSet $completionResults |
655 | * @return Title[] |
656 | */ |
657 | public function extractTitles( SearchSuggestionSet $completionResults ) { |
658 | return $completionResults->map( static function ( SearchSuggestion $sugg ) { |
659 | return $sugg->getSuggestedTitle(); |
660 | } ); |
661 | } |
662 | |
663 | /** |
664 | * Process completion search results. |
665 | * Resolves the titles and rescores. |
666 | * @param string $search |
667 | * @param SearchSuggestionSet $suggestions |
668 | * @return SearchSuggestionSet |
669 | */ |
670 | protected function processCompletionResults( $search, SearchSuggestionSet $suggestions ) { |
671 | // We over-fetched to determine pagination. Shrink back down if we have extra results |
672 | // and mark if pagination is possible |
673 | $suggestions->shrink( $this->limit ); |
674 | |
675 | $search = trim( $search ); |
676 | // preload the titles with LinkBatch |
677 | $suggestedTitles = $suggestions->map( static function ( SearchSuggestion $sugg ) { |
678 | return $sugg->getSuggestedTitle(); |
679 | } ); |
680 | $linkBatchFactory = MediaWikiServices::getInstance()->getLinkBatchFactory(); |
681 | $linkBatchFactory->newLinkBatch( $suggestedTitles ) |
682 | ->setCaller( __METHOD__ ) |
683 | ->execute(); |
684 | |
685 | $diff = $suggestions->filter( static function ( SearchSuggestion $sugg ) { |
686 | return $sugg->getSuggestedTitle()->isKnown(); |
687 | } ); |
688 | if ( $diff > 0 ) { |
689 | $statsFactory = MediaWikiServices::getInstance()->getStatsFactory(); |
690 | $statsFactory->getCounter( 'search_completion_missing_total' ) |
691 | ->incrementBy( $diff ); |
692 | } |
693 | |
694 | // SearchExactMatchRescorer should probably be refactored to work directly on top of a SearchSuggestionSet |
695 | // instead of converting it to array and trying to infer if it has re-scored anything by inspected the head |
696 | // of the returned array. |
697 | $results = $suggestions->map( static function ( SearchSuggestion $sugg ) { |
698 | return $sugg->getSuggestedTitle()->getPrefixedText(); |
699 | } ); |
700 | |
701 | $rescorer = new SearchExactMatchRescorer(); |
702 | if ( $this->offset === 0 ) { |
703 | // Rescore results with an exact title match |
704 | // NOTE: in some cases like cross-namespace redirects |
705 | // (frequently used as shortcuts e.g. WP:WP on huwiki) some |
706 | // backends like Cirrus will return no results. We should still |
707 | // try an exact title match to workaround this limitation |
708 | $rescoredResults = $rescorer->rescore( $search, $this->namespaces, $results, $this->limit ); |
709 | } else { |
710 | // No need to rescore if offset is not 0 |
711 | // The exact match must have been returned at position 0 |
712 | // if it existed. |
713 | $rescoredResults = $results; |
714 | } |
715 | |
716 | if ( count( $rescoredResults ) > 0 ) { |
717 | $found = array_search( $rescoredResults[0], $results ); |
718 | if ( $found === false ) { |
719 | // If the first result is not in the previous array it |
720 | // means that we found a new exact match |
721 | $exactMatch = SearchSuggestion::fromTitle( 0, Title::newFromText( $rescoredResults[0] ) ); |
722 | $suggestions->prepend( $exactMatch ); |
723 | if ( $rescorer->getReplacedRedirect() !== null ) { |
724 | // the exact match rescorer replaced one of the suggestion found by the search engine |
725 | // let's remove it from our suggestions set to avoid showing duplicates |
726 | $suggestions->remove( SearchSuggestion::fromTitle( 0, |
727 | Title::newFromText( $rescorer->getReplacedRedirect() ) ) ); |
728 | } |
729 | $suggestions->shrink( $this->limit ); |
730 | } else { |
731 | // if the first result is not the same we need to rescore |
732 | if ( $found > 0 ) { |
733 | $suggestions->rescore( $found ); |
734 | } |
735 | } |
736 | } |
737 | |
738 | return $suggestions; |
739 | } |
740 | |
741 | /** |
742 | * Simple prefix search for subpages. |
743 | * @param string $search |
744 | * @return Title[] |
745 | */ |
746 | public function defaultPrefixSearch( $search ) { |
747 | if ( trim( $search ) === '' ) { |
748 | return []; |
749 | } |
750 | |
751 | $search = $this->normalizeNamespaces( $search ); |
752 | return $this->simplePrefixSearch( $search ); |
753 | } |
754 | |
755 | /** |
756 | * Call out to simple search backend. |
757 | * Defaults to TitlePrefixSearch. |
758 | * @param string $search |
759 | * @return Title[] |
760 | */ |
761 | protected function simplePrefixSearch( $search ) { |
762 | // Use default database prefix search |
763 | $backend = new TitlePrefixSearch; |
764 | return $backend->defaultSearchBackend( $this->namespaces, $search, $this->limit, $this->offset ); |
765 | } |
766 | |
767 | /** |
768 | * Get a list of supported profiles. |
769 | * Some search engine implementations may expose specific profiles to fine-tune |
770 | * its behaviors. |
771 | * The profile can be passed as a feature data with setFeatureData( $profileType, $profileName ) |
772 | * The array returned by this function contains the following keys: |
773 | * - name: the profile name to use with setFeatureData |
774 | * - desc-message: the i18n description |
775 | * - default: set to true if this profile is the default |
776 | * |
777 | * @since 1.28 |
778 | * @stable to override |
779 | * |
780 | * @param string $profileType the type of profiles |
781 | * @param User|null $user the user requesting the list of profiles |
782 | * @return array|null the list of profiles or null if none available |
783 | * @phan-return null|array{name:string,desc-message:string,default?:bool} |
784 | */ |
785 | public function getProfiles( $profileType, ?User $user = null ) { |
786 | return null; |
787 | } |
788 | |
789 | /** |
790 | * Create a search field definition. |
791 | * Specific search engines should override this method to create search fields. |
792 | * @stable to override |
793 | * |
794 | * @param string $name |
795 | * @param string $type One of the types in SearchIndexField::INDEX_TYPE_* |
796 | * @return SearchIndexField |
797 | * @since 1.28 |
798 | */ |
799 | public function makeSearchFieldMapping( $name, $type ) { |
800 | return new NullIndexField(); |
801 | } |
802 | |
803 | /** |
804 | * Get fields for search index |
805 | * @since 1.28 |
806 | * @return SearchIndexField[] Index field definitions for all content handlers |
807 | */ |
808 | public function getSearchIndexFields() { |
809 | $models = MediaWikiServices::getInstance()->getContentHandlerFactory()->getContentModels(); |
810 | $fields = []; |
811 | $seenHandlers = new SplObjectStorage(); |
812 | foreach ( $models as $model ) { |
813 | try { |
814 | $handler = MediaWikiServices::getInstance() |
815 | ->getContentHandlerFactory() |
816 | ->getContentHandler( $model ); |
817 | } catch ( MWUnknownContentModelException ) { |
818 | // If we can find no handler, ignore it |
819 | continue; |
820 | } |
821 | // Several models can have the same handler, so avoid processing it repeatedly |
822 | if ( $seenHandlers->contains( $handler ) ) { |
823 | // We already did this one |
824 | continue; |
825 | } |
826 | $seenHandlers->attach( $handler ); |
827 | $handlerFields = $handler->getFieldsForSearchIndex( $this ); |
828 | foreach ( $handlerFields as $fieldName => $fieldData ) { |
829 | if ( empty( $fields[$fieldName] ) ) { |
830 | $fields[$fieldName] = $fieldData; |
831 | } else { |
832 | // TODO: do we allow some clashes with the same type or reject all of them? |
833 | $mergeDef = $fields[$fieldName]->merge( $fieldData ); |
834 | if ( !$mergeDef ) { |
835 | throw new InvalidArgumentException( "Duplicate field $fieldName for model $model" ); |
836 | } |
837 | $fields[$fieldName] = $mergeDef; |
838 | } |
839 | } |
840 | } |
841 | // Hook to allow extensions to produce search mapping fields |
842 | $this->getHookRunner()->onSearchIndexFields( $fields, $this ); |
843 | return $fields; |
844 | } |
845 | |
846 | /** |
847 | * Augment search results with extra data. |
848 | */ |
849 | public function augmentSearchResults( ISearchResultSet $resultSet ) { |
850 | $setAugmentors = []; |
851 | $rowAugmentors = []; |
852 | $this->getHookRunner()->onSearchResultsAugment( $setAugmentors, $rowAugmentors ); |
853 | if ( !$setAugmentors && !$rowAugmentors ) { |
854 | // We're done here |
855 | return; |
856 | } |
857 | |
858 | // Convert row augmentors to set augmentor |
859 | foreach ( $rowAugmentors as $name => $row ) { |
860 | if ( isset( $setAugmentors[$name] ) ) { |
861 | throw new InvalidArgumentException( "Both row and set augmentors are defined for $name" ); |
862 | } |
863 | $setAugmentors[$name] = new PerRowAugmentor( $row ); |
864 | } |
865 | |
866 | /** |
867 | * @var string $name |
868 | * @var ResultSetAugmentor $augmentor |
869 | */ |
870 | foreach ( $setAugmentors as $name => $augmentor ) { |
871 | $data = $augmentor->augmentAll( $resultSet ); |
872 | if ( $data ) { |
873 | $resultSet->setAugmentedData( $name, $data ); |
874 | } |
875 | } |
876 | } |
877 | |
878 | /** |
879 | * @since 1.35 |
880 | * @internal |
881 | * @param HookContainer $hookContainer |
882 | */ |
883 | public function setHookContainer( HookContainer $hookContainer ) { |
884 | $this->hookContainer = $hookContainer; |
885 | $this->hookRunner = new HookRunner( $hookContainer ); |
886 | } |
887 | |
888 | /** |
889 | * Get a HookContainer, for running extension hooks or for hook metadata. |
890 | * |
891 | * @since 1.35 |
892 | * @return HookContainer |
893 | */ |
894 | protected function getHookContainer(): HookContainer { |
895 | if ( !$this->hookContainer ) { |
896 | // This shouldn't be hit in core, but it is needed for CirrusSearch |
897 | // which commonly creates a CirrusSearch object without cirrus being |
898 | // configured in $wgSearchType/$wgSearchTypeAlternatives. |
899 | $this->hookContainer = MediaWikiServices::getInstance()->getHookContainer(); |
900 | } |
901 | return $this->hookContainer; |
902 | } |
903 | |
904 | /** |
905 | * Get a HookRunner for running core hooks. |
906 | * |
907 | * @internal This is for use by core only. Hook interfaces may be removed |
908 | * without notice. |
909 | * @since 1.35 |
910 | * @return HookRunner |
911 | */ |
912 | protected function getHookRunner(): HookRunner { |
913 | if ( !$this->hookRunner ) { |
914 | $this->hookRunner = new HookRunner( $this->getHookContainer() ); |
915 | } |
916 | return $this->hookRunner; |
917 | } |
918 | |
919 | } |