MediaWiki master
SearchEngine.php
Go to the documentation of this file.
1<?php
10namespace MediaWiki\Search;
11
12use Closure;
13use InvalidArgumentException;
22use SplObjectStorage;
23
33abstract class SearchEngine {
34 public const DEFAULT_SORT = 'relevance';
35
37 public $prefix = '';
38
40 public $namespaces = [ NS_MAIN ];
41
43 protected $limit = 10;
44
46 protected $offset = 0;
47
52 protected $searchTerms = [];
53
55 protected $showSuggestion = true;
57 private $sort = self::DEFAULT_SORT;
58
60 protected $features = [];
61
63 private $hookContainer;
64
66 private $hookRunner;
67
69 public const COMPLETION_PROFILE_TYPE = 'completionSearchProfile';
70
72 public const FT_QUERY_INDEP_PROFILE_TYPE = 'fulltextQueryIndepProfile';
73
75 public const FT_QUERY_DEP_PROFILE_TYPE = 'fulltextQueryDepProfile';
76
78 protected const CHARS_ALL = 1;
79
81 protected const CHARS_NO_SYNTAX = 2;
82
93 public function searchText( $term ) {
94 return $this->maybePaginate( function () use ( $term ) {
95 return $this->doSearchText( $term );
96 } );
97 }
98
108 protected function doSearchText( $term ) {
109 return null;
110 }
111
126 public function searchArchiveTitle( $term ) {
127 return $this->doSearchArchiveTitle( $term );
128 }
129
139 protected function doSearchArchiveTitle( $term ) {
140 return Status::newGood( [] );
141 }
142
154 public function searchTitle( $term ) {
155 return $this->maybePaginate( function () use ( $term ) {
156 return $this->doSearchTitle( $term );
157 } );
158 }
159
169 protected function doSearchTitle( $term ) {
170 return null;
171 }
172
181 private function maybePaginate( Closure $fn ) {
182 if ( $this instanceof PaginatingSearchEngine ) {
183 return $fn();
184 }
185 $this->limit++;
186 try {
187 $resultSetOrStatus = $fn();
188 } finally {
189 $this->limit--;
190 }
191
192 $resultSet = null;
193 if ( $resultSetOrStatus instanceof ISearchResultSet ) {
194 $resultSet = $resultSetOrStatus;
195 } elseif ( $resultSetOrStatus instanceof Status &&
196 $resultSetOrStatus->getValue() instanceof ISearchResultSet
197 ) {
198 $resultSet = $resultSetOrStatus->getValue();
199 }
200 if ( $resultSet ) {
201 $resultSet->shrink( $this->limit );
202 }
203
204 return $resultSetOrStatus;
205 }
206
214 public function supports( $feature ) {
215 switch ( $feature ) {
216 case 'search-update':
217 return true;
218 case 'title-suffix-filter':
219 default:
220 return false;
221 }
222 }
223
230 public function setFeatureData( $feature, $data ) {
231 $this->features[$feature] = $data;
232 }
233
241 public function getFeatureData( $feature ) {
242 return $this->features[$feature] ?? null;
243 }
244
253 public function normalizeText( $string ) {
254 // Some languages such as Chinese require word segmentation
255 return MediaWikiServices::getInstance()->getContentLanguage()->segmentByWord( $string );
256 }
257
264 public function getNearMatcher( Config $config ) {
265 return MediaWikiServices::getInstance()->getTitleMatcher();
266 }
267
274 protected static function defaultNearMatcher() {
275 wfDeprecated( __METHOD__, '1.40' );
276 return MediaWikiServices::getInstance()->getTitleMatcher();
277 }
278
285 public function legalSearchChars( $type = self::CHARS_ALL ) {
286 return "A-Za-z_'.0-9\\x80-\\xFF\\-";
287 }
288
296 public function setLimitOffset( $limit, $offset = 0 ) {
297 $this->limit = intval( $limit );
298 $this->offset = intval( $offset );
299 }
300
307 public function setNamespaces( $namespaces ) {
308 if ( $namespaces ) {
309 // Filter namespaces to only keep valid ones
310 $validNs = MediaWikiServices::getInstance()->getSearchEngineConfig()->searchableNamespaces();
311 $namespaces = array_filter( $namespaces,
312 static fn ( $id ) => $id < 0 || isset( $validNs[$id] )
313 );
314 } else {
315 $namespaces = [];
316 }
317 $this->namespaces = $namespaces;
318 }
319
328 $this->showSuggestion = $showSuggestion;
329 }
330
340 public function getValidSorts() {
341 return [ self::DEFAULT_SORT ];
342 }
343
351 public function setSort( $sort ) {
352 if ( !in_array( $sort, $this->getValidSorts() ) ) {
353 throw new InvalidArgumentException( "Invalid sort: $sort. " .
354 "Must be one of: " . implode( ', ', $this->getValidSorts() ) );
355 }
356 $this->sort = $sort;
357 }
358
365 public function getSort() {
366 return $this->sort;
367 }
368
382 public static function parseNamespacePrefixes(
383 $query,
384 $withAllKeyword = true,
385 $withPrefixSearchExtractNamespaceHook = false
386 ) {
387 $parsed = $query;
388 if ( !str_contains( $query, ':' ) ) { // nothing to do
389 return false;
390 }
391 $extractedNamespace = null;
392
393 $allQuery = false;
394 if ( $withAllKeyword ) {
395 $allkeywords = [];
396
397 $allkeywords[] = wfMessage( 'searchall' )->inContentLanguage()->text() . ":";
398 // force all: so that we have a common syntax for all the wikis
399 if ( !in_array( 'all:', $allkeywords ) ) {
400 $allkeywords[] = 'all:';
401 }
402
403 foreach ( $allkeywords as $kw ) {
404 if ( str_starts_with( $query, $kw ) ) {
405 $parsed = substr( $query, strlen( $kw ) );
406 $allQuery = true;
407 break;
408 }
409 }
410 }
411
412 if ( !$allQuery && str_contains( $query, ':' ) ) {
413 $prefix = str_replace( ' ', '_', substr( $query, 0, strpos( $query, ':' ) ) );
414 $services = MediaWikiServices::getInstance();
415 $index = $services->getContentLanguage()->getNsIndex( $prefix );
416 if ( $index !== false ) {
417 $extractedNamespace = [ $index ];
418 $parsed = substr( $query, strlen( $prefix ) + 1 );
419 } elseif ( $withPrefixSearchExtractNamespaceHook ) {
420 $hookNamespaces = [ NS_MAIN ];
421 $hookQuery = $query;
422 ( new HookRunner( $services->getHookContainer() ) )
423 ->onPrefixSearchExtractNamespace( $hookNamespaces, $hookQuery );
424 if ( $hookQuery !== $query ) {
425 $parsed = $hookQuery;
426 $extractedNamespace = $hookNamespaces;
427 } else {
428 return false;
429 }
430 } else {
431 return false;
432 }
433 }
434
435 return [ $parsed, $extractedNamespace ];
436 }
437
445 public static function userHighlightPrefs() {
448 return [ $contextlines, $contextchars ];
449 }
450
460 public function update( $id, $title, $text ) {
461 // no-op
462 }
463
472 public function updateTitle( $id, $title ) {
473 // no-op
474 }
475
484 public function delete( $id, $title ) {
485 // no-op
486 }
487
494 protected function normalizeNamespaces( $search ) {
495 $queryAndNs = self::parseNamespacePrefixes( $search, false, true );
496 if ( $queryAndNs !== false ) {
497 $this->setNamespaces( $queryAndNs[1] );
498 return $queryAndNs[0];
499 }
500 return $search;
501 }
502
510 protected function completionSearchBackendOverfetch( $search ) {
511 $this->limit++;
512 try {
513 return $this->completionSearchBackend( $search );
514 } finally {
515 $this->limit--;
516 }
517 }
518
529 protected function completionSearchBackend( $search ) {
530 $results = [];
531
532 $search = trim( $search );
533
534 if ( !in_array( NS_SPECIAL, $this->namespaces ) && // We do not run hook on Special: search
535 !$this->getHookRunner()->onPrefixSearchBackend(
536 $this->namespaces, $search, $this->limit, $results, $this->offset )
537 ) {
538 // False means hook worked.
539 // FIXME: Yes, the API is weird. That's why it is going to be deprecated.
540
541 return SearchSuggestionSet::fromStrings( $results );
542 } else {
543 // Hook did not do the job, use default simple search
544 $results = $this->simplePrefixSearch( $search );
545 return SearchSuggestionSet::fromTitles( $results );
546 }
547 }
548
554 public function completionSearch( $search ) {
555 if ( trim( $search ) === '' ) {
556 return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
557 }
558 $search = $this->normalizeNamespaces( $search );
559 $suggestions = $this->completionSearchBackendOverfetch( $search );
560 return $this->processCompletionResults( $search, $suggestions );
561 }
562
570 public function completionSearchWithVariants( $search ) {
571 if ( trim( $search ) === '' ) {
572 return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
573 }
574 $search = $this->normalizeNamespaces( $search );
575
576 $results = $this->completionSearchBackendOverfetch( $search );
577 $fallbackLimit = 1 + $this->limit - $results->getSize();
578 if ( $fallbackLimit > 0 ) {
579 $services = MediaWikiServices::getInstance();
580 $fallbackSearches = $services->getLanguageConverterFactory()
581 ->getLanguageConverter( $services->getContentLanguage() )
582 ->autoConvertToAllVariants( $search );
583 $fallbackSearches = array_diff( array_unique( $fallbackSearches ), [ $search ] );
584
585 $origLimit = $this->limit;
586 $origOffset = $this->offset;
587 foreach ( $fallbackSearches as $fbs ) {
588 try {
589 $this->setLimitOffset( $fallbackLimit );
590 $fallbackSearchResult = $this->completionSearch( $fbs );
591 $results->appendAll( $fallbackSearchResult );
592 $fallbackLimit -= $fallbackSearchResult->getSize();
593 } finally {
594 $this->setLimitOffset( $origLimit, $origOffset );
595 }
596 if ( $fallbackLimit <= 0 ) {
597 break;
598 }
599 }
600 }
601 return $this->processCompletionResults( $search, $results );
602 }
603
609 public function extractTitles( SearchSuggestionSet $completionResults ) {
610 return $completionResults->map( static function ( SearchSuggestion $sugg ) {
611 return $sugg->getSuggestedTitle();
612 } );
613 }
614
622 protected function processCompletionResults( $search, SearchSuggestionSet $suggestions ) {
623 // We over-fetched to determine pagination. Shrink back down if we have extra results
624 // and mark if pagination is possible
625 $suggestions->shrink( $this->limit );
626
627 $search = trim( $search );
628 // preload the titles with LinkBatch
629 $suggestedTitles = $suggestions->map( static function ( SearchSuggestion $sugg ) {
630 return $sugg->getSuggestedTitle();
631 } );
632 $linkBatchFactory = MediaWikiServices::getInstance()->getLinkBatchFactory();
633 $linkBatchFactory->newLinkBatch( $suggestedTitles )
634 ->setCaller( __METHOD__ )
635 ->execute();
636
637 $diff = $suggestions->filter( static function ( SearchSuggestion $sugg ) {
638 return $sugg->getSuggestedTitle()->isKnown();
639 } );
640 if ( $diff > 0 ) {
641 $statsFactory = MediaWikiServices::getInstance()->getStatsFactory();
642 $statsFactory->getCounter( 'search_completion_missing_total' )
643 ->incrementBy( $diff );
644 }
645
646 // SearchExactMatchRescorer should probably be refactored to work directly on top of a SearchSuggestionSet
647 // instead of converting it to array and trying to infer if it has re-scored anything by inspected the head
648 // of the returned array.
649 $results = $suggestions->map( static function ( SearchSuggestion $sugg ) {
650 return $sugg->getSuggestedTitle()->getPrefixedText();
651 } );
652
653 $rescorer = new SearchExactMatchRescorer();
654 if ( $this->offset === 0 ) {
655 // Rescore results with an exact title match
656 // NOTE: in some cases like cross-namespace redirects
657 // (frequently used as shortcuts e.g. WP:WP on huwiki) some
658 // backends like Cirrus will return no results. We should still
659 // try an exact title match to workaround this limitation
660 $rescoredResults = $rescorer->rescore( $search, $this->namespaces, $results, $this->limit );
661 } else {
662 // No need to rescore if offset is not 0
663 // The exact match must have been returned at position 0
664 // if it existed.
665 $rescoredResults = $results;
666 }
667
668 if ( count( $rescoredResults ) > 0 ) {
669 $found = array_search( $rescoredResults[0], $results );
670 if ( $found === false ) {
671 // If the first result is not in the previous array it
672 // means that we found a new exact match
673 $exactMatch = SearchSuggestion::fromTitle( 0, Title::newFromText( $rescoredResults[0] ) );
674 $suggestions->prepend( $exactMatch );
675 if ( $rescorer->getReplacedRedirect() !== null ) {
676 // the exact match rescorer replaced one of the suggestion found by the search engine
677 // let's remove it from our suggestions set to avoid showing duplicates
678 $suggestions->remove( SearchSuggestion::fromTitle( 0,
679 Title::newFromText( $rescorer->getReplacedRedirect() ) ) );
680 }
681 $suggestions->shrink( $this->limit );
682 } else {
683 // if the first result is not the same we need to rescore
684 if ( $found > 0 ) {
685 $suggestions->rescore( $found );
686 }
687 }
688 }
689
690 return $suggestions;
691 }
692
698 public function defaultPrefixSearch( $search ) {
699 if ( trim( $search ) === '' ) {
700 return [];
701 }
702
703 $search = $this->normalizeNamespaces( $search );
704 return $this->simplePrefixSearch( $search );
705 }
706
713 protected function simplePrefixSearch( $search ) {
714 // Use default database prefix search
715 $backend = new TitlePrefixSearch;
716 return $backend->defaultSearchBackend( $this->namespaces, $search, $this->limit, $this->offset );
717 }
718
737 public function getProfiles( $profileType, ?User $user = null ) {
738 return null;
739 }
740
751 public function makeSearchFieldMapping( $name, $type ) {
752 return new NullIndexField();
753 }
754
760 public function getSearchIndexFields() {
761 $models = MediaWikiServices::getInstance()->getContentHandlerFactory()->getContentModels();
762 $fields = [];
763 $seenHandlers = new SplObjectStorage();
764 foreach ( $models as $model ) {
765 try {
767 ->getContentHandlerFactory()
768 ->getContentHandler( $model );
770 // If we can find no handler, ignore it
771 continue;
772 }
773 // Several models can have the same handler, so avoid processing it repeatedly
774 if ( $seenHandlers->offsetExists( $handler ) ) {
775 // We already did this one
776 continue;
777 }
778 $seenHandlers->offsetSet( $handler );
779 $handlerFields = $handler->getFieldsForSearchIndex( $this );
780 foreach ( $handlerFields as $fieldName => $fieldData ) {
781 if ( empty( $fields[$fieldName] ) ) {
782 $fields[$fieldName] = $fieldData;
783 } else {
784 // TODO: do we allow some clashes with the same type or reject all of them?
785 $mergeDef = $fields[$fieldName]->merge( $fieldData );
786 if ( !$mergeDef ) {
787 throw new InvalidArgumentException( "Duplicate field $fieldName for model $model" );
788 }
789 $fields[$fieldName] = $mergeDef;
790 }
791 }
792 }
793 // Hook to allow extensions to produce search mapping fields
794 $this->getHookRunner()->onSearchIndexFields( $fields, $this );
795 return $fields;
796 }
797
801 public function augmentSearchResults( ISearchResultSet $resultSet ) {
802 $setAugmentors = [];
803 $rowAugmentors = [];
804 $this->getHookRunner()->onSearchResultsAugment( $setAugmentors, $rowAugmentors );
805 if ( !$setAugmentors && !$rowAugmentors ) {
806 // We're done here
807 return;
808 }
809
810 // Convert row augmentors to set augmentor
811 foreach ( $rowAugmentors as $name => $row ) {
812 if ( isset( $setAugmentors[$name] ) ) {
813 throw new InvalidArgumentException( "Both row and set augmentors are defined for $name" );
814 }
815 $setAugmentors[$name] = new PerRowAugmentor( $row );
816 }
817
822 foreach ( $setAugmentors as $name => $augmentor ) {
823 $data = $augmentor->augmentAll( $resultSet );
824 if ( $data ) {
825 $resultSet->setAugmentedData( $name, $data );
826 }
827 }
828 }
829
835 public function setHookContainer( HookContainer $hookContainer ) {
836 $this->hookContainer = $hookContainer;
837 $this->hookRunner = new HookRunner( $hookContainer );
838 }
839
846 protected function getHookContainer(): HookContainer {
847 if ( !$this->hookContainer ) {
848 // This shouldn't be hit in core, but it is needed for CirrusSearch
849 // which commonly creates a CirrusSearch object without cirrus being
850 // configured in $wgSearchType/$wgSearchTypeAlternatives.
851 $this->hookContainer = MediaWikiServices::getInstance()->getHookContainer();
852 }
853 return $this->hookContainer;
854 }
855
864 protected function getHookRunner(): HookRunner {
865 if ( !$this->hookRunner ) {
866 $this->hookRunner = new HookRunner( $this->getHookContainer() );
867 }
868 return $this->hookRunner;
869 }
870
871}
872
874class_alias( SearchEngine::class, 'SearchEngine' );
const NS_MAIN
Definition Defines.php:51
const NS_SPECIAL
Definition Defines.php:40
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:69
Exception thrown when an unregistered content model is requested.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Service locator for MediaWiki core services.
static getInstance()
Returns the global default instance of the top level service locator.
Null index field - means search engine does not implement this field.
Perform augmentation of each row and return composite result, indexed by ID.
defaultSearchBackend( $namespaces, $search, $limit, $offset)
Unless overridden by PrefixSearchBackend hook... This is case-sensitive (First character may be autom...
Contain a class for special pages.
updateTitle( $id, $title)
Update a search index record's title only.
setLimitOffset( $limit, $offset=0)
Set the maximum number of results to return and how many to skip before returning the first.
const COMPLETION_PROFILE_TYPE
Profile type for completionSearch.
getHookContainer()
Get a HookContainer, for running extension hooks or for hook metadata.
augmentSearchResults(ISearchResultSet $resultSet)
Augment search results with extra data.
processCompletionResults( $search, SearchSuggestionSet $suggestions)
Process completion search results.
setSort( $sort)
Set the sort direction of the search results.
static defaultNearMatcher()
Get near matcher for default SearchEngine.
setNamespaces( $namespaces)
Set which namespaces the search should include.
simplePrefixSearch( $search)
Call out to simple search backend.
completionSearchBackend( $search)
Perform a completion search.
searchTitle( $term)
Perform a title-only search query and return a result set.
setShowSuggestion( $showSuggestion)
Set whether the searcher should try to build a suggestion.
completionSearchBackendOverfetch( $search)
Perform an overfetch of completion search results.
doSearchText( $term)
Perform a full text search query and return a result set.
completionSearchWithVariants( $search)
Perform a completion search with variants.
doSearchArchiveTitle( $term)
Perform a title search in the article archive.
searchArchiveTitle( $term)
Perform a title search in the article archive.
extractTitles(SearchSuggestionSet $completionResults)
Extract titles from completion results.
setFeatureData( $feature, $data)
Way to pass custom data for engines.
getFeatureData( $feature)
Way to retrieve custom data set by setFeatureData or by the engine itself.
update( $id, $title, $text)
Create or update the search index record for the given page.
legalSearchChars( $type=self::CHARS_ALL)
Get chars legal for search.
normalizeNamespaces( $search)
Makes search simple string if it was namespaced.
getSearchIndexFields()
Get fields for search index.
const FT_QUERY_DEP_PROFILE_TYPE
Profile type for query dependent ranking features (ex: field weights)
static parseNamespacePrefixes( $query, $withAllKeyword=true, $withPrefixSearchExtractNamespaceHook=false)
Parse some common prefixes: all (search everything) or namespace names.
const FT_QUERY_INDEP_PROFILE_TYPE
Profile type for query independent ranking features (ex: article popularity)
defaultPrefixSearch( $search)
Simple prefix search for subpages.
array $features
Feature values.
setHookContainer(HookContainer $hookContainer)
static userHighlightPrefs()
Find snippet highlight settings for all users.
makeSearchFieldMapping( $name, $type)
Create a search field definition.
getProfiles( $profileType, ?User $user=null)
Get a list of supported profiles.
completionSearch( $search)
Perform a completion search.
getSort()
Get the sort direction of the search results.
getValidSorts()
Get the valid sort directions.
getHookRunner()
Get a HookRunner for running core hooks.
getNearMatcher(Config $config)
Get service class to finding near matches.
normalizeText( $string)
When overridden in derived class, performs database-specific conversions on text to be used for searc...
const CHARS_NO_SYNTAX
Integer flag for legalSearchChars: includes all chars allowed in a search term.
searchText( $term)
Perform a full text search query and return a result set.
const CHARS_ALL
Integer flag for legalSearchChars: includes all chars allowed in a search query.
doSearchTitle( $term)
Perform a title-only search query and return a result set.
An utility class to rescore search results by looking for an exact match in the db and add the page f...
filter( $callback)
Filter the suggestions array.
rescore( $key)
Move the suggestion at index $key to the first position.
map( $callback)
Call array_map on the suggestions array.
static fromStrings(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a string array.
static fromTitles(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a title array.
shrink( $limit)
Remove any extra elements in the suggestions set.
remove(SearchSuggestion $suggestion)
Remove a suggestion from the set.
prepend(SearchSuggestion $suggestion)
Add a new suggestion at the top.
getSuggestedTitle()
Title object in the case this suggestion is based on a title.
static fromTitle( $score, Title $title)
Create suggestion from Title.
Performs prefix search, returning Title objects.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:44
Represents a title within MediaWiki.
Definition Title.php:69
User class for the MediaWiki software.
Definition User.php:130
Interface for configuration instances.
Definition Config.php:18
A set of SearchEngine results.
setAugmentedData( $name, $data)
Sets augmented data for result set.
Marker class for search engines that can handle their own pagination, by reporting in their ISearchRe...
Definition of a mapping for the search index field.