MediaWiki master
SearchEngine.php
Go to the documentation of this file.
1<?php
36
42abstract class SearchEngine {
43 public const DEFAULT_SORT = 'relevance';
44
46 public $prefix = '';
47
49 public $namespaces = [ NS_MAIN ];
50
52 protected $limit = 10;
53
55 protected $offset = 0;
56
61 protected $searchTerms = [];
62
64 protected $showSuggestion = true;
66 private $sort = self::DEFAULT_SORT;
67
69 protected $features = [];
70
72 private $hookContainer;
73
75 private $hookRunner;
76
78 public const COMPLETION_PROFILE_TYPE = 'completionSearchProfile';
79
81 public const FT_QUERY_INDEP_PROFILE_TYPE = 'fulltextQueryIndepProfile';
82
84 protected const CHARS_ALL = 1;
85
87 protected const CHARS_NO_SYNTAX = 2;
88
99 public function searchText( $term ) {
100 return $this->maybePaginate( function () use ( $term ) {
101 return $this->doSearchText( $term );
102 } );
103 }
104
114 protected function doSearchText( $term ) {
115 return null;
116 }
117
132 public function searchArchiveTitle( $term ) {
133 return $this->doSearchArchiveTitle( $term );
134 }
135
145 protected function doSearchArchiveTitle( $term ) {
146 return Status::newGood( [] );
147 }
148
160 public function searchTitle( $term ) {
161 return $this->maybePaginate( function () use ( $term ) {
162 return $this->doSearchTitle( $term );
163 } );
164 }
165
175 protected function doSearchTitle( $term ) {
176 return null;
177 }
178
187 private function maybePaginate( Closure $fn ) {
188 if ( $this instanceof PaginatingSearchEngine ) {
189 return $fn();
190 }
191 $this->limit++;
192 try {
193 $resultSetOrStatus = $fn();
194 } finally {
195 $this->limit--;
196 }
197
198 $resultSet = null;
199 if ( $resultSetOrStatus instanceof ISearchResultSet ) {
200 $resultSet = $resultSetOrStatus;
201 } elseif ( $resultSetOrStatus instanceof Status &&
202 $resultSetOrStatus->getValue() instanceof ISearchResultSet
203 ) {
204 $resultSet = $resultSetOrStatus->getValue();
205 }
206 if ( $resultSet ) {
207 $resultSet->shrink( $this->limit );
208 }
209
210 return $resultSetOrStatus;
211 }
212
220 public function supports( $feature ) {
221 switch ( $feature ) {
222 case 'search-update':
223 return true;
224 case 'title-suffix-filter':
225 default:
226 return false;
227 }
228 }
229
236 public function setFeatureData( $feature, $data ) {
237 $this->features[$feature] = $data;
238 }
239
247 public function getFeatureData( $feature ) {
248 return $this->features[$feature] ?? null;
249 }
250
259 public function normalizeText( $string ) {
260 // Some languages such as Chinese require word segmentation
261 return MediaWikiServices::getInstance()->getContentLanguage()->segmentByWord( $string );
262 }
263
270 public function getNearMatcher( Config $config ) {
271 return MediaWikiServices::getInstance()->getTitleMatcher();
272 }
273
280 protected static function defaultNearMatcher() {
281 wfDeprecated( __METHOD__, '1.40' );
282 return MediaWikiServices::getInstance()->getTitleMatcher();
283 }
284
291 public function legalSearchChars( $type = self::CHARS_ALL ) {
292 return "A-Za-z_'.0-9\\x80-\\xFF\\-";
293 }
294
302 public function setLimitOffset( $limit, $offset = 0 ) {
303 $this->limit = intval( $limit );
304 $this->offset = intval( $offset );
305 }
306
313 public function setNamespaces( $namespaces ) {
314 if ( $namespaces ) {
315 // Filter namespaces to only keep valid ones
316 $validNs = MediaWikiServices::getInstance()->getSearchEngineConfig()->searchableNamespaces();
317 $namespaces = array_filter( $namespaces, static function ( $ns ) use( $validNs ) {
318 return $ns < 0 || isset( $validNs[$ns] );
319 } );
320 } else {
321 $namespaces = [];
322 }
323 $this->namespaces = $namespaces;
324 }
325
333 public function setShowSuggestion( $showSuggestion ) {
334 $this->showSuggestion = $showSuggestion;
335 }
336
346 public function getValidSorts() {
347 return [ self::DEFAULT_SORT ];
348 }
349
358 public function setSort( $sort ) {
359 if ( !in_array( $sort, $this->getValidSorts() ) ) {
360 throw new InvalidArgumentException( "Invalid sort: $sort. " .
361 "Must be one of: " . implode( ', ', $this->getValidSorts() ) );
362 }
363 $this->sort = $sort;
364 }
365
372 public function getSort() {
373 return $this->sort;
374 }
375
385 public function replacePrefixes( $query ) {
386 return $query;
387 }
388
402 public static function parseNamespacePrefixes(
403 $query,
404 $withAllKeyword = true,
405 $withPrefixSearchExtractNamespaceHook = false
406 ) {
407 $parsed = $query;
408 if ( strpos( $query, ':' ) === false ) { // nothing to do
409 return false;
410 }
411 $extractedNamespace = null;
412
413 $allQuery = false;
414 if ( $withAllKeyword ) {
415 $allkeywords = [];
416
417 $allkeywords[] = wfMessage( 'searchall' )->inContentLanguage()->text() . ":";
418 // force all: so that we have a common syntax for all the wikis
419 if ( !in_array( 'all:', $allkeywords ) ) {
420 $allkeywords[] = 'all:';
421 }
422
423 foreach ( $allkeywords as $kw ) {
424 if ( str_starts_with( $query, $kw ) ) {
425 $parsed = substr( $query, strlen( $kw ) );
426 $allQuery = true;
427 break;
428 }
429 }
430 }
431
432 if ( !$allQuery && strpos( $query, ':' ) !== false ) {
433 $prefix = str_replace( ' ', '_', substr( $query, 0, strpos( $query, ':' ) ) );
434 $services = MediaWikiServices::getInstance();
435 $index = $services->getContentLanguage()->getNsIndex( $prefix );
436 if ( $index !== false ) {
437 $extractedNamespace = [ $index ];
438 $parsed = substr( $query, strlen( $prefix ) + 1 );
439 } elseif ( $withPrefixSearchExtractNamespaceHook ) {
440 $hookNamespaces = [ NS_MAIN ];
441 $hookQuery = $query;
442 ( new HookRunner( $services->getHookContainer() ) )
443 ->onPrefixSearchExtractNamespace( $hookNamespaces, $hookQuery );
444 if ( $hookQuery !== $query ) {
445 $parsed = $hookQuery;
446 $extractedNamespace = $hookNamespaces;
447 } else {
448 return false;
449 }
450 } else {
451 return false;
452 }
453 }
454
455 return [ $parsed, $extractedNamespace ];
456 }
457
465 public static function userHighlightPrefs() {
468 return [ $contextlines, $contextchars ];
469 }
470
480 public function update( $id, $title, $text ) {
481 // no-op
482 }
483
492 public function updateTitle( $id, $title ) {
493 // no-op
494 }
495
504 public function delete( $id, $title ) {
505 // no-op
506 }
507
519 public function getTextFromContent( Title $t, Content $c = null ) {
520 return $c ? $c->getTextForSearchIndex() : '';
521 }
522
531 public function textAlreadyUpdatedForIndex() {
532 return false;
533 }
534
541 protected function normalizeNamespaces( $search ) {
542 $queryAndNs = self::parseNamespacePrefixes( $search, false, true );
543 if ( $queryAndNs !== false ) {
544 $this->setNamespaces( $queryAndNs[1] );
545 return $queryAndNs[0];
546 }
547 return $search;
548 }
549
557 protected function completionSearchBackendOverfetch( $search ) {
558 $this->limit++;
559 try {
560 return $this->completionSearchBackend( $search );
561 } finally {
562 $this->limit--;
563 }
564 }
565
576 protected function completionSearchBackend( $search ) {
577 $results = [];
578
579 $search = trim( $search );
580
581 if ( !in_array( NS_SPECIAL, $this->namespaces ) && // We do not run hook on Special: search
582 !$this->getHookRunner()->onPrefixSearchBackend(
583 $this->namespaces, $search, $this->limit, $results, $this->offset )
584 ) {
585 // False means hook worked.
586 // FIXME: Yes, the API is weird. That's why it is going to be deprecated.
587
588 return SearchSuggestionSet::fromStrings( $results );
589 } else {
590 // Hook did not do the job, use default simple search
591 $results = $this->simplePrefixSearch( $search );
592 return SearchSuggestionSet::fromTitles( $results );
593 }
594 }
595
601 public function completionSearch( $search ) {
602 if ( trim( $search ) === '' ) {
603 return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
604 }
605 $search = $this->normalizeNamespaces( $search );
606 $suggestions = $this->completionSearchBackendOverfetch( $search );
607 return $this->processCompletionResults( $search, $suggestions );
608 }
609
617 public function completionSearchWithVariants( $search ) {
618 if ( trim( $search ) === '' ) {
619 return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
620 }
621 $search = $this->normalizeNamespaces( $search );
622
623 $results = $this->completionSearchBackendOverfetch( $search );
624 $fallbackLimit = 1 + $this->limit - $results->getSize();
625 if ( $fallbackLimit > 0 ) {
626 $services = MediaWikiServices::getInstance();
627 $fallbackSearches = $services->getLanguageConverterFactory()
628 ->getLanguageConverter( $services->getContentLanguage() )
629 ->autoConvertToAllVariants( $search );
630 $fallbackSearches = array_diff( array_unique( $fallbackSearches ), [ $search ] );
631
632 foreach ( $fallbackSearches as $fbs ) {
633 $this->setLimitOffset( $fallbackLimit );
634 $fallbackSearchResult = $this->completionSearch( $fbs );
635 $results->appendAll( $fallbackSearchResult );
636 $fallbackLimit -= $fallbackSearchResult->getSize();
637 if ( $fallbackLimit <= 0 ) {
638 break;
639 }
640 }
641 }
642 return $this->processCompletionResults( $search, $results );
643 }
644
650 public function extractTitles( SearchSuggestionSet $completionResults ) {
651 return $completionResults->map( static function ( SearchSuggestion $sugg ) {
652 return $sugg->getSuggestedTitle();
653 } );
654 }
655
663 protected function processCompletionResults( $search, SearchSuggestionSet $suggestions ) {
664 // We over-fetched to determine pagination. Shrink back down if we have extra results
665 // and mark if pagination is possible
666 $suggestions->shrink( $this->limit );
667
668 $search = trim( $search );
669 // preload the titles with LinkBatch
670 $linkBatchFactory = MediaWikiServices::getInstance()->getLinkBatchFactory();
671 $lb = $linkBatchFactory->newLinkBatch( $suggestions->map( static function ( SearchSuggestion $sugg ) {
672 return $sugg->getSuggestedTitle();
673 } ) );
674 $lb->setCaller( __METHOD__ );
675 $lb->execute();
676
677 $diff = $suggestions->filter( static function ( SearchSuggestion $sugg ) {
678 return $sugg->getSuggestedTitle()->isKnown();
679 } );
680 if ( $diff > 0 ) {
681 MediaWikiServices::getInstance()->getStatsdDataFactory()
682 ->updateCount( 'search.completion.missing', $diff );
683 }
684
685 // SearchExactMatchRescorer should probably be refactored to work directly on top of a SearchSuggestionSet
686 // instead of converting it to array and trying to infer if it has re-scored anything by inspected the head
687 // of the returned array.
688 $results = $suggestions->map( static function ( SearchSuggestion $sugg ) {
689 return $sugg->getSuggestedTitle()->getPrefixedText();
690 } );
691
692 $rescorer = new SearchExactMatchRescorer();
693 if ( $this->offset === 0 ) {
694 // Rescore results with an exact title match
695 // NOTE: in some cases like cross-namespace redirects
696 // (frequently used as shortcuts e.g. WP:WP on huwiki) some
697 // backends like Cirrus will return no results. We should still
698 // try an exact title match to workaround this limitation
699 $rescoredResults = $rescorer->rescore( $search, $this->namespaces, $results, $this->limit );
700 } else {
701 // No need to rescore if offset is not 0
702 // The exact match must have been returned at position 0
703 // if it existed.
704 $rescoredResults = $results;
705 }
706
707 if ( count( $rescoredResults ) > 0 ) {
708 $found = array_search( $rescoredResults[0], $results );
709 if ( $found === false ) {
710 // If the first result is not in the previous array it
711 // means that we found a new exact match
712 $exactMatch = SearchSuggestion::fromTitle( 0, Title::newFromText( $rescoredResults[0] ) );
713 $suggestions->prepend( $exactMatch );
714 if ( $rescorer->getReplacedRedirect() !== null ) {
715 // the exact match rescorer replaced one of the suggestion found by the search engine
716 // let's remove it from our suggestions set to avoid showing duplicates
717 $suggestions->remove( SearchSuggestion::fromTitle( 0,
718 Title::newFromText( $rescorer->getReplacedRedirect() ) ) );
719 }
720 $suggestions->shrink( $this->limit );
721 } else {
722 // if the first result is not the same we need to rescore
723 if ( $found > 0 ) {
724 $suggestions->rescore( $found );
725 }
726 }
727 }
728
729 return $suggestions;
730 }
731
737 public function defaultPrefixSearch( $search ) {
738 if ( trim( $search ) === '' ) {
739 return [];
740 }
741
742 $search = $this->normalizeNamespaces( $search );
743 return $this->simplePrefixSearch( $search );
744 }
745
752 protected function simplePrefixSearch( $search ) {
753 // Use default database prefix search
754 $backend = new TitlePrefixSearch;
755 return $backend->defaultSearchBackend( $this->namespaces, $search, $this->limit, $this->offset );
756 }
757
776 public function getProfiles( $profileType, User $user = null ) {
777 return null;
778 }
779
790 public function makeSearchFieldMapping( $name, $type ) {
791 return new NullIndexField();
792 }
793
799 public function getSearchIndexFields() {
800 $models = MediaWikiServices::getInstance()->getContentHandlerFactory()->getContentModels();
801 $fields = [];
802 $seenHandlers = new SplObjectStorage();
803 foreach ( $models as $model ) {
804 try {
805 $handler = MediaWikiServices::getInstance()
806 ->getContentHandlerFactory()
807 ->getContentHandler( $model );
808 } catch ( MWUnknownContentModelException $e ) {
809 // If we can find no handler, ignore it
810 continue;
811 }
812 // Several models can have the same handler, so avoid processing it repeatedly
813 if ( $seenHandlers->contains( $handler ) ) {
814 // We already did this one
815 continue;
816 }
817 $seenHandlers->attach( $handler );
818 $handlerFields = $handler->getFieldsForSearchIndex( $this );
819 foreach ( $handlerFields as $fieldName => $fieldData ) {
820 if ( empty( $fields[$fieldName] ) ) {
821 $fields[$fieldName] = $fieldData;
822 } else {
823 // TODO: do we allow some clashes with the same type or reject all of them?
824 $mergeDef = $fields[$fieldName]->merge( $fieldData );
825 if ( !$mergeDef ) {
826 throw new InvalidArgumentException( "Duplicate field $fieldName for model $model" );
827 }
828 $fields[$fieldName] = $mergeDef;
829 }
830 }
831 }
832 // Hook to allow extensions to produce search mapping fields
833 $this->getHookRunner()->onSearchIndexFields( $fields, $this );
834 return $fields;
835 }
836
842 public function augmentSearchResults( ISearchResultSet $resultSet ) {
843 $setAugmentors = [];
844 $rowAugmentors = [];
845 $this->getHookRunner()->onSearchResultsAugment( $setAugmentors, $rowAugmentors );
846 if ( !$setAugmentors && !$rowAugmentors ) {
847 // We're done here
848 return;
849 }
850
851 // Convert row augmentors to set augmentor
852 foreach ( $rowAugmentors as $name => $row ) {
853 if ( isset( $setAugmentors[$name] ) ) {
854 throw new InvalidArgumentException( "Both row and set augmentors are defined for $name" );
855 }
856 $setAugmentors[$name] = new PerRowAugmentor( $row );
857 }
858
863 foreach ( $setAugmentors as $name => $augmentor ) {
864 $data = $augmentor->augmentAll( $resultSet );
865 if ( $data ) {
866 $resultSet->setAugmentedData( $name, $data );
867 }
868 }
869 }
870
876 public function setHookContainer( HookContainer $hookContainer ) {
877 $this->hookContainer = $hookContainer;
878 $this->hookRunner = new HookRunner( $hookContainer );
879 }
880
887 protected function getHookContainer(): HookContainer {
888 if ( !$this->hookContainer ) {
889 // This shouldn't be hit in core, but it is needed for CirrusSearch
890 // which commonly creates a CirrusSearch object without cirrus being
891 // configured in $wgSearchType/$wgSearchTypeAlternatives.
892 $this->hookContainer = MediaWikiServices::getInstance()->getHookContainer();
893 }
894 return $this->hookContainer;
895 }
896
905 protected function getHookRunner(): HookRunner {
906 if ( !$this->hookRunner ) {
907 $this->hookRunner = new HookRunner( $this->getHookContainer() );
908 }
909 return $this->hookRunner;
910 }
911
912}
const NS_MAIN
Definition Defines.php:65
const NS_SPECIAL
Definition Defines.php:54
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:81
Exception thrown when an unregistered content model is requested.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Service locator for MediaWiki core services.
Service implementation of near match title search.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:54
Represents a title within MediaWiki.
Definition Title.php:78
internal since 1.36
Definition User.php:93
Null index field - means search engine does not implement this field.
Perform augmentation of each row and return composite result, indexed by ID.
defaultSearchBackend( $namespaces, $search, $limit, $offset)
Unless overridden by PrefixSearchBackend hook... This is case-sensitive (First character may be autom...
Contain a class for special pages.
completionSearchBackendOverfetch( $search)
Perform an overfetch of completion search results.
makeSearchFieldMapping( $name, $type)
Create a search field definition.
getNearMatcher(Config $config)
Get service class to finding near matches.
getHookRunner()
Get a HookRunner for running core hooks.
searchTitle( $term)
Perform a title-only search query and return a result set.
supports( $feature)
processCompletionResults( $search, SearchSuggestionSet $suggestions)
Process completion search results.
getFeatureData( $feature)
Way to retrieve custom data set by setFeatureData or by the engine itself.
update( $id, $title, $text)
Create or update the search index record for the given page.
setNamespaces( $namespaces)
Set which namespaces the search should include.
static parseNamespacePrefixes( $query, $withAllKeyword=true, $withPrefixSearchExtractNamespaceHook=false)
Parse some common prefixes: all (search everything) or namespace names.
doSearchArchiveTitle( $term)
Perform a title search in the article archive.
array $features
Feature values.
replacePrefixes( $query)
Parse some common prefixes: all (search everything) or namespace names and set the list of namespaces...
string[] $searchTerms
textAlreadyUpdatedForIndex()
If an implementation of SearchEngine handles all of its own text processing in getTextFromContent() a...
defaultPrefixSearch( $search)
Simple prefix search for subpages.
augmentSearchResults(ISearchResultSet $resultSet)
Augment search results with extra data.
searchArchiveTitle( $term)
Perform a title search in the article archive.
normalizeText( $string)
When overridden in derived class, performs database-specific conversions on text to be used for searc...
setFeatureData( $feature, $data)
Way to pass custom data for engines.
completionSearchBackend( $search)
Perform a completion search.
getTextFromContent(Title $t, Content $c=null)
Get the raw text for updating the index from a content object Nicer search backends could possibly do...
getProfiles( $profileType, User $user=null)
Get a list of supported profiles.
int[] null $namespaces
getSort()
Get the sort direction of the search results.
static defaultNearMatcher()
Get near matcher for default SearchEngine.
getSearchIndexFields()
Get fields for search index.
getValidSorts()
Get the valid sort directions.
static userHighlightPrefs()
Find snippet highlight settings for all users.
updateTitle( $id, $title)
Update a search index record's title only.
completionSearchWithVariants( $search)
Perform a completion search with variants.
doSearchText( $term)
Perform a full text search query and return a result set.
normalizeNamespaces( $search)
Makes search simple string if it was namespaced.
const CHARS_ALL
Integer flag for legalSearchChars: includes all chars allowed in a search query.
getHookContainer()
Get a HookContainer, for running extension hooks or for hook metadata.
completionSearch( $search)
Perform a completion search.
setLimitOffset( $limit, $offset=0)
Set the maximum number of results to return and how many to skip before returning the first.
const CHARS_NO_SYNTAX
Integer flag for legalSearchChars: includes all chars allowed in a search term.
setShowSuggestion( $showSuggestion)
Set whether the searcher should try to build a suggestion.
simplePrefixSearch( $search)
Call out to simple search backend.
setSort( $sort)
Set the sort direction of the search results.
const FT_QUERY_INDEP_PROFILE_TYPE
Profile type for query independent ranking features.
setHookContainer(HookContainer $hookContainer)
searchText( $term)
Perform a full text search query and return a result set.
legalSearchChars( $type=self::CHARS_ALL)
Get chars legal for search.
extractTitles(SearchSuggestionSet $completionResults)
Extract titles from completion results.
const COMPLETION_PROFILE_TYPE
Profile type for completionSearch.
doSearchTitle( $term)
Perform a title-only search query and return a result set.
An utility class to rescore search results by looking for an exact match in the db and add the page f...
A set of search suggestions.
filter( $callback)
Filter the suggestions array.
rescore( $key)
Move the suggestion at index $key to the first position.
shrink( $limit)
Remove any extra elements in the suggestions set.
static fromStrings(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a string array.
static fromTitles(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a title array.
map( $callback)
Call array_map on the suggestions array.
prepend(SearchSuggestion $suggestion)
Add a new suggestion at the top.
remove(SearchSuggestion $suggestion)
Remove a suggestion from the set.
A search suggestion.
getSuggestedTitle()
Title object in the case this suggestion is based on a title.
Performs prefix search, returning Title objects.
A set of SearchEngine results.
setAugmentedData( $name, $data)
Sets augmented data for result set.
Interface for configuration instances.
Definition Config.php:32
Marker class for search engines that can handle their own pagination, by reporting in their ISearchRe...
Augment search results.