MediaWiki master
SearchEngine.php
Go to the documentation of this file.
1<?php
36
42abstract class SearchEngine {
43 public const DEFAULT_SORT = 'relevance';
44
46 public $prefix = '';
47
49 public $namespaces = [ NS_MAIN ];
50
52 protected $limit = 10;
53
55 protected $offset = 0;
56
61 protected $searchTerms = [];
62
64 protected $showSuggestion = true;
65 private $sort = self::DEFAULT_SORT;
66
68 protected $features = [];
69
71 private $hookContainer;
72
74 private $hookRunner;
75
77 public const COMPLETION_PROFILE_TYPE = 'completionSearchProfile';
78
80 public const FT_QUERY_INDEP_PROFILE_TYPE = 'fulltextQueryIndepProfile';
81
83 protected const CHARS_ALL = 1;
84
86 protected const CHARS_NO_SYNTAX = 2;
87
98 public function searchText( $term ) {
99 return $this->maybePaginate( function () use ( $term ) {
100 return $this->doSearchText( $term );
101 } );
102 }
103
113 protected function doSearchText( $term ) {
114 return null;
115 }
116
131 public function searchArchiveTitle( $term ) {
132 return $this->doSearchArchiveTitle( $term );
133 }
134
144 protected function doSearchArchiveTitle( $term ) {
145 return Status::newGood( [] );
146 }
147
159 public function searchTitle( $term ) {
160 return $this->maybePaginate( function () use ( $term ) {
161 return $this->doSearchTitle( $term );
162 } );
163 }
164
174 protected function doSearchTitle( $term ) {
175 return null;
176 }
177
186 private function maybePaginate( Closure $fn ) {
187 if ( $this instanceof PaginatingSearchEngine ) {
188 return $fn();
189 }
190 $this->limit++;
191 try {
192 $resultSetOrStatus = $fn();
193 } finally {
194 $this->limit--;
195 }
196
197 $resultSet = null;
198 if ( $resultSetOrStatus instanceof ISearchResultSet ) {
199 $resultSet = $resultSetOrStatus;
200 } elseif ( $resultSetOrStatus instanceof Status &&
201 $resultSetOrStatus->getValue() instanceof ISearchResultSet
202 ) {
203 $resultSet = $resultSetOrStatus->getValue();
204 }
205 if ( $resultSet ) {
206 $resultSet->shrink( $this->limit );
207 }
208
209 return $resultSetOrStatus;
210 }
211
219 public function supports( $feature ) {
220 switch ( $feature ) {
221 case 'search-update':
222 return true;
223 case 'title-suffix-filter':
224 default:
225 return false;
226 }
227 }
228
235 public function setFeatureData( $feature, $data ) {
236 $this->features[$feature] = $data;
237 }
238
246 public function getFeatureData( $feature ) {
247 return $this->features[$feature] ?? null;
248 }
249
258 public function normalizeText( $string ) {
259 // Some languages such as Chinese require word segmentation
260 return MediaWikiServices::getInstance()->getContentLanguage()->segmentByWord( $string );
261 }
262
269 public function getNearMatcher( Config $config ) {
270 return MediaWikiServices::getInstance()->getTitleMatcher();
271 }
272
279 protected static function defaultNearMatcher() {
280 wfDeprecated( __METHOD__, '1.40' );
281 return MediaWikiServices::getInstance()->getTitleMatcher();
282 }
283
290 public function legalSearchChars( $type = self::CHARS_ALL ) {
291 return "A-Za-z_'.0-9\\x80-\\xFF\\-";
292 }
293
301 public function setLimitOffset( $limit, $offset = 0 ) {
302 $this->limit = intval( $limit );
303 $this->offset = intval( $offset );
304 }
305
312 public function setNamespaces( $namespaces ) {
313 if ( $namespaces ) {
314 // Filter namespaces to only keep valid ones
315 $validNs = MediaWikiServices::getInstance()->getSearchEngineConfig()->searchableNamespaces();
316 $namespaces = array_filter( $namespaces, static function ( $ns ) use( $validNs ) {
317 return $ns < 0 || isset( $validNs[$ns] );
318 } );
319 } else {
320 $namespaces = [];
321 }
322 $this->namespaces = $namespaces;
323 }
324
332 public function setShowSuggestion( $showSuggestion ) {
333 $this->showSuggestion = $showSuggestion;
334 }
335
345 public function getValidSorts() {
346 return [ self::DEFAULT_SORT ];
347 }
348
357 public function setSort( $sort ) {
358 if ( !in_array( $sort, $this->getValidSorts() ) ) {
359 throw new InvalidArgumentException( "Invalid sort: $sort. " .
360 "Must be one of: " . implode( ', ', $this->getValidSorts() ) );
361 }
362 $this->sort = $sort;
363 }
364
371 public function getSort() {
372 return $this->sort;
373 }
374
384 public function replacePrefixes( $query ) {
385 return $query;
386 }
387
401 public static function parseNamespacePrefixes(
402 $query,
403 $withAllKeyword = true,
404 $withPrefixSearchExtractNamespaceHook = false
405 ) {
406 $parsed = $query;
407 if ( strpos( $query, ':' ) === false ) { // nothing to do
408 return false;
409 }
410 $extractedNamespace = null;
411
412 $allQuery = false;
413 if ( $withAllKeyword ) {
414 $allkeywords = [];
415
416 $allkeywords[] = wfMessage( 'searchall' )->inContentLanguage()->text() . ":";
417 // force all: so that we have a common syntax for all the wikis
418 if ( !in_array( 'all:', $allkeywords ) ) {
419 $allkeywords[] = 'all:';
420 }
421
422 foreach ( $allkeywords as $kw ) {
423 if ( str_starts_with( $query, $kw ) ) {
424 $parsed = substr( $query, strlen( $kw ) );
425 $allQuery = true;
426 break;
427 }
428 }
429 }
430
431 if ( !$allQuery && strpos( $query, ':' ) !== false ) {
432 $prefix = str_replace( ' ', '_', substr( $query, 0, strpos( $query, ':' ) ) );
433 $services = MediaWikiServices::getInstance();
434 $index = $services->getContentLanguage()->getNsIndex( $prefix );
435 if ( $index !== false ) {
436 $extractedNamespace = [ $index ];
437 $parsed = substr( $query, strlen( $prefix ) + 1 );
438 } elseif ( $withPrefixSearchExtractNamespaceHook ) {
439 $hookNamespaces = [ NS_MAIN ];
440 $hookQuery = $query;
441 ( new HookRunner( $services->getHookContainer() ) )
442 ->onPrefixSearchExtractNamespace( $hookNamespaces, $hookQuery );
443 if ( $hookQuery !== $query ) {
444 $parsed = $hookQuery;
445 $extractedNamespace = $hookNamespaces;
446 } else {
447 return false;
448 }
449 } else {
450 return false;
451 }
452 }
453
454 return [ $parsed, $extractedNamespace ];
455 }
456
464 public static function userHighlightPrefs() {
467 return [ $contextlines, $contextchars ];
468 }
469
479 public function update( $id, $title, $text ) {
480 // no-op
481 }
482
491 public function updateTitle( $id, $title ) {
492 // no-op
493 }
494
503 public function delete( $id, $title ) {
504 // no-op
505 }
506
518 public function getTextFromContent( Title $t, Content $c = null ) {
519 return $c ? $c->getTextForSearchIndex() : '';
520 }
521
530 public function textAlreadyUpdatedForIndex() {
531 return false;
532 }
533
540 protected function normalizeNamespaces( $search ) {
541 $queryAndNs = self::parseNamespacePrefixes( $search, false, true );
542 if ( $queryAndNs !== false ) {
543 $this->setNamespaces( $queryAndNs[1] );
544 return $queryAndNs[0];
545 }
546 return $search;
547 }
548
556 protected function completionSearchBackendOverfetch( $search ) {
557 $this->limit++;
558 try {
559 return $this->completionSearchBackend( $search );
560 } finally {
561 $this->limit--;
562 }
563 }
564
575 protected function completionSearchBackend( $search ) {
576 $results = [];
577
578 $search = trim( $search );
579
580 if ( !in_array( NS_SPECIAL, $this->namespaces ) && // We do not run hook on Special: search
581 !$this->getHookRunner()->onPrefixSearchBackend(
582 $this->namespaces, $search, $this->limit, $results, $this->offset )
583 ) {
584 // False means hook worked.
585 // FIXME: Yes, the API is weird. That's why it is going to be deprecated.
586
587 return SearchSuggestionSet::fromStrings( $results );
588 } else {
589 // Hook did not do the job, use default simple search
590 $results = $this->simplePrefixSearch( $search );
591 return SearchSuggestionSet::fromTitles( $results );
592 }
593 }
594
600 public function completionSearch( $search ) {
601 if ( trim( $search ) === '' ) {
602 return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
603 }
604 $search = $this->normalizeNamespaces( $search );
605 $suggestions = $this->completionSearchBackendOverfetch( $search );
606 return $this->processCompletionResults( $search, $suggestions );
607 }
608
616 public function completionSearchWithVariants( $search ) {
617 if ( trim( $search ) === '' ) {
618 return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
619 }
620 $search = $this->normalizeNamespaces( $search );
621
622 $results = $this->completionSearchBackendOverfetch( $search );
623 $fallbackLimit = 1 + $this->limit - $results->getSize();
624 if ( $fallbackLimit > 0 ) {
625 $services = MediaWikiServices::getInstance();
626 $fallbackSearches = $services->getLanguageConverterFactory()
627 ->getLanguageConverter( $services->getContentLanguage() )
628 ->autoConvertToAllVariants( $search );
629 $fallbackSearches = array_diff( array_unique( $fallbackSearches ), [ $search ] );
630
631 foreach ( $fallbackSearches as $fbs ) {
632 $this->setLimitOffset( $fallbackLimit );
633 $fallbackSearchResult = $this->completionSearch( $fbs );
634 $results->appendAll( $fallbackSearchResult );
635 $fallbackLimit -= $fallbackSearchResult->getSize();
636 if ( $fallbackLimit <= 0 ) {
637 break;
638 }
639 }
640 }
641 return $this->processCompletionResults( $search, $results );
642 }
643
649 public function extractTitles( SearchSuggestionSet $completionResults ) {
650 return $completionResults->map( static function ( SearchSuggestion $sugg ) {
651 return $sugg->getSuggestedTitle();
652 } );
653 }
654
662 protected function processCompletionResults( $search, SearchSuggestionSet $suggestions ) {
663 // We over-fetched to determine pagination. Shrink back down if we have extra results
664 // and mark if pagination is possible
665 $suggestions->shrink( $this->limit );
666
667 $search = trim( $search );
668 // preload the titles with LinkBatch
669 $linkBatchFactory = MediaWikiServices::getInstance()->getLinkBatchFactory();
670 $lb = $linkBatchFactory->newLinkBatch( $suggestions->map( static function ( SearchSuggestion $sugg ) {
671 return $sugg->getSuggestedTitle();
672 } ) );
673 $lb->setCaller( __METHOD__ );
674 $lb->execute();
675
676 $diff = $suggestions->filter( static function ( SearchSuggestion $sugg ) {
677 return $sugg->getSuggestedTitle()->isKnown();
678 } );
679 if ( $diff > 0 ) {
680 MediaWikiServices::getInstance()->getStatsdDataFactory()
681 ->updateCount( 'search.completion.missing', $diff );
682 }
683
684 // SearchExactMatchRescorer should probably be refactored to work directly on top of a SearchSuggestionSet
685 // instead of converting it to array and trying to infer if it has re-scored anything by inspected the head
686 // of the returned array.
687 $results = $suggestions->map( static function ( SearchSuggestion $sugg ) {
688 return $sugg->getSuggestedTitle()->getPrefixedText();
689 } );
690
691 $rescorer = new SearchExactMatchRescorer();
692 if ( $this->offset === 0 ) {
693 // Rescore results with an exact title match
694 // NOTE: in some cases like cross-namespace redirects
695 // (frequently used as shortcuts e.g. WP:WP on huwiki) some
696 // backends like Cirrus will return no results. We should still
697 // try an exact title match to workaround this limitation
698 $rescoredResults = $rescorer->rescore( $search, $this->namespaces, $results, $this->limit );
699 } else {
700 // No need to rescore if offset is not 0
701 // The exact match must have been returned at position 0
702 // if it existed.
703 $rescoredResults = $results;
704 }
705
706 if ( count( $rescoredResults ) > 0 ) {
707 $found = array_search( $rescoredResults[0], $results );
708 if ( $found === false ) {
709 // If the first result is not in the previous array it
710 // means that we found a new exact match
711 $exactMatch = SearchSuggestion::fromTitle( 0, Title::newFromText( $rescoredResults[0] ) );
712 $suggestions->prepend( $exactMatch );
713 if ( $rescorer->getReplacedRedirect() !== null ) {
714 // the exact match rescorer replaced one of the suggestion found by the search engine
715 // let's remove it from our suggestions set to avoid showing duplicates
716 $suggestions->remove( SearchSuggestion::fromTitle( 0,
717 Title::newFromText( $rescorer->getReplacedRedirect() ) ) );
718 }
719 $suggestions->shrink( $this->limit );
720 } else {
721 // if the first result is not the same we need to rescore
722 if ( $found > 0 ) {
723 $suggestions->rescore( $found );
724 }
725 }
726 }
727
728 return $suggestions;
729 }
730
736 public function defaultPrefixSearch( $search ) {
737 if ( trim( $search ) === '' ) {
738 return [];
739 }
740
741 $search = $this->normalizeNamespaces( $search );
742 return $this->simplePrefixSearch( $search );
743 }
744
751 protected function simplePrefixSearch( $search ) {
752 // Use default database prefix search
753 $backend = new TitlePrefixSearch;
754 return $backend->defaultSearchBackend( $this->namespaces, $search, $this->limit, $this->offset );
755 }
756
775 public function getProfiles( $profileType, User $user = null ) {
776 return null;
777 }
778
789 public function makeSearchFieldMapping( $name, $type ) {
790 return new NullIndexField();
791 }
792
798 public function getSearchIndexFields() {
799 $models = MediaWikiServices::getInstance()->getContentHandlerFactory()->getContentModels();
800 $fields = [];
801 $seenHandlers = new SplObjectStorage();
802 foreach ( $models as $model ) {
803 try {
804 $handler = MediaWikiServices::getInstance()
805 ->getContentHandlerFactory()
806 ->getContentHandler( $model );
807 } catch ( MWUnknownContentModelException $e ) {
808 // If we can find no handler, ignore it
809 continue;
810 }
811 // Several models can have the same handler, so avoid processing it repeatedly
812 if ( $seenHandlers->contains( $handler ) ) {
813 // We already did this one
814 continue;
815 }
816 $seenHandlers->attach( $handler );
817 $handlerFields = $handler->getFieldsForSearchIndex( $this );
818 foreach ( $handlerFields as $fieldName => $fieldData ) {
819 if ( empty( $fields[$fieldName] ) ) {
820 $fields[$fieldName] = $fieldData;
821 } else {
822 // TODO: do we allow some clashes with the same type or reject all of them?
823 $mergeDef = $fields[$fieldName]->merge( $fieldData );
824 if ( !$mergeDef ) {
825 throw new InvalidArgumentException( "Duplicate field $fieldName for model $model" );
826 }
827 $fields[$fieldName] = $mergeDef;
828 }
829 }
830 }
831 // Hook to allow extensions to produce search mapping fields
832 $this->getHookRunner()->onSearchIndexFields( $fields, $this );
833 return $fields;
834 }
835
841 public function augmentSearchResults( ISearchResultSet $resultSet ) {
842 $setAugmentors = [];
843 $rowAugmentors = [];
844 $this->getHookRunner()->onSearchResultsAugment( $setAugmentors, $rowAugmentors );
845 if ( !$setAugmentors && !$rowAugmentors ) {
846 // We're done here
847 return;
848 }
849
850 // Convert row augmentors to set augmentor
851 foreach ( $rowAugmentors as $name => $row ) {
852 if ( isset( $setAugmentors[$name] ) ) {
853 throw new InvalidArgumentException( "Both row and set augmentors are defined for $name" );
854 }
855 $setAugmentors[$name] = new PerRowAugmentor( $row );
856 }
857
862 foreach ( $setAugmentors as $name => $augmentor ) {
863 $data = $augmentor->augmentAll( $resultSet );
864 if ( $data ) {
865 $resultSet->setAugmentedData( $name, $data );
866 }
867 }
868 }
869
875 public function setHookContainer( HookContainer $hookContainer ) {
876 $this->hookContainer = $hookContainer;
877 $this->hookRunner = new HookRunner( $hookContainer );
878 }
879
886 protected function getHookContainer(): HookContainer {
887 if ( !$this->hookContainer ) {
888 // This shouldn't be hit in core, but it is needed for CirrusSearch
889 // which commonly creates a CirrusSearch object without cirrus being
890 // configured in $wgSearchType/$wgSearchTypeAlternatives.
891 $this->hookContainer = MediaWikiServices::getInstance()->getHookContainer();
892 }
893 return $this->hookContainer;
894 }
895
904 protected function getHookRunner(): HookRunner {
905 if ( !$this->hookRunner ) {
906 $this->hookRunner = new HookRunner( $this->getHookContainer() );
907 }
908 return $this->hookRunner;
909 }
910
911}
const NS_MAIN
Definition Defines.php:65
const NS_SPECIAL
Definition Defines.php:54
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:81
Exception thrown when an unregistered content model is requested.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Service locator for MediaWiki core services.
Service implementation of near match title search.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:54
Represents a title within MediaWiki.
Definition Title.php:79
internal since 1.36
Definition User.php:94
Null index field - means search engine does not implement this field.
Perform augmentation of each row and return composite result, indexed by ID.
defaultSearchBackend( $namespaces, $search, $limit, $offset)
Unless overridden by PrefixSearchBackend hook... This is case-sensitive (First character may be autom...
Contain a class for special pages.
completionSearchBackendOverfetch( $search)
Perform an overfetch of completion search results.
makeSearchFieldMapping( $name, $type)
Create a search field definition.
getNearMatcher(Config $config)
Get service class to finding near matches.
getHookRunner()
Get a HookRunner for running core hooks.
searchTitle( $term)
Perform a title-only search query and return a result set.
supports( $feature)
processCompletionResults( $search, SearchSuggestionSet $suggestions)
Process completion search results.
getFeatureData( $feature)
Way to retrieve custom data set by setFeatureData or by the engine itself.
update( $id, $title, $text)
Create or update the search index record for the given page.
setNamespaces( $namespaces)
Set which namespaces the search should include.
static parseNamespacePrefixes( $query, $withAllKeyword=true, $withPrefixSearchExtractNamespaceHook=false)
Parse some common prefixes: all (search everything) or namespace names.
doSearchArchiveTitle( $term)
Perform a title search in the article archive.
array $features
Feature values.
replacePrefixes( $query)
Parse some common prefixes: all (search everything) or namespace names and set the list of namespaces...
string[] $searchTerms
textAlreadyUpdatedForIndex()
If an implementation of SearchEngine handles all of its own text processing in getTextFromContent() a...
defaultPrefixSearch( $search)
Simple prefix search for subpages.
augmentSearchResults(ISearchResultSet $resultSet)
Augment search results with extra data.
searchArchiveTitle( $term)
Perform a title search in the article archive.
normalizeText( $string)
When overridden in derived class, performs database-specific conversions on text to be used for searc...
setFeatureData( $feature, $data)
Way to pass custom data for engines.
completionSearchBackend( $search)
Perform a completion search.
getTextFromContent(Title $t, Content $c=null)
Get the raw text for updating the index from a content object Nicer search backends could possibly do...
getProfiles( $profileType, User $user=null)
Get a list of supported profiles.
int[] null $namespaces
getSort()
Get the sort direction of the search results.
static defaultNearMatcher()
Get near matcher for default SearchEngine.
getSearchIndexFields()
Get fields for search index.
getValidSorts()
Get the valid sort directions.
static userHighlightPrefs()
Find snippet highlight settings for all users.
updateTitle( $id, $title)
Update a search index record's title only.
completionSearchWithVariants( $search)
Perform a completion search with variants.
doSearchText( $term)
Perform a full text search query and return a result set.
normalizeNamespaces( $search)
Makes search simple string if it was namespaced.
const CHARS_ALL
Integer flag for legalSearchChars: includes all chars allowed in a search query.
getHookContainer()
Get a HookContainer, for running extension hooks or for hook metadata.
completionSearch( $search)
Perform a completion search.
setLimitOffset( $limit, $offset=0)
Set the maximum number of results to return and how many to skip before returning the first.
const CHARS_NO_SYNTAX
Integer flag for legalSearchChars: includes all chars allowed in a search term.
setShowSuggestion( $showSuggestion)
Set whether the searcher should try to build a suggestion.
simplePrefixSearch( $search)
Call out to simple search backend.
setSort( $sort)
Set the sort direction of the search results.
const FT_QUERY_INDEP_PROFILE_TYPE
Profile type for query independent ranking features.
setHookContainer(HookContainer $hookContainer)
searchText( $term)
Perform a full text search query and return a result set.
legalSearchChars( $type=self::CHARS_ALL)
Get chars legal for search.
extractTitles(SearchSuggestionSet $completionResults)
Extract titles from completion results.
const COMPLETION_PROFILE_TYPE
Profile type for completionSearch.
doSearchTitle( $term)
Perform a title-only search query and return a result set.
An utility class to rescore search results by looking for an exact match in the db and add the page f...
A set of search suggestions.
filter( $callback)
Filter the suggestions array.
rescore( $key)
Move the suggestion at index $key to the first position.
shrink( $limit)
Remove any extra elements in the suggestions set.
static fromStrings(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a string array.
static fromTitles(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a title array.
map( $callback)
Call array_map on the suggestions array.
prepend(SearchSuggestion $suggestion)
Add a new suggestion at the top.
remove(SearchSuggestion $suggestion)
Remove a suggestion from the set.
A search suggestion.
getSuggestedTitle()
Title object in the case this suggestion is based on a title.
Performs prefix search, returning Title objects.
Base interface for representing page content.
Definition Content.php:37
A set of SearchEngine results.
setAugmentedData( $name, $data)
Sets augmented data for result set.
Interface for configuration instances.
Definition Config.php:32
Marker class for search engines that can handle their own pagination, by reporting in their ISearchRe...
Augment search results.