MediaWiki master
SearchEngine.php
Go to the documentation of this file.
1<?php
37
43abstract class SearchEngine {
44 public const DEFAULT_SORT = 'relevance';
45
47 public $prefix = '';
48
50 public $namespaces = [ NS_MAIN ];
51
53 protected $limit = 10;
54
56 protected $offset = 0;
57
62 protected $searchTerms = [];
63
65 protected $showSuggestion = true;
67 private $sort = self::DEFAULT_SORT;
68
70 protected $features = [];
71
73 private $hookContainer;
74
76 private $hookRunner;
77
79 public const COMPLETION_PROFILE_TYPE = 'completionSearchProfile';
80
82 public const FT_QUERY_INDEP_PROFILE_TYPE = 'fulltextQueryIndepProfile';
83
85 protected const CHARS_ALL = 1;
86
88 protected const CHARS_NO_SYNTAX = 2;
89
100 public function searchText( $term ) {
101 return $this->maybePaginate( function () use ( $term ) {
102 return $this->doSearchText( $term );
103 } );
104 }
105
115 protected function doSearchText( $term ) {
116 return null;
117 }
118
133 public function searchArchiveTitle( $term ) {
134 return $this->doSearchArchiveTitle( $term );
135 }
136
146 protected function doSearchArchiveTitle( $term ) {
147 return Status::newGood( [] );
148 }
149
161 public function searchTitle( $term ) {
162 return $this->maybePaginate( function () use ( $term ) {
163 return $this->doSearchTitle( $term );
164 } );
165 }
166
176 protected function doSearchTitle( $term ) {
177 return null;
178 }
179
188 private function maybePaginate( Closure $fn ) {
189 if ( $this instanceof PaginatingSearchEngine ) {
190 return $fn();
191 }
192 $this->limit++;
193 try {
194 $resultSetOrStatus = $fn();
195 } finally {
196 $this->limit--;
197 }
198
199 $resultSet = null;
200 if ( $resultSetOrStatus instanceof ISearchResultSet ) {
201 $resultSet = $resultSetOrStatus;
202 } elseif ( $resultSetOrStatus instanceof Status &&
203 $resultSetOrStatus->getValue() instanceof ISearchResultSet
204 ) {
205 $resultSet = $resultSetOrStatus->getValue();
206 }
207 if ( $resultSet ) {
208 $resultSet->shrink( $this->limit );
209 }
210
211 return $resultSetOrStatus;
212 }
213
221 public function supports( $feature ) {
222 switch ( $feature ) {
223 case 'search-update':
224 return true;
225 case 'title-suffix-filter':
226 default:
227 return false;
228 }
229 }
230
237 public function setFeatureData( $feature, $data ) {
238 $this->features[$feature] = $data;
239 }
240
248 public function getFeatureData( $feature ) {
249 return $this->features[$feature] ?? null;
250 }
251
260 public function normalizeText( $string ) {
261 // Some languages such as Chinese require word segmentation
262 return MediaWikiServices::getInstance()->getContentLanguage()->segmentByWord( $string );
263 }
264
271 public function getNearMatcher( Config $config ) {
272 return MediaWikiServices::getInstance()->getTitleMatcher();
273 }
274
281 protected static function defaultNearMatcher() {
282 wfDeprecated( __METHOD__, '1.40' );
283 return MediaWikiServices::getInstance()->getTitleMatcher();
284 }
285
292 public function legalSearchChars( $type = self::CHARS_ALL ) {
293 return "A-Za-z_'.0-9\\x80-\\xFF\\-";
294 }
295
303 public function setLimitOffset( $limit, $offset = 0 ) {
304 $this->limit = intval( $limit );
305 $this->offset = intval( $offset );
306 }
307
314 public function setNamespaces( $namespaces ) {
315 if ( $namespaces ) {
316 // Filter namespaces to only keep valid ones
317 $validNs = MediaWikiServices::getInstance()->getSearchEngineConfig()->searchableNamespaces();
318 $namespaces = array_filter( $namespaces, static function ( $ns ) use( $validNs ) {
319 return $ns < 0 || isset( $validNs[$ns] );
320 } );
321 } else {
322 $namespaces = [];
323 }
324 $this->namespaces = $namespaces;
325 }
326
334 public function setShowSuggestion( $showSuggestion ) {
335 $this->showSuggestion = $showSuggestion;
336 }
337
347 public function getValidSorts() {
348 return [ self::DEFAULT_SORT ];
349 }
350
358 public function setSort( $sort ) {
359 if ( !in_array( $sort, $this->getValidSorts() ) ) {
360 throw new InvalidArgumentException( "Invalid sort: $sort. " .
361 "Must be one of: " . implode( ', ', $this->getValidSorts() ) );
362 }
363 $this->sort = $sort;
364 }
365
372 public function getSort() {
373 return $this->sort;
374 }
375
385 public function replacePrefixes( $query ) {
386 return $query;
387 }
388
402 public static function parseNamespacePrefixes(
403 $query,
404 $withAllKeyword = true,
405 $withPrefixSearchExtractNamespaceHook = false
406 ) {
407 $parsed = $query;
408 if ( strpos( $query, ':' ) === false ) { // nothing to do
409 return false;
410 }
411 $extractedNamespace = null;
412
413 $allQuery = false;
414 if ( $withAllKeyword ) {
415 $allkeywords = [];
416
417 $allkeywords[] = wfMessage( 'searchall' )->inContentLanguage()->text() . ":";
418 // force all: so that we have a common syntax for all the wikis
419 if ( !in_array( 'all:', $allkeywords ) ) {
420 $allkeywords[] = 'all:';
421 }
422
423 foreach ( $allkeywords as $kw ) {
424 if ( str_starts_with( $query, $kw ) ) {
425 $parsed = substr( $query, strlen( $kw ) );
426 $allQuery = true;
427 break;
428 }
429 }
430 }
431
432 if ( !$allQuery && strpos( $query, ':' ) !== false ) {
433 $prefix = str_replace( ' ', '_', substr( $query, 0, strpos( $query, ':' ) ) );
434 $services = MediaWikiServices::getInstance();
435 $index = $services->getContentLanguage()->getNsIndex( $prefix );
436 if ( $index !== false ) {
437 $extractedNamespace = [ $index ];
438 $parsed = substr( $query, strlen( $prefix ) + 1 );
439 } elseif ( $withPrefixSearchExtractNamespaceHook ) {
440 $hookNamespaces = [ NS_MAIN ];
441 $hookQuery = $query;
442 ( new HookRunner( $services->getHookContainer() ) )
443 ->onPrefixSearchExtractNamespace( $hookNamespaces, $hookQuery );
444 if ( $hookQuery !== $query ) {
445 $parsed = $hookQuery;
446 $extractedNamespace = $hookNamespaces;
447 } else {
448 return false;
449 }
450 } else {
451 return false;
452 }
453 }
454
455 return [ $parsed, $extractedNamespace ];
456 }
457
465 public static function userHighlightPrefs() {
468 return [ $contextlines, $contextchars ];
469 }
470
480 public function update( $id, $title, $text ) {
481 // no-op
482 }
483
492 public function updateTitle( $id, $title ) {
493 // no-op
494 }
495
504 public function delete( $id, $title ) {
505 // no-op
506 }
507
519 public function getTextFromContent( Title $t, ?Content $c = null ) {
520 return $c ? $c->getTextForSearchIndex() : '';
521 }
522
531 public function textAlreadyUpdatedForIndex() {
532 return false;
533 }
534
541 protected function normalizeNamespaces( $search ) {
542 $queryAndNs = self::parseNamespacePrefixes( $search, false, true );
543 if ( $queryAndNs !== false ) {
544 $this->setNamespaces( $queryAndNs[1] );
545 return $queryAndNs[0];
546 }
547 return $search;
548 }
549
557 protected function completionSearchBackendOverfetch( $search ) {
558 $this->limit++;
559 try {
560 return $this->completionSearchBackend( $search );
561 } finally {
562 $this->limit--;
563 }
564 }
565
576 protected function completionSearchBackend( $search ) {
577 $results = [];
578
579 $search = trim( $search );
580
581 if ( !in_array( NS_SPECIAL, $this->namespaces ) && // We do not run hook on Special: search
582 !$this->getHookRunner()->onPrefixSearchBackend(
583 $this->namespaces, $search, $this->limit, $results, $this->offset )
584 ) {
585 // False means hook worked.
586 // FIXME: Yes, the API is weird. That's why it is going to be deprecated.
587
588 return SearchSuggestionSet::fromStrings( $results );
589 } else {
590 // Hook did not do the job, use default simple search
591 $results = $this->simplePrefixSearch( $search );
592 return SearchSuggestionSet::fromTitles( $results );
593 }
594 }
595
601 public function completionSearch( $search ) {
602 if ( trim( $search ) === '' ) {
603 return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
604 }
605 $search = $this->normalizeNamespaces( $search );
606 $suggestions = $this->completionSearchBackendOverfetch( $search );
607 return $this->processCompletionResults( $search, $suggestions );
608 }
609
617 public function completionSearchWithVariants( $search ) {
618 if ( trim( $search ) === '' ) {
619 return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
620 }
621 $search = $this->normalizeNamespaces( $search );
622
623 $results = $this->completionSearchBackendOverfetch( $search );
624 $fallbackLimit = 1 + $this->limit - $results->getSize();
625 if ( $fallbackLimit > 0 ) {
626 $services = MediaWikiServices::getInstance();
627 $fallbackSearches = $services->getLanguageConverterFactory()
628 ->getLanguageConverter( $services->getContentLanguage() )
629 ->autoConvertToAllVariants( $search );
630 $fallbackSearches = array_diff( array_unique( $fallbackSearches ), [ $search ] );
631
632 foreach ( $fallbackSearches as $fbs ) {
633 $this->setLimitOffset( $fallbackLimit );
634 $fallbackSearchResult = $this->completionSearch( $fbs );
635 $results->appendAll( $fallbackSearchResult );
636 $fallbackLimit -= $fallbackSearchResult->getSize();
637 if ( $fallbackLimit <= 0 ) {
638 break;
639 }
640 }
641 }
642 return $this->processCompletionResults( $search, $results );
643 }
644
650 public function extractTitles( SearchSuggestionSet $completionResults ) {
651 return $completionResults->map( static function ( SearchSuggestion $sugg ) {
652 return $sugg->getSuggestedTitle();
653 } );
654 }
655
663 protected function processCompletionResults( $search, SearchSuggestionSet $suggestions ) {
664 // We over-fetched to determine pagination. Shrink back down if we have extra results
665 // and mark if pagination is possible
666 $suggestions->shrink( $this->limit );
667
668 $search = trim( $search );
669 // preload the titles with LinkBatch
670 $linkBatchFactory = MediaWikiServices::getInstance()->getLinkBatchFactory();
671 $lb = $linkBatchFactory->newLinkBatch( $suggestions->map( static function ( SearchSuggestion $sugg ) {
672 return $sugg->getSuggestedTitle();
673 } ) );
674 $lb->setCaller( __METHOD__ );
675 $lb->execute();
676
677 $diff = $suggestions->filter( static function ( SearchSuggestion $sugg ) {
678 return $sugg->getSuggestedTitle()->isKnown();
679 } );
680 if ( $diff > 0 ) {
681 MediaWikiServices::getInstance()->getStatsdDataFactory()
682 ->updateCount( 'search.completion.missing', $diff );
683 }
684
685 // SearchExactMatchRescorer should probably be refactored to work directly on top of a SearchSuggestionSet
686 // instead of converting it to array and trying to infer if it has re-scored anything by inspected the head
687 // of the returned array.
688 $results = $suggestions->map( static function ( SearchSuggestion $sugg ) {
689 return $sugg->getSuggestedTitle()->getPrefixedText();
690 } );
691
692 $rescorer = new SearchExactMatchRescorer();
693 if ( $this->offset === 0 ) {
694 // Rescore results with an exact title match
695 // NOTE: in some cases like cross-namespace redirects
696 // (frequently used as shortcuts e.g. WP:WP on huwiki) some
697 // backends like Cirrus will return no results. We should still
698 // try an exact title match to workaround this limitation
699 $rescoredResults = $rescorer->rescore( $search, $this->namespaces, $results, $this->limit );
700 } else {
701 // No need to rescore if offset is not 0
702 // The exact match must have been returned at position 0
703 // if it existed.
704 $rescoredResults = $results;
705 }
706
707 if ( count( $rescoredResults ) > 0 ) {
708 $found = array_search( $rescoredResults[0], $results );
709 if ( $found === false ) {
710 // If the first result is not in the previous array it
711 // means that we found a new exact match
712 $exactMatch = SearchSuggestion::fromTitle( 0, Title::newFromText( $rescoredResults[0] ) );
713 $suggestions->prepend( $exactMatch );
714 if ( $rescorer->getReplacedRedirect() !== null ) {
715 // the exact match rescorer replaced one of the suggestion found by the search engine
716 // let's remove it from our suggestions set to avoid showing duplicates
717 $suggestions->remove( SearchSuggestion::fromTitle( 0,
718 Title::newFromText( $rescorer->getReplacedRedirect() ) ) );
719 }
720 $suggestions->shrink( $this->limit );
721 } else {
722 // if the first result is not the same we need to rescore
723 if ( $found > 0 ) {
724 $suggestions->rescore( $found );
725 }
726 }
727 }
728
729 return $suggestions;
730 }
731
737 public function defaultPrefixSearch( $search ) {
738 if ( trim( $search ) === '' ) {
739 return [];
740 }
741
742 $search = $this->normalizeNamespaces( $search );
743 return $this->simplePrefixSearch( $search );
744 }
745
752 protected function simplePrefixSearch( $search ) {
753 // Use default database prefix search
754 $backend = new TitlePrefixSearch;
755 return $backend->defaultSearchBackend( $this->namespaces, $search, $this->limit, $this->offset );
756 }
757
776 public function getProfiles( $profileType, ?User $user = null ) {
777 return null;
778 }
779
790 public function makeSearchFieldMapping( $name, $type ) {
791 return new NullIndexField();
792 }
793
799 public function getSearchIndexFields() {
800 $models = MediaWikiServices::getInstance()->getContentHandlerFactory()->getContentModels();
801 $fields = [];
802 $seenHandlers = new SplObjectStorage();
803 foreach ( $models as $model ) {
804 try {
805 $handler = MediaWikiServices::getInstance()
806 ->getContentHandlerFactory()
807 ->getContentHandler( $model );
808 } catch ( MWUnknownContentModelException $e ) {
809 // If we can find no handler, ignore it
810 continue;
811 }
812 // Several models can have the same handler, so avoid processing it repeatedly
813 if ( $seenHandlers->contains( $handler ) ) {
814 // We already did this one
815 continue;
816 }
817 $seenHandlers->attach( $handler );
818 $handlerFields = $handler->getFieldsForSearchIndex( $this );
819 foreach ( $handlerFields as $fieldName => $fieldData ) {
820 if ( empty( $fields[$fieldName] ) ) {
821 $fields[$fieldName] = $fieldData;
822 } else {
823 // TODO: do we allow some clashes with the same type or reject all of them?
824 $mergeDef = $fields[$fieldName]->merge( $fieldData );
825 if ( !$mergeDef ) {
826 throw new InvalidArgumentException( "Duplicate field $fieldName for model $model" );
827 }
828 $fields[$fieldName] = $mergeDef;
829 }
830 }
831 }
832 // Hook to allow extensions to produce search mapping fields
833 $this->getHookRunner()->onSearchIndexFields( $fields, $this );
834 return $fields;
835 }
836
842 public function augmentSearchResults( ISearchResultSet $resultSet ) {
843 $setAugmentors = [];
844 $rowAugmentors = [];
845 $this->getHookRunner()->onSearchResultsAugment( $setAugmentors, $rowAugmentors );
846 if ( !$setAugmentors && !$rowAugmentors ) {
847 // We're done here
848 return;
849 }
850
851 // Convert row augmentors to set augmentor
852 foreach ( $rowAugmentors as $name => $row ) {
853 if ( isset( $setAugmentors[$name] ) ) {
854 throw new InvalidArgumentException( "Both row and set augmentors are defined for $name" );
855 }
856 $setAugmentors[$name] = new PerRowAugmentor( $row );
857 }
858
863 foreach ( $setAugmentors as $name => $augmentor ) {
864 $data = $augmentor->augmentAll( $resultSet );
865 if ( $data ) {
866 $resultSet->setAugmentedData( $name, $data );
867 }
868 }
869 }
870
876 public function setHookContainer( HookContainer $hookContainer ) {
877 $this->hookContainer = $hookContainer;
878 $this->hookRunner = new HookRunner( $hookContainer );
879 }
880
887 protected function getHookContainer(): HookContainer {
888 if ( !$this->hookContainer ) {
889 // This shouldn't be hit in core, but it is needed for CirrusSearch
890 // which commonly creates a CirrusSearch object without cirrus being
891 // configured in $wgSearchType/$wgSearchTypeAlternatives.
892 $this->hookContainer = MediaWikiServices::getInstance()->getHookContainer();
893 }
894 return $this->hookContainer;
895 }
896
905 protected function getHookRunner(): HookRunner {
906 if ( !$this->hookRunner ) {
907 $this->hookRunner = new HookRunner( $this->getHookContainer() );
908 }
909 return $this->hookRunner;
910 }
911
912}
const NS_MAIN
Definition Defines.php:65
const NS_SPECIAL
Definition Defines.php:54
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:81
Exception thrown when an unregistered content model is requested.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Service locator for MediaWiki core services.
Service implementation of near match title search.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:54
Represents a title within MediaWiki.
Definition Title.php:78
internal since 1.36
Definition User.php:93
Null index field - means search engine does not implement this field.
Perform augmentation of each row and return composite result, indexed by ID.
defaultSearchBackend( $namespaces, $search, $limit, $offset)
Unless overridden by PrefixSearchBackend hook... This is case-sensitive (First character may be autom...
Contain a class for special pages.
completionSearchBackendOverfetch( $search)
Perform an overfetch of completion search results.
makeSearchFieldMapping( $name, $type)
Create a search field definition.
getNearMatcher(Config $config)
Get service class to finding near matches.
getHookRunner()
Get a HookRunner for running core hooks.
searchTitle( $term)
Perform a title-only search query and return a result set.
supports( $feature)
processCompletionResults( $search, SearchSuggestionSet $suggestions)
Process completion search results.
getFeatureData( $feature)
Way to retrieve custom data set by setFeatureData or by the engine itself.
update( $id, $title, $text)
Create or update the search index record for the given page.
setNamespaces( $namespaces)
Set which namespaces the search should include.
static parseNamespacePrefixes( $query, $withAllKeyword=true, $withPrefixSearchExtractNamespaceHook=false)
Parse some common prefixes: all (search everything) or namespace names.
doSearchArchiveTitle( $term)
Perform a title search in the article archive.
getTextFromContent(Title $t, ?Content $c=null)
Get the raw text for updating the index from a content object Nicer search backends could possibly do...
array $features
Feature values.
replacePrefixes( $query)
Parse some common prefixes: all (search everything) or namespace names and set the list of namespaces...
string[] $searchTerms
textAlreadyUpdatedForIndex()
If an implementation of SearchEngine handles all of its own text processing in getTextFromContent() a...
defaultPrefixSearch( $search)
Simple prefix search for subpages.
augmentSearchResults(ISearchResultSet $resultSet)
Augment search results with extra data.
searchArchiveTitle( $term)
Perform a title search in the article archive.
normalizeText( $string)
When overridden in derived class, performs database-specific conversions on text to be used for searc...
setFeatureData( $feature, $data)
Way to pass custom data for engines.
completionSearchBackend( $search)
Perform a completion search.
int[] null $namespaces
getSort()
Get the sort direction of the search results.
static defaultNearMatcher()
Get near matcher for default SearchEngine.
getSearchIndexFields()
Get fields for search index.
getValidSorts()
Get the valid sort directions.
static userHighlightPrefs()
Find snippet highlight settings for all users.
updateTitle( $id, $title)
Update a search index record's title only.
completionSearchWithVariants( $search)
Perform a completion search with variants.
doSearchText( $term)
Perform a full text search query and return a result set.
normalizeNamespaces( $search)
Makes search simple string if it was namespaced.
const CHARS_ALL
Integer flag for legalSearchChars: includes all chars allowed in a search query.
getHookContainer()
Get a HookContainer, for running extension hooks or for hook metadata.
completionSearch( $search)
Perform a completion search.
setLimitOffset( $limit, $offset=0)
Set the maximum number of results to return and how many to skip before returning the first.
const CHARS_NO_SYNTAX
Integer flag for legalSearchChars: includes all chars allowed in a search term.
setShowSuggestion( $showSuggestion)
Set whether the searcher should try to build a suggestion.
getProfiles( $profileType, ?User $user=null)
Get a list of supported profiles.
simplePrefixSearch( $search)
Call out to simple search backend.
setSort( $sort)
Set the sort direction of the search results.
const FT_QUERY_INDEP_PROFILE_TYPE
Profile type for query independent ranking features.
setHookContainer(HookContainer $hookContainer)
searchText( $term)
Perform a full text search query and return a result set.
legalSearchChars( $type=self::CHARS_ALL)
Get chars legal for search.
extractTitles(SearchSuggestionSet $completionResults)
Extract titles from completion results.
const COMPLETION_PROFILE_TYPE
Profile type for completionSearch.
doSearchTitle( $term)
Perform a title-only search query and return a result set.
An utility class to rescore search results by looking for an exact match in the db and add the page f...
A set of search suggestions.
filter( $callback)
Filter the suggestions array.
rescore( $key)
Move the suggestion at index $key to the first position.
shrink( $limit)
Remove any extra elements in the suggestions set.
static fromStrings(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a string array.
static fromTitles(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a title array.
map( $callback)
Call array_map on the suggestions array.
prepend(SearchSuggestion $suggestion)
Add a new suggestion at the top.
remove(SearchSuggestion $suggestion)
Remove a suggestion from the set.
A search suggestion.
getSuggestedTitle()
Title object in the case this suggestion is based on a title.
Performs prefix search, returning Title objects.
A set of SearchEngine results.
setAugmentedData( $name, $data)
Sets augmented data for result set.
Interface for configuration instances.
Definition Config.php:32
Base interface for representing page content.
Definition Content.php:39
Marker class for search engines that can handle their own pagination, by reporting in their ISearchRe...
Augment search results.