MediaWiki REL1_39
SearchEngine.php
Go to the documentation of this file.
1<?php
31
37abstract class SearchEngine {
38 public const DEFAULT_SORT = 'relevance';
39
41 public $prefix = '';
42
44 public $namespaces = [ NS_MAIN ];
45
47 protected $limit = 10;
48
50 protected $offset = 0;
51
56 protected $searchTerms = [];
57
59 protected $showSuggestion = true;
60 private $sort = self::DEFAULT_SORT;
61
63 protected $features = [];
64
66 private $hookContainer;
67
69 private $hookRunner;
70
72 public const COMPLETION_PROFILE_TYPE = 'completionSearchProfile';
73
75 public const FT_QUERY_INDEP_PROFILE_TYPE = 'fulltextQueryIndepProfile';
76
78 protected const CHARS_ALL = 1;
79
81 protected const CHARS_NO_SYNTAX = 2;
82
93 public function searchText( $term ) {
94 return $this->maybePaginate( function () use ( $term ) {
95 return $this->doSearchText( $term );
96 } );
97 }
98
108 protected function doSearchText( $term ) {
109 return null;
110 }
111
126 public function searchArchiveTitle( $term ) {
127 return $this->doSearchArchiveTitle( $term );
128 }
129
139 protected function doSearchArchiveTitle( $term ) {
140 return Status::newGood( [] );
141 }
142
154 public function searchTitle( $term ) {
155 return $this->maybePaginate( function () use ( $term ) {
156 return $this->doSearchTitle( $term );
157 } );
158 }
159
169 protected function doSearchTitle( $term ) {
170 return null;
171 }
172
181 private function maybePaginate( Closure $fn ) {
182 if ( $this instanceof PaginatingSearchEngine ) {
183 return $fn();
184 }
185 $this->limit++;
186 try {
187 $resultSetOrStatus = $fn();
188 } finally {
189 $this->limit--;
190 }
191
192 $resultSet = null;
193 if ( $resultSetOrStatus instanceof ISearchResultSet ) {
194 $resultSet = $resultSetOrStatus;
195 } elseif ( $resultSetOrStatus instanceof Status &&
196 $resultSetOrStatus->getValue() instanceof ISearchResultSet
197 ) {
198 $resultSet = $resultSetOrStatus->getValue();
199 }
200 if ( $resultSet ) {
201 $resultSet->shrink( $this->limit );
202 }
203
204 return $resultSetOrStatus;
205 }
206
214 public function supports( $feature ) {
215 switch ( $feature ) {
216 case 'search-update':
217 return true;
218 case 'title-suffix-filter':
219 default:
220 return false;
221 }
222 }
223
230 public function setFeatureData( $feature, $data ) {
231 $this->features[$feature] = $data;
232 }
233
241 public function getFeatureData( $feature ) {
242 return $this->features[$feature] ?? null;
243 }
244
253 public function normalizeText( $string ) {
254 // Some languages such as Chinese require word segmentation
255 return MediaWikiServices::getInstance()->getContentLanguage()->segmentByWord( $string );
256 }
257
263 public function getNearMatcher( Config $config ) {
264 return new SearchNearMatcher( $config,
265 MediaWikiServices::getInstance()->getContentLanguage(),
266 $this->getHookContainer()
267 );
268 }
269
274 protected static function defaultNearMatcher() {
275 $services = MediaWikiServices::getInstance();
276 $config = $services->getMainConfig();
277 return $services->newSearchEngine()->getNearMatcher( $config );
278 }
279
286 public function legalSearchChars( $type = self::CHARS_ALL ) {
287 return "A-Za-z_'.0-9\\x80-\\xFF\\-";
288 }
289
297 public function setLimitOffset( $limit, $offset = 0 ) {
298 $this->limit = intval( $limit );
299 $this->offset = intval( $offset );
300 }
301
308 public function setNamespaces( $namespaces ) {
309 if ( $namespaces ) {
310 // Filter namespaces to only keep valid ones
311 $validNs = MediaWikiServices::getInstance()->getSearchEngineConfig()->searchableNamespaces();
312 $namespaces = array_filter( $namespaces, static function ( $ns ) use( $validNs ) {
313 return $ns < 0 || isset( $validNs[$ns] );
314 } );
315 } else {
316 $namespaces = [];
317 }
318 $this->namespaces = $namespaces;
319 }
320
328 public function setShowSuggestion( $showSuggestion ) {
329 $this->showSuggestion = $showSuggestion;
330 }
331
341 public function getValidSorts() {
342 return [ self::DEFAULT_SORT ];
343 }
344
353 public function setSort( $sort ) {
354 if ( !in_array( $sort, $this->getValidSorts() ) ) {
355 throw new InvalidArgumentException( "Invalid sort: $sort. " .
356 "Must be one of: " . implode( ', ', $this->getValidSorts() ) );
357 }
358 $this->sort = $sort;
359 }
360
367 public function getSort() {
368 return $this->sort;
369 }
370
380 public function replacePrefixes( $query ) {
381 return $query;
382 }
383
399 public static function parseNamespacePrefixes(
400 $query,
401 $withAllKeyword = true,
402 $withPrefixSearchExtractNamespaceHook = false
403 ) {
404 $parsed = $query;
405 if ( strpos( $query, ':' ) === false ) { // nothing to do
406 return false;
407 }
408 $extractedNamespace = null;
409
410 $allQuery = false;
411 if ( $withAllKeyword ) {
412 $allkeywords = [];
413
414 $allkeywords[] = wfMessage( 'searchall' )->inContentLanguage()->text() . ":";
415 // force all: so that we have a common syntax for all the wikis
416 if ( !in_array( 'all:', $allkeywords ) ) {
417 $allkeywords[] = 'all:';
418 }
419
420 foreach ( $allkeywords as $kw ) {
421 if ( str_starts_with( $query, $kw ) ) {
422 $parsed = substr( $query, strlen( $kw ) );
423 $allQuery = true;
424 break;
425 }
426 }
427 }
428
429 if ( !$allQuery && strpos( $query, ':' ) !== false ) {
430 $prefix = str_replace( ' ', '_', substr( $query, 0, strpos( $query, ':' ) ) );
431 $index = MediaWikiServices::getInstance()->getContentLanguage()->getNsIndex( $prefix );
432 if ( $index !== false ) {
433 $extractedNamespace = [ $index ];
434 $parsed = substr( $query, strlen( $prefix ) + 1 );
435 } elseif ( $withPrefixSearchExtractNamespaceHook ) {
436 $hookNamespaces = [ NS_MAIN ];
437 $hookQuery = $query;
438 Hooks::runner()->onPrefixSearchExtractNamespace( $hookNamespaces, $hookQuery );
439 if ( $hookQuery !== $query ) {
440 $parsed = $hookQuery;
441 $extractedNamespace = $hookNamespaces;
442 } else {
443 return false;
444 }
445 } else {
446 return false;
447 }
448 }
449
450 return [ $parsed, $extractedNamespace ];
451 }
452
460 public static function userHighlightPrefs() {
463 return [ $contextlines, $contextchars ];
464 }
465
475 public function update( $id, $title, $text ) {
476 // no-op
477 }
478
487 public function updateTitle( $id, $title ) {
488 // no-op
489 }
490
499 public function delete( $id, $title ) {
500 // no-op
501 }
502
514 public function getTextFromContent( Title $t, Content $c = null ) {
515 return $c ? $c->getTextForSearchIndex() : '';
516 }
517
526 public function textAlreadyUpdatedForIndex() {
527 return false;
528 }
529
536 protected function normalizeNamespaces( $search ) {
537 $queryAndNs = self::parseNamespacePrefixes( $search, false, true );
538 if ( $queryAndNs !== false ) {
539 $this->setNamespaces( $queryAndNs[1] );
540 return $queryAndNs[0];
541 }
542 return $search;
543 }
544
552 protected function completionSearchBackendOverfetch( $search ) {
553 $this->limit++;
554 try {
555 return $this->completionSearchBackend( $search );
556 } finally {
557 $this->limit--;
558 }
559 }
560
571 protected function completionSearchBackend( $search ) {
572 $results = [];
573
574 $search = trim( $search );
575
576 if ( !in_array( NS_SPECIAL, $this->namespaces ) && // We do not run hook on Special: search
577 !$this->getHookRunner()->onPrefixSearchBackend(
578 $this->namespaces, $search, $this->limit, $results, $this->offset )
579 ) {
580 // False means hook worked.
581 // FIXME: Yes, the API is weird. That's why it is going to be deprecated.
582
583 return SearchSuggestionSet::fromStrings( $results );
584 } else {
585 // Hook did not do the job, use default simple search
586 $results = $this->simplePrefixSearch( $search );
587 return SearchSuggestionSet::fromTitles( $results );
588 }
589 }
590
596 public function completionSearch( $search ) {
597 if ( trim( $search ) === '' ) {
598 return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
599 }
600 $search = $this->normalizeNamespaces( $search );
601 $suggestions = $this->completionSearchBackendOverfetch( $search );
602 return $this->processCompletionResults( $search, $suggestions );
603 }
604
612 public function completionSearchWithVariants( $search ) {
613 if ( trim( $search ) === '' ) {
614 return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
615 }
616 $search = $this->normalizeNamespaces( $search );
617
618 $results = $this->completionSearchBackendOverfetch( $search );
619 $fallbackLimit = 1 + $this->limit - $results->getSize();
620 if ( $fallbackLimit > 0 ) {
621 $services = MediaWikiServices::getInstance();
622 $fallbackSearches = $services->getLanguageConverterFactory()
623 ->getLanguageConverter( $services->getContentLanguage() )
624 ->autoConvertToAllVariants( $search );
625 $fallbackSearches = array_diff( array_unique( $fallbackSearches ), [ $search ] );
626
627 foreach ( $fallbackSearches as $fbs ) {
628 $this->setLimitOffset( $fallbackLimit );
629 $fallbackSearchResult = $this->completionSearch( $fbs );
630 $results->appendAll( $fallbackSearchResult );
631 $fallbackLimit -= $fallbackSearchResult->getSize();
632 if ( $fallbackLimit <= 0 ) {
633 break;
634 }
635 }
636 }
637 return $this->processCompletionResults( $search, $results );
638 }
639
645 public function extractTitles( SearchSuggestionSet $completionResults ) {
646 return $completionResults->map( static function ( SearchSuggestion $sugg ) {
647 return $sugg->getSuggestedTitle();
648 } );
649 }
650
658 protected function processCompletionResults( $search, SearchSuggestionSet $suggestions ) {
659 // We over-fetched to determine pagination. Shrink back down if we have extra results
660 // and mark if pagination is possible
661 $suggestions->shrink( $this->limit );
662
663 $search = trim( $search );
664 // preload the titles with LinkBatch
665 $linkBatchFactory = MediaWikiServices::getInstance()->getLinkBatchFactory();
666 $lb = $linkBatchFactory->newLinkBatch( $suggestions->map( static function ( SearchSuggestion $sugg ) {
667 return $sugg->getSuggestedTitle();
668 } ) );
669 $lb->setCaller( __METHOD__ );
670 $lb->execute();
671
672 $diff = $suggestions->filter( static function ( SearchSuggestion $sugg ) {
673 return $sugg->getSuggestedTitle()->isKnown();
674 } );
675 if ( $diff > 0 ) {
676 MediaWikiServices::getInstance()->getStatsdDataFactory()
677 ->updateCount( 'search.completion.missing', $diff );
678 }
679
680 $results = $suggestions->map( static function ( SearchSuggestion $sugg ) {
681 return $sugg->getSuggestedTitle()->getPrefixedText();
682 } );
683
684 if ( $this->offset === 0 ) {
685 // Rescore results with an exact title match
686 // NOTE: in some cases like cross-namespace redirects
687 // (frequently used as shortcuts e.g. WP:WP on huwiki) some
688 // backends like Cirrus will return no results. We should still
689 // try an exact title match to workaround this limitation
690 $rescorer = new SearchExactMatchRescorer();
691 $rescoredResults = $rescorer->rescore( $search, $this->namespaces, $results, $this->limit );
692 } else {
693 // No need to rescore if offset is not 0
694 // The exact match must have been returned at position 0
695 // if it existed.
696 $rescoredResults = $results;
697 }
698
699 if ( count( $rescoredResults ) > 0 ) {
700 $found = array_search( $rescoredResults[0], $results );
701 if ( $found === false ) {
702 // If the first result is not in the previous array it
703 // means that we found a new exact match
704 $exactMatch = SearchSuggestion::fromTitle( 0, Title::newFromText( $rescoredResults[0] ) );
705 $suggestions->prepend( $exactMatch );
706 $suggestions->shrink( $this->limit );
707 } else {
708 // if the first result is not the same we need to rescore
709 if ( $found > 0 ) {
710 $suggestions->rescore( $found );
711 }
712 }
713 }
714
715 return $suggestions;
716 }
717
723 public function defaultPrefixSearch( $search ) {
724 if ( trim( $search ) === '' ) {
725 return [];
726 }
727
728 $search = $this->normalizeNamespaces( $search );
729 return $this->simplePrefixSearch( $search );
730 }
731
738 protected function simplePrefixSearch( $search ) {
739 // Use default database prefix search
740 $backend = new TitlePrefixSearch;
741 return $backend->defaultSearchBackend( $this->namespaces, $search, $this->limit, $this->offset );
742 }
743
762 public function getProfiles( $profileType, User $user = null ) {
763 return null;
764 }
765
776 public function makeSearchFieldMapping( $name, $type ) {
777 return new NullIndexField();
778 }
779
785 public function getSearchIndexFields() {
786 $models = MediaWikiServices::getInstance()->getContentHandlerFactory()->getContentModels();
787 $fields = [];
788 $seenHandlers = new SplObjectStorage();
789 foreach ( $models as $model ) {
790 try {
791 $handler = MediaWikiServices::getInstance()
792 ->getContentHandlerFactory()
793 ->getContentHandler( $model );
794 } catch ( MWUnknownContentModelException $e ) {
795 // If we can find no handler, ignore it
796 continue;
797 }
798 // Several models can have the same handler, so avoid processing it repeatedly
799 if ( $seenHandlers->contains( $handler ) ) {
800 // We already did this one
801 continue;
802 }
803 $seenHandlers->attach( $handler );
804 $handlerFields = $handler->getFieldsForSearchIndex( $this );
805 foreach ( $handlerFields as $fieldName => $fieldData ) {
806 if ( empty( $fields[$fieldName] ) ) {
807 $fields[$fieldName] = $fieldData;
808 } else {
809 // TODO: do we allow some clashes with the same type or reject all of them?
810 $mergeDef = $fields[$fieldName]->merge( $fieldData );
811 if ( !$mergeDef ) {
812 throw new InvalidArgumentException( "Duplicate field $fieldName for model $model" );
813 }
814 $fields[$fieldName] = $mergeDef;
815 }
816 }
817 }
818 // Hook to allow extensions to produce search mapping fields
819 $this->getHookRunner()->onSearchIndexFields( $fields, $this );
820 return $fields;
821 }
822
828 public function augmentSearchResults( ISearchResultSet $resultSet ) {
829 $setAugmentors = [];
830 $rowAugmentors = [];
831 $this->getHookRunner()->onSearchResultsAugment( $setAugmentors, $rowAugmentors );
832 if ( !$setAugmentors && !$rowAugmentors ) {
833 // We're done here
834 return;
835 }
836
837 // Convert row augmentors to set augmentor
838 foreach ( $rowAugmentors as $name => $row ) {
839 if ( isset( $setAugmentors[$name] ) ) {
840 throw new InvalidArgumentException( "Both row and set augmentors are defined for $name" );
841 }
842 $setAugmentors[$name] = new PerRowAugmentor( $row );
843 }
844
849 foreach ( $setAugmentors as $name => $augmentor ) {
850 $data = $augmentor->augmentAll( $resultSet );
851 if ( $data ) {
852 $resultSet->setAugmentedData( $name, $data );
853 }
854 }
855 }
856
862 public function setHookContainer( HookContainer $hookContainer ) {
863 $this->hookContainer = $hookContainer;
864 $this->hookRunner = new HookRunner( $hookContainer );
865 }
866
873 protected function getHookContainer(): HookContainer {
874 if ( !$this->hookContainer ) {
875 // This shouldn't be hit in core, but it is needed for CirrusSearch
876 // which commonly creates a CirrusSearch object without cirrus being
877 // configured in $wgSearchType/$wgSearchTypeAlternatives.
878 $this->hookContainer = MediaWikiServices::getInstance()->getHookContainer();
879 }
880 return $this->hookContainer;
881 }
882
891 protected function getHookRunner(): HookRunner {
892 if ( !$this->hookRunner ) {
893 $this->hookRunner = new HookRunner( $this->getHookContainer() );
894 }
895 return $this->hookRunner;
896 }
897
898}
const NS_MAIN
Definition Defines.php:64
const NS_SPECIAL
Definition Defines.php:53
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
if(!defined('MW_SETUP_CALLBACK'))
The persistent session ID (if any) loaded at startup.
Definition WebStart.php:82
Exception thrown when an unregistered content model is requested.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Service locator for MediaWiki core services.
Null index field - means search engine does not implement this field.
Perform augmentation of each row and return composite result, indexed by ID.
defaultSearchBackend( $namespaces, $search, $limit, $offset)
Unless overridden by PrefixSearchBackend hook... This is case-sensitive (First character may be autom...
Contain a class for special pages.
completionSearchBackendOverfetch( $search)
Perform an overfetch of completion search results.
makeSearchFieldMapping( $name, $type)
Create a search field definition.
getNearMatcher(Config $config)
Get service class to finding near matches.
getHookRunner()
Get a HookRunner for running core hooks.
searchTitle( $term)
Perform a title-only search query and return a result set.
supports( $feature)
processCompletionResults( $search, SearchSuggestionSet $suggestions)
Process completion search results.
getFeatureData( $feature)
Way to retrieve custom data set by setFeatureData or by the engine itself.
update( $id, $title, $text)
Create or update the search index record for the given page.
setNamespaces( $namespaces)
Set which namespaces the search should include.
static parseNamespacePrefixes( $query, $withAllKeyword=true, $withPrefixSearchExtractNamespaceHook=false)
Parse some common prefixes: all (search everything) or namespace names.
doSearchArchiveTitle( $term)
Perform a title search in the article archive.
array $features
Feature values.
replacePrefixes( $query)
Parse some common prefixes: all (search everything) or namespace names and set the list of namespaces...
string[] $searchTerms
textAlreadyUpdatedForIndex()
If an implementation of SearchEngine handles all of its own text processing in getTextFromContent() a...
defaultPrefixSearch( $search)
Simple prefix search for subpages.
augmentSearchResults(ISearchResultSet $resultSet)
Augment search results with extra data.
searchArchiveTitle( $term)
Perform a title search in the article archive.
normalizeText( $string)
When overridden in derived class, performs database-specific conversions on text to be used for searc...
setFeatureData( $feature, $data)
Way to pass custom data for engines.
completionSearchBackend( $search)
Perform a completion search.
getTextFromContent(Title $t, Content $c=null)
Get the raw text for updating the index from a content object Nicer search backends could possibly do...
getProfiles( $profileType, User $user=null)
Get a list of supported profiles.
int[] null $namespaces
getSort()
Get the sort direction of the search results.
static defaultNearMatcher()
Get near matcher for default SearchEngine.
getSearchIndexFields()
Get fields for search index.
getValidSorts()
Get the valid sort directions.
static userHighlightPrefs()
Find snippet highlight settings for all users.
updateTitle( $id, $title)
Update a search index record's title only.
completionSearchWithVariants( $search)
Perform a completion search with variants.
doSearchText( $term)
Perform a full text search query and return a result set.
normalizeNamespaces( $search)
Makes search simple string if it was namespaced.
const CHARS_ALL
Integer flag for legalSearchChars: includes all chars allowed in a search query.
getHookContainer()
Get a HookContainer, for running extension hooks or for hook metadata.
completionSearch( $search)
Perform a completion search.
setLimitOffset( $limit, $offset=0)
Set the maximum number of results to return and how many to skip before returning the first.
const CHARS_NO_SYNTAX
Integer flag for legalSearchChars: includes all chars allowed in a search term.
setShowSuggestion( $showSuggestion)
Set whether the searcher should try to build a suggestion.
simplePrefixSearch( $search)
Call out to simple search backend.
setSort( $sort)
Set the sort direction of the search results.
const FT_QUERY_INDEP_PROFILE_TYPE
Profile type for query independent ranking features.
setHookContainer(HookContainer $hookContainer)
searchText( $term)
Perform a full text search query and return a result set.
legalSearchChars( $type=self::CHARS_ALL)
Get chars legal for search.
extractTitles(SearchSuggestionSet $completionResults)
Extract titles from completion results.
const COMPLETION_PROFILE_TYPE
Profile type for completionSearch.
doSearchTitle( $term)
Perform a title-only search query and return a result set.
An utility class to rescore search results by looking for an exact match in the db and add the page f...
Implementation of near match title search.
Search suggestion sets.
filter( $callback)
Filter the suggestions array.
rescore( $key)
Move the suggestion at index $key to the first position.
shrink( $limit)
Remove any extra elements in the suggestions set.
static fromStrings(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a string array.
static fromTitles(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a title array.
map( $callback)
Call array_map on the suggestions array.
prepend(SearchSuggestion $suggestion)
Add a new suggestion at the top.
Search suggestion.
getSuggestedTitle()
Title object in the case this suggestion is based on a title.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:44
Performs prefix search, returning Title objects.
Represents a title within MediaWiki.
Definition Title.php:49
internal since 1.36
Definition User.php:70
Interface for configuration instances.
Definition Config.php:30
Base interface for content objects.
Definition Content.php:35
A set of SearchEngine results.
setAugmentedData( $name, $data)
Sets augmented data for result set.
Marker class for search engines that can handle their own pagination, by reporting in their ISearchRe...
Augment search results.