MediaWiki master
SearchEngine.php
Go to the documentation of this file.
1<?php
38
44abstract class SearchEngine {
45 public const DEFAULT_SORT = 'relevance';
46
48 public $prefix = '';
49
51 public $namespaces = [ NS_MAIN ];
52
54 protected $limit = 10;
55
57 protected $offset = 0;
58
63 protected $searchTerms = [];
64
66 protected $showSuggestion = true;
68 private $sort = self::DEFAULT_SORT;
69
71 protected $features = [];
72
74 private $hookContainer;
75
77 private $hookRunner;
78
80 public const COMPLETION_PROFILE_TYPE = 'completionSearchProfile';
81
83 public const FT_QUERY_INDEP_PROFILE_TYPE = 'fulltextQueryIndepProfile';
84
86 protected const CHARS_ALL = 1;
87
89 protected const CHARS_NO_SYNTAX = 2;
90
101 public function searchText( $term ) {
102 return $this->maybePaginate( function () use ( $term ) {
103 return $this->doSearchText( $term );
104 } );
105 }
106
116 protected function doSearchText( $term ) {
117 return null;
118 }
119
134 public function searchArchiveTitle( $term ) {
135 return $this->doSearchArchiveTitle( $term );
136 }
137
147 protected function doSearchArchiveTitle( $term ) {
148 return Status::newGood( [] );
149 }
150
162 public function searchTitle( $term ) {
163 return $this->maybePaginate( function () use ( $term ) {
164 return $this->doSearchTitle( $term );
165 } );
166 }
167
177 protected function doSearchTitle( $term ) {
178 return null;
179 }
180
189 private function maybePaginate( Closure $fn ) {
190 if ( $this instanceof PaginatingSearchEngine ) {
191 return $fn();
192 }
193 $this->limit++;
194 try {
195 $resultSetOrStatus = $fn();
196 } finally {
197 $this->limit--;
198 }
199
200 $resultSet = null;
201 if ( $resultSetOrStatus instanceof ISearchResultSet ) {
202 $resultSet = $resultSetOrStatus;
203 } elseif ( $resultSetOrStatus instanceof Status &&
204 $resultSetOrStatus->getValue() instanceof ISearchResultSet
205 ) {
206 $resultSet = $resultSetOrStatus->getValue();
207 }
208 if ( $resultSet ) {
209 $resultSet->shrink( $this->limit );
210 }
211
212 return $resultSetOrStatus;
213 }
214
222 public function supports( $feature ) {
223 switch ( $feature ) {
224 case 'search-update':
225 return true;
226 case 'title-suffix-filter':
227 default:
228 return false;
229 }
230 }
231
238 public function setFeatureData( $feature, $data ) {
239 $this->features[$feature] = $data;
240 }
241
249 public function getFeatureData( $feature ) {
250 return $this->features[$feature] ?? null;
251 }
252
261 public function normalizeText( $string ) {
262 // Some languages such as Chinese require word segmentation
263 return MediaWikiServices::getInstance()->getContentLanguage()->segmentByWord( $string );
264 }
265
272 public function getNearMatcher( Config $config ) {
273 return MediaWikiServices::getInstance()->getTitleMatcher();
274 }
275
282 protected static function defaultNearMatcher() {
283 wfDeprecated( __METHOD__, '1.40' );
284 return MediaWikiServices::getInstance()->getTitleMatcher();
285 }
286
293 public function legalSearchChars( $type = self::CHARS_ALL ) {
294 return "A-Za-z_'.0-9\\x80-\\xFF\\-";
295 }
296
304 public function setLimitOffset( $limit, $offset = 0 ) {
305 $this->limit = intval( $limit );
306 $this->offset = intval( $offset );
307 }
308
315 public function setNamespaces( $namespaces ) {
316 if ( $namespaces ) {
317 // Filter namespaces to only keep valid ones
318 $validNs = MediaWikiServices::getInstance()->getSearchEngineConfig()->searchableNamespaces();
319 $namespaces = array_filter( $namespaces, static function ( $ns ) use( $validNs ) {
320 return $ns < 0 || isset( $validNs[$ns] );
321 } );
322 } else {
323 $namespaces = [];
324 }
325 $this->namespaces = $namespaces;
326 }
327
335 public function setShowSuggestion( $showSuggestion ) {
336 $this->showSuggestion = $showSuggestion;
337 }
338
348 public function getValidSorts() {
349 return [ self::DEFAULT_SORT ];
350 }
351
359 public function setSort( $sort ) {
360 if ( !in_array( $sort, $this->getValidSorts() ) ) {
361 throw new InvalidArgumentException( "Invalid sort: $sort. " .
362 "Must be one of: " . implode( ', ', $this->getValidSorts() ) );
363 }
364 $this->sort = $sort;
365 }
366
373 public function getSort() {
374 return $this->sort;
375 }
376
386 public function replacePrefixes( $query ) {
387 return $query;
388 }
389
403 public static function parseNamespacePrefixes(
404 $query,
405 $withAllKeyword = true,
406 $withPrefixSearchExtractNamespaceHook = false
407 ) {
408 $parsed = $query;
409 if ( strpos( $query, ':' ) === false ) { // nothing to do
410 return false;
411 }
412 $extractedNamespace = null;
413
414 $allQuery = false;
415 if ( $withAllKeyword ) {
416 $allkeywords = [];
417
418 $allkeywords[] = wfMessage( 'searchall' )->inContentLanguage()->text() . ":";
419 // force all: so that we have a common syntax for all the wikis
420 if ( !in_array( 'all:', $allkeywords ) ) {
421 $allkeywords[] = 'all:';
422 }
423
424 foreach ( $allkeywords as $kw ) {
425 if ( str_starts_with( $query, $kw ) ) {
426 $parsed = substr( $query, strlen( $kw ) );
427 $allQuery = true;
428 break;
429 }
430 }
431 }
432
433 if ( !$allQuery && strpos( $query, ':' ) !== false ) {
434 $prefix = str_replace( ' ', '_', substr( $query, 0, strpos( $query, ':' ) ) );
435 $services = MediaWikiServices::getInstance();
436 $index = $services->getContentLanguage()->getNsIndex( $prefix );
437 if ( $index !== false ) {
438 $extractedNamespace = [ $index ];
439 $parsed = substr( $query, strlen( $prefix ) + 1 );
440 } elseif ( $withPrefixSearchExtractNamespaceHook ) {
441 $hookNamespaces = [ NS_MAIN ];
442 $hookQuery = $query;
443 ( new HookRunner( $services->getHookContainer() ) )
444 ->onPrefixSearchExtractNamespace( $hookNamespaces, $hookQuery );
445 if ( $hookQuery !== $query ) {
446 $parsed = $hookQuery;
447 $extractedNamespace = $hookNamespaces;
448 } else {
449 return false;
450 }
451 } else {
452 return false;
453 }
454 }
455
456 return [ $parsed, $extractedNamespace ];
457 }
458
466 public static function userHighlightPrefs() {
469 return [ $contextlines, $contextchars ];
470 }
471
481 public function update( $id, $title, $text ) {
482 // no-op
483 }
484
493 public function updateTitle( $id, $title ) {
494 // no-op
495 }
496
505 public function delete( $id, $title ) {
506 // no-op
507 }
508
520 public function getTextFromContent( Title $t, ?Content $c = null ) {
521 return $c ? $c->getTextForSearchIndex() : '';
522 }
523
532 public function textAlreadyUpdatedForIndex() {
533 return false;
534 }
535
542 protected function normalizeNamespaces( $search ) {
543 $queryAndNs = self::parseNamespacePrefixes( $search, false, true );
544 if ( $queryAndNs !== false ) {
545 $this->setNamespaces( $queryAndNs[1] );
546 return $queryAndNs[0];
547 }
548 return $search;
549 }
550
558 protected function completionSearchBackendOverfetch( $search ) {
559 $this->limit++;
560 try {
561 return $this->completionSearchBackend( $search );
562 } finally {
563 $this->limit--;
564 }
565 }
566
577 protected function completionSearchBackend( $search ) {
578 $results = [];
579
580 $search = trim( $search );
581
582 if ( !in_array( NS_SPECIAL, $this->namespaces ) && // We do not run hook on Special: search
583 !$this->getHookRunner()->onPrefixSearchBackend(
584 $this->namespaces, $search, $this->limit, $results, $this->offset )
585 ) {
586 // False means hook worked.
587 // FIXME: Yes, the API is weird. That's why it is going to be deprecated.
588
589 return SearchSuggestionSet::fromStrings( $results );
590 } else {
591 // Hook did not do the job, use default simple search
592 $results = $this->simplePrefixSearch( $search );
593 return SearchSuggestionSet::fromTitles( $results );
594 }
595 }
596
602 public function completionSearch( $search ) {
603 if ( trim( $search ) === '' ) {
604 return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
605 }
606 $search = $this->normalizeNamespaces( $search );
607 $suggestions = $this->completionSearchBackendOverfetch( $search );
608 return $this->processCompletionResults( $search, $suggestions );
609 }
610
618 public function completionSearchWithVariants( $search ) {
619 if ( trim( $search ) === '' ) {
620 return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
621 }
622 $search = $this->normalizeNamespaces( $search );
623
624 $results = $this->completionSearchBackendOverfetch( $search );
625 $fallbackLimit = 1 + $this->limit - $results->getSize();
626 if ( $fallbackLimit > 0 ) {
627 $services = MediaWikiServices::getInstance();
628 $fallbackSearches = $services->getLanguageConverterFactory()
629 ->getLanguageConverter( $services->getContentLanguage() )
630 ->autoConvertToAllVariants( $search );
631 $fallbackSearches = array_diff( array_unique( $fallbackSearches ), [ $search ] );
632
633 $origLimit = $this->limit;
634 $origOffset = $this->offset;
635 foreach ( $fallbackSearches as $fbs ) {
636 try {
637 $this->setLimitOffset( $fallbackLimit );
638 $fallbackSearchResult = $this->completionSearch( $fbs );
639 $results->appendAll( $fallbackSearchResult );
640 $fallbackLimit -= $fallbackSearchResult->getSize();
641 } finally {
642 $this->setLimitOffset( $origLimit, $origOffset );
643 }
644 if ( $fallbackLimit <= 0 ) {
645 break;
646 }
647 }
648 }
649 return $this->processCompletionResults( $search, $results );
650 }
651
657 public function extractTitles( SearchSuggestionSet $completionResults ) {
658 return $completionResults->map( static function ( SearchSuggestion $sugg ) {
659 return $sugg->getSuggestedTitle();
660 } );
661 }
662
670 protected function processCompletionResults( $search, SearchSuggestionSet $suggestions ) {
671 // We over-fetched to determine pagination. Shrink back down if we have extra results
672 // and mark if pagination is possible
673 $suggestions->shrink( $this->limit );
674
675 $search = trim( $search );
676 // preload the titles with LinkBatch
677 $linkBatchFactory = MediaWikiServices::getInstance()->getLinkBatchFactory();
678 $lb = $linkBatchFactory->newLinkBatch( $suggestions->map( static function ( SearchSuggestion $sugg ) {
679 return $sugg->getSuggestedTitle();
680 } ) );
681 $lb->setCaller( __METHOD__ );
682 $lb->execute();
683
684 $diff = $suggestions->filter( static function ( SearchSuggestion $sugg ) {
685 return $sugg->getSuggestedTitle()->isKnown();
686 } );
687 if ( $diff > 0 ) {
688 MediaWikiServices::getInstance()->getStatsdDataFactory()
689 ->updateCount( 'search.completion.missing', $diff );
690 }
691
692 // SearchExactMatchRescorer should probably be refactored to work directly on top of a SearchSuggestionSet
693 // instead of converting it to array and trying to infer if it has re-scored anything by inspected the head
694 // of the returned array.
695 $results = $suggestions->map( static function ( SearchSuggestion $sugg ) {
696 return $sugg->getSuggestedTitle()->getPrefixedText();
697 } );
698
699 $rescorer = new SearchExactMatchRescorer();
700 if ( $this->offset === 0 ) {
701 // Rescore results with an exact title match
702 // NOTE: in some cases like cross-namespace redirects
703 // (frequently used as shortcuts e.g. WP:WP on huwiki) some
704 // backends like Cirrus will return no results. We should still
705 // try an exact title match to workaround this limitation
706 $rescoredResults = $rescorer->rescore( $search, $this->namespaces, $results, $this->limit );
707 } else {
708 // No need to rescore if offset is not 0
709 // The exact match must have been returned at position 0
710 // if it existed.
711 $rescoredResults = $results;
712 }
713
714 if ( count( $rescoredResults ) > 0 ) {
715 $found = array_search( $rescoredResults[0], $results );
716 if ( $found === false ) {
717 // If the first result is not in the previous array it
718 // means that we found a new exact match
719 $exactMatch = SearchSuggestion::fromTitle( 0, Title::newFromText( $rescoredResults[0] ) );
720 $suggestions->prepend( $exactMatch );
721 if ( $rescorer->getReplacedRedirect() !== null ) {
722 // the exact match rescorer replaced one of the suggestion found by the search engine
723 // let's remove it from our suggestions set to avoid showing duplicates
724 $suggestions->remove( SearchSuggestion::fromTitle( 0,
725 Title::newFromText( $rescorer->getReplacedRedirect() ) ) );
726 }
727 $suggestions->shrink( $this->limit );
728 } else {
729 // if the first result is not the same we need to rescore
730 if ( $found > 0 ) {
731 $suggestions->rescore( $found );
732 }
733 }
734 }
735
736 return $suggestions;
737 }
738
744 public function defaultPrefixSearch( $search ) {
745 if ( trim( $search ) === '' ) {
746 return [];
747 }
748
749 $search = $this->normalizeNamespaces( $search );
750 return $this->simplePrefixSearch( $search );
751 }
752
759 protected function simplePrefixSearch( $search ) {
760 // Use default database prefix search
761 $backend = new TitlePrefixSearch;
762 return $backend->defaultSearchBackend( $this->namespaces, $search, $this->limit, $this->offset );
763 }
764
783 public function getProfiles( $profileType, ?User $user = null ) {
784 return null;
785 }
786
797 public function makeSearchFieldMapping( $name, $type ) {
798 return new NullIndexField();
799 }
800
806 public function getSearchIndexFields() {
807 $models = MediaWikiServices::getInstance()->getContentHandlerFactory()->getContentModels();
808 $fields = [];
809 $seenHandlers = new SplObjectStorage();
810 foreach ( $models as $model ) {
811 try {
812 $handler = MediaWikiServices::getInstance()
813 ->getContentHandlerFactory()
814 ->getContentHandler( $model );
815 } catch ( MWUnknownContentModelException $e ) {
816 // If we can find no handler, ignore it
817 continue;
818 }
819 // Several models can have the same handler, so avoid processing it repeatedly
820 if ( $seenHandlers->contains( $handler ) ) {
821 // We already did this one
822 continue;
823 }
824 $seenHandlers->attach( $handler );
825 $handlerFields = $handler->getFieldsForSearchIndex( $this );
826 foreach ( $handlerFields as $fieldName => $fieldData ) {
827 if ( empty( $fields[$fieldName] ) ) {
828 $fields[$fieldName] = $fieldData;
829 } else {
830 // TODO: do we allow some clashes with the same type or reject all of them?
831 $mergeDef = $fields[$fieldName]->merge( $fieldData );
832 if ( !$mergeDef ) {
833 throw new InvalidArgumentException( "Duplicate field $fieldName for model $model" );
834 }
835 $fields[$fieldName] = $mergeDef;
836 }
837 }
838 }
839 // Hook to allow extensions to produce search mapping fields
840 $this->getHookRunner()->onSearchIndexFields( $fields, $this );
841 return $fields;
842 }
843
847 public function augmentSearchResults( ISearchResultSet $resultSet ) {
848 $setAugmentors = [];
849 $rowAugmentors = [];
850 $this->getHookRunner()->onSearchResultsAugment( $setAugmentors, $rowAugmentors );
851 if ( !$setAugmentors && !$rowAugmentors ) {
852 // We're done here
853 return;
854 }
855
856 // Convert row augmentors to set augmentor
857 foreach ( $rowAugmentors as $name => $row ) {
858 if ( isset( $setAugmentors[$name] ) ) {
859 throw new InvalidArgumentException( "Both row and set augmentors are defined for $name" );
860 }
861 $setAugmentors[$name] = new PerRowAugmentor( $row );
862 }
863
868 foreach ( $setAugmentors as $name => $augmentor ) {
869 $data = $augmentor->augmentAll( $resultSet );
870 if ( $data ) {
871 $resultSet->setAugmentedData( $name, $data );
872 }
873 }
874 }
875
881 public function setHookContainer( HookContainer $hookContainer ) {
882 $this->hookContainer = $hookContainer;
883 $this->hookRunner = new HookRunner( $hookContainer );
884 }
885
892 protected function getHookContainer(): HookContainer {
893 if ( !$this->hookContainer ) {
894 // This shouldn't be hit in core, but it is needed for CirrusSearch
895 // which commonly creates a CirrusSearch object without cirrus being
896 // configured in $wgSearchType/$wgSearchTypeAlternatives.
897 $this->hookContainer = MediaWikiServices::getInstance()->getHookContainer();
898 }
899 return $this->hookContainer;
900 }
901
910 protected function getHookRunner(): HookRunner {
911 if ( !$this->hookRunner ) {
912 $this->hookRunner = new HookRunner( $this->getHookContainer() );
913 }
914 return $this->hookRunner;
915 }
916
917}
const NS_MAIN
Definition Defines.php:65
const NS_SPECIAL
Definition Defines.php:54
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:81
Exception thrown when an unregistered content model is requested.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Service locator for MediaWiki core services.
Service implementation of near match title search.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:54
Represents a title within MediaWiki.
Definition Title.php:78
User class for the MediaWiki software.
Definition User.php:121
Null index field - means search engine does not implement this field.
Perform augmentation of each row and return composite result, indexed by ID.
defaultSearchBackend( $namespaces, $search, $limit, $offset)
Unless overridden by PrefixSearchBackend hook... This is case-sensitive (First character may be autom...
Contain a class for special pages.
completionSearchBackendOverfetch( $search)
Perform an overfetch of completion search results.
makeSearchFieldMapping( $name, $type)
Create a search field definition.
getNearMatcher(Config $config)
Get service class to finding near matches.
getHookRunner()
Get a HookRunner for running core hooks.
searchTitle( $term)
Perform a title-only search query and return a result set.
supports( $feature)
processCompletionResults( $search, SearchSuggestionSet $suggestions)
Process completion search results.
getFeatureData( $feature)
Way to retrieve custom data set by setFeatureData or by the engine itself.
update( $id, $title, $text)
Create or update the search index record for the given page.
setNamespaces( $namespaces)
Set which namespaces the search should include.
static parseNamespacePrefixes( $query, $withAllKeyword=true, $withPrefixSearchExtractNamespaceHook=false)
Parse some common prefixes: all (search everything) or namespace names.
doSearchArchiveTitle( $term)
Perform a title search in the article archive.
getTextFromContent(Title $t, ?Content $c=null)
Get the raw text for updating the index from a content object Nicer search backends could possibly do...
array $features
Feature values.
replacePrefixes( $query)
Parse some common prefixes: all (search everything) or namespace names and set the list of namespaces...
string[] $searchTerms
textAlreadyUpdatedForIndex()
If an implementation of SearchEngine handles all of its own text processing in getTextFromContent() a...
defaultPrefixSearch( $search)
Simple prefix search for subpages.
augmentSearchResults(ISearchResultSet $resultSet)
Augment search results with extra data.
searchArchiveTitle( $term)
Perform a title search in the article archive.
normalizeText( $string)
When overridden in derived class, performs database-specific conversions on text to be used for searc...
setFeatureData( $feature, $data)
Way to pass custom data for engines.
completionSearchBackend( $search)
Perform a completion search.
int[] null $namespaces
getSort()
Get the sort direction of the search results.
static defaultNearMatcher()
Get near matcher for default SearchEngine.
getSearchIndexFields()
Get fields for search index.
getValidSorts()
Get the valid sort directions.
static userHighlightPrefs()
Find snippet highlight settings for all users.
updateTitle( $id, $title)
Update a search index record's title only.
completionSearchWithVariants( $search)
Perform a completion search with variants.
doSearchText( $term)
Perform a full text search query and return a result set.
normalizeNamespaces( $search)
Makes search simple string if it was namespaced.
const CHARS_ALL
Integer flag for legalSearchChars: includes all chars allowed in a search query.
getHookContainer()
Get a HookContainer, for running extension hooks or for hook metadata.
completionSearch( $search)
Perform a completion search.
setLimitOffset( $limit, $offset=0)
Set the maximum number of results to return and how many to skip before returning the first.
const CHARS_NO_SYNTAX
Integer flag for legalSearchChars: includes all chars allowed in a search term.
setShowSuggestion( $showSuggestion)
Set whether the searcher should try to build a suggestion.
getProfiles( $profileType, ?User $user=null)
Get a list of supported profiles.
simplePrefixSearch( $search)
Call out to simple search backend.
setSort( $sort)
Set the sort direction of the search results.
const FT_QUERY_INDEP_PROFILE_TYPE
Profile type for query independent ranking features.
setHookContainer(HookContainer $hookContainer)
searchText( $term)
Perform a full text search query and return a result set.
legalSearchChars( $type=self::CHARS_ALL)
Get chars legal for search.
extractTitles(SearchSuggestionSet $completionResults)
Extract titles from completion results.
const COMPLETION_PROFILE_TYPE
Profile type for completionSearch.
doSearchTitle( $term)
Perform a title-only search query and return a result set.
An utility class to rescore search results by looking for an exact match in the db and add the page f...
A set of search suggestions.
filter( $callback)
Filter the suggestions array.
rescore( $key)
Move the suggestion at index $key to the first position.
shrink( $limit)
Remove any extra elements in the suggestions set.
static fromStrings(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a string array.
static fromTitles(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a title array.
map( $callback)
Call array_map on the suggestions array.
prepend(SearchSuggestion $suggestion)
Add a new suggestion at the top.
remove(SearchSuggestion $suggestion)
Remove a suggestion from the set.
A search suggestion.
getSuggestedTitle()
Title object in the case this suggestion is based on a title.
Performs prefix search, returning Title objects.
A set of SearchEngine results.
setAugmentedData( $name, $data)
Sets augmented data for result set.
Interface for configuration instances.
Definition Config.php:32
Content objects represent page content, e.g.
Definition Content.php:42
Marker class for search engines that can handle their own pagination, by reporting in their ISearchRe...
Augment search results.