MediaWiki master
SearchEngine.php
Go to the documentation of this file.
1<?php
23
29abstract class SearchEngine {
30 public const DEFAULT_SORT = 'relevance';
31
33 public $prefix = '';
34
36 public $namespaces = [ NS_MAIN ];
37
39 protected $limit = 10;
40
42 protected $offset = 0;
43
48 protected $searchTerms = [];
49
51 protected $showSuggestion = true;
53 private $sort = self::DEFAULT_SORT;
54
56 protected $features = [];
57
59 private $hookContainer;
60
62 private $hookRunner;
63
65 public const COMPLETION_PROFILE_TYPE = 'completionSearchProfile';
66
68 public const FT_QUERY_INDEP_PROFILE_TYPE = 'fulltextQueryIndepProfile';
69
71 public const FT_QUERY_DEP_PROFILE_TYPE = 'fulltextQueryDepProfile';
72
74 protected const CHARS_ALL = 1;
75
77 protected const CHARS_NO_SYNTAX = 2;
78
89 public function searchText( $term ) {
90 return $this->maybePaginate( function () use ( $term ) {
91 return $this->doSearchText( $term );
92 } );
93 }
94
104 protected function doSearchText( $term ) {
105 return null;
106 }
107
122 public function searchArchiveTitle( $term ) {
123 return $this->doSearchArchiveTitle( $term );
124 }
125
135 protected function doSearchArchiveTitle( $term ) {
136 return Status::newGood( [] );
137 }
138
150 public function searchTitle( $term ) {
151 return $this->maybePaginate( function () use ( $term ) {
152 return $this->doSearchTitle( $term );
153 } );
154 }
155
165 protected function doSearchTitle( $term ) {
166 return null;
167 }
168
177 private function maybePaginate( Closure $fn ) {
178 if ( $this instanceof PaginatingSearchEngine ) {
179 return $fn();
180 }
181 $this->limit++;
182 try {
183 $resultSetOrStatus = $fn();
184 } finally {
185 $this->limit--;
186 }
187
188 $resultSet = null;
189 if ( $resultSetOrStatus instanceof ISearchResultSet ) {
190 $resultSet = $resultSetOrStatus;
191 } elseif ( $resultSetOrStatus instanceof Status &&
192 $resultSetOrStatus->getValue() instanceof ISearchResultSet
193 ) {
194 $resultSet = $resultSetOrStatus->getValue();
195 }
196 if ( $resultSet ) {
197 $resultSet->shrink( $this->limit );
198 }
199
200 return $resultSetOrStatus;
201 }
202
210 public function supports( $feature ) {
211 switch ( $feature ) {
212 case 'search-update':
213 return true;
214 case 'title-suffix-filter':
215 default:
216 return false;
217 }
218 }
219
226 public function setFeatureData( $feature, $data ) {
227 $this->features[$feature] = $data;
228 }
229
237 public function getFeatureData( $feature ) {
238 return $this->features[$feature] ?? null;
239 }
240
249 public function normalizeText( $string ) {
250 // Some languages such as Chinese require word segmentation
251 return MediaWikiServices::getInstance()->getContentLanguage()->segmentByWord( $string );
252 }
253
260 public function getNearMatcher( Config $config ) {
261 return MediaWikiServices::getInstance()->getTitleMatcher();
262 }
263
270 protected static function defaultNearMatcher() {
271 wfDeprecated( __METHOD__, '1.40' );
272 return MediaWikiServices::getInstance()->getTitleMatcher();
273 }
274
281 public function legalSearchChars( $type = self::CHARS_ALL ) {
282 return "A-Za-z_'.0-9\\x80-\\xFF\\-";
283 }
284
292 public function setLimitOffset( $limit, $offset = 0 ) {
293 $this->limit = intval( $limit );
294 $this->offset = intval( $offset );
295 }
296
303 public function setNamespaces( $namespaces ) {
304 if ( $namespaces ) {
305 // Filter namespaces to only keep valid ones
306 $validNs = MediaWikiServices::getInstance()->getSearchEngineConfig()->searchableNamespaces();
307 $namespaces = array_filter( $namespaces,
308 static fn ( $id ) => $id < 0 || isset( $validNs[$id] )
309 );
310 } else {
311 $namespaces = [];
312 }
313 $this->namespaces = $namespaces;
314 }
315
323 public function setShowSuggestion( $showSuggestion ) {
324 $this->showSuggestion = $showSuggestion;
325 }
326
336 public function getValidSorts() {
337 return [ self::DEFAULT_SORT ];
338 }
339
347 public function setSort( $sort ) {
348 if ( !in_array( $sort, $this->getValidSorts() ) ) {
349 throw new InvalidArgumentException( "Invalid sort: $sort. " .
350 "Must be one of: " . implode( ', ', $this->getValidSorts() ) );
351 }
352 $this->sort = $sort;
353 }
354
361 public function getSort() {
362 return $this->sort;
363 }
364
378 public static function parseNamespacePrefixes(
379 $query,
380 $withAllKeyword = true,
381 $withPrefixSearchExtractNamespaceHook = false
382 ) {
383 $parsed = $query;
384 if ( !str_contains( $query, ':' ) ) { // nothing to do
385 return false;
386 }
387 $extractedNamespace = null;
388
389 $allQuery = false;
390 if ( $withAllKeyword ) {
391 $allkeywords = [];
392
393 $allkeywords[] = wfMessage( 'searchall' )->inContentLanguage()->text() . ":";
394 // force all: so that we have a common syntax for all the wikis
395 if ( !in_array( 'all:', $allkeywords ) ) {
396 $allkeywords[] = 'all:';
397 }
398
399 foreach ( $allkeywords as $kw ) {
400 if ( str_starts_with( $query, $kw ) ) {
401 $parsed = substr( $query, strlen( $kw ) );
402 $allQuery = true;
403 break;
404 }
405 }
406 }
407
408 if ( !$allQuery && str_contains( $query, ':' ) ) {
409 $prefix = str_replace( ' ', '_', substr( $query, 0, strpos( $query, ':' ) ) );
410 $services = MediaWikiServices::getInstance();
411 $index = $services->getContentLanguage()->getNsIndex( $prefix );
412 if ( $index !== false ) {
413 $extractedNamespace = [ $index ];
414 $parsed = substr( $query, strlen( $prefix ) + 1 );
415 } elseif ( $withPrefixSearchExtractNamespaceHook ) {
416 $hookNamespaces = [ NS_MAIN ];
417 $hookQuery = $query;
418 ( new HookRunner( $services->getHookContainer() ) )
419 ->onPrefixSearchExtractNamespace( $hookNamespaces, $hookQuery );
420 if ( $hookQuery !== $query ) {
421 $parsed = $hookQuery;
422 $extractedNamespace = $hookNamespaces;
423 } else {
424 return false;
425 }
426 } else {
427 return false;
428 }
429 }
430
431 return [ $parsed, $extractedNamespace ];
432 }
433
441 public static function userHighlightPrefs() {
444 return [ $contextlines, $contextchars ];
445 }
446
456 public function update( $id, $title, $text ) {
457 // no-op
458 }
459
468 public function updateTitle( $id, $title ) {
469 // no-op
470 }
471
480 public function delete( $id, $title ) {
481 // no-op
482 }
483
490 protected function normalizeNamespaces( $search ) {
491 $queryAndNs = self::parseNamespacePrefixes( $search, false, true );
492 if ( $queryAndNs !== false ) {
493 $this->setNamespaces( $queryAndNs[1] );
494 return $queryAndNs[0];
495 }
496 return $search;
497 }
498
506 protected function completionSearchBackendOverfetch( $search ) {
507 $this->limit++;
508 try {
509 return $this->completionSearchBackend( $search );
510 } finally {
511 $this->limit--;
512 }
513 }
514
525 protected function completionSearchBackend( $search ) {
526 $results = [];
527
528 $search = trim( $search );
529
530 if ( !in_array( NS_SPECIAL, $this->namespaces ) && // We do not run hook on Special: search
531 !$this->getHookRunner()->onPrefixSearchBackend(
532 $this->namespaces, $search, $this->limit, $results, $this->offset )
533 ) {
534 // False means hook worked.
535 // FIXME: Yes, the API is weird. That's why it is going to be deprecated.
536
537 return SearchSuggestionSet::fromStrings( $results );
538 } else {
539 // Hook did not do the job, use default simple search
540 $results = $this->simplePrefixSearch( $search );
541 return SearchSuggestionSet::fromTitles( $results );
542 }
543 }
544
550 public function completionSearch( $search ) {
551 if ( trim( $search ) === '' ) {
552 return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
553 }
554 $search = $this->normalizeNamespaces( $search );
555 $suggestions = $this->completionSearchBackendOverfetch( $search );
556 return $this->processCompletionResults( $search, $suggestions );
557 }
558
566 public function completionSearchWithVariants( $search ) {
567 if ( trim( $search ) === '' ) {
568 return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
569 }
570 $search = $this->normalizeNamespaces( $search );
571
572 $results = $this->completionSearchBackendOverfetch( $search );
573 $fallbackLimit = 1 + $this->limit - $results->getSize();
574 if ( $fallbackLimit > 0 ) {
575 $services = MediaWikiServices::getInstance();
576 $fallbackSearches = $services->getLanguageConverterFactory()
577 ->getLanguageConverter( $services->getContentLanguage() )
578 ->autoConvertToAllVariants( $search );
579 $fallbackSearches = array_diff( array_unique( $fallbackSearches ), [ $search ] );
580
581 $origLimit = $this->limit;
582 $origOffset = $this->offset;
583 foreach ( $fallbackSearches as $fbs ) {
584 try {
585 $this->setLimitOffset( $fallbackLimit );
586 $fallbackSearchResult = $this->completionSearch( $fbs );
587 $results->appendAll( $fallbackSearchResult );
588 $fallbackLimit -= $fallbackSearchResult->getSize();
589 } finally {
590 $this->setLimitOffset( $origLimit, $origOffset );
591 }
592 if ( $fallbackLimit <= 0 ) {
593 break;
594 }
595 }
596 }
597 return $this->processCompletionResults( $search, $results );
598 }
599
605 public function extractTitles( SearchSuggestionSet $completionResults ) {
606 return $completionResults->map( static function ( SearchSuggestion $sugg ) {
607 return $sugg->getSuggestedTitle();
608 } );
609 }
610
618 protected function processCompletionResults( $search, SearchSuggestionSet $suggestions ) {
619 // We over-fetched to determine pagination. Shrink back down if we have extra results
620 // and mark if pagination is possible
621 $suggestions->shrink( $this->limit );
622
623 $search = trim( $search );
624 // preload the titles with LinkBatch
625 $suggestedTitles = $suggestions->map( static function ( SearchSuggestion $sugg ) {
626 return $sugg->getSuggestedTitle();
627 } );
628 $linkBatchFactory = MediaWikiServices::getInstance()->getLinkBatchFactory();
629 $linkBatchFactory->newLinkBatch( $suggestedTitles )
630 ->setCaller( __METHOD__ )
631 ->execute();
632
633 $diff = $suggestions->filter( static function ( SearchSuggestion $sugg ) {
634 return $sugg->getSuggestedTitle()->isKnown();
635 } );
636 if ( $diff > 0 ) {
637 $statsFactory = MediaWikiServices::getInstance()->getStatsFactory();
638 $statsFactory->getCounter( 'search_completion_missing_total' )
639 ->incrementBy( $diff );
640 }
641
642 // SearchExactMatchRescorer should probably be refactored to work directly on top of a SearchSuggestionSet
643 // instead of converting it to array and trying to infer if it has re-scored anything by inspected the head
644 // of the returned array.
645 $results = $suggestions->map( static function ( SearchSuggestion $sugg ) {
646 return $sugg->getSuggestedTitle()->getPrefixedText();
647 } );
648
649 $rescorer = new SearchExactMatchRescorer();
650 if ( $this->offset === 0 ) {
651 // Rescore results with an exact title match
652 // NOTE: in some cases like cross-namespace redirects
653 // (frequently used as shortcuts e.g. WP:WP on huwiki) some
654 // backends like Cirrus will return no results. We should still
655 // try an exact title match to workaround this limitation
656 $rescoredResults = $rescorer->rescore( $search, $this->namespaces, $results, $this->limit );
657 } else {
658 // No need to rescore if offset is not 0
659 // The exact match must have been returned at position 0
660 // if it existed.
661 $rescoredResults = $results;
662 }
663
664 if ( count( $rescoredResults ) > 0 ) {
665 $found = array_search( $rescoredResults[0], $results );
666 if ( $found === false ) {
667 // If the first result is not in the previous array it
668 // means that we found a new exact match
669 $exactMatch = SearchSuggestion::fromTitle( 0, Title::newFromText( $rescoredResults[0] ) );
670 $suggestions->prepend( $exactMatch );
671 if ( $rescorer->getReplacedRedirect() !== null ) {
672 // the exact match rescorer replaced one of the suggestion found by the search engine
673 // let's remove it from our suggestions set to avoid showing duplicates
674 $suggestions->remove( SearchSuggestion::fromTitle( 0,
675 Title::newFromText( $rescorer->getReplacedRedirect() ) ) );
676 }
677 $suggestions->shrink( $this->limit );
678 } else {
679 // if the first result is not the same we need to rescore
680 if ( $found > 0 ) {
681 $suggestions->rescore( $found );
682 }
683 }
684 }
685
686 return $suggestions;
687 }
688
694 public function defaultPrefixSearch( $search ) {
695 if ( trim( $search ) === '' ) {
696 return [];
697 }
698
699 $search = $this->normalizeNamespaces( $search );
700 return $this->simplePrefixSearch( $search );
701 }
702
709 protected function simplePrefixSearch( $search ) {
710 // Use default database prefix search
711 $backend = new TitlePrefixSearch;
712 return $backend->defaultSearchBackend( $this->namespaces, $search, $this->limit, $this->offset );
713 }
714
733 public function getProfiles( $profileType, ?User $user = null ) {
734 return null;
735 }
736
747 public function makeSearchFieldMapping( $name, $type ) {
748 return new NullIndexField();
749 }
750
756 public function getSearchIndexFields() {
757 $models = MediaWikiServices::getInstance()->getContentHandlerFactory()->getContentModels();
758 $fields = [];
759 $seenHandlers = new SplObjectStorage();
760 foreach ( $models as $model ) {
761 try {
762 $handler = MediaWikiServices::getInstance()
763 ->getContentHandlerFactory()
764 ->getContentHandler( $model );
766 // If we can find no handler, ignore it
767 continue;
768 }
769 // Several models can have the same handler, so avoid processing it repeatedly
770 if ( $seenHandlers->contains( $handler ) ) {
771 // We already did this one
772 continue;
773 }
774 $seenHandlers->attach( $handler );
775 $handlerFields = $handler->getFieldsForSearchIndex( $this );
776 foreach ( $handlerFields as $fieldName => $fieldData ) {
777 if ( empty( $fields[$fieldName] ) ) {
778 $fields[$fieldName] = $fieldData;
779 } else {
780 // TODO: do we allow some clashes with the same type or reject all of them?
781 $mergeDef = $fields[$fieldName]->merge( $fieldData );
782 if ( !$mergeDef ) {
783 throw new InvalidArgumentException( "Duplicate field $fieldName for model $model" );
784 }
785 $fields[$fieldName] = $mergeDef;
786 }
787 }
788 }
789 // Hook to allow extensions to produce search mapping fields
790 $this->getHookRunner()->onSearchIndexFields( $fields, $this );
791 return $fields;
792 }
793
797 public function augmentSearchResults( ISearchResultSet $resultSet ) {
798 $setAugmentors = [];
799 $rowAugmentors = [];
800 $this->getHookRunner()->onSearchResultsAugment( $setAugmentors, $rowAugmentors );
801 if ( !$setAugmentors && !$rowAugmentors ) {
802 // We're done here
803 return;
804 }
805
806 // Convert row augmentors to set augmentor
807 foreach ( $rowAugmentors as $name => $row ) {
808 if ( isset( $setAugmentors[$name] ) ) {
809 throw new InvalidArgumentException( "Both row and set augmentors are defined for $name" );
810 }
811 $setAugmentors[$name] = new PerRowAugmentor( $row );
812 }
813
818 foreach ( $setAugmentors as $name => $augmentor ) {
819 $data = $augmentor->augmentAll( $resultSet );
820 if ( $data ) {
821 $resultSet->setAugmentedData( $name, $data );
822 }
823 }
824 }
825
831 public function setHookContainer( HookContainer $hookContainer ) {
832 $this->hookContainer = $hookContainer;
833 $this->hookRunner = new HookRunner( $hookContainer );
834 }
835
842 protected function getHookContainer(): HookContainer {
843 if ( !$this->hookContainer ) {
844 // This shouldn't be hit in core, but it is needed for CirrusSearch
845 // which commonly creates a CirrusSearch object without cirrus being
846 // configured in $wgSearchType/$wgSearchTypeAlternatives.
847 $this->hookContainer = MediaWikiServices::getInstance()->getHookContainer();
848 }
849 return $this->hookContainer;
850 }
851
860 protected function getHookRunner(): HookRunner {
861 if ( !$this->hookRunner ) {
862 $this->hookRunner = new HookRunner( $this->getHookContainer() );
863 }
864 return $this->hookRunner;
865 }
866
867}
const NS_MAIN
Definition Defines.php:51
const NS_SPECIAL
Definition Defines.php:40
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:68
Exception thrown when an unregistered content model is requested.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Service locator for MediaWiki core services.
Service implementation of near match title search.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:44
Represents a title within MediaWiki.
Definition Title.php:69
User class for the MediaWiki software.
Definition User.php:108
Null index field - means search engine does not implement this field.
Perform augmentation of each row and return composite result, indexed by ID.
defaultSearchBackend( $namespaces, $search, $limit, $offset)
Unless overridden by PrefixSearchBackend hook... This is case-sensitive (First character may be autom...
Contain a class for special pages.
completionSearchBackendOverfetch( $search)
Perform an overfetch of completion search results.
makeSearchFieldMapping( $name, $type)
Create a search field definition.
getNearMatcher(Config $config)
Get service class to finding near matches.
getHookRunner()
Get a HookRunner for running core hooks.
searchTitle( $term)
Perform a title-only search query and return a result set.
supports( $feature)
processCompletionResults( $search, SearchSuggestionSet $suggestions)
Process completion search results.
getFeatureData( $feature)
Way to retrieve custom data set by setFeatureData or by the engine itself.
const FT_QUERY_DEP_PROFILE_TYPE
Profile type for query dependent ranking features (ex: field weights)
update( $id, $title, $text)
Create or update the search index record for the given page.
setNamespaces( $namespaces)
Set which namespaces the search should include.
static parseNamespacePrefixes( $query, $withAllKeyword=true, $withPrefixSearchExtractNamespaceHook=false)
Parse some common prefixes: all (search everything) or namespace names.
doSearchArchiveTitle( $term)
Perform a title search in the article archive.
array $features
Feature values.
string[] $searchTerms
defaultPrefixSearch( $search)
Simple prefix search for subpages.
augmentSearchResults(ISearchResultSet $resultSet)
Augment search results with extra data.
searchArchiveTitle( $term)
Perform a title search in the article archive.
normalizeText( $string)
When overridden in derived class, performs database-specific conversions on text to be used for searc...
setFeatureData( $feature, $data)
Way to pass custom data for engines.
completionSearchBackend( $search)
Perform a completion search.
int[] null $namespaces
getSort()
Get the sort direction of the search results.
static defaultNearMatcher()
Get near matcher for default SearchEngine.
getSearchIndexFields()
Get fields for search index.
getValidSorts()
Get the valid sort directions.
static userHighlightPrefs()
Find snippet highlight settings for all users.
updateTitle( $id, $title)
Update a search index record's title only.
completionSearchWithVariants( $search)
Perform a completion search with variants.
doSearchText( $term)
Perform a full text search query and return a result set.
normalizeNamespaces( $search)
Makes search simple string if it was namespaced.
const CHARS_ALL
Integer flag for legalSearchChars: includes all chars allowed in a search query.
getHookContainer()
Get a HookContainer, for running extension hooks or for hook metadata.
completionSearch( $search)
Perform a completion search.
setLimitOffset( $limit, $offset=0)
Set the maximum number of results to return and how many to skip before returning the first.
const CHARS_NO_SYNTAX
Integer flag for legalSearchChars: includes all chars allowed in a search term.
setShowSuggestion( $showSuggestion)
Set whether the searcher should try to build a suggestion.
getProfiles( $profileType, ?User $user=null)
Get a list of supported profiles.
simplePrefixSearch( $search)
Call out to simple search backend.
setSort( $sort)
Set the sort direction of the search results.
const FT_QUERY_INDEP_PROFILE_TYPE
Profile type for query independent ranking features (ex: article popularity)
setHookContainer(HookContainer $hookContainer)
searchText( $term)
Perform a full text search query and return a result set.
legalSearchChars( $type=self::CHARS_ALL)
Get chars legal for search.
extractTitles(SearchSuggestionSet $completionResults)
Extract titles from completion results.
const COMPLETION_PROFILE_TYPE
Profile type for completionSearch.
doSearchTitle( $term)
Perform a title-only search query and return a result set.
An utility class to rescore search results by looking for an exact match in the db and add the page f...
A set of search suggestions.
filter( $callback)
Filter the suggestions array.
rescore( $key)
Move the suggestion at index $key to the first position.
shrink( $limit)
Remove any extra elements in the suggestions set.
static fromStrings(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a string array.
static fromTitles(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a title array.
map( $callback)
Call array_map on the suggestions array.
prepend(SearchSuggestion $suggestion)
Add a new suggestion at the top.
remove(SearchSuggestion $suggestion)
Remove a suggestion from the set.
A search suggestion.
getSuggestedTitle()
Title object in the case this suggestion is based on a title.
Performs prefix search, returning Title objects.
A set of SearchEngine results.
setAugmentedData( $name, $data)
Sets augmented data for result set.
Interface for configuration instances.
Definition Config.php:18
Marker class for search engines that can handle their own pagination, by reporting in their ISearchRe...
Augment search results.