MediaWiki master
SearchEngine.php
Go to the documentation of this file.
1<?php
10namespace MediaWiki\Search;
11
12use Closure;
13use InvalidArgumentException;
22use SplObjectStorage;
23
33abstract class SearchEngine {
34 public const DEFAULT_SORT = 'relevance';
35
37 public $prefix = '';
38
40 public $namespaces = [ NS_MAIN ];
41
43 protected $limit = 10;
44
46 protected $offset = 0;
47
52 protected $searchTerms = [];
53
55 protected $showSuggestion = true;
57 private $sort = self::DEFAULT_SORT;
58
60 protected $features = [];
61
63 private $hookContainer;
64
66 private $hookRunner;
67
69 public const COMPLETION_PROFILE_TYPE = 'completionSearchProfile';
70
72 public const FT_QUERY_INDEP_PROFILE_TYPE = 'fulltextQueryIndepProfile';
73
75 public const FT_QUERY_DEP_PROFILE_TYPE = 'fulltextQueryDepProfile';
76
78 protected const CHARS_ALL = 1;
79
81 protected const CHARS_NO_SYNTAX = 2;
82
88 public const SEARCH_ID = 'searchId';
89
100 public function searchText( $term ) {
101 return $this->maybePaginate( function () use ( $term ) {
102 return $this->doSearchText( $term );
103 } );
104 }
105
115 protected function doSearchText( $term ) {
116 return null;
117 }
118
133 public function searchArchiveTitle( $term ) {
134 return $this->doSearchArchiveTitle( $term );
135 }
136
146 protected function doSearchArchiveTitle( $term ) {
147 return Status::newGood( [] );
148 }
149
161 public function searchTitle( $term ) {
162 return $this->maybePaginate( function () use ( $term ) {
163 return $this->doSearchTitle( $term );
164 } );
165 }
166
176 protected function doSearchTitle( $term ) {
177 return null;
178 }
179
188 private function maybePaginate( Closure $fn ) {
189 if ( $this instanceof PaginatingSearchEngine ) {
190 return $fn();
191 }
192 $this->limit++;
193 try {
194 $resultSetOrStatus = $fn();
195 } finally {
196 $this->limit--;
197 }
198
199 $resultSet = null;
200 if ( $resultSetOrStatus instanceof ISearchResultSet ) {
201 $resultSet = $resultSetOrStatus;
202 } elseif ( $resultSetOrStatus instanceof Status &&
203 $resultSetOrStatus->getValue() instanceof ISearchResultSet
204 ) {
205 $resultSet = $resultSetOrStatus->getValue();
206 }
207 if ( $resultSet ) {
208 $resultSet->shrink( $this->limit );
209 }
210
211 return $resultSetOrStatus;
212 }
213
221 public function supports( $feature ) {
222 switch ( $feature ) {
223 case 'search-update':
224 return true;
225 case 'title-suffix-filter':
226 default:
227 return false;
228 }
229 }
230
237 public function setFeatureData( $feature, $data ) {
238 $this->features[$feature] = $data;
239 }
240
248 public function getFeatureData( $feature ) {
249 return $this->features[$feature] ?? null;
250 }
251
260 public function normalizeText( $string ) {
261 // Some languages such as Chinese require word segmentation
262 return MediaWikiServices::getInstance()->getContentLanguage()->segmentByWord( $string );
263 }
264
271 public function getNearMatcher( Config $config ) {
272 return MediaWikiServices::getInstance()->getTitleMatcher();
273 }
274
281 protected static function defaultNearMatcher() {
282 wfDeprecated( __METHOD__, '1.40' );
283 return MediaWikiServices::getInstance()->getTitleMatcher();
284 }
285
292 public function legalSearchChars( $type = self::CHARS_ALL ) {
293 return "A-Za-z_'.0-9\\x80-\\xFF\\-";
294 }
295
303 public function setLimitOffset( $limit, $offset = 0 ) {
304 $this->limit = intval( $limit );
305 $this->offset = intval( $offset );
306 }
307
314 public function setNamespaces( $namespaces ) {
315 if ( $namespaces ) {
316 // Filter namespaces to only keep valid ones
317 $validNs = MediaWikiServices::getInstance()->getSearchEngineConfig()->searchableNamespaces();
318 $namespaces = array_filter( $namespaces,
319 static fn ( $id ) => $id < 0 || isset( $validNs[$id] )
320 );
321 } else {
322 $namespaces = [];
323 }
324 $this->namespaces = $namespaces;
325 }
326
335 $this->showSuggestion = $showSuggestion;
336 }
337
347 public function getValidSorts() {
348 return [ self::DEFAULT_SORT ];
349 }
350
358 public function setSort( $sort ) {
359 if ( !in_array( $sort, $this->getValidSorts() ) ) {
360 throw new InvalidArgumentException( "Invalid sort: $sort. " .
361 "Must be one of: " . implode( ', ', $this->getValidSorts() ) );
362 }
363 $this->sort = $sort;
364 }
365
372 public function getSort() {
373 return $this->sort;
374 }
375
389 public static function parseNamespacePrefixes(
390 $query,
391 $withAllKeyword = true,
392 $withPrefixSearchExtractNamespaceHook = false
393 ) {
394 $parsed = $query;
395 if ( !str_contains( $query, ':' ) ) { // nothing to do
396 return false;
397 }
398 $extractedNamespace = null;
399
400 $allQuery = false;
401 if ( $withAllKeyword ) {
402 $allkeywords = [];
403
404 $allkeywords[] = wfMessage( 'searchall' )->inContentLanguage()->text() . ":";
405 // force all: so that we have a common syntax for all the wikis
406 if ( !in_array( 'all:', $allkeywords ) ) {
407 $allkeywords[] = 'all:';
408 }
409
410 foreach ( $allkeywords as $kw ) {
411 if ( str_starts_with( $query, $kw ) ) {
412 $parsed = substr( $query, strlen( $kw ) );
413 $allQuery = true;
414 break;
415 }
416 }
417 }
418
419 if ( !$allQuery && str_contains( $query, ':' ) ) {
420 $prefix = str_replace( ' ', '_', substr( $query, 0, strpos( $query, ':' ) ) );
421 $services = MediaWikiServices::getInstance();
422 $index = $services->getContentLanguage()->getNsIndex( $prefix );
423 if ( $index !== false ) {
424 $extractedNamespace = [ $index ];
425 $parsed = substr( $query, strlen( $prefix ) + 1 );
426 } elseif ( $withPrefixSearchExtractNamespaceHook ) {
427 $hookNamespaces = [ NS_MAIN ];
428 $hookQuery = $query;
429 ( new HookRunner( $services->getHookContainer() ) )
430 ->onPrefixSearchExtractNamespace( $hookNamespaces, $hookQuery );
431 if ( $hookQuery !== $query ) {
432 $parsed = $hookQuery;
433 $extractedNamespace = $hookNamespaces;
434 } else {
435 return false;
436 }
437 } else {
438 return false;
439 }
440 }
441
442 return [ $parsed, $extractedNamespace ];
443 }
444
452 public static function userHighlightPrefs() {
455 return [ $contextlines, $contextchars ];
456 }
457
467 public function update( $id, $title, $text ) {
468 // no-op
469 }
470
479 public function updateTitle( $id, $title ) {
480 // no-op
481 }
482
491 public function delete( $id, $title ) {
492 // no-op
493 }
494
501 protected function normalizeNamespaces( $search ) {
502 $queryAndNs = self::parseNamespacePrefixes( $search, false, true );
503 if ( $queryAndNs !== false ) {
504 $this->setNamespaces( $queryAndNs[1] );
505 return $queryAndNs[0];
506 }
507 return $search;
508 }
509
517 protected function completionSearchBackendOverfetch( $search ) {
518 $this->limit++;
519 try {
520 return $this->completionSearchBackend( $search );
521 } finally {
522 $this->limit--;
523 }
524 }
525
536 protected function completionSearchBackend( $search ) {
537 $search = trim( $search );
538 $results = $this->simplePrefixSearch( $search );
539 return SearchSuggestionSet::fromTitles( $results );
540 }
541
547 public function completionSearch( $search ) {
548 if ( trim( $search ) === '' ) {
549 return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
550 }
551 $search = $this->normalizeNamespaces( $search );
552 $suggestions = $this->completionSearchBackendOverfetch( $search );
553 return $this->processCompletionResults( $search, $suggestions );
554 }
555
563 public function completionSearchWithVariants( $search ) {
564 if ( trim( $search ) === '' ) {
565 return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
566 }
567 $search = $this->normalizeNamespaces( $search );
568
569 $results = $this->completionSearchBackendOverfetch( $search );
570 $fallbackLimit = 1 + $this->limit - $results->getSize();
571 if ( $fallbackLimit > 0 ) {
572 $services = MediaWikiServices::getInstance();
573 $fallbackSearches = $services->getLanguageConverterFactory()
574 ->getLanguageConverter( $services->getContentLanguage() )
575 ->autoConvertToAllVariants( $search );
576 $fallbackSearches = array_diff( array_unique( $fallbackSearches ), [ $search ] );
577
578 $origLimit = $this->limit;
579 $origOffset = $this->offset;
580 foreach ( $fallbackSearches as $fbs ) {
581 try {
582 $this->setLimitOffset( $fallbackLimit );
583 $fallbackSearchResult = $this->completionSearch( $fbs );
584 $results->appendAll( $fallbackSearchResult );
585 $fallbackLimit -= $fallbackSearchResult->getSize();
586 } finally {
587 $this->setLimitOffset( $origLimit, $origOffset );
588 }
589 if ( $fallbackLimit <= 0 ) {
590 break;
591 }
592 }
593 }
594 return $this->processCompletionResults( $search, $results );
595 }
596
602 public function extractTitles( SearchSuggestionSet $completionResults ) {
603 return $completionResults->map( static function ( SearchSuggestion $sugg ) {
604 return $sugg->getSuggestedTitle();
605 } );
606 }
607
615 protected function processCompletionResults( $search, SearchSuggestionSet $suggestions ) {
616 // We over-fetched to determine pagination. Shrink back down if we have extra results
617 // and mark if pagination is possible
618 $suggestions->shrink( $this->limit );
619
620 $search = trim( $search );
621 // preload the titles with LinkBatch
622 $suggestedTitles = $suggestions->map( static function ( SearchSuggestion $sugg ) {
623 return $sugg->getSuggestedTitle();
624 } );
625 $linkBatchFactory = MediaWikiServices::getInstance()->getLinkBatchFactory();
626 $linkBatchFactory->newLinkBatch( $suggestedTitles )
627 ->setCaller( __METHOD__ )
628 ->execute();
629
630 $diff = $suggestions->filter( static function ( SearchSuggestion $sugg ) {
631 return $sugg->getSuggestedTitle()->isKnown();
632 } );
633 if ( $diff > 0 ) {
634 $statsFactory = MediaWikiServices::getInstance()->getStatsFactory();
635 $statsFactory->getCounter( 'search_completion_missing_total' )
636 ->incrementBy( (float)$diff );
637 }
638
639 // SearchExactMatchRescorer should probably be refactored to work directly on top of a SearchSuggestionSet
640 // instead of converting it to array and trying to infer if it has re-scored anything by inspected the head
641 // of the returned array.
642 $results = $suggestions->map( static function ( SearchSuggestion $sugg ) {
643 return $sugg->getSuggestedTitle()->getPrefixedText();
644 } );
645
646 $rescorer = new SearchExactMatchRescorer();
647 if ( $this->offset === 0 ) {
648 // Rescore results with an exact title match
649 // NOTE: in some cases like cross-namespace redirects
650 // (frequently used as shortcuts e.g. WP:WP on huwiki) some
651 // backends like Cirrus will return no results. We should still
652 // try an exact title match to workaround this limitation
653 $rescoredResults = $rescorer->rescore( $search, $this->namespaces, $results, $this->limit );
654 } else {
655 // No need to rescore if offset is not 0
656 // The exact match must have been returned at position 0
657 // if it existed.
658 $rescoredResults = $results;
659 }
660
661 if ( count( $rescoredResults ) > 0 ) {
662 $found = array_search( $rescoredResults[0], $results );
663 if ( $found === false ) {
664 // If the first result is not in the previous array it
665 // means that we found a new exact match
666 $exactMatch = SearchSuggestion::fromTitle( 0, Title::newFromText( $rescoredResults[0] ) );
667 $suggestions->prepend( $exactMatch );
668 if ( $rescorer->getReplacedRedirect() !== null ) {
669 // the exact match rescorer replaced one of the suggestion found by the search engine
670 // let's remove it from our suggestions set to avoid showing duplicates
671 $suggestions->remove( SearchSuggestion::fromTitle( 0,
672 Title::newFromText( $rescorer->getReplacedRedirect() ) ) );
673 }
674 $suggestions->shrink( $this->limit );
675 } else {
676 // if the first result is not the same we need to rescore
677 if ( $found > 0 ) {
678 $suggestions->rescore( $found );
679 }
680 }
681 }
682
683 return $suggestions;
684 }
685
691 public function defaultPrefixSearch( $search ) {
692 if ( trim( $search ) === '' ) {
693 return [];
694 }
695
696 $search = $this->normalizeNamespaces( $search );
697 return $this->simplePrefixSearch( $search );
698 }
699
706 protected function simplePrefixSearch( $search ) {
707 // Use default database prefix search
708 $backend = new TitlePrefixSearch;
709 return $backend->defaultSearchBackend( $this->namespaces, $search, $this->limit, $this->offset );
710 }
711
730 public function getProfiles( $profileType, ?User $user = null ) {
731 return null;
732 }
733
744 public function makeSearchFieldMapping( $name, $type ) {
745 return new NullIndexField();
746 }
747
753 public function getSearchIndexFields() {
754 $models = MediaWikiServices::getInstance()->getContentHandlerFactory()->getContentModels();
755 $fields = [];
756 $seenHandlers = new SplObjectStorage();
757 foreach ( $models as $model ) {
758 try {
760 ->getContentHandlerFactory()
761 ->getContentHandler( $model );
762 } catch ( UnknownContentModelException ) {
763 // If we can find no handler, ignore it
764 continue;
765 }
766 // Several models can have the same handler, so avoid processing it repeatedly
767 if ( $seenHandlers->offsetExists( $handler ) ) {
768 // We already did this one
769 continue;
770 }
771 $seenHandlers->offsetSet( $handler );
772 $handlerFields = $handler->getFieldsForSearchIndex( $this );
773 foreach ( $handlerFields as $fieldName => $fieldData ) {
774 if ( empty( $fields[$fieldName] ) ) {
775 $fields[$fieldName] = $fieldData;
776 } else {
777 // TODO: do we allow some clashes with the same type or reject all of them?
778 $mergeDef = $fields[$fieldName]->merge( $fieldData );
779 if ( !$mergeDef ) {
780 throw new InvalidArgumentException( "Duplicate field $fieldName for model $model" );
781 }
782 $fields[$fieldName] = $mergeDef;
783 }
784 }
785 }
786 // Hook to allow extensions to produce search mapping fields
787 $this->getHookRunner()->onSearchIndexFields( $fields, $this );
788 return $fields;
789 }
790
794 public function augmentSearchResults( ISearchResultSet $resultSet ) {
795 $setAugmentors = [];
796 $rowAugmentors = [];
797 $this->getHookRunner()->onSearchResultsAugment( $setAugmentors, $rowAugmentors );
798 if ( !$setAugmentors && !$rowAugmentors ) {
799 // We're done here
800 return;
801 }
802
803 // Convert row augmenters to set augmentor
804 foreach ( $rowAugmentors as $name => $row ) {
805 if ( isset( $setAugmentors[$name] ) ) {
806 throw new InvalidArgumentException( "Both row and set augmentors are defined for $name" );
807 }
808 $setAugmentors[$name] = new PerRowAugmentor( $row );
809 }
810
815 foreach ( $setAugmentors as $name => $augmentor ) {
816 $data = $augmentor->augmentAll( $resultSet );
817 if ( $data ) {
818 $resultSet->setAugmentedData( $name, $data );
819 }
820 }
821 }
822
828 public function setHookContainer( HookContainer $hookContainer ) {
829 $this->hookContainer = $hookContainer;
830 $this->hookRunner = new HookRunner( $hookContainer );
831 }
832
839 protected function getHookContainer(): HookContainer {
840 if ( !$this->hookContainer ) {
841 // This shouldn't be hit in core, but it is needed for CirrusSearch
842 // which commonly creates a CirrusSearch object without cirrus being
843 // configured in $wgSearchType/$wgSearchTypeAlternatives.
844 $this->hookContainer = MediaWikiServices::getInstance()->getHookContainer();
845 }
846 return $this->hookContainer;
847 }
848
857 protected function getHookRunner(): HookRunner {
858 if ( !$this->hookRunner ) {
859 $this->hookRunner = new HookRunner( $this->getHookContainer() );
860 }
861 return $this->hookRunner;
862 }
863
864}
865
867class_alias( SearchEngine::class, 'SearchEngine' );
const NS_MAIN
Definition Defines.php:51
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:71
Exception thrown when an unregistered content model is requested.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Service locator for MediaWiki core services.
static getInstance()
Returns the global default instance of the top level service locator.
Null index field - means search engine does not implement this field.
Perform augmentation of each row and return composite result, indexed by ID.
defaultSearchBackend( $namespaces, $search, $limit, $offset)
This is case-sensitive (First character may be automatically capitalized by Title::secureAndSpit() la...
Contain a class for special pages.
updateTitle( $id, $title)
Update a search index record's title only.
setLimitOffset( $limit, $offset=0)
Set the maximum number of results to return and how many to skip before returning the first.
const COMPLETION_PROFILE_TYPE
Profile type for completionSearch.
getHookContainer()
Get a HookContainer, for running extension hooks or for hook metadata.
augmentSearchResults(ISearchResultSet $resultSet)
Augment search results with extra data.
processCompletionResults( $search, SearchSuggestionSet $suggestions)
Process completion search results.
setSort( $sort)
Set the sort direction of the search results.
static defaultNearMatcher()
Get near matcher for default SearchEngine.
setNamespaces( $namespaces)
Set which namespaces the search should include.
simplePrefixSearch( $search)
Call out to simple search backend.
completionSearchBackend( $search)
Perform a completion search.
searchTitle( $term)
Perform a title-only search query and return a result set.
const SEARCH_ID
Name of the feature data entry holding an identifier generated by some search backends.
setShowSuggestion( $showSuggestion)
Set whether the searcher should try to build a suggestion.
completionSearchBackendOverfetch( $search)
Perform an overfetch of completion search results.
doSearchText( $term)
Perform a full text search query and return a result set.
completionSearchWithVariants( $search)
Perform a completion search with variants.
doSearchArchiveTitle( $term)
Perform a title search in the article archive.
searchArchiveTitle( $term)
Perform a title search in the article archive.
extractTitles(SearchSuggestionSet $completionResults)
Extract titles from completion results.
setFeatureData( $feature, $data)
Way to pass custom data for engines.
getFeatureData( $feature)
Way to retrieve custom data set by setFeatureData or by the engine itself.
update( $id, $title, $text)
Create or update the search index record for the given page.
legalSearchChars( $type=self::CHARS_ALL)
Get chars legal for search.
normalizeNamespaces( $search)
Makes search simple string if it was namespaced.
getSearchIndexFields()
Get fields for search index.
const FT_QUERY_DEP_PROFILE_TYPE
Profile type for query dependent ranking features (ex: field weights)
static parseNamespacePrefixes( $query, $withAllKeyword=true, $withPrefixSearchExtractNamespaceHook=false)
Parse some common prefixes: all (search everything) or namespace names.
const FT_QUERY_INDEP_PROFILE_TYPE
Profile type for query independent ranking features (ex: article popularity)
defaultPrefixSearch( $search)
Simple prefix search for subpages.
array $features
Feature values.
setHookContainer(HookContainer $hookContainer)
static userHighlightPrefs()
Find snippet highlight settings for all users.
makeSearchFieldMapping( $name, $type)
Create a search field definition.
getProfiles( $profileType, ?User $user=null)
Get a list of supported profiles.
completionSearch( $search)
Perform a completion search.
getSort()
Get the sort direction of the search results.
getValidSorts()
Get the valid sort directions.
getHookRunner()
Get a HookRunner for running core hooks.
getNearMatcher(Config $config)
Get service class to finding near matches.
normalizeText( $string)
When overridden in derived class, performs database-specific conversions on text to be used for searc...
const CHARS_NO_SYNTAX
Integer flag for legalSearchChars: includes all chars allowed in a search term.
searchText( $term)
Perform a full text search query and return a result set.
const CHARS_ALL
Integer flag for legalSearchChars: includes all chars allowed in a search query.
doSearchTitle( $term)
Perform a title-only search query and return a result set.
An utility class to rescore search results by looking for an exact match in the db and add the page f...
filter( $callback)
Filter the suggestions array.
rescore( $key)
Move the suggestion at index $key to the first position.
map( $callback)
Call array_map on the suggestions array.
static fromTitles(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a title array.
shrink( $limit)
Remove any extra elements in the suggestions set.
remove(SearchSuggestion $suggestion)
Remove a suggestion from the set.
prepend(SearchSuggestion $suggestion)
Add a new suggestion at the top.
getSuggestedTitle()
Title object in the case this suggestion is based on a title.
static fromTitle( $score, Title $title)
Create suggestion from Title.
Performs prefix search, returning Title objects.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:44
Represents a title within MediaWiki.
Definition Title.php:69
User class for the MediaWiki software.
Definition User.php:130
Interface for configuration instances.
Definition Config.php:18
A set of SearchEngine results.
setAugmentedData( $name, $data)
Sets augmented data for result set.
Marker class for search engines that can handle their own pagination, by reporting in their ISearchRe...
Definition of a mapping for the search index field.