MediaWiki REL1_35
SearchEngine.php
Go to the documentation of this file.
1<?php
31
37abstract class SearchEngine {
38 public const DEFAULT_SORT = 'relevance';
39
41 public $prefix = '';
42
44 public $namespaces = [ NS_MAIN ];
45
47 protected $limit = 10;
48
50 protected $offset = 0;
51
56 protected $searchTerms = [];
57
59 protected $showSuggestion = true;
60 private $sort = self::DEFAULT_SORT;
61
63 protected $features = [];
64
67
69 private $hookRunner;
70
72 public const COMPLETION_PROFILE_TYPE = 'completionSearchProfile';
73
75 public const FT_QUERY_INDEP_PROFILE_TYPE = 'fulltextQueryIndepProfile';
76
78 protected const CHARS_ALL = 1;
79
81 protected const CHARS_NO_SYNTAX = 2;
82
93 public function searchText( $term ) {
94 return $this->maybePaginate( function () use ( $term ) {
95 return $this->doSearchText( $term );
96 } );
97 }
98
108 protected function doSearchText( $term ) {
109 return null;
110 }
111
126 public function searchArchiveTitle( $term ) {
127 return $this->doSearchArchiveTitle( $term );
128 }
129
139 protected function doSearchArchiveTitle( $term ) {
140 return Status::newGood( [] );
141 }
142
154 public function searchTitle( $term ) {
155 return $this->maybePaginate( function () use ( $term ) {
156 return $this->doSearchTitle( $term );
157 } );
158 }
159
169 protected function doSearchTitle( $term ) {
170 return null;
171 }
172
181 private function maybePaginate( Closure $fn ) {
182 if ( $this instanceof PaginatingSearchEngine ) {
183 return $fn();
184 }
185 $this->limit++;
186 try {
187 $resultSetOrStatus = $fn();
188 } finally {
189 $this->limit--;
190 }
191
192 $resultSet = null;
193 if ( $resultSetOrStatus instanceof ISearchResultSet ) {
194 $resultSet = $resultSetOrStatus;
195 } elseif ( $resultSetOrStatus instanceof Status &&
196 $resultSetOrStatus->getValue() instanceof ISearchResultSet
197 ) {
198 $resultSet = $resultSetOrStatus->getValue();
199 }
200 if ( $resultSet ) {
201 $resultSet->shrink( $this->limit );
202 }
203
204 return $resultSetOrStatus;
205 }
206
214 public function supports( $feature ) {
215 switch ( $feature ) {
216 case 'search-update':
217 return true;
218 case 'title-suffix-filter':
219 default:
220 return false;
221 }
222 }
223
230 public function setFeatureData( $feature, $data ) {
231 $this->features[$feature] = $data;
232 }
233
241 public function getFeatureData( $feature ) {
242 return $this->features[$feature] ?? null;
243 }
244
253 public function normalizeText( $string ) {
254 // Some languages such as Chinese require word segmentation
255 return MediaWikiServices::getInstance()->getContentLanguage()->segmentByWord( $string );
256 }
257
263 public function getNearMatcher( Config $config ) {
264 return new SearchNearMatcher( $config,
265 MediaWikiServices::getInstance()->getContentLanguage(),
266 $this->getHookContainer()
267 );
268 }
269
274 protected static function defaultNearMatcher() {
275 $services = MediaWikiServices::getInstance();
276 $config = $services->getMainConfig();
277 return $services->newSearchEngine()->getNearMatcher( $config );
278 }
279
286 public function legalSearchChars( $type = self::CHARS_ALL ) {
287 return "A-Za-z_'.0-9\\x80-\\xFF\\-";
288 }
289
297 public function setLimitOffset( $limit, $offset = 0 ) {
298 $this->limit = intval( $limit );
299 $this->offset = intval( $offset );
300 }
301
308 public function setNamespaces( $namespaces ) {
309 if ( $namespaces ) {
310 // Filter namespaces to only keep valid ones
311 $validNs = MediaWikiServices::getInstance()->getSearchEngineConfig()->searchableNamespaces();
312 $namespaces = array_filter( $namespaces, function ( $ns ) use( $validNs ) {
313 return $ns < 0 || isset( $validNs[$ns] );
314 } );
315 } else {
316 $namespaces = [];
317 }
318 $this->namespaces = $namespaces;
319 }
320
328 public function setShowSuggestion( $showSuggestion ) {
329 $this->showSuggestion = $showSuggestion;
330 }
331
341 public function getValidSorts() {
342 return [ self::DEFAULT_SORT ];
343 }
344
353 public function setSort( $sort ) {
354 if ( !in_array( $sort, $this->getValidSorts() ) ) {
355 throw new InvalidArgumentException( "Invalid sort: $sort. " .
356 "Must be one of: " . implode( ', ', $this->getValidSorts() ) );
357 }
358 $this->sort = $sort;
359 }
360
367 public function getSort() {
368 return $this->sort;
369 }
370
380 public function replacePrefixes( $query ) {
381 return $query;
382 }
383
399 public static function parseNamespacePrefixes(
400 $query,
401 $withAllKeyword = true,
402 $withPrefixSearchExtractNamespaceHook = false
403 ) {
404 $parsed = $query;
405 if ( strpos( $query, ':' ) === false ) { // nothing to do
406 return false;
407 }
408 $extractedNamespace = null;
409
410 $allQuery = false;
411 if ( $withAllKeyword ) {
412 $allkeywords = [];
413
414 $allkeywords[] = wfMessage( 'searchall' )->inContentLanguage()->text() . ":";
415 // force all: so that we have a common syntax for all the wikis
416 if ( !in_array( 'all:', $allkeywords ) ) {
417 $allkeywords[] = 'all:';
418 }
419
420 foreach ( $allkeywords as $kw ) {
421 if ( strncmp( $query, $kw, strlen( $kw ) ) == 0 ) {
422 $extractedNamespace = null;
423 $parsed = substr( $query, strlen( $kw ) );
424 $allQuery = true;
425 break;
426 }
427 }
428 }
429
430 if ( !$allQuery && strpos( $query, ':' ) !== false ) {
431 $prefix = str_replace( ' ', '_', substr( $query, 0, strpos( $query, ':' ) ) );
432 $index = MediaWikiServices::getInstance()->getContentLanguage()->getNsIndex( $prefix );
433 if ( $index !== false ) {
434 $extractedNamespace = [ $index ];
435 $parsed = substr( $query, strlen( $prefix ) + 1 );
436 } elseif ( $withPrefixSearchExtractNamespaceHook ) {
437 $hookNamespaces = [ NS_MAIN ];
438 $hookQuery = $query;
439 Hooks::runner()->onPrefixSearchExtractNamespace( $hookNamespaces, $hookQuery );
440 if ( $hookQuery !== $query ) {
441 $parsed = $hookQuery;
442 $extractedNamespace = $hookNamespaces;
443 } else {
444 return false;
445 }
446 } else {
447 return false;
448 }
449 }
450
451 return [ $parsed, $extractedNamespace ];
452 }
453
461 public static function userHighlightPrefs() {
464 return [ $contextlines, $contextchars ];
465 }
466
476 public function update( $id, $title, $text ) {
477 // no-op
478 }
479
488 public function updateTitle( $id, $title ) {
489 // no-op
490 }
491
500 public function delete( $id, $title ) {
501 // no-op
502 }
503
515 public function getTextFromContent( Title $t, Content $c = null ) {
516 return $c ? $c->getTextForSearchIndex() : '';
517 }
518
527 public function textAlreadyUpdatedForIndex() {
528 return false;
529 }
530
537 protected function normalizeNamespaces( $search ) {
538 $queryAndNs = self::parseNamespacePrefixes( $search, false, true );
539 if ( $queryAndNs !== false ) {
540 $this->setNamespaces( $queryAndNs[1] );
541 return $queryAndNs[0];
542 }
543 return $search;
544 }
545
553 protected function completionSearchBackendOverfetch( $search ) {
554 $this->limit++;
555 try {
556 return $this->completionSearchBackend( $search );
557 } finally {
558 $this->limit--;
559 }
560 }
561
572 protected function completionSearchBackend( $search ) {
573 $results = [];
574
575 $search = trim( $search );
576
577 if ( !in_array( NS_SPECIAL, $this->namespaces ) && // We do not run hook on Special: search
578 !$this->getHookRunner()->onPrefixSearchBackend(
579 $this->namespaces, $search, $this->limit, $results, $this->offset )
580 ) {
581 // False means hook worked.
582 // FIXME: Yes, the API is weird. That's why it is going to be deprecated.
583
584 return SearchSuggestionSet::fromStrings( $results );
585 } else {
586 // Hook did not do the job, use default simple search
587 $results = $this->simplePrefixSearch( $search );
588 return SearchSuggestionSet::fromTitles( $results );
589 }
590 }
591
597 public function completionSearch( $search ) {
598 if ( trim( $search ) === '' ) {
599 return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
600 }
601 $search = $this->normalizeNamespaces( $search );
602 $suggestions = $this->completionSearchBackendOverfetch( $search );
603 return $this->processCompletionResults( $search, $suggestions );
604 }
605
613 public function completionSearchWithVariants( $search ) {
614 if ( trim( $search ) === '' ) {
615 return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
616 }
617 $search = $this->normalizeNamespaces( $search );
618
619 $results = $this->completionSearchBackendOverfetch( $search );
620 $fallbackLimit = 1 + $this->limit - $results->getSize();
621 if ( $fallbackLimit > 0 ) {
622 $fallbackSearches = MediaWikiServices::getInstance()->getContentLanguage()->
623 autoConvertToAllVariants( $search );
624 $fallbackSearches = array_diff( array_unique( $fallbackSearches ), [ $search ] );
625
626 foreach ( $fallbackSearches as $fbs ) {
627 $this->setLimitOffset( $fallbackLimit );
628 $fallbackSearchResult = $this->completionSearch( $fbs );
629 $results->appendAll( $fallbackSearchResult );
630 $fallbackLimit -= $fallbackSearchResult->getSize();
631 if ( $fallbackLimit <= 0 ) {
632 break;
633 }
634 }
635 }
636 return $this->processCompletionResults( $search, $results );
637 }
638
644 public function extractTitles( SearchSuggestionSet $completionResults ) {
645 return $completionResults->map( function ( SearchSuggestion $sugg ) {
646 return $sugg->getSuggestedTitle();
647 } );
648 }
649
657 protected function processCompletionResults( $search, SearchSuggestionSet $suggestions ) {
658 // We over-fetched to determine pagination. Shrink back down if we have extra results
659 // and mark if pagination is possible
660 $suggestions->shrink( $this->limit );
661
662 $search = trim( $search );
663 // preload the titles with LinkBatch
664 $lb = new LinkBatch( $suggestions->map( function ( SearchSuggestion $sugg ) {
665 return $sugg->getSuggestedTitle();
666 } ) );
667 $lb->setCaller( __METHOD__ );
668 $lb->execute();
669
670 $diff = $suggestions->filter( function ( SearchSuggestion $sugg ) {
671 return $sugg->getSuggestedTitle()->isKnown();
672 } );
673 if ( $diff > 0 ) {
674 MediaWikiServices::getInstance()->getStatsdDataFactory()
675 ->updateCount( 'search.completion.missing', $diff );
676 }
677
678 $results = $suggestions->map( function ( SearchSuggestion $sugg ) {
679 return $sugg->getSuggestedTitle()->getPrefixedText();
680 } );
681
682 if ( $this->offset === 0 ) {
683 // Rescore results with an exact title match
684 // NOTE: in some cases like cross-namespace redirects
685 // (frequently used as shortcuts e.g. WP:WP on huwiki) some
686 // backends like Cirrus will return no results. We should still
687 // try an exact title match to workaround this limitation
688 $rescorer = new SearchExactMatchRescorer();
689 $rescoredResults = $rescorer->rescore( $search, $this->namespaces, $results, $this->limit );
690 } else {
691 // No need to rescore if offset is not 0
692 // The exact match must have been returned at position 0
693 // if it existed.
694 $rescoredResults = $results;
695 }
696
697 if ( count( $rescoredResults ) > 0 ) {
698 $found = array_search( $rescoredResults[0], $results );
699 if ( $found === false ) {
700 // If the first result is not in the previous array it
701 // means that we found a new exact match
702 $exactMatch = SearchSuggestion::fromTitle( 0, Title::newFromText( $rescoredResults[0] ) );
703 $suggestions->prepend( $exactMatch );
704 $suggestions->shrink( $this->limit );
705 } else {
706 // if the first result is not the same we need to rescore
707 if ( $found > 0 ) {
708 $suggestions->rescore( $found );
709 }
710 }
711 }
712
713 return $suggestions;
714 }
715
721 public function defaultPrefixSearch( $search ) {
722 if ( trim( $search ) === '' ) {
723 return [];
724 }
725
726 $search = $this->normalizeNamespaces( $search );
727 return $this->simplePrefixSearch( $search );
728 }
729
736 protected function simplePrefixSearch( $search ) {
737 // Use default database prefix search
738 $backend = new TitlePrefixSearch;
739 return $backend->defaultSearchBackend( $this->namespaces, $search, $this->limit, $this->offset );
740 }
741
760 public function getProfiles( $profileType, User $user = null ) {
761 return null;
762 }
763
774 public function makeSearchFieldMapping( $name, $type ) {
775 return new NullIndexField();
776 }
777
783 public function getSearchIndexFields() {
784 $models = MediaWikiServices::getInstance()->getContentHandlerFactory()->getContentModels();
785 $fields = [];
786 $seenHandlers = new SplObjectStorage();
787 foreach ( $models as $model ) {
788 try {
789 $handler = MediaWikiServices::getInstance()
790 ->getContentHandlerFactory()
791 ->getContentHandler( $model );
792 }
793 catch ( MWUnknownContentModelException $e ) {
794 // If we can find no handler, ignore it
795 continue;
796 }
797 // Several models can have the same handler, so avoid processing it repeatedly
798 if ( $seenHandlers->contains( $handler ) ) {
799 // We already did this one
800 continue;
801 }
802 $seenHandlers->attach( $handler );
803 $handlerFields = $handler->getFieldsForSearchIndex( $this );
804 foreach ( $handlerFields as $fieldName => $fieldData ) {
805 if ( empty( $fields[$fieldName] ) ) {
806 $fields[$fieldName] = $fieldData;
807 } else {
808 // TODO: do we allow some clashes with the same type or reject all of them?
809 $mergeDef = $fields[$fieldName]->merge( $fieldData );
810 if ( !$mergeDef ) {
811 throw new InvalidArgumentException( "Duplicate field $fieldName for model $model" );
812 }
813 $fields[$fieldName] = $mergeDef;
814 }
815 }
816 }
817 // Hook to allow extensions to produce search mapping fields
818 $this->getHookRunner()->onSearchIndexFields( $fields, $this );
819 return $fields;
820 }
821
827 public function augmentSearchResults( ISearchResultSet $resultSet ) {
828 $setAugmentors = [];
829 $rowAugmentors = [];
830 $this->getHookRunner()->onSearchResultsAugment( $setAugmentors, $rowAugmentors );
831 if ( !$setAugmentors && !$rowAugmentors ) {
832 // We're done here
833 return;
834 }
835
836 // Convert row augmentors to set augmentor
837 foreach ( $rowAugmentors as $name => $row ) {
838 if ( isset( $setAugmentors[$name] ) ) {
839 throw new InvalidArgumentException( "Both row and set augmentors are defined for $name" );
840 }
841 $setAugmentors[$name] = new PerRowAugmentor( $row );
842 }
843
848 foreach ( $setAugmentors as $name => $augmentor ) {
849 $data = $augmentor->augmentAll( $resultSet );
850 if ( $data ) {
851 $resultSet->setAugmentedData( $name, $data );
852 }
853 }
854 }
855
861 public function setHookContainer( HookContainer $hookContainer ) {
862 $this->hookContainer = $hookContainer;
863 $this->hookRunner = new HookRunner( $hookContainer );
864 }
865
872 protected function getHookContainer() : HookContainer {
873 if ( !$this->hookContainer ) {
874 $this->hookContainer = MediaWikiServices::getInstance()->getHookContainer();
875 }
876 return $this->hookContainer;
877 }
878
887 protected function getHookRunner() : HookRunner {
888 if ( !$this->hookRunner ) {
889 $this->hookRunner = new HookRunner( $this->getHookContainer() );
890 }
891 return $this->hookRunner;
892 }
893
894}
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
if(ini_get('mbstring.func_overload')) if(!defined('MW_ENTRY_POINT'))
Pre-config setup: Before loading LocalSettings.php.
Definition Setup.php:85
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition LinkBatch.php:35
Exception thrown when an unregistered content model is requested.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
MediaWikiServices is the service locator for the application scope of MediaWiki.
Null index field - means search engine does not implement this field.
Perform augmentation of each row and return composite result, indexed by ID.
defaultSearchBackend( $namespaces, $search, $limit, $offset)
Unless overridden by PrefixSearchBackend hook... This is case-sensitive (First character may be autom...
Contain a class for special pages Stable to extend.
completionSearchBackendOverfetch( $search)
Perform an overfetch of completion search results.
makeSearchFieldMapping( $name, $type)
Create a search field definition.
getNearMatcher(Config $config)
Get service class to finding near matches.
getHookRunner()
Get a HookRunner for running core hooks.
searchTitle( $term)
Perform a title-only search query and return a result set.
supports( $feature)
maybePaginate(Closure $fn)
Performs an overfetch and shrink operation to determine if the next page is available for search engi...
processCompletionResults( $search, SearchSuggestionSet $suggestions)
Process completion search results.
getFeatureData( $feature)
Way to retrieve custom data set by setFeatureData or by the engine itself.
update( $id, $title, $text)
Create or update the search index record for the given page.
setNamespaces( $namespaces)
Set which namespaces the search should include.
static parseNamespacePrefixes( $query, $withAllKeyword=true, $withPrefixSearchExtractNamespaceHook=false)
Parse some common prefixes: all (search everything) or namespace names.
doSearchArchiveTitle( $term)
Perform a title search in the article archive.
array $features
Feature values.
replacePrefixes( $query)
Parse some common prefixes: all (search everything) or namespace names and set the list of namespaces...
string[] $searchTerms
textAlreadyUpdatedForIndex()
If an implementation of SearchEngine handles all of its own text processing in getTextFromContent() a...
defaultPrefixSearch( $search)
Simple prefix search for subpages.
augmentSearchResults(ISearchResultSet $resultSet)
Augment search results with extra data.
searchArchiveTitle( $term)
Perform a title search in the article archive.
normalizeText( $string)
When overridden in derived class, performs database-specific conversions on text to be used for searc...
setFeatureData( $feature, $data)
Way to pass custom data for engines.
completionSearchBackend( $search)
Perform a completion search.
getTextFromContent(Title $t, Content $c=null)
Get the raw text for updating the index from a content object Nicer search backends could possibly do...
getProfiles( $profileType, User $user=null)
Get a list of supported profiles.
int[] null $namespaces
getSort()
Get the sort direction of the search results.
static defaultNearMatcher()
Get near matcher for default SearchEngine.
getSearchIndexFields()
Get fields for search index.
getValidSorts()
Get the valid sort directions.
static userHighlightPrefs()
Find snippet highlight settings for all users.
updateTitle( $id, $title)
Update a search index record's title only.
completionSearchWithVariants( $search)
Perform a completion search with variants.
doSearchText( $term)
Perform a full text search query and return a result set.
normalizeNamespaces( $search)
Makes search simple string if it was namespaced.
const CHARS_ALL
Integer flag for legalSearchChars: includes all chars allowed in a search query.
HookRunner $hookRunner
getHookContainer()
Get a HookContainer, for running extension hooks or for hook metadata.
HookContainer $hookContainer
completionSearch( $search)
Perform a completion search.
setLimitOffset( $limit, $offset=0)
Set the maximum number of results to return and how many to skip before returning the first.
const CHARS_NO_SYNTAX
Integer flag for legalSearchChars: includes all chars allowed in a search term.
setShowSuggestion( $showSuggestion)
Set whether the searcher should try to build a suggestion.
simplePrefixSearch( $search)
Call out to simple search backend.
setSort( $sort)
Set the sort direction of the search results.
const FT_QUERY_INDEP_PROFILE_TYPE
Profile type for query independent ranking features.
setHookContainer(HookContainer $hookContainer)
searchText( $term)
Perform a full text search query and return a result set.
legalSearchChars( $type=self::CHARS_ALL)
Get chars legal for search.
extractTitles(SearchSuggestionSet $completionResults)
Extract titles from completion results.
const COMPLETION_PROFILE_TYPE
Profile type for completionSearch.
doSearchTitle( $term)
Perform a title-only search query and return a result set.
An utility class to rescore search results by looking for an exact match in the db and add the page f...
Implementation of near match title search.
Search suggestion sets.
filter( $callback)
Filter the suggestions array.
rescore( $key)
Move the suggestion at index $key to the first position.
shrink( $limit)
Remove any extra elements in the suggestions set.
static fromStrings(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a string array.
static fromTitles(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a title array.
map( $callback)
Call array_map on the suggestions array.
prepend(SearchSuggestion $suggestion)
Add a new suggestion at the top.
Search suggestion.
getSuggestedTitle()
Title object in the case this suggestion is based on a title.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:44
Performs prefix search, returning Title objects.
Represents a title within MediaWiki.
Definition Title.php:42
The User object encapsulates all of the user-specific settings (user_id, name, rights,...
Definition User.php:60
const NS_MAIN
Definition Defines.php:70
const NS_SPECIAL
Definition Defines.php:59
Interface for configuration instances.
Definition Config.php:30
Base interface for content objects.
Definition Content.php:35
A set of SearchEngine results.
setAugmentedData( $name, $data)
Sets augmented data for result set.
Marker class for search engines that can handle their own pagination, by reporting in their ISearchRe...
Augment search results.