MediaWiki REL1_34
SearchEngine.php
Go to the documentation of this file.
1<?php
29
34abstract class SearchEngine {
35 const DEFAULT_SORT = 'relevance';
36
38 public $prefix = '';
39
41 public $namespaces = [ NS_MAIN ];
42
44 protected $limit = 10;
45
47 protected $offset = 0;
48
53 protected $searchTerms = [];
54
56 protected $showSuggestion = true;
57 private $sort = self::DEFAULT_SORT;
58
60 protected $features = [];
61
63 const COMPLETION_PROFILE_TYPE = 'completionSearchProfile';
64
66 const FT_QUERY_INDEP_PROFILE_TYPE = 'fulltextQueryIndepProfile';
67
69 const CHARS_ALL = 1;
70
72 const CHARS_NO_SYNTAX = 2;
73
84 public function searchText( $term ) {
85 return $this->maybePaginate( function () use ( $term ) {
86 return $this->doSearchText( $term );
87 } );
88 }
89
97 protected function doSearchText( $term ) {
98 return null;
99 }
100
115 public function searchArchiveTitle( $term ) {
116 return $this->doSearchArchiveTitle( $term );
117 }
118
126 protected function doSearchArchiveTitle( $term ) {
127 return Status::newGood( [] );
128 }
129
141 public function searchTitle( $term ) {
142 return $this->maybePaginate( function () use ( $term ) {
143 return $this->doSearchTitle( $term );
144 } );
145 }
146
154 protected function doSearchTitle( $term ) {
155 return null;
156 }
157
166 private function maybePaginate( Closure $fn ) {
167 if ( $this instanceof PaginatingSearchEngine ) {
168 return $fn();
169 }
170 $this->limit++;
171 try {
172 $resultSetOrStatus = $fn();
173 } finally {
174 $this->limit--;
175 }
176
177 $resultSet = null;
178 if ( $resultSetOrStatus instanceof ISearchResultSet ) {
179 $resultSet = $resultSetOrStatus;
180 } elseif ( $resultSetOrStatus instanceof Status &&
181 $resultSetOrStatus->getValue() instanceof ISearchResultSet
182 ) {
183 $resultSet = $resultSetOrStatus->getValue();
184 }
185 if ( $resultSet ) {
186 $resultSet->shrink( $this->limit );
187 }
188
189 return $resultSetOrStatus;
190 }
191
197 public function supports( $feature ) {
198 switch ( $feature ) {
199 case 'search-update':
200 return true;
201 case 'title-suffix-filter':
202 default:
203 return false;
204 }
205 }
206
213 public function setFeatureData( $feature, $data ) {
214 $this->features[$feature] = $data;
215 }
216
224 public function getFeatureData( $feature ) {
225 return $this->features[$feature] ?? null;
226 }
227
236 public function normalizeText( $string ) {
237 // Some languages such as Chinese require word segmentation
238 return MediaWikiServices::getInstance()->getContentLanguage()->segmentByWord( $string );
239 }
240
246 public function getNearMatcher( Config $config ) {
247 return new SearchNearMatcher( $config,
248 MediaWikiServices::getInstance()->getContentLanguage() );
249 }
250
255 protected static function defaultNearMatcher() {
256 $services = MediaWikiServices::getInstance();
257 $config = $services->getMainConfig();
258 return $services->newSearchEngine()->getNearMatcher( $config );
259 }
260
267 public function legalSearchChars( $type = self::CHARS_ALL ) {
268 return "A-Za-z_'.0-9\\x80-\\xFF\\-";
269 }
270
278 function setLimitOffset( $limit, $offset = 0 ) {
279 $this->limit = intval( $limit );
280 $this->offset = intval( $offset );
281 }
282
289 function setNamespaces( $namespaces ) {
290 if ( $namespaces ) {
291 // Filter namespaces to only keep valid ones
292 $validNs = MediaWikiServices::getInstance()->getSearchEngineConfig()->searchableNamespaces();
293 $namespaces = array_filter( $namespaces, function ( $ns ) use( $validNs ) {
294 return $ns < 0 || isset( $validNs[$ns] );
295 } );
296 } else {
297 $namespaces = [];
298 }
299 $this->namespaces = $namespaces;
300 }
301
309 function setShowSuggestion( $showSuggestion ) {
310 $this->showSuggestion = $showSuggestion;
311 }
312
320 public function getValidSorts() {
321 return [ self::DEFAULT_SORT ];
322 }
323
332 public function setSort( $sort ) {
333 if ( !in_array( $sort, $this->getValidSorts() ) ) {
334 throw new InvalidArgumentException( "Invalid sort: $sort. " .
335 "Must be one of: " . implode( ', ', $this->getValidSorts() ) );
336 }
337 $this->sort = $sort;
338 }
339
346 public function getSort() {
347 return $this->sort;
348 }
349
359 function replacePrefixes( $query ) {
360 return $query;
361 }
362
378 public static function parseNamespacePrefixes(
379 $query,
380 $withAllKeyword = true,
381 $withPrefixSearchExtractNamespaceHook = false
382 ) {
383 $parsed = $query;
384 if ( strpos( $query, ':' ) === false ) { // nothing to do
385 return false;
386 }
387 $extractedNamespace = null;
388
389 $allQuery = false;
390 if ( $withAllKeyword ) {
391 $allkeywords = [];
392
393 $allkeywords[] = wfMessage( 'searchall' )->inContentLanguage()->text() . ":";
394 // force all: so that we have a common syntax for all the wikis
395 if ( !in_array( 'all:', $allkeywords ) ) {
396 $allkeywords[] = 'all:';
397 }
398
399 foreach ( $allkeywords as $kw ) {
400 if ( strncmp( $query, $kw, strlen( $kw ) ) == 0 ) {
401 $extractedNamespace = null;
402 $parsed = substr( $query, strlen( $kw ) );
403 $allQuery = true;
404 break;
405 }
406 }
407 }
408
409 if ( !$allQuery && strpos( $query, ':' ) !== false ) {
410 $prefix = str_replace( ' ', '_', substr( $query, 0, strpos( $query, ':' ) ) );
411 $index = MediaWikiServices::getInstance()->getContentLanguage()->getNsIndex( $prefix );
412 if ( $index !== false ) {
413 $extractedNamespace = [ $index ];
414 $parsed = substr( $query, strlen( $prefix ) + 1 );
415 } elseif ( $withPrefixSearchExtractNamespaceHook ) {
416 $hookNamespaces = [ NS_MAIN ];
417 $hookQuery = $query;
418 Hooks::run( 'PrefixSearchExtractNamespace', [ &$hookNamespaces, &$hookQuery ] );
419 if ( $hookQuery !== $query ) {
420 $parsed = $hookQuery;
421 $extractedNamespace = $hookNamespaces;
422 } else {
423 return false;
424 }
425 } else {
426 return false;
427 }
428 }
429
430 return [ $parsed, $extractedNamespace ];
431 }
432
440 public static function userHighlightPrefs() {
443 return [ $contextlines, $contextchars ];
444 }
445
455 function update( $id, $title, $text ) {
456 // no-op
457 }
458
467 function updateTitle( $id, $title ) {
468 // no-op
469 }
470
479 function delete( $id, $title ) {
480 // no-op
481 }
482
494 public function getTextFromContent( Title $t, Content $c = null ) {
495 return $c ? $c->getTextForSearchIndex() : '';
496 }
497
506 public function textAlreadyUpdatedForIndex() {
507 return false;
508 }
509
516 protected function normalizeNamespaces( $search ) {
517 $queryAndNs = self::parseNamespacePrefixes( $search, false, true );
518 if ( $queryAndNs !== false ) {
519 $this->setNamespaces( $queryAndNs[1] );
520 return $queryAndNs[0];
521 }
522 return $search;
523 }
524
532 protected function completionSearchBackendOverfetch( $search ) {
533 $this->limit++;
534 try {
535 return $this->completionSearchBackend( $search );
536 } finally {
537 $this->limit--;
538 }
539 }
540
548 protected function completionSearchBackend( $search ) {
549 $results = [];
550
551 $search = trim( $search );
552
553 if ( !in_array( NS_SPECIAL, $this->namespaces ) && // We do not run hook on Special: search
554 !Hooks::run( 'PrefixSearchBackend',
555 [ $this->namespaces, $search, $this->limit, &$results, $this->offset ]
556 ) ) {
557 // False means hook worked.
558 // FIXME: Yes, the API is weird. That's why it is going to be deprecated.
559
560 return SearchSuggestionSet::fromStrings( $results );
561 } else {
562 // Hook did not do the job, use default simple search
563 $results = $this->simplePrefixSearch( $search );
564 return SearchSuggestionSet::fromTitles( $results );
565 }
566 }
567
573 public function completionSearch( $search ) {
574 if ( trim( $search ) === '' ) {
575 return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
576 }
577 $search = $this->normalizeNamespaces( $search );
578 $suggestions = $this->completionSearchBackendOverfetch( $search );
579 return $this->processCompletionResults( $search, $suggestions );
580 }
581
587 public function completionSearchWithVariants( $search ) {
588 if ( trim( $search ) === '' ) {
589 return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
590 }
591 $search = $this->normalizeNamespaces( $search );
592
593 $results = $this->completionSearchBackendOverfetch( $search );
594 $fallbackLimit = 1 + $this->limit - $results->getSize();
595 if ( $fallbackLimit > 0 ) {
596 $fallbackSearches = MediaWikiServices::getInstance()->getContentLanguage()->
597 autoConvertToAllVariants( $search );
598 $fallbackSearches = array_diff( array_unique( $fallbackSearches ), [ $search ] );
599
600 foreach ( $fallbackSearches as $fbs ) {
601 $this->setLimitOffset( $fallbackLimit );
602 $fallbackSearchResult = $this->completionSearch( $fbs );
603 $results->appendAll( $fallbackSearchResult );
604 $fallbackLimit -= $fallbackSearchResult->getSize();
605 if ( $fallbackLimit <= 0 ) {
606 break;
607 }
608 }
609 }
610 return $this->processCompletionResults( $search, $results );
611 }
612
618 public function extractTitles( SearchSuggestionSet $completionResults ) {
619 return $completionResults->map( function ( SearchSuggestion $sugg ) {
620 return $sugg->getSuggestedTitle();
621 } );
622 }
623
631 protected function processCompletionResults( $search, SearchSuggestionSet $suggestions ) {
632 // We over-fetched to determine pagination. Shrink back down if we have extra results
633 // and mark if pagination is possible
634 $suggestions->shrink( $this->limit );
635
636 $search = trim( $search );
637 // preload the titles with LinkBatch
638 $lb = new LinkBatch( $suggestions->map( function ( SearchSuggestion $sugg ) {
639 return $sugg->getSuggestedTitle();
640 } ) );
641 $lb->setCaller( __METHOD__ );
642 $lb->execute();
643
644 $diff = $suggestions->filter( function ( SearchSuggestion $sugg ) {
645 return $sugg->getSuggestedTitle()->isKnown();
646 } );
647 if ( $diff > 0 ) {
648 MediaWikiServices::getInstance()->getStatsdDataFactory()
649 ->updateCount( 'search.completion.missing', $diff );
650 }
651
652 $results = $suggestions->map( function ( SearchSuggestion $sugg ) {
653 return $sugg->getSuggestedTitle()->getPrefixedText();
654 } );
655
656 if ( $this->offset === 0 ) {
657 // Rescore results with an exact title match
658 // NOTE: in some cases like cross-namespace redirects
659 // (frequently used as shortcuts e.g. WP:WP on huwiki) some
660 // backends like Cirrus will return no results. We should still
661 // try an exact title match to workaround this limitation
662 $rescorer = new SearchExactMatchRescorer();
663 $rescoredResults = $rescorer->rescore( $search, $this->namespaces, $results, $this->limit );
664 } else {
665 // No need to rescore if offset is not 0
666 // The exact match must have been returned at position 0
667 // if it existed.
668 $rescoredResults = $results;
669 }
670
671 if ( count( $rescoredResults ) > 0 ) {
672 $found = array_search( $rescoredResults[0], $results );
673 if ( $found === false ) {
674 // If the first result is not in the previous array it
675 // means that we found a new exact match
676 $exactMatch = SearchSuggestion::fromTitle( 0, Title::newFromText( $rescoredResults[0] ) );
677 $suggestions->prepend( $exactMatch );
678 $suggestions->shrink( $this->limit );
679 } else {
680 // if the first result is not the same we need to rescore
681 if ( $found > 0 ) {
682 $suggestions->rescore( $found );
683 }
684 }
685 }
686
687 return $suggestions;
688 }
689
695 public function defaultPrefixSearch( $search ) {
696 if ( trim( $search ) === '' ) {
697 return [];
698 }
699
700 $search = $this->normalizeNamespaces( $search );
701 return $this->simplePrefixSearch( $search );
702 }
703
710 protected function simplePrefixSearch( $search ) {
711 // Use default database prefix search
712 $backend = new TitlePrefixSearch;
713 return $backend->defaultSearchBackend( $this->namespaces, $search, $this->limit, $this->offset );
714 }
715
732 public function getProfiles( $profileType, User $user = null ) {
733 return null;
734 }
735
744 public function makeSearchFieldMapping( $name, $type ) {
745 return new NullIndexField();
746 }
747
753 public function getSearchIndexFields() {
754 $models = ContentHandler::getContentModels();
755 $fields = [];
756 $seenHandlers = new SplObjectStorage();
757 foreach ( $models as $model ) {
758 try {
759 $handler = ContentHandler::getForModelID( $model );
760 }
761 catch ( MWUnknownContentModelException $e ) {
762 // If we can find no handler, ignore it
763 continue;
764 }
765 // Several models can have the same handler, so avoid processing it repeatedly
766 if ( $seenHandlers->contains( $handler ) ) {
767 // We already did this one
768 continue;
769 }
770 $seenHandlers->attach( $handler );
771 $handlerFields = $handler->getFieldsForSearchIndex( $this );
772 foreach ( $handlerFields as $fieldName => $fieldData ) {
773 if ( empty( $fields[$fieldName] ) ) {
774 $fields[$fieldName] = $fieldData;
775 } else {
776 // TODO: do we allow some clashes with the same type or reject all of them?
777 $mergeDef = $fields[$fieldName]->merge( $fieldData );
778 if ( !$mergeDef ) {
779 throw new InvalidArgumentException( "Duplicate field $fieldName for model $model" );
780 }
781 $fields[$fieldName] = $mergeDef;
782 }
783 }
784 }
785 // Hook to allow extensions to produce search mapping fields
786 Hooks::run( 'SearchIndexFields', [ &$fields, $this ] );
787 return $fields;
788 }
789
795 public function augmentSearchResults( ISearchResultSet $resultSet ) {
796 $setAugmentors = [];
797 $rowAugmentors = [];
798 Hooks::run( "SearchResultsAugment", [ &$setAugmentors, &$rowAugmentors ] );
799 if ( !$setAugmentors && !$rowAugmentors ) {
800 // We're done here
801 return;
802 }
803
804 // Convert row augmentors to set augmentor
805 foreach ( $rowAugmentors as $name => $row ) {
806 if ( isset( $setAugmentors[$name] ) ) {
807 throw new InvalidArgumentException( "Both row and set augmentors are defined for $name" );
808 }
809 $setAugmentors[$name] = new PerRowAugmentor( $row );
810 }
811
816 foreach ( $setAugmentors as $name => $augmentor ) {
817 $data = $augmentor->augmentAll( $resultSet );
818 if ( $data ) {
819 $resultSet->setAugmentedData( $name, $data );
820 }
821 }
822 }
823}
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition LinkBatch.php:34
Exception thrown when an unregistered content model is requested.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Null index field - means search engine does not implement this field.
Perform augmentation of each row and return composite result, indexed by ID.
defaultSearchBackend( $namespaces, $search, $limit, $offset)
Unless overridden by PrefixSearchBackend hook... This is case-sensitive (First character may be autom...
Contain a class for special pages.
completionSearchBackendOverfetch( $search)
Perform an overfetch of completion search results.
makeSearchFieldMapping( $name, $type)
Create a search field definition.
getNearMatcher(Config $config)
Get service class to finding near matches.
searchTitle( $term)
Perform a title-only search query and return a result set.
supports( $feature)
maybePaginate(Closure $fn)
Performs an overfetch and shrink operation to determine if the next page is available for search engi...
processCompletionResults( $search, SearchSuggestionSet $suggestions)
Process completion search results.
getFeatureData( $feature)
Way to retrieve custom data set by setFeatureData or by the engine itself.
update( $id, $title, $text)
Create or update the search index record for the given page.
setNamespaces( $namespaces)
Set which namespaces the search should include.
static parseNamespacePrefixes( $query, $withAllKeyword=true, $withPrefixSearchExtractNamespaceHook=false)
Parse some common prefixes: all (search everything) or namespace names.
doSearchArchiveTitle( $term)
Perform a title search in the article archive.
array $features
Feature values.
replacePrefixes( $query)
Parse some common prefixes: all (search everything) or namespace names and set the list of namespaces...
string[] $searchTerms
textAlreadyUpdatedForIndex()
If an implementation of SearchEngine handles all of its own text processing in getTextFromContent() a...
defaultPrefixSearch( $search)
Simple prefix search for subpages.
augmentSearchResults(ISearchResultSet $resultSet)
Augment search results with extra data.
searchArchiveTitle( $term)
Perform a title search in the article archive.
normalizeText( $string)
When overridden in derived class, performs database-specific conversions on text to be used for searc...
setFeatureData( $feature, $data)
Way to pass custom data for engines.
completionSearchBackend( $search)
Perform a completion search.
getTextFromContent(Title $t, Content $c=null)
Get the raw text for updating the index from a content object Nicer search backends could possibly do...
getProfiles( $profileType, User $user=null)
Get a list of supported profiles.
int[] null $namespaces
getSort()
Get the sort direction of the search results.
static defaultNearMatcher()
Get near matcher for default SearchEngine.
getSearchIndexFields()
Get fields for search index.
getValidSorts()
Get the valid sort directions.
static userHighlightPrefs()
Find snippet highlight settings for all users.
updateTitle( $id, $title)
Update a search index record's title only.
completionSearchWithVariants( $search)
Perform a completion search with variants.
doSearchText( $term)
Perform a full text search query and return a result set.
normalizeNamespaces( $search)
Makes search simple string if it was namespaced.
const CHARS_ALL
Integer flag for legalSearchChars: includes all chars allowed in a search query.
completionSearch( $search)
Perform a completion search.
setLimitOffset( $limit, $offset=0)
Set the maximum number of results to return and how many to skip before returning the first.
const CHARS_NO_SYNTAX
Integer flag for legalSearchChars: includes all chars allowed in a search term.
setShowSuggestion( $showSuggestion)
Set whether the searcher should try to build a suggestion.
simplePrefixSearch( $search)
Call out to simple search backend.
setSort( $sort)
Set the sort direction of the search results.
const FT_QUERY_INDEP_PROFILE_TYPE
Profile type for query independent ranking features.
searchText( $term)
Perform a full text search query and return a result set.
legalSearchChars( $type=self::CHARS_ALL)
Get chars legal for search.
extractTitles(SearchSuggestionSet $completionResults)
Extract titles from completion results.
const COMPLETION_PROFILE_TYPE
Profile type for completionSearch.
doSearchTitle( $term)
Perform a title-only search query and return a result set.
An utility class to rescore search results by looking for an exact match in the db and add the page f...
Implementation of near match title search.
Search suggestion sets.
filter( $callback)
Filter the suggestions array.
rescore( $key)
Move the suggestion at index $key to the first position.
shrink( $limit)
Remove any extra elements in the suggestions set.
static fromStrings(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a string array.
static fromTitles(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a title array.
map( $callback)
Call array_map on the suggestions array.
prepend(SearchSuggestion $suggestion)
Add a new suggestion at the top.
Search suggestion.
getSuggestedTitle()
Title object in the case this suggestion is based on a title.
static fromTitle( $score, Title $title)
Create suggestion from Title.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition Status.php:40
Performs prefix search, returning Title objects.
Represents a title within MediaWiki.
Definition Title.php:42
The User object encapsulates all of the user-specific settings (user_id, name, rights,...
Definition User.php:51
const NS_MAIN
Definition Defines.php:69
const NS_SPECIAL
Definition Defines.php:58
Interface for configuration instances.
Definition Config.php:28
Base interface for content objects.
Definition Content.php:34
A set of SearchEngine results.
setAugmentedData( $name, $data)
Sets augmented data for result set.
Marker class for search engines that can handle their own pagination, by reporting in their ISearchRe...
Augment search results.
$sort