MediaWiki  master
SearchEngine.php
Go to the documentation of this file.
1 <?php
36 
42 abstract class SearchEngine {
43  public const DEFAULT_SORT = 'relevance';
44 
46  public $prefix = '';
47 
49  public $namespaces = [ NS_MAIN ];
50 
52  protected $limit = 10;
53 
55  protected $offset = 0;
56 
61  protected $searchTerms = [];
62 
64  protected $showSuggestion = true;
65  private $sort = self::DEFAULT_SORT;
66 
68  protected $features = [];
69 
71  private $hookContainer;
72 
74  private $hookRunner;
75 
77  public const COMPLETION_PROFILE_TYPE = 'completionSearchProfile';
78 
80  public const FT_QUERY_INDEP_PROFILE_TYPE = 'fulltextQueryIndepProfile';
81 
83  protected const CHARS_ALL = 1;
84 
86  protected const CHARS_NO_SYNTAX = 2;
87 
98  public function searchText( $term ) {
99  return $this->maybePaginate( function () use ( $term ) {
100  return $this->doSearchText( $term );
101  } );
102  }
103 
113  protected function doSearchText( $term ) {
114  return null;
115  }
116 
131  public function searchArchiveTitle( $term ) {
132  return $this->doSearchArchiveTitle( $term );
133  }
134 
144  protected function doSearchArchiveTitle( $term ) {
145  return Status::newGood( [] );
146  }
147 
159  public function searchTitle( $term ) {
160  return $this->maybePaginate( function () use ( $term ) {
161  return $this->doSearchTitle( $term );
162  } );
163  }
164 
174  protected function doSearchTitle( $term ) {
175  return null;
176  }
177 
186  private function maybePaginate( Closure $fn ) {
187  if ( $this instanceof PaginatingSearchEngine ) {
188  return $fn();
189  }
190  $this->limit++;
191  try {
192  $resultSetOrStatus = $fn();
193  } finally {
194  $this->limit--;
195  }
196 
197  $resultSet = null;
198  if ( $resultSetOrStatus instanceof ISearchResultSet ) {
199  $resultSet = $resultSetOrStatus;
200  } elseif ( $resultSetOrStatus instanceof Status &&
201  $resultSetOrStatus->getValue() instanceof ISearchResultSet
202  ) {
203  $resultSet = $resultSetOrStatus->getValue();
204  }
205  if ( $resultSet ) {
206  $resultSet->shrink( $this->limit );
207  }
208 
209  return $resultSetOrStatus;
210  }
211 
219  public function supports( $feature ) {
220  switch ( $feature ) {
221  case 'search-update':
222  return true;
223  case 'title-suffix-filter':
224  default:
225  return false;
226  }
227  }
228 
235  public function setFeatureData( $feature, $data ) {
236  $this->features[$feature] = $data;
237  }
238 
246  public function getFeatureData( $feature ) {
247  return $this->features[$feature] ?? null;
248  }
249 
258  public function normalizeText( $string ) {
259  // Some languages such as Chinese require word segmentation
260  return MediaWikiServices::getInstance()->getContentLanguage()->segmentByWord( $string );
261  }
262 
269  public function getNearMatcher( Config $config ) {
270  return MediaWikiServices::getInstance()->getTitleMatcher();
271  }
272 
279  protected static function defaultNearMatcher() {
280  wfDeprecated( __METHOD__, '1.40' );
281  return MediaWikiServices::getInstance()->getTitleMatcher();
282  }
283 
290  public function legalSearchChars( $type = self::CHARS_ALL ) {
291  return "A-Za-z_'.0-9\\x80-\\xFF\\-";
292  }
293 
301  public function setLimitOffset( $limit, $offset = 0 ) {
302  $this->limit = intval( $limit );
303  $this->offset = intval( $offset );
304  }
305 
312  public function setNamespaces( $namespaces ) {
313  if ( $namespaces ) {
314  // Filter namespaces to only keep valid ones
315  $validNs = MediaWikiServices::getInstance()->getSearchEngineConfig()->searchableNamespaces();
316  $namespaces = array_filter( $namespaces, static function ( $ns ) use( $validNs ) {
317  return $ns < 0 || isset( $validNs[$ns] );
318  } );
319  } else {
320  $namespaces = [];
321  }
322  $this->namespaces = $namespaces;
323  }
324 
332  public function setShowSuggestion( $showSuggestion ) {
333  $this->showSuggestion = $showSuggestion;
334  }
335 
345  public function getValidSorts() {
346  return [ self::DEFAULT_SORT ];
347  }
348 
357  public function setSort( $sort ) {
358  if ( !in_array( $sort, $this->getValidSorts() ) ) {
359  throw new InvalidArgumentException( "Invalid sort: $sort. " .
360  "Must be one of: " . implode( ', ', $this->getValidSorts() ) );
361  }
362  $this->sort = $sort;
363  }
364 
371  public function getSort() {
372  return $this->sort;
373  }
374 
384  public function replacePrefixes( $query ) {
385  return $query;
386  }
387 
401  public static function parseNamespacePrefixes(
402  $query,
403  $withAllKeyword = true,
404  $withPrefixSearchExtractNamespaceHook = false
405  ) {
406  $parsed = $query;
407  if ( strpos( $query, ':' ) === false ) { // nothing to do
408  return false;
409  }
410  $extractedNamespace = null;
411 
412  $allQuery = false;
413  if ( $withAllKeyword ) {
414  $allkeywords = [];
415 
416  $allkeywords[] = wfMessage( 'searchall' )->inContentLanguage()->text() . ":";
417  // force all: so that we have a common syntax for all the wikis
418  if ( !in_array( 'all:', $allkeywords ) ) {
419  $allkeywords[] = 'all:';
420  }
421 
422  foreach ( $allkeywords as $kw ) {
423  if ( str_starts_with( $query, $kw ) ) {
424  $parsed = substr( $query, strlen( $kw ) );
425  $allQuery = true;
426  break;
427  }
428  }
429  }
430 
431  if ( !$allQuery && strpos( $query, ':' ) !== false ) {
432  $prefix = str_replace( ' ', '_', substr( $query, 0, strpos( $query, ':' ) ) );
433  $services = MediaWikiServices::getInstance();
434  $index = $services->getContentLanguage()->getNsIndex( $prefix );
435  if ( $index !== false ) {
436  $extractedNamespace = [ $index ];
437  $parsed = substr( $query, strlen( $prefix ) + 1 );
438  } elseif ( $withPrefixSearchExtractNamespaceHook ) {
439  $hookNamespaces = [ NS_MAIN ];
440  $hookQuery = $query;
441  ( new HookRunner( $services->getHookContainer() ) )
442  ->onPrefixSearchExtractNamespace( $hookNamespaces, $hookQuery );
443  if ( $hookQuery !== $query ) {
444  $parsed = $hookQuery;
445  $extractedNamespace = $hookNamespaces;
446  } else {
447  return false;
448  }
449  } else {
450  return false;
451  }
452  }
453 
454  return [ $parsed, $extractedNamespace ];
455  }
456 
464  public static function userHighlightPrefs() {
467  return [ $contextlines, $contextchars ];
468  }
469 
479  public function update( $id, $title, $text ) {
480  // no-op
481  }
482 
491  public function updateTitle( $id, $title ) {
492  // no-op
493  }
494 
503  public function delete( $id, $title ) {
504  // no-op
505  }
506 
518  public function getTextFromContent( Title $t, Content $c = null ) {
519  return $c ? $c->getTextForSearchIndex() : '';
520  }
521 
530  public function textAlreadyUpdatedForIndex() {
531  return false;
532  }
533 
540  protected function normalizeNamespaces( $search ) {
541  $queryAndNs = self::parseNamespacePrefixes( $search, false, true );
542  if ( $queryAndNs !== false ) {
543  $this->setNamespaces( $queryAndNs[1] );
544  return $queryAndNs[0];
545  }
546  return $search;
547  }
548 
556  protected function completionSearchBackendOverfetch( $search ) {
557  $this->limit++;
558  try {
559  return $this->completionSearchBackend( $search );
560  } finally {
561  $this->limit--;
562  }
563  }
564 
575  protected function completionSearchBackend( $search ) {
576  $results = [];
577 
578  $search = trim( $search );
579 
580  if ( !in_array( NS_SPECIAL, $this->namespaces ) && // We do not run hook on Special: search
581  !$this->getHookRunner()->onPrefixSearchBackend(
582  $this->namespaces, $search, $this->limit, $results, $this->offset )
583  ) {
584  // False means hook worked.
585  // FIXME: Yes, the API is weird. That's why it is going to be deprecated.
586 
587  return SearchSuggestionSet::fromStrings( $results );
588  } else {
589  // Hook did not do the job, use default simple search
590  $results = $this->simplePrefixSearch( $search );
591  return SearchSuggestionSet::fromTitles( $results );
592  }
593  }
594 
600  public function completionSearch( $search ) {
601  if ( trim( $search ) === '' ) {
602  return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
603  }
604  $search = $this->normalizeNamespaces( $search );
605  $suggestions = $this->completionSearchBackendOverfetch( $search );
606  return $this->processCompletionResults( $search, $suggestions );
607  }
608 
616  public function completionSearchWithVariants( $search ) {
617  if ( trim( $search ) === '' ) {
618  return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
619  }
620  $search = $this->normalizeNamespaces( $search );
621 
622  $results = $this->completionSearchBackendOverfetch( $search );
623  $fallbackLimit = 1 + $this->limit - $results->getSize();
624  if ( $fallbackLimit > 0 ) {
625  $services = MediaWikiServices::getInstance();
626  $fallbackSearches = $services->getLanguageConverterFactory()
627  ->getLanguageConverter( $services->getContentLanguage() )
628  ->autoConvertToAllVariants( $search );
629  $fallbackSearches = array_diff( array_unique( $fallbackSearches ), [ $search ] );
630 
631  foreach ( $fallbackSearches as $fbs ) {
632  $this->setLimitOffset( $fallbackLimit );
633  $fallbackSearchResult = $this->completionSearch( $fbs );
634  $results->appendAll( $fallbackSearchResult );
635  $fallbackLimit -= $fallbackSearchResult->getSize();
636  if ( $fallbackLimit <= 0 ) {
637  break;
638  }
639  }
640  }
641  return $this->processCompletionResults( $search, $results );
642  }
643 
649  public function extractTitles( SearchSuggestionSet $completionResults ) {
650  return $completionResults->map( static function ( SearchSuggestion $sugg ) {
651  return $sugg->getSuggestedTitle();
652  } );
653  }
654 
662  protected function processCompletionResults( $search, SearchSuggestionSet $suggestions ) {
663  // We over-fetched to determine pagination. Shrink back down if we have extra results
664  // and mark if pagination is possible
665  $suggestions->shrink( $this->limit );
666 
667  $search = trim( $search );
668  // preload the titles with LinkBatch
669  $linkBatchFactory = MediaWikiServices::getInstance()->getLinkBatchFactory();
670  $lb = $linkBatchFactory->newLinkBatch( $suggestions->map( static function ( SearchSuggestion $sugg ) {
671  return $sugg->getSuggestedTitle();
672  } ) );
673  $lb->setCaller( __METHOD__ );
674  $lb->execute();
675 
676  $diff = $suggestions->filter( static function ( SearchSuggestion $sugg ) {
677  return $sugg->getSuggestedTitle()->isKnown();
678  } );
679  if ( $diff > 0 ) {
680  MediaWikiServices::getInstance()->getStatsdDataFactory()
681  ->updateCount( 'search.completion.missing', $diff );
682  }
683 
684  $results = $suggestions->map( static function ( SearchSuggestion $sugg ) {
685  return $sugg->getSuggestedTitle()->getPrefixedText();
686  } );
687 
688  if ( $this->offset === 0 ) {
689  // Rescore results with an exact title match
690  // NOTE: in some cases like cross-namespace redirects
691  // (frequently used as shortcuts e.g. WP:WP on huwiki) some
692  // backends like Cirrus will return no results. We should still
693  // try an exact title match to workaround this limitation
694  $rescorer = new SearchExactMatchRescorer();
695  $rescoredResults = $rescorer->rescore( $search, $this->namespaces, $results, $this->limit );
696  } else {
697  // No need to rescore if offset is not 0
698  // The exact match must have been returned at position 0
699  // if it existed.
700  $rescoredResults = $results;
701  }
702 
703  if ( count( $rescoredResults ) > 0 ) {
704  $found = array_search( $rescoredResults[0], $results );
705  if ( $found === false ) {
706  // If the first result is not in the previous array it
707  // means that we found a new exact match
708  $exactMatch = SearchSuggestion::fromTitle( 0, Title::newFromText( $rescoredResults[0] ) );
709  $suggestions->prepend( $exactMatch );
710  $suggestions->shrink( $this->limit );
711  } else {
712  // if the first result is not the same we need to rescore
713  if ( $found > 0 ) {
714  $suggestions->rescore( $found );
715  }
716  }
717  }
718 
719  return $suggestions;
720  }
721 
727  public function defaultPrefixSearch( $search ) {
728  if ( trim( $search ) === '' ) {
729  return [];
730  }
731 
732  $search = $this->normalizeNamespaces( $search );
733  return $this->simplePrefixSearch( $search );
734  }
735 
742  protected function simplePrefixSearch( $search ) {
743  // Use default database prefix search
744  $backend = new TitlePrefixSearch;
745  return $backend->defaultSearchBackend( $this->namespaces, $search, $this->limit, $this->offset );
746  }
747 
766  public function getProfiles( $profileType, User $user = null ) {
767  return null;
768  }
769 
780  public function makeSearchFieldMapping( $name, $type ) {
781  return new NullIndexField();
782  }
783 
789  public function getSearchIndexFields() {
790  $models = MediaWikiServices::getInstance()->getContentHandlerFactory()->getContentModels();
791  $fields = [];
792  $seenHandlers = new SplObjectStorage();
793  foreach ( $models as $model ) {
794  try {
795  $handler = MediaWikiServices::getInstance()
796  ->getContentHandlerFactory()
797  ->getContentHandler( $model );
798  } catch ( MWUnknownContentModelException $e ) {
799  // If we can find no handler, ignore it
800  continue;
801  }
802  // Several models can have the same handler, so avoid processing it repeatedly
803  if ( $seenHandlers->contains( $handler ) ) {
804  // We already did this one
805  continue;
806  }
807  $seenHandlers->attach( $handler );
808  $handlerFields = $handler->getFieldsForSearchIndex( $this );
809  foreach ( $handlerFields as $fieldName => $fieldData ) {
810  if ( empty( $fields[$fieldName] ) ) {
811  $fields[$fieldName] = $fieldData;
812  } else {
813  // TODO: do we allow some clashes with the same type or reject all of them?
814  $mergeDef = $fields[$fieldName]->merge( $fieldData );
815  if ( !$mergeDef ) {
816  throw new InvalidArgumentException( "Duplicate field $fieldName for model $model" );
817  }
818  $fields[$fieldName] = $mergeDef;
819  }
820  }
821  }
822  // Hook to allow extensions to produce search mapping fields
823  $this->getHookRunner()->onSearchIndexFields( $fields, $this );
824  return $fields;
825  }
826 
832  public function augmentSearchResults( ISearchResultSet $resultSet ) {
833  $setAugmentors = [];
834  $rowAugmentors = [];
835  $this->getHookRunner()->onSearchResultsAugment( $setAugmentors, $rowAugmentors );
836  if ( !$setAugmentors && !$rowAugmentors ) {
837  // We're done here
838  return;
839  }
840 
841  // Convert row augmentors to set augmentor
842  foreach ( $rowAugmentors as $name => $row ) {
843  if ( isset( $setAugmentors[$name] ) ) {
844  throw new InvalidArgumentException( "Both row and set augmentors are defined for $name" );
845  }
846  $setAugmentors[$name] = new PerRowAugmentor( $row );
847  }
848 
853  foreach ( $setAugmentors as $name => $augmentor ) {
854  $data = $augmentor->augmentAll( $resultSet );
855  if ( $data ) {
856  $resultSet->setAugmentedData( $name, $data );
857  }
858  }
859  }
860 
866  public function setHookContainer( HookContainer $hookContainer ) {
867  $this->hookContainer = $hookContainer;
868  $this->hookRunner = new HookRunner( $hookContainer );
869  }
870 
877  protected function getHookContainer(): HookContainer {
878  if ( !$this->hookContainer ) {
879  // This shouldn't be hit in core, but it is needed for CirrusSearch
880  // which commonly creates a CirrusSearch object without cirrus being
881  // configured in $wgSearchType/$wgSearchTypeAlternatives.
882  $this->hookContainer = MediaWikiServices::getInstance()->getHookContainer();
883  }
884  return $this->hookContainer;
885  }
886 
895  protected function getHookRunner(): HookRunner {
896  if ( !$this->hookRunner ) {
897  $this->hookRunner = new HookRunner( $this->getHookContainer() );
898  }
899  return $this->hookRunner;
900  }
901 
902 }
const NS_MAIN
Definition: Defines.php:64
const NS_SPECIAL
Definition: Defines.php:53
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
if(!defined('MW_SETUP_CALLBACK'))
Definition: WebStart.php:88
Exception thrown when an unregistered content model is requested.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Definition: HookRunner.php:568
Service locator for MediaWiki core services.
Service implementation of near match title search.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition: Status.php:58
Represents a title within MediaWiki.
Definition: Title.php:76
internal since 1.36
Definition: User.php:98
Null index field - means search engine does not implement this field.
Perform augmentation of each row and return composite result, indexed by ID.
defaultSearchBackend( $namespaces, $search, $limit, $offset)
Unless overridden by PrefixSearchBackend hook...
Contain a class for special pages.
completionSearchBackendOverfetch( $search)
Perform an overfetch of completion search results.
makeSearchFieldMapping( $name, $type)
Create a search field definition.
getNearMatcher(Config $config)
Get service class to finding near matches.
getHookRunner()
Get a HookRunner for running core hooks.
searchTitle( $term)
Perform a title-only search query and return a result set.
supports( $feature)
bool $showSuggestion
processCompletionResults( $search, SearchSuggestionSet $suggestions)
Process completion search results.
getFeatureData( $feature)
Way to retrieve custom data set by setFeatureData or by the engine itself.
update( $id, $title, $text)
Create or update the search index record for the given page.
setNamespaces( $namespaces)
Set which namespaces the search should include.
static parseNamespacePrefixes( $query, $withAllKeyword=true, $withPrefixSearchExtractNamespaceHook=false)
Parse some common prefixes: all (search everything) or namespace names.
doSearchArchiveTitle( $term)
Perform a title search in the article archive.
array $features
Feature values.
replacePrefixes( $query)
Parse some common prefixes: all (search everything) or namespace names and set the list of namespaces...
string[] $searchTerms
textAlreadyUpdatedForIndex()
If an implementation of SearchEngine handles all of its own text processing in getTextFromContent() a...
defaultPrefixSearch( $search)
Simple prefix search for subpages.
augmentSearchResults(ISearchResultSet $resultSet)
Augment search results with extra data.
searchArchiveTitle( $term)
Perform a title search in the article archive.
normalizeText( $string)
When overridden in derived class, performs database-specific conversions on text to be used for searc...
setFeatureData( $feature, $data)
Way to pass custom data for engines.
completionSearchBackend( $search)
Perform a completion search.
const DEFAULT_SORT
getTextFromContent(Title $t, Content $c=null)
Get the raw text for updating the index from a content object Nicer search backends could possibly do...
getProfiles( $profileType, User $user=null)
Get a list of supported profiles.
int[] null $namespaces
getSort()
Get the sort direction of the search results.
static defaultNearMatcher()
Get near matcher for default SearchEngine.
getSearchIndexFields()
Get fields for search index.
getValidSorts()
Get the valid sort directions.
static userHighlightPrefs()
Find snippet highlight settings for all users.
updateTitle( $id, $title)
Update a search index record's title only.
string $prefix
completionSearchWithVariants( $search)
Perform a completion search with variants.
doSearchText( $term)
Perform a full text search query and return a result set.
normalizeNamespaces( $search)
Makes search simple string if it was namespaced.
const CHARS_ALL
Integer flag for legalSearchChars: includes all chars allowed in a search query.
getHookContainer()
Get a HookContainer, for running extension hooks or for hook metadata.
completionSearch( $search)
Perform a completion search.
setLimitOffset( $limit, $offset=0)
Set the maximum number of results to return and how many to skip before returning the first.
const CHARS_NO_SYNTAX
Integer flag for legalSearchChars: includes all chars allowed in a search term.
setShowSuggestion( $showSuggestion)
Set whether the searcher should try to build a suggestion.
simplePrefixSearch( $search)
Call out to simple search backend.
setSort( $sort)
Set the sort direction of the search results.
const FT_QUERY_INDEP_PROFILE_TYPE
Profile type for query independent ranking features.
setHookContainer(HookContainer $hookContainer)
searchText( $term)
Perform a full text search query and return a result set.
legalSearchChars( $type=self::CHARS_ALL)
Get chars legal for search.
extractTitles(SearchSuggestionSet $completionResults)
Extract titles from completion results.
const COMPLETION_PROFILE_TYPE
Profile type for completionSearch.
doSearchTitle( $term)
Perform a title-only search query and return a result set.
An utility class to rescore search results by looking for an exact match in the db and add the page f...
A set of search suggestions.
filter( $callback)
Filter the suggestions array.
rescore( $key)
Move the suggestion at index $key to the first position.
shrink( $limit)
Remove any extra elements in the suggestions set.
static fromStrings(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a string array.
static fromTitles(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a title array.
map( $callback)
Call array_map on the suggestions array.
prepend(SearchSuggestion $suggestion)
Add a new suggestion at the top.
A search suggestion.
getSuggestedTitle()
Title object in the case this suggestion is based on a title.
static fromTitle( $score, Title $title)
Create suggestion from Title.
Performs prefix search, returning Title objects.
Base interface for representing page content.
Definition: Content.php:39
A set of SearchEngine results.
setAugmentedData( $name, $data)
Sets augmented data for result set.
Interface for configuration instances.
Definition: Config.php:32
Marker class for search engines that can handle their own pagination, by reporting in their ISearchRe...