MediaWiki  master
SearchEngine.php
Go to the documentation of this file.
1 <?php
31 
37 abstract class SearchEngine {
38  public const DEFAULT_SORT = 'relevance';
39 
41  public $prefix = '';
42 
44  public $namespaces = [ NS_MAIN ];
45 
47  protected $limit = 10;
48 
50  protected $offset = 0;
51 
56  protected $searchTerms = [];
57 
59  protected $showSuggestion = true;
61 
63  protected $features = [];
64 
66  private $hookContainer;
67 
69  private $hookRunner;
70 
72  public const COMPLETION_PROFILE_TYPE = 'completionSearchProfile';
73 
75  public const FT_QUERY_INDEP_PROFILE_TYPE = 'fulltextQueryIndepProfile';
76 
78  protected const CHARS_ALL = 1;
79 
81  protected const CHARS_NO_SYNTAX = 2;
82 
93  public function searchText( $term ) {
94  return $this->maybePaginate( function () use ( $term ) {
95  return $this->doSearchText( $term );
96  } );
97  }
98 
108  protected function doSearchText( $term ) {
109  return null;
110  }
111 
126  public function searchArchiveTitle( $term ) {
127  return $this->doSearchArchiveTitle( $term );
128  }
129 
139  protected function doSearchArchiveTitle( $term ) {
140  return Status::newGood( [] );
141  }
142 
154  public function searchTitle( $term ) {
155  return $this->maybePaginate( function () use ( $term ) {
156  return $this->doSearchTitle( $term );
157  } );
158  }
159 
169  protected function doSearchTitle( $term ) {
170  return null;
171  }
172 
181  private function maybePaginate( Closure $fn ) {
182  if ( $this instanceof PaginatingSearchEngine ) {
183  return $fn();
184  }
185  $this->limit++;
186  try {
187  $resultSetOrStatus = $fn();
188  } finally {
189  $this->limit--;
190  }
191 
192  $resultSet = null;
193  if ( $resultSetOrStatus instanceof ISearchResultSet ) {
194  $resultSet = $resultSetOrStatus;
195  } elseif ( $resultSetOrStatus instanceof Status &&
196  $resultSetOrStatus->getValue() instanceof ISearchResultSet
197  ) {
198  $resultSet = $resultSetOrStatus->getValue();
199  }
200  if ( $resultSet ) {
201  $resultSet->shrink( $this->limit );
202  }
203 
204  return $resultSetOrStatus;
205  }
206 
214  public function supports( $feature ) {
215  switch ( $feature ) {
216  case 'search-update':
217  return true;
218  case 'title-suffix-filter':
219  default:
220  return false;
221  }
222  }
223 
230  public function setFeatureData( $feature, $data ) {
231  $this->features[$feature] = $data;
232  }
233 
241  public function getFeatureData( $feature ) {
242  return $this->features[$feature] ?? null;
243  }
244 
253  public function normalizeText( $string ) {
254  // Some languages such as Chinese require word segmentation
255  return MediaWikiServices::getInstance()->getContentLanguage()->segmentByWord( $string );
256  }
257 
263  public function getNearMatcher( Config $config ) {
264  return new SearchNearMatcher( $config,
265  MediaWikiServices::getInstance()->getContentLanguage(),
266  $this->getHookContainer()
267  );
268  }
269 
274  protected static function defaultNearMatcher() {
275  $services = MediaWikiServices::getInstance();
276  $config = $services->getMainConfig();
277  return $services->newSearchEngine()->getNearMatcher( $config );
278  }
279 
286  public function legalSearchChars( $type = self::CHARS_ALL ) {
287  return "A-Za-z_'.0-9\\x80-\\xFF\\-";
288  }
289 
297  public function setLimitOffset( $limit, $offset = 0 ) {
298  $this->limit = intval( $limit );
299  $this->offset = intval( $offset );
300  }
301 
308  public function setNamespaces( $namespaces ) {
309  if ( $namespaces ) {
310  // Filter namespaces to only keep valid ones
311  $validNs = MediaWikiServices::getInstance()->getSearchEngineConfig()->searchableNamespaces();
312  $namespaces = array_filter( $namespaces, static function ( $ns ) use( $validNs ) {
313  return $ns < 0 || isset( $validNs[$ns] );
314  } );
315  } else {
316  $namespaces = [];
317  }
318  $this->namespaces = $namespaces;
319  }
320 
328  public function setShowSuggestion( $showSuggestion ) {
329  $this->showSuggestion = $showSuggestion;
330  }
331 
341  public function getValidSorts() {
342  return [ self::DEFAULT_SORT ];
343  }
344 
353  public function setSort( $sort ) {
354  if ( !in_array( $sort, $this->getValidSorts() ) ) {
355  throw new InvalidArgumentException( "Invalid sort: $sort. " .
356  "Must be one of: " . implode( ', ', $this->getValidSorts() ) );
357  }
358  $this->sort = $sort;
359  }
360 
367  public function getSort() {
368  return $this->sort;
369  }
370 
380  public function replacePrefixes( $query ) {
381  return $query;
382  }
383 
399  public static function parseNamespacePrefixes(
400  $query,
401  $withAllKeyword = true,
402  $withPrefixSearchExtractNamespaceHook = false
403  ) {
404  $parsed = $query;
405  if ( strpos( $query, ':' ) === false ) { // nothing to do
406  return false;
407  }
408  $extractedNamespace = null;
409 
410  $allQuery = false;
411  if ( $withAllKeyword ) {
412  $allkeywords = [];
413 
414  $allkeywords[] = wfMessage( 'searchall' )->inContentLanguage()->text() . ":";
415  // force all: so that we have a common syntax for all the wikis
416  if ( !in_array( 'all:', $allkeywords ) ) {
417  $allkeywords[] = 'all:';
418  }
419 
420  foreach ( $allkeywords as $kw ) {
421  if ( str_starts_with( $query, $kw ) ) {
422  $parsed = substr( $query, strlen( $kw ) );
423  $allQuery = true;
424  break;
425  }
426  }
427  }
428 
429  if ( !$allQuery && strpos( $query, ':' ) !== false ) {
430  $prefix = str_replace( ' ', '_', substr( $query, 0, strpos( $query, ':' ) ) );
431  $index = MediaWikiServices::getInstance()->getContentLanguage()->getNsIndex( $prefix );
432  if ( $index !== false ) {
433  $extractedNamespace = [ $index ];
434  $parsed = substr( $query, strlen( $prefix ) + 1 );
435  } elseif ( $withPrefixSearchExtractNamespaceHook ) {
436  $hookNamespaces = [ NS_MAIN ];
437  $hookQuery = $query;
438  Hooks::runner()->onPrefixSearchExtractNamespace( $hookNamespaces, $hookQuery );
439  if ( $hookQuery !== $query ) {
440  $parsed = $hookQuery;
441  $extractedNamespace = $hookNamespaces;
442  } else {
443  return false;
444  }
445  } else {
446  return false;
447  }
448  }
449 
450  return [ $parsed, $extractedNamespace ];
451  }
452 
460  public static function userHighlightPrefs() {
463  return [ $contextlines, $contextchars ];
464  }
465 
475  public function update( $id, $title, $text ) {
476  // no-op
477  }
478 
487  public function updateTitle( $id, $title ) {
488  // no-op
489  }
490 
499  public function delete( $id, $title ) {
500  // no-op
501  }
502 
514  public function getTextFromContent( Title $t, Content $c = null ) {
515  return $c ? $c->getTextForSearchIndex() : '';
516  }
517 
526  public function textAlreadyUpdatedForIndex() {
527  return false;
528  }
529 
536  protected function normalizeNamespaces( $search ) {
537  $queryAndNs = self::parseNamespacePrefixes( $search, false, true );
538  if ( $queryAndNs !== false ) {
539  $this->setNamespaces( $queryAndNs[1] );
540  return $queryAndNs[0];
541  }
542  return $search;
543  }
544 
552  protected function completionSearchBackendOverfetch( $search ) {
553  $this->limit++;
554  try {
555  return $this->completionSearchBackend( $search );
556  } finally {
557  $this->limit--;
558  }
559  }
560 
571  protected function completionSearchBackend( $search ) {
572  $results = [];
573 
574  $search = trim( $search );
575 
576  if ( !in_array( NS_SPECIAL, $this->namespaces ) && // We do not run hook on Special: search
577  !$this->getHookRunner()->onPrefixSearchBackend(
578  $this->namespaces, $search, $this->limit, $results, $this->offset )
579  ) {
580  // False means hook worked.
581  // FIXME: Yes, the API is weird. That's why it is going to be deprecated.
582 
583  return SearchSuggestionSet::fromStrings( $results );
584  } else {
585  // Hook did not do the job, use default simple search
586  $results = $this->simplePrefixSearch( $search );
587  return SearchSuggestionSet::fromTitles( $results );
588  }
589  }
590 
596  public function completionSearch( $search ) {
597  if ( trim( $search ) === '' ) {
598  return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
599  }
600  $search = $this->normalizeNamespaces( $search );
601  $suggestions = $this->completionSearchBackendOverfetch( $search );
602  return $this->processCompletionResults( $search, $suggestions );
603  }
604 
612  public function completionSearchWithVariants( $search ) {
613  if ( trim( $search ) === '' ) {
614  return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
615  }
616  $search = $this->normalizeNamespaces( $search );
617 
618  $results = $this->completionSearchBackendOverfetch( $search );
619  $fallbackLimit = 1 + $this->limit - $results->getSize();
620  if ( $fallbackLimit > 0 ) {
621  $services = MediaWikiServices::getInstance();
622  $fallbackSearches = $services->getLanguageConverterFactory()
623  ->getLanguageConverter( $services->getContentLanguage() )
624  ->autoConvertToAllVariants( $search );
625  $fallbackSearches = array_diff( array_unique( $fallbackSearches ), [ $search ] );
626 
627  foreach ( $fallbackSearches as $fbs ) {
628  $this->setLimitOffset( $fallbackLimit );
629  $fallbackSearchResult = $this->completionSearch( $fbs );
630  $results->appendAll( $fallbackSearchResult );
631  $fallbackLimit -= $fallbackSearchResult->getSize();
632  if ( $fallbackLimit <= 0 ) {
633  break;
634  }
635  }
636  }
637  return $this->processCompletionResults( $search, $results );
638  }
639 
645  public function extractTitles( SearchSuggestionSet $completionResults ) {
646  return $completionResults->map( static function ( SearchSuggestion $sugg ) {
647  return $sugg->getSuggestedTitle();
648  } );
649  }
650 
658  protected function processCompletionResults( $search, SearchSuggestionSet $suggestions ) {
659  // We over-fetched to determine pagination. Shrink back down if we have extra results
660  // and mark if pagination is possible
661  $suggestions->shrink( $this->limit );
662 
663  $search = trim( $search );
664  // preload the titles with LinkBatch
665  $linkBatchFactory = MediaWikiServices::getInstance()->getLinkBatchFactory();
666  $lb = $linkBatchFactory->newLinkBatch( $suggestions->map( static function ( SearchSuggestion $sugg ) {
667  return $sugg->getSuggestedTitle();
668  } ) );
669  $lb->setCaller( __METHOD__ );
670  $lb->execute();
671 
672  $diff = $suggestions->filter( static function ( SearchSuggestion $sugg ) {
673  return $sugg->getSuggestedTitle()->isKnown();
674  } );
675  if ( $diff > 0 ) {
676  MediaWikiServices::getInstance()->getStatsdDataFactory()
677  ->updateCount( 'search.completion.missing', $diff );
678  }
679 
680  $results = $suggestions->map( static function ( SearchSuggestion $sugg ) {
681  return $sugg->getSuggestedTitle()->getPrefixedText();
682  } );
683 
684  if ( $this->offset === 0 ) {
685  // Rescore results with an exact title match
686  // NOTE: in some cases like cross-namespace redirects
687  // (frequently used as shortcuts e.g. WP:WP on huwiki) some
688  // backends like Cirrus will return no results. We should still
689  // try an exact title match to workaround this limitation
690  $rescorer = new SearchExactMatchRescorer();
691  $rescoredResults = $rescorer->rescore( $search, $this->namespaces, $results, $this->limit );
692  } else {
693  // No need to rescore if offset is not 0
694  // The exact match must have been returned at position 0
695  // if it existed.
696  $rescoredResults = $results;
697  }
698 
699  if ( count( $rescoredResults ) > 0 ) {
700  $found = array_search( $rescoredResults[0], $results );
701  if ( $found === false ) {
702  // If the first result is not in the previous array it
703  // means that we found a new exact match
704  $exactMatch = SearchSuggestion::fromTitle( 0, Title::newFromText( $rescoredResults[0] ) );
705  $suggestions->prepend( $exactMatch );
706  $suggestions->shrink( $this->limit );
707  } else {
708  // if the first result is not the same we need to rescore
709  if ( $found > 0 ) {
710  $suggestions->rescore( $found );
711  }
712  }
713  }
714 
715  return $suggestions;
716  }
717 
723  public function defaultPrefixSearch( $search ) {
724  if ( trim( $search ) === '' ) {
725  return [];
726  }
727 
728  $search = $this->normalizeNamespaces( $search );
729  return $this->simplePrefixSearch( $search );
730  }
731 
738  protected function simplePrefixSearch( $search ) {
739  // Use default database prefix search
740  $backend = new TitlePrefixSearch;
741  return $backend->defaultSearchBackend( $this->namespaces, $search, $this->limit, $this->offset );
742  }
743 
762  public function getProfiles( $profileType, User $user = null ) {
763  return null;
764  }
765 
776  public function makeSearchFieldMapping( $name, $type ) {
777  return new NullIndexField();
778  }
779 
785  public function getSearchIndexFields() {
786  $models = MediaWikiServices::getInstance()->getContentHandlerFactory()->getContentModels();
787  $fields = [];
788  $seenHandlers = new SplObjectStorage();
789  foreach ( $models as $model ) {
790  try {
791  $handler = MediaWikiServices::getInstance()
792  ->getContentHandlerFactory()
793  ->getContentHandler( $model );
794  } catch ( MWUnknownContentModelException $e ) {
795  // If we can find no handler, ignore it
796  continue;
797  }
798  // Several models can have the same handler, so avoid processing it repeatedly
799  if ( $seenHandlers->contains( $handler ) ) {
800  // We already did this one
801  continue;
802  }
803  $seenHandlers->attach( $handler );
804  $handlerFields = $handler->getFieldsForSearchIndex( $this );
805  foreach ( $handlerFields as $fieldName => $fieldData ) {
806  if ( empty( $fields[$fieldName] ) ) {
807  $fields[$fieldName] = $fieldData;
808  } else {
809  // TODO: do we allow some clashes with the same type or reject all of them?
810  $mergeDef = $fields[$fieldName]->merge( $fieldData );
811  if ( !$mergeDef ) {
812  throw new InvalidArgumentException( "Duplicate field $fieldName for model $model" );
813  }
814  $fields[$fieldName] = $mergeDef;
815  }
816  }
817  }
818  // Hook to allow extensions to produce search mapping fields
819  $this->getHookRunner()->onSearchIndexFields( $fields, $this );
820  return $fields;
821  }
822 
828  public function augmentSearchResults( ISearchResultSet $resultSet ) {
829  $setAugmentors = [];
830  $rowAugmentors = [];
831  $this->getHookRunner()->onSearchResultsAugment( $setAugmentors, $rowAugmentors );
832  if ( !$setAugmentors && !$rowAugmentors ) {
833  // We're done here
834  return;
835  }
836 
837  // Convert row augmentors to set augmentor
838  foreach ( $rowAugmentors as $name => $row ) {
839  if ( isset( $setAugmentors[$name] ) ) {
840  throw new InvalidArgumentException( "Both row and set augmentors are defined for $name" );
841  }
842  $setAugmentors[$name] = new PerRowAugmentor( $row );
843  }
844 
849  foreach ( $setAugmentors as $name => $augmentor ) {
850  $data = $augmentor->augmentAll( $resultSet );
851  if ( $data ) {
852  $resultSet->setAugmentedData( $name, $data );
853  }
854  }
855  }
856 
863  $this->hookContainer = $hookContainer;
864  $this->hookRunner = new HookRunner( $hookContainer );
865  }
866 
873  protected function getHookContainer(): HookContainer {
874  if ( !$this->hookContainer ) {
875  // This shouldn't be hit in core, but it is needed for CirrusSearch
876  // which commonly creates a CirrusSearch object without cirrus being
877  // configured in $wgSearchType/$wgSearchTypeAlternatives.
878  $this->hookContainer = MediaWikiServices::getInstance()->getHookContainer();
879  }
880  return $this->hookContainer;
881  }
882 
891  protected function getHookRunner(): HookRunner {
892  if ( !$this->hookRunner ) {
893  $this->hookRunner = new HookRunner( $this->getHookContainer() );
894  }
895  return $this->hookRunner;
896  }
897 
898 }
const NS_MAIN
Definition: Defines.php:64
const NS_SPECIAL
Definition: Defines.php:53
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
if(!defined('MW_SETUP_CALLBACK'))
The persistent session ID (if any) loaded at startup.
Definition: WebStart.php:82
static runner()
Get a HookRunner instance for calling hooks using the new interfaces.
Definition: Hooks.php:173
Exception thrown when an unregistered content model is requested.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Definition: HookRunner.php:561
Service locator for MediaWiki core services.
Null index field - means search engine does not implement this field.
Perform augmentation of each row and return composite result, indexed by ID.
defaultSearchBackend( $namespaces, $search, $limit, $offset)
Unless overridden by PrefixSearchBackend hook...
Contain a class for special pages.
completionSearchBackendOverfetch( $search)
Perform an overfetch of completion search results.
makeSearchFieldMapping( $name, $type)
Create a search field definition.
getNearMatcher(Config $config)
Get service class to finding near matches.
getHookRunner()
Get a HookRunner for running core hooks.
searchTitle( $term)
Perform a title-only search query and return a result set.
supports( $feature)
maybePaginate(Closure $fn)
Performs an overfetch and shrink operation to determine if the next page is available for search engi...
bool $showSuggestion
processCompletionResults( $search, SearchSuggestionSet $suggestions)
Process completion search results.
getFeatureData( $feature)
Way to retrieve custom data set by setFeatureData or by the engine itself.
update( $id, $title, $text)
Create or update the search index record for the given page.
setNamespaces( $namespaces)
Set which namespaces the search should include.
static parseNamespacePrefixes( $query, $withAllKeyword=true, $withPrefixSearchExtractNamespaceHook=false)
Parse some common prefixes: all (search everything) or namespace names.
doSearchArchiveTitle( $term)
Perform a title search in the article archive.
array $features
Feature values.
replacePrefixes( $query)
Parse some common prefixes: all (search everything) or namespace names and set the list of namespaces...
string[] $searchTerms
textAlreadyUpdatedForIndex()
If an implementation of SearchEngine handles all of its own text processing in getTextFromContent() a...
defaultPrefixSearch( $search)
Simple prefix search for subpages.
augmentSearchResults(ISearchResultSet $resultSet)
Augment search results with extra data.
searchArchiveTitle( $term)
Perform a title search in the article archive.
normalizeText( $string)
When overridden in derived class, performs database-specific conversions on text to be used for searc...
setFeatureData( $feature, $data)
Way to pass custom data for engines.
completionSearchBackend( $search)
Perform a completion search.
const DEFAULT_SORT
getTextFromContent(Title $t, Content $c=null)
Get the raw text for updating the index from a content object Nicer search backends could possibly do...
getProfiles( $profileType, User $user=null)
Get a list of supported profiles.
int[] null $namespaces
getSort()
Get the sort direction of the search results.
static defaultNearMatcher()
Get near matcher for default SearchEngine.
getSearchIndexFields()
Get fields for search index.
getValidSorts()
Get the valid sort directions.
static userHighlightPrefs()
Find snippet highlight settings for all users.
updateTitle( $id, $title)
Update a search index record's title only.
string $prefix
completionSearchWithVariants( $search)
Perform a completion search with variants.
doSearchText( $term)
Perform a full text search query and return a result set.
normalizeNamespaces( $search)
Makes search simple string if it was namespaced.
const CHARS_ALL
Integer flag for legalSearchChars: includes all chars allowed in a search query.
HookRunner $hookRunner
getHookContainer()
Get a HookContainer, for running extension hooks or for hook metadata.
HookContainer $hookContainer
completionSearch( $search)
Perform a completion search.
setLimitOffset( $limit, $offset=0)
Set the maximum number of results to return and how many to skip before returning the first.
const CHARS_NO_SYNTAX
Integer flag for legalSearchChars: includes all chars allowed in a search term.
setShowSuggestion( $showSuggestion)
Set whether the searcher should try to build a suggestion.
simplePrefixSearch( $search)
Call out to simple search backend.
setSort( $sort)
Set the sort direction of the search results.
const FT_QUERY_INDEP_PROFILE_TYPE
Profile type for query independent ranking features.
setHookContainer(HookContainer $hookContainer)
searchText( $term)
Perform a full text search query and return a result set.
legalSearchChars( $type=self::CHARS_ALL)
Get chars legal for search.
extractTitles(SearchSuggestionSet $completionResults)
Extract titles from completion results.
const COMPLETION_PROFILE_TYPE
Profile type for completionSearch.
doSearchTitle( $term)
Perform a title-only search query and return a result set.
An utility class to rescore search results by looking for an exact match in the db and add the page f...
Implementation of near match title search.
Search suggestion sets.
filter( $callback)
Filter the suggestions array.
rescore( $key)
Move the suggestion at index $key to the first position.
shrink( $limit)
Remove any extra elements in the suggestions set.
static fromStrings(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a string array.
static fromTitles(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a title array.
map( $callback)
Call array_map on the suggestions array.
prepend(SearchSuggestion $suggestion)
Add a new suggestion at the top.
Search suggestion.
getSuggestedTitle()
Title object in the case this suggestion is based on a title.
static fromTitle( $score, Title $title)
Create suggestion from Title.
static newGood( $value=null)
Factory function for good results.
Definition: StatusValue.php:82
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition: Status.php:44
Performs prefix search, returning Title objects.
Represents a title within MediaWiki.
Definition: Title.php:49
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:370
The User object encapsulates all of the user-specific settings (user_id, name, rights,...
Definition: User.php:69
Interface for configuration instances.
Definition: Config.php:30
Base interface for content objects.
Definition: Content.php:35
A set of SearchEngine results.
setAugmentedData( $name, $data)
Sets augmented data for result set.
Marker class for search engines that can handle their own pagination, by reporting in their ISearchRe...