MediaWiki  master
SearchEngine.php
Go to the documentation of this file.
1 <?php
33 
39 abstract class SearchEngine {
40  public const DEFAULT_SORT = 'relevance';
41 
43  public $prefix = '';
44 
46  public $namespaces = [ NS_MAIN ];
47 
49  protected $limit = 10;
50 
52  protected $offset = 0;
53 
58  protected $searchTerms = [];
59 
61  protected $showSuggestion = true;
62  private $sort = self::DEFAULT_SORT;
63 
65  protected $features = [];
66 
68  private $hookContainer;
69 
71  private $hookRunner;
72 
74  public const COMPLETION_PROFILE_TYPE = 'completionSearchProfile';
75 
77  public const FT_QUERY_INDEP_PROFILE_TYPE = 'fulltextQueryIndepProfile';
78 
80  protected const CHARS_ALL = 1;
81 
83  protected const CHARS_NO_SYNTAX = 2;
84 
95  public function searchText( $term ) {
96  return $this->maybePaginate( function () use ( $term ) {
97  return $this->doSearchText( $term );
98  } );
99  }
100 
110  protected function doSearchText( $term ) {
111  return null;
112  }
113 
128  public function searchArchiveTitle( $term ) {
129  return $this->doSearchArchiveTitle( $term );
130  }
131 
141  protected function doSearchArchiveTitle( $term ) {
142  return Status::newGood( [] );
143  }
144 
156  public function searchTitle( $term ) {
157  return $this->maybePaginate( function () use ( $term ) {
158  return $this->doSearchTitle( $term );
159  } );
160  }
161 
171  protected function doSearchTitle( $term ) {
172  return null;
173  }
174 
183  private function maybePaginate( Closure $fn ) {
184  if ( $this instanceof PaginatingSearchEngine ) {
185  return $fn();
186  }
187  $this->limit++;
188  try {
189  $resultSetOrStatus = $fn();
190  } finally {
191  $this->limit--;
192  }
193 
194  $resultSet = null;
195  if ( $resultSetOrStatus instanceof ISearchResultSet ) {
196  $resultSet = $resultSetOrStatus;
197  } elseif ( $resultSetOrStatus instanceof Status &&
198  $resultSetOrStatus->getValue() instanceof ISearchResultSet
199  ) {
200  $resultSet = $resultSetOrStatus->getValue();
201  }
202  if ( $resultSet ) {
203  $resultSet->shrink( $this->limit );
204  }
205 
206  return $resultSetOrStatus;
207  }
208 
216  public function supports( $feature ) {
217  switch ( $feature ) {
218  case 'search-update':
219  return true;
220  case 'title-suffix-filter':
221  default:
222  return false;
223  }
224  }
225 
232  public function setFeatureData( $feature, $data ) {
233  $this->features[$feature] = $data;
234  }
235 
243  public function getFeatureData( $feature ) {
244  return $this->features[$feature] ?? null;
245  }
246 
255  public function normalizeText( $string ) {
256  // Some languages such as Chinese require word segmentation
257  return MediaWikiServices::getInstance()->getContentLanguage()->segmentByWord( $string );
258  }
259 
266  public function getNearMatcher( Config $config ) {
267  return MediaWikiServices::getInstance()->getTitleMatcher();
268  }
269 
276  protected static function defaultNearMatcher() {
277  wfDeprecated( __METHOD__, '1.40' );
278  return MediaWikiServices::getInstance()->getTitleMatcher();
279  }
280 
287  public function legalSearchChars( $type = self::CHARS_ALL ) {
288  return "A-Za-z_'.0-9\\x80-\\xFF\\-";
289  }
290 
298  public function setLimitOffset( $limit, $offset = 0 ) {
299  $this->limit = intval( $limit );
300  $this->offset = intval( $offset );
301  }
302 
309  public function setNamespaces( $namespaces ) {
310  if ( $namespaces ) {
311  // Filter namespaces to only keep valid ones
312  $validNs = MediaWikiServices::getInstance()->getSearchEngineConfig()->searchableNamespaces();
313  $namespaces = array_filter( $namespaces, static function ( $ns ) use( $validNs ) {
314  return $ns < 0 || isset( $validNs[$ns] );
315  } );
316  } else {
317  $namespaces = [];
318  }
319  $this->namespaces = $namespaces;
320  }
321 
329  public function setShowSuggestion( $showSuggestion ) {
330  $this->showSuggestion = $showSuggestion;
331  }
332 
342  public function getValidSorts() {
343  return [ self::DEFAULT_SORT ];
344  }
345 
354  public function setSort( $sort ) {
355  if ( !in_array( $sort, $this->getValidSorts() ) ) {
356  throw new InvalidArgumentException( "Invalid sort: $sort. " .
357  "Must be one of: " . implode( ', ', $this->getValidSorts() ) );
358  }
359  $this->sort = $sort;
360  }
361 
368  public function getSort() {
369  return $this->sort;
370  }
371 
381  public function replacePrefixes( $query ) {
382  return $query;
383  }
384 
400  public static function parseNamespacePrefixes(
401  $query,
402  $withAllKeyword = true,
403  $withPrefixSearchExtractNamespaceHook = false
404  ) {
405  $parsed = $query;
406  if ( strpos( $query, ':' ) === false ) { // nothing to do
407  return false;
408  }
409  $extractedNamespace = null;
410 
411  $allQuery = false;
412  if ( $withAllKeyword ) {
413  $allkeywords = [];
414 
415  $allkeywords[] = wfMessage( 'searchall' )->inContentLanguage()->text() . ":";
416  // force all: so that we have a common syntax for all the wikis
417  if ( !in_array( 'all:', $allkeywords ) ) {
418  $allkeywords[] = 'all:';
419  }
420 
421  foreach ( $allkeywords as $kw ) {
422  if ( str_starts_with( $query, $kw ) ) {
423  $parsed = substr( $query, strlen( $kw ) );
424  $allQuery = true;
425  break;
426  }
427  }
428  }
429 
430  if ( !$allQuery && strpos( $query, ':' ) !== false ) {
431  $prefix = str_replace( ' ', '_', substr( $query, 0, strpos( $query, ':' ) ) );
432  $index = MediaWikiServices::getInstance()->getContentLanguage()->getNsIndex( $prefix );
433  if ( $index !== false ) {
434  $extractedNamespace = [ $index ];
435  $parsed = substr( $query, strlen( $prefix ) + 1 );
436  } elseif ( $withPrefixSearchExtractNamespaceHook ) {
437  $hookNamespaces = [ NS_MAIN ];
438  $hookQuery = $query;
439  Hooks::runner()->onPrefixSearchExtractNamespace( $hookNamespaces, $hookQuery );
440  if ( $hookQuery !== $query ) {
441  $parsed = $hookQuery;
442  $extractedNamespace = $hookNamespaces;
443  } else {
444  return false;
445  }
446  } else {
447  return false;
448  }
449  }
450 
451  return [ $parsed, $extractedNamespace ];
452  }
453 
461  public static function userHighlightPrefs() {
464  return [ $contextlines, $contextchars ];
465  }
466 
476  public function update( $id, $title, $text ) {
477  // no-op
478  }
479 
488  public function updateTitle( $id, $title ) {
489  // no-op
490  }
491 
500  public function delete( $id, $title ) {
501  // no-op
502  }
503 
515  public function getTextFromContent( Title $t, Content $c = null ) {
516  return $c ? $c->getTextForSearchIndex() : '';
517  }
518 
527  public function textAlreadyUpdatedForIndex() {
528  return false;
529  }
530 
537  protected function normalizeNamespaces( $search ) {
538  $queryAndNs = self::parseNamespacePrefixes( $search, false, true );
539  if ( $queryAndNs !== false ) {
540  $this->setNamespaces( $queryAndNs[1] );
541  return $queryAndNs[0];
542  }
543  return $search;
544  }
545 
553  protected function completionSearchBackendOverfetch( $search ) {
554  $this->limit++;
555  try {
556  return $this->completionSearchBackend( $search );
557  } finally {
558  $this->limit--;
559  }
560  }
561 
572  protected function completionSearchBackend( $search ) {
573  $results = [];
574 
575  $search = trim( $search );
576 
577  if ( !in_array( NS_SPECIAL, $this->namespaces ) && // We do not run hook on Special: search
578  !$this->getHookRunner()->onPrefixSearchBackend(
579  $this->namespaces, $search, $this->limit, $results, $this->offset )
580  ) {
581  // False means hook worked.
582  // FIXME: Yes, the API is weird. That's why it is going to be deprecated.
583 
584  return SearchSuggestionSet::fromStrings( $results );
585  } else {
586  // Hook did not do the job, use default simple search
587  $results = $this->simplePrefixSearch( $search );
588  return SearchSuggestionSet::fromTitles( $results );
589  }
590  }
591 
597  public function completionSearch( $search ) {
598  if ( trim( $search ) === '' ) {
599  return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
600  }
601  $search = $this->normalizeNamespaces( $search );
602  $suggestions = $this->completionSearchBackendOverfetch( $search );
603  return $this->processCompletionResults( $search, $suggestions );
604  }
605 
613  public function completionSearchWithVariants( $search ) {
614  if ( trim( $search ) === '' ) {
615  return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
616  }
617  $search = $this->normalizeNamespaces( $search );
618 
619  $results = $this->completionSearchBackendOverfetch( $search );
620  $fallbackLimit = 1 + $this->limit - $results->getSize();
621  if ( $fallbackLimit > 0 ) {
622  $services = MediaWikiServices::getInstance();
623  $fallbackSearches = $services->getLanguageConverterFactory()
624  ->getLanguageConverter( $services->getContentLanguage() )
625  ->autoConvertToAllVariants( $search );
626  $fallbackSearches = array_diff( array_unique( $fallbackSearches ), [ $search ] );
627 
628  foreach ( $fallbackSearches as $fbs ) {
629  $this->setLimitOffset( $fallbackLimit );
630  $fallbackSearchResult = $this->completionSearch( $fbs );
631  $results->appendAll( $fallbackSearchResult );
632  $fallbackLimit -= $fallbackSearchResult->getSize();
633  if ( $fallbackLimit <= 0 ) {
634  break;
635  }
636  }
637  }
638  return $this->processCompletionResults( $search, $results );
639  }
640 
646  public function extractTitles( SearchSuggestionSet $completionResults ) {
647  return $completionResults->map( static function ( SearchSuggestion $sugg ) {
648  return $sugg->getSuggestedTitle();
649  } );
650  }
651 
659  protected function processCompletionResults( $search, SearchSuggestionSet $suggestions ) {
660  // We over-fetched to determine pagination. Shrink back down if we have extra results
661  // and mark if pagination is possible
662  $suggestions->shrink( $this->limit );
663 
664  $search = trim( $search );
665  // preload the titles with LinkBatch
666  $linkBatchFactory = MediaWikiServices::getInstance()->getLinkBatchFactory();
667  $lb = $linkBatchFactory->newLinkBatch( $suggestions->map( static function ( SearchSuggestion $sugg ) {
668  return $sugg->getSuggestedTitle();
669  } ) );
670  $lb->setCaller( __METHOD__ );
671  $lb->execute();
672 
673  $diff = $suggestions->filter( static function ( SearchSuggestion $sugg ) {
674  return $sugg->getSuggestedTitle()->isKnown();
675  } );
676  if ( $diff > 0 ) {
677  MediaWikiServices::getInstance()->getStatsdDataFactory()
678  ->updateCount( 'search.completion.missing', $diff );
679  }
680 
681  $results = $suggestions->map( static function ( SearchSuggestion $sugg ) {
682  return $sugg->getSuggestedTitle()->getPrefixedText();
683  } );
684 
685  if ( $this->offset === 0 ) {
686  // Rescore results with an exact title match
687  // NOTE: in some cases like cross-namespace redirects
688  // (frequently used as shortcuts e.g. WP:WP on huwiki) some
689  // backends like Cirrus will return no results. We should still
690  // try an exact title match to workaround this limitation
691  $rescorer = new SearchExactMatchRescorer();
692  $rescoredResults = $rescorer->rescore( $search, $this->namespaces, $results, $this->limit );
693  } else {
694  // No need to rescore if offset is not 0
695  // The exact match must have been returned at position 0
696  // if it existed.
697  $rescoredResults = $results;
698  }
699 
700  if ( count( $rescoredResults ) > 0 ) {
701  $found = array_search( $rescoredResults[0], $results );
702  if ( $found === false ) {
703  // If the first result is not in the previous array it
704  // means that we found a new exact match
705  $exactMatch = SearchSuggestion::fromTitle( 0, Title::newFromText( $rescoredResults[0] ) );
706  $suggestions->prepend( $exactMatch );
707  $suggestions->shrink( $this->limit );
708  } else {
709  // if the first result is not the same we need to rescore
710  if ( $found > 0 ) {
711  $suggestions->rescore( $found );
712  }
713  }
714  }
715 
716  return $suggestions;
717  }
718 
724  public function defaultPrefixSearch( $search ) {
725  if ( trim( $search ) === '' ) {
726  return [];
727  }
728 
729  $search = $this->normalizeNamespaces( $search );
730  return $this->simplePrefixSearch( $search );
731  }
732 
739  protected function simplePrefixSearch( $search ) {
740  // Use default database prefix search
741  $backend = new TitlePrefixSearch;
742  return $backend->defaultSearchBackend( $this->namespaces, $search, $this->limit, $this->offset );
743  }
744 
763  public function getProfiles( $profileType, User $user = null ) {
764  return null;
765  }
766 
777  public function makeSearchFieldMapping( $name, $type ) {
778  return new NullIndexField();
779  }
780 
786  public function getSearchIndexFields() {
787  $models = MediaWikiServices::getInstance()->getContentHandlerFactory()->getContentModels();
788  $fields = [];
789  $seenHandlers = new SplObjectStorage();
790  foreach ( $models as $model ) {
791  try {
792  $handler = MediaWikiServices::getInstance()
793  ->getContentHandlerFactory()
794  ->getContentHandler( $model );
795  } catch ( MWUnknownContentModelException $e ) {
796  // If we can find no handler, ignore it
797  continue;
798  }
799  // Several models can have the same handler, so avoid processing it repeatedly
800  if ( $seenHandlers->contains( $handler ) ) {
801  // We already did this one
802  continue;
803  }
804  $seenHandlers->attach( $handler );
805  $handlerFields = $handler->getFieldsForSearchIndex( $this );
806  foreach ( $handlerFields as $fieldName => $fieldData ) {
807  if ( empty( $fields[$fieldName] ) ) {
808  $fields[$fieldName] = $fieldData;
809  } else {
810  // TODO: do we allow some clashes with the same type or reject all of them?
811  $mergeDef = $fields[$fieldName]->merge( $fieldData );
812  if ( !$mergeDef ) {
813  throw new InvalidArgumentException( "Duplicate field $fieldName for model $model" );
814  }
815  $fields[$fieldName] = $mergeDef;
816  }
817  }
818  }
819  // Hook to allow extensions to produce search mapping fields
820  $this->getHookRunner()->onSearchIndexFields( $fields, $this );
821  return $fields;
822  }
823 
829  public function augmentSearchResults( ISearchResultSet $resultSet ) {
830  $setAugmentors = [];
831  $rowAugmentors = [];
832  $this->getHookRunner()->onSearchResultsAugment( $setAugmentors, $rowAugmentors );
833  if ( !$setAugmentors && !$rowAugmentors ) {
834  // We're done here
835  return;
836  }
837 
838  // Convert row augmentors to set augmentor
839  foreach ( $rowAugmentors as $name => $row ) {
840  if ( isset( $setAugmentors[$name] ) ) {
841  throw new InvalidArgumentException( "Both row and set augmentors are defined for $name" );
842  }
843  $setAugmentors[$name] = new PerRowAugmentor( $row );
844  }
845 
850  foreach ( $setAugmentors as $name => $augmentor ) {
851  $data = $augmentor->augmentAll( $resultSet );
852  if ( $data ) {
853  $resultSet->setAugmentedData( $name, $data );
854  }
855  }
856  }
857 
863  public function setHookContainer( HookContainer $hookContainer ) {
864  $this->hookContainer = $hookContainer;
865  $this->hookRunner = new HookRunner( $hookContainer );
866  }
867 
874  protected function getHookContainer(): HookContainer {
875  if ( !$this->hookContainer ) {
876  // This shouldn't be hit in core, but it is needed for CirrusSearch
877  // which commonly creates a CirrusSearch object without cirrus being
878  // configured in $wgSearchType/$wgSearchTypeAlternatives.
879  $this->hookContainer = MediaWikiServices::getInstance()->getHookContainer();
880  }
881  return $this->hookContainer;
882  }
883 
892  protected function getHookRunner(): HookRunner {
893  if ( !$this->hookRunner ) {
894  $this->hookRunner = new HookRunner( $this->getHookContainer() );
895  }
896  return $this->hookRunner;
897  }
898 
899 }
const NS_MAIN
Definition: Defines.php:64
const NS_SPECIAL
Definition: Defines.php:53
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
if(!defined('MW_SETUP_CALLBACK'))
Definition: WebStart.php:88
static runner()
Get a HookRunner instance for calling hooks using the new interfaces.
Definition: Hooks.php:172
Exception thrown when an unregistered content model is requested.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Definition: HookRunner.php:568
Service locator for MediaWiki core services.
Service implementation of near match title search.
Represents a title within MediaWiki.
Definition: Title.php:82
Null index field - means search engine does not implement this field.
Perform augmentation of each row and return composite result, indexed by ID.
defaultSearchBackend( $namespaces, $search, $limit, $offset)
Unless overridden by PrefixSearchBackend hook...
Contain a class for special pages.
completionSearchBackendOverfetch( $search)
Perform an overfetch of completion search results.
makeSearchFieldMapping( $name, $type)
Create a search field definition.
getNearMatcher(Config $config)
Get service class to finding near matches.
getHookRunner()
Get a HookRunner for running core hooks.
searchTitle( $term)
Perform a title-only search query and return a result set.
supports( $feature)
bool $showSuggestion
processCompletionResults( $search, SearchSuggestionSet $suggestions)
Process completion search results.
getFeatureData( $feature)
Way to retrieve custom data set by setFeatureData or by the engine itself.
update( $id, $title, $text)
Create or update the search index record for the given page.
setNamespaces( $namespaces)
Set which namespaces the search should include.
static parseNamespacePrefixes( $query, $withAllKeyword=true, $withPrefixSearchExtractNamespaceHook=false)
Parse some common prefixes: all (search everything) or namespace names.
doSearchArchiveTitle( $term)
Perform a title search in the article archive.
array $features
Feature values.
replacePrefixes( $query)
Parse some common prefixes: all (search everything) or namespace names and set the list of namespaces...
string[] $searchTerms
textAlreadyUpdatedForIndex()
If an implementation of SearchEngine handles all of its own text processing in getTextFromContent() a...
defaultPrefixSearch( $search)
Simple prefix search for subpages.
augmentSearchResults(ISearchResultSet $resultSet)
Augment search results with extra data.
searchArchiveTitle( $term)
Perform a title search in the article archive.
normalizeText( $string)
When overridden in derived class, performs database-specific conversions on text to be used for searc...
setFeatureData( $feature, $data)
Way to pass custom data for engines.
completionSearchBackend( $search)
Perform a completion search.
const DEFAULT_SORT
getTextFromContent(Title $t, Content $c=null)
Get the raw text for updating the index from a content object Nicer search backends could possibly do...
getProfiles( $profileType, User $user=null)
Get a list of supported profiles.
int[] null $namespaces
getSort()
Get the sort direction of the search results.
static defaultNearMatcher()
Get near matcher for default SearchEngine.
getSearchIndexFields()
Get fields for search index.
getValidSorts()
Get the valid sort directions.
static userHighlightPrefs()
Find snippet highlight settings for all users.
updateTitle( $id, $title)
Update a search index record's title only.
string $prefix
completionSearchWithVariants( $search)
Perform a completion search with variants.
doSearchText( $term)
Perform a full text search query and return a result set.
normalizeNamespaces( $search)
Makes search simple string if it was namespaced.
const CHARS_ALL
Integer flag for legalSearchChars: includes all chars allowed in a search query.
getHookContainer()
Get a HookContainer, for running extension hooks or for hook metadata.
completionSearch( $search)
Perform a completion search.
setLimitOffset( $limit, $offset=0)
Set the maximum number of results to return and how many to skip before returning the first.
const CHARS_NO_SYNTAX
Integer flag for legalSearchChars: includes all chars allowed in a search term.
setShowSuggestion( $showSuggestion)
Set whether the searcher should try to build a suggestion.
simplePrefixSearch( $search)
Call out to simple search backend.
setSort( $sort)
Set the sort direction of the search results.
const FT_QUERY_INDEP_PROFILE_TYPE
Profile type for query independent ranking features.
setHookContainer(HookContainer $hookContainer)
searchText( $term)
Perform a full text search query and return a result set.
legalSearchChars( $type=self::CHARS_ALL)
Get chars legal for search.
extractTitles(SearchSuggestionSet $completionResults)
Extract titles from completion results.
const COMPLETION_PROFILE_TYPE
Profile type for completionSearch.
doSearchTitle( $term)
Perform a title-only search query and return a result set.
An utility class to rescore search results by looking for an exact match in the db and add the page f...
A set of search suggestions.
filter( $callback)
Filter the suggestions array.
rescore( $key)
Move the suggestion at index $key to the first position.
shrink( $limit)
Remove any extra elements in the suggestions set.
static fromStrings(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a string array.
static fromTitles(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a title array.
map( $callback)
Call array_map on the suggestions array.
prepend(SearchSuggestion $suggestion)
Add a new suggestion at the top.
A search suggestion.
getSuggestedTitle()
Title object in the case this suggestion is based on a title.
static fromTitle( $score, Title $title)
Create suggestion from Title.
static newGood( $value=null)
Factory function for good results.
Definition: StatusValue.php:85
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition: Status.php:46
Performs prefix search, returning Title objects.
The User object encapsulates all of the user-specific settings (user_id, name, rights,...
Definition: User.php:71
Interface for configuration instances.
Definition: Config.php:30
Base interface for representing page content.
Definition: Content.php:37
A set of SearchEngine results.
setAugmentedData( $name, $data)
Sets augmented data for result set.
Marker class for search engines that can handle their own pagination, by reporting in their ISearchRe...