MediaWiki  master
SearchEngine.php
Go to the documentation of this file.
1 <?php
29 
34 abstract class SearchEngine {
35  const DEFAULT_SORT = 'relevance';
36 
38  public $prefix = '';
39 
41  public $namespaces = [ NS_MAIN ];
42 
44  protected $limit = 10;
45 
47  protected $offset = 0;
48 
53  protected $searchTerms = [];
54 
56  protected $showSuggestion = true;
57  private $sort = self::DEFAULT_SORT;
58 
60  protected $features = [];
61 
63  const COMPLETION_PROFILE_TYPE = 'completionSearchProfile';
64 
66  const FT_QUERY_INDEP_PROFILE_TYPE = 'fulltextQueryIndepProfile';
67 
69  const CHARS_ALL = 1;
70 
72  const CHARS_NO_SYNTAX = 2;
73 
84  public function searchText( $term ) {
85  return $this->maybePaginate( function () use ( $term ) {
86  return $this->doSearchText( $term );
87  } );
88  }
89 
97  protected function doSearchText( $term ) {
98  return null;
99  }
100 
115  public function searchArchiveTitle( $term ) {
116  return $this->doSearchArchiveTitle( $term );
117  }
118 
126  protected function doSearchArchiveTitle( $term ) {
127  return Status::newGood( [] );
128  }
129 
141  public function searchTitle( $term ) {
142  return $this->maybePaginate( function () use ( $term ) {
143  return $this->doSearchTitle( $term );
144  } );
145  }
146 
154  protected function doSearchTitle( $term ) {
155  return null;
156  }
157 
166  private function maybePaginate( Closure $fn ) {
167  if ( $this instanceof PaginatingSearchEngine ) {
168  return $fn();
169  }
170  $this->limit++;
171  try {
172  $resultSetOrStatus = $fn();
173  } finally {
174  $this->limit--;
175  }
176 
177  $resultSet = null;
178  if ( $resultSetOrStatus instanceof ISearchResultSet ) {
179  $resultSet = $resultSetOrStatus;
180  } elseif ( $resultSetOrStatus instanceof Status &&
181  $resultSetOrStatus->getValue() instanceof ISearchResultSet
182  ) {
183  $resultSet = $resultSetOrStatus->getValue();
184  }
185  if ( $resultSet ) {
186  $resultSet->shrink( $this->limit );
187  }
188 
189  return $resultSetOrStatus;
190  }
191 
197  public function supports( $feature ) {
198  switch ( $feature ) {
199  case 'search-update':
200  return true;
201  case 'title-suffix-filter':
202  default:
203  return false;
204  }
205  }
206 
213  public function setFeatureData( $feature, $data ) {
214  $this->features[$feature] = $data;
215  }
216 
224  public function getFeatureData( $feature ) {
225  return $this->features[$feature] ?? null;
226  }
227 
236  public function normalizeText( $string ) {
237  // Some languages such as Chinese require word segmentation
238  return MediaWikiServices::getInstance()->getContentLanguage()->segmentByWord( $string );
239  }
240 
246  public function getNearMatcher( Config $config ) {
247  return new SearchNearMatcher( $config,
248  MediaWikiServices::getInstance()->getContentLanguage() );
249  }
250 
255  protected static function defaultNearMatcher() {
256  $services = MediaWikiServices::getInstance();
257  $config = $services->getMainConfig();
258  return $services->newSearchEngine()->getNearMatcher( $config );
259  }
260 
267  public function legalSearchChars( $type = self::CHARS_ALL ) {
268  return "A-Za-z_'.0-9\\x80-\\xFF\\-";
269  }
270 
278  function setLimitOffset( $limit, $offset = 0 ) {
279  $this->limit = intval( $limit );
280  $this->offset = intval( $offset );
281  }
282 
290  if ( $namespaces ) {
291  // Filter namespaces to only keep valid ones
292  $validNs = MediaWikiServices::getInstance()->getSearchEngineConfig()->searchableNamespaces();
293  $namespaces = array_filter( $namespaces, function ( $ns ) use( $validNs ) {
294  return $ns < 0 || isset( $validNs[$ns] );
295  } );
296  } else {
297  $namespaces = [];
298  }
299  $this->namespaces = $namespaces;
300  }
301 
310  $this->showSuggestion = $showSuggestion;
311  }
312 
320  public function getValidSorts() {
321  return [ self::DEFAULT_SORT ];
322  }
323 
332  public function setSort( $sort ) {
333  if ( !in_array( $sort, $this->getValidSorts() ) ) {
334  throw new InvalidArgumentException( "Invalid sort: $sort. " .
335  "Must be one of: " . implode( ', ', $this->getValidSorts() ) );
336  }
337  $this->sort = $sort;
338  }
339 
346  public function getSort() {
347  return $this->sort;
348  }
349 
359  function replacePrefixes( $query ) {
360  return $query;
361  }
362 
378  public static function parseNamespacePrefixes(
379  $query,
380  $withAllKeyword = true,
381  $withPrefixSearchExtractNamespaceHook = false
382  ) {
383  $parsed = $query;
384  if ( strpos( $query, ':' ) === false ) { // nothing to do
385  return false;
386  }
387  $extractedNamespace = null;
388 
389  $allQuery = false;
390  if ( $withAllKeyword ) {
391  $allkeywords = [];
392 
393  $allkeywords[] = wfMessage( 'searchall' )->inContentLanguage()->text() . ":";
394  // force all: so that we have a common syntax for all the wikis
395  if ( !in_array( 'all:', $allkeywords ) ) {
396  $allkeywords[] = 'all:';
397  }
398 
399  foreach ( $allkeywords as $kw ) {
400  if ( strncmp( $query, $kw, strlen( $kw ) ) == 0 ) {
401  $extractedNamespace = null;
402  $parsed = substr( $query, strlen( $kw ) );
403  $allQuery = true;
404  break;
405  }
406  }
407  }
408 
409  if ( !$allQuery && strpos( $query, ':' ) !== false ) {
410  $prefix = str_replace( ' ', '_', substr( $query, 0, strpos( $query, ':' ) ) );
411  $index = MediaWikiServices::getInstance()->getContentLanguage()->getNsIndex( $prefix );
412  if ( $index !== false ) {
413  $extractedNamespace = [ $index ];
414  $parsed = substr( $query, strlen( $prefix ) + 1 );
415  } elseif ( $withPrefixSearchExtractNamespaceHook ) {
416  $hookNamespaces = [ NS_MAIN ];
417  $hookQuery = $query;
418  Hooks::run( 'PrefixSearchExtractNamespace', [ &$hookNamespaces, &$hookQuery ] );
419  if ( $hookQuery !== $query ) {
420  $parsed = $hookQuery;
421  $extractedNamespace = $hookNamespaces;
422  } else {
423  return false;
424  }
425  } else {
426  return false;
427  }
428  }
429 
430  return [ $parsed, $extractedNamespace ];
431  }
432 
440  public static function userHighlightPrefs() {
443  return [ $contextlines, $contextchars ];
444  }
445 
455  function update( $id, $title, $text ) {
456  // no-op
457  }
458 
467  function updateTitle( $id, $title ) {
468  // no-op
469  }
470 
479  function delete( $id, $title ) {
480  // no-op
481  }
482 
494  public function getTextFromContent( Title $t, Content $c = null ) {
495  return $c ? $c->getTextForSearchIndex() : '';
496  }
497 
506  public function textAlreadyUpdatedForIndex() {
507  return false;
508  }
509 
516  protected function normalizeNamespaces( $search ) {
517  $queryAndNs = self::parseNamespacePrefixes( $search, false, true );
518  if ( $queryAndNs !== false ) {
519  $this->setNamespaces( $queryAndNs[1] );
520  return $queryAndNs[0];
521  }
522  return $search;
523  }
524 
532  protected function completionSearchBackendOverfetch( $search ) {
533  $this->limit++;
534  try {
535  return $this->completionSearchBackend( $search );
536  } finally {
537  $this->limit--;
538  }
539  }
540 
548  protected function completionSearchBackend( $search ) {
549  $results = [];
550 
551  $search = trim( $search );
552 
553  if ( !in_array( NS_SPECIAL, $this->namespaces ) && // We do not run hook on Special: search
554  !Hooks::run( 'PrefixSearchBackend',
555  [ $this->namespaces, $search, $this->limit, &$results, $this->offset ]
556  ) ) {
557  // False means hook worked.
558  // FIXME: Yes, the API is weird. That's why it is going to be deprecated.
559 
560  return SearchSuggestionSet::fromStrings( $results );
561  } else {
562  // Hook did not do the job, use default simple search
563  $results = $this->simplePrefixSearch( $search );
564  return SearchSuggestionSet::fromTitles( $results );
565  }
566  }
567 
573  public function completionSearch( $search ) {
574  if ( trim( $search ) === '' ) {
575  return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
576  }
577  $search = $this->normalizeNamespaces( $search );
578  $suggestions = $this->completionSearchBackendOverfetch( $search );
579  return $this->processCompletionResults( $search, $suggestions );
580  }
581 
587  public function completionSearchWithVariants( $search ) {
588  if ( trim( $search ) === '' ) {
589  return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
590  }
591  $search = $this->normalizeNamespaces( $search );
592 
593  $results = $this->completionSearchBackendOverfetch( $search );
594  $fallbackLimit = 1 + $this->limit - $results->getSize();
595  if ( $fallbackLimit > 0 ) {
596  $fallbackSearches = MediaWikiServices::getInstance()->getContentLanguage()->
597  autoConvertToAllVariants( $search );
598  $fallbackSearches = array_diff( array_unique( $fallbackSearches ), [ $search ] );
599 
600  foreach ( $fallbackSearches as $fbs ) {
601  $this->setLimitOffset( $fallbackLimit );
602  $fallbackSearchResult = $this->completionSearch( $fbs );
603  $results->appendAll( $fallbackSearchResult );
604  $fallbackLimit -= $fallbackSearchResult->getSize();
605  if ( $fallbackLimit <= 0 ) {
606  break;
607  }
608  }
609  }
610  return $this->processCompletionResults( $search, $results );
611  }
612 
618  public function extractTitles( SearchSuggestionSet $completionResults ) {
619  return $completionResults->map( function ( SearchSuggestion $sugg ) {
620  return $sugg->getSuggestedTitle();
621  } );
622  }
623 
631  protected function processCompletionResults( $search, SearchSuggestionSet $suggestions ) {
632  // We over-fetched to determine pagination. Shrink back down if we have extra results
633  // and mark if pagination is possible
634  $suggestions->shrink( $this->limit );
635 
636  $search = trim( $search );
637  // preload the titles with LinkBatch
638  $lb = new LinkBatch( $suggestions->map( function ( SearchSuggestion $sugg ) {
639  return $sugg->getSuggestedTitle();
640  } ) );
641  $lb->setCaller( __METHOD__ );
642  $lb->execute();
643 
644  $diff = $suggestions->filter( function ( SearchSuggestion $sugg ) {
645  return $sugg->getSuggestedTitle()->isKnown();
646  } );
647  if ( $diff > 0 ) {
648  MediaWikiServices::getInstance()->getStatsdDataFactory()
649  ->updateCount( 'search.completion.missing', $diff );
650  }
651 
652  $results = $suggestions->map( function ( SearchSuggestion $sugg ) {
653  return $sugg->getSuggestedTitle()->getPrefixedText();
654  } );
655 
656  if ( $this->offset === 0 ) {
657  // Rescore results with an exact title match
658  // NOTE: in some cases like cross-namespace redirects
659  // (frequently used as shortcuts e.g. WP:WP on huwiki) some
660  // backends like Cirrus will return no results. We should still
661  // try an exact title match to workaround this limitation
662  $rescorer = new SearchExactMatchRescorer();
663  $rescoredResults = $rescorer->rescore( $search, $this->namespaces, $results, $this->limit );
664  } else {
665  // No need to rescore if offset is not 0
666  // The exact match must have been returned at position 0
667  // if it existed.
668  $rescoredResults = $results;
669  }
670 
671  if ( count( $rescoredResults ) > 0 ) {
672  $found = array_search( $rescoredResults[0], $results );
673  if ( $found === false ) {
674  // If the first result is not in the previous array it
675  // means that we found a new exact match
676  $exactMatch = SearchSuggestion::fromTitle( 0, Title::newFromText( $rescoredResults[0] ) );
677  $suggestions->prepend( $exactMatch );
678  $suggestions->shrink( $this->limit );
679  } else {
680  // if the first result is not the same we need to rescore
681  if ( $found > 0 ) {
682  $suggestions->rescore( $found );
683  }
684  }
685  }
686 
687  return $suggestions;
688  }
689 
695  public function defaultPrefixSearch( $search ) {
696  if ( trim( $search ) === '' ) {
697  return [];
698  }
699 
700  $search = $this->normalizeNamespaces( $search );
701  return $this->simplePrefixSearch( $search );
702  }
703 
710  protected function simplePrefixSearch( $search ) {
711  // Use default database prefix search
712  $backend = new TitlePrefixSearch;
713  return $backend->defaultSearchBackend( $this->namespaces, $search, $this->limit, $this->offset );
714  }
715 
732  public function getProfiles( $profileType, User $user = null ) {
733  return null;
734  }
735 
744  public function makeSearchFieldMapping( $name, $type ) {
745  return new NullIndexField();
746  }
747 
753  public function getSearchIndexFields() {
755  $fields = [];
756  $seenHandlers = new SplObjectStorage();
757  foreach ( $models as $model ) {
758  try {
759  $handler = ContentHandler::getForModelID( $model );
760  }
761  catch ( MWUnknownContentModelException $e ) {
762  // If we can find no handler, ignore it
763  continue;
764  }
765  // Several models can have the same handler, so avoid processing it repeatedly
766  if ( $seenHandlers->contains( $handler ) ) {
767  // We already did this one
768  continue;
769  }
770  $seenHandlers->attach( $handler );
771  $handlerFields = $handler->getFieldsForSearchIndex( $this );
772  foreach ( $handlerFields as $fieldName => $fieldData ) {
773  if ( empty( $fields[$fieldName] ) ) {
774  $fields[$fieldName] = $fieldData;
775  } else {
776  // TODO: do we allow some clashes with the same type or reject all of them?
777  $mergeDef = $fields[$fieldName]->merge( $fieldData );
778  if ( !$mergeDef ) {
779  throw new InvalidArgumentException( "Duplicate field $fieldName for model $model" );
780  }
781  $fields[$fieldName] = $mergeDef;
782  }
783  }
784  }
785  // Hook to allow extensions to produce search mapping fields
786  Hooks::run( 'SearchIndexFields', [ &$fields, $this ] );
787  return $fields;
788  }
789 
795  public function augmentSearchResults( ISearchResultSet $resultSet ) {
796  $setAugmentors = [];
797  $rowAugmentors = [];
798  Hooks::run( "SearchResultsAugment", [ &$setAugmentors, &$rowAugmentors ] );
799  if ( !$setAugmentors && !$rowAugmentors ) {
800  // We're done here
801  return;
802  }
803 
804  // Convert row augmentors to set augmentor
805  foreach ( $rowAugmentors as $name => $row ) {
806  if ( isset( $setAugmentors[$name] ) ) {
807  throw new InvalidArgumentException( "Both row and set augmentors are defined for $name" );
808  }
809  $setAugmentors[$name] = new PerRowAugmentor( $row );
810  }
811 
816  foreach ( $setAugmentors as $name => $augmentor ) {
817  $data = $augmentor->augmentAll( $resultSet );
818  if ( $data ) {
819  $resultSet->setAugmentedData( $name, $data );
820  }
821  }
822  }
823 }
getSort()
Get the sort direction of the search results.
string $prefix
updateTitle( $id, $title)
Update a search index record&#39;s title only.
searchTitle( $term)
Perform a title-only search query and return a result set.
Search suggestion.
const NS_MAIN
Definition: Defines.php:60
replacePrefixes( $query)
Parse some common prefixes: all (search everything) or namespace names and set the list of namespaces...
normalizeNamespaces( $search)
Makes search simple string if it was namespaced.
static parseNamespacePrefixes( $query, $withAllKeyword=true, $withPrefixSearchExtractNamespaceHook=false)
Parse some common prefixes: all (search everything) or namespace names.
static getContentModels()
completionSearchBackendOverfetch( $search)
Perform an overfetch of completion search results.
processCompletionResults( $search, SearchSuggestionSet $suggestions)
Process completion search results.
doSearchText( $term)
Perform a full text search query and return a result set.
const NS_SPECIAL
Definition: Defines.php:49
simplePrefixSearch( $search)
Call out to simple search backend.
getSuggestedTitle()
Title object in the case this suggestion is based on a title.
Null index field - means search engine does not implement this field.
setShowSuggestion( $showSuggestion)
Set whether the searcher should try to build a suggestion.
static fromStrings(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a string array.
The User object encapsulates all of the user-specific settings (user_id, name, rights, email address, options, last login time).
Definition: User.php:51
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition: LinkBatch.php:34
update( $id, $title, $text)
Create or update the search index record for the given page.
setAugmentedData( $name, $data)
Sets augmented data for result set.
Interface for configuration instances.
Definition: Config.php:28
static getForModelID( $modelId)
Returns the ContentHandler singleton for the given model ID.
setNamespaces( $namespaces)
Set which namespaces the search should include.
prepend(SearchSuggestion $suggestion)
Add a new suggestion at the top.
searchArchiveTitle( $term)
Perform a title search in the article archive.
getTextFromContent(Title $t, Content $c=null)
Get the raw text for updating the index from a content object Nicer search backends could possibly do...
static newGood( $value=null)
Factory function for good results.
Definition: StatusValue.php:81
An utility class to rescore search results by looking for an exact match in the db and add the page f...
legalSearchChars( $type=self::CHARS_ALL)
Get chars legal for search.
setLimitOffset( $limit, $offset=0)
Set the maximum number of results to return and how many to skip before returning the first...
getFeatureData( $feature)
Way to retrieve custom data set by setFeatureData or by the engine itself.
getProfiles( $profileType, User $user=null)
Get a list of supported profiles.
textAlreadyUpdatedForIndex()
If an implementation of SearchEngine handles all of its own text processing in getTextFromContent() a...
getValidSorts()
Get the valid sort directions.
filter( $callback)
Filter the suggestions array.
static defaultNearMatcher()
Get near matcher for default SearchEngine.
setSort( $sort)
Set the sort direction of the search results.
makeSearchFieldMapping( $name, $type)
Create a search field definition.
const DEFAULT_SORT
const COMPLETION_PROFILE_TYPE
Profile type for completionSearch.
doSearchArchiveTitle( $term)
Perform a title search in the article archive.
bool $showSuggestion
extractTitles(SearchSuggestionSet $completionResults)
Extract titles from completion results.
const CHARS_ALL
Integer flag for legalSearchChars: includes all chars allowed in a search query.
augmentSearchResults(ISearchResultSet $resultSet)
Augment search results with extra data.
setFeatureData( $feature, $data)
Way to pass custom data for engines.
Performs prefix search, returning Title objects.
rescore( $key)
Move the suggestion at index $key to the first position.
const FT_QUERY_INDEP_PROFILE_TYPE
Profile type for query independent ranking features.
int [] null $namespaces
static fromTitle( $score, Title $title)
Create suggestion from Title.
string [] $searchTerms
Implementation of near match title search.
completionSearchBackend( $search)
Perform a completion search.
completionSearchWithVariants( $search)
Perform a completion search with variants.
Marker class for search engines that can handle their own pagination, by reporting in their ISearchRe...
static fromTitles(array $titles, $hasMoreResults=false)
Builds a new set of suggestion based on a title array.
array $features
Feature values.
defaultPrefixSearch( $search)
Simple prefix search for subpages.
getSearchIndexFields()
Get fields for search index.
maybePaginate(Closure $fn)
Performs an overfetch and shrink operation to determine if the next page is available for search engi...
Search suggestion sets.
Perform augmentation of each row and return composite result, indexed by ID.
completionSearch( $search)
Perform a completion search.
doSearchTitle( $term)
Perform a title-only search query and return a result set.
wfMessage( $key,... $params)
This is the function for getting translated interface messages.
supports( $feature)
defaultSearchBackend( $namespaces, $search, $limit, $offset)
Unless overridden by PrefixSearchBackend hook...
getNearMatcher(Config $config)
Get service class to finding near matches.
searchText( $term)
Perform a full text search query and return a result set.
const CHARS_NO_SYNTAX
Integer flag for legalSearchChars: includes all chars allowed in a search term.
normalizeText( $string)
When overridden in derived class, performs database-specific conversions on text to be used for searc...
map( $callback)
Call array_map on the suggestions array.
static userHighlightPrefs()
Find snippet highlight settings for all users.
static run( $event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:200
shrink( $limit)
Remove any extra elements in the suggestions set.
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:319