MediaWiki  1.27.4
SearchEngine.php
Go to the documentation of this file.
1 <?php
29 
34 abstract class SearchEngine {
36  public $prefix = '';
37 
39  public $namespaces = [ NS_MAIN ];
40 
42  protected $limit = 10;
43 
45  protected $offset = 0;
46 
48  protected $searchTerms = [];
49 
51  protected $showSuggestion = true;
52  private $sort = 'relevance';
53 
55  protected $features = [];
56 
58  const CHARS_ALL = 1;
59 
61  const CHARS_NO_SYNTAX = 2;
62 
71  function searchText( $term ) {
72  return null;
73  }
74 
83  function searchTitle( $term ) {
84  return null;
85  }
86 
92  public function supports( $feature ) {
93  switch ( $feature ) {
94  case 'search-update':
95  return true;
96  case 'title-suffix-filter':
97  default:
98  return false;
99  }
100  }
101 
109  public function setFeatureData( $feature, $data ) {
110  $this->features[$feature] = $data;
111  }
112 
121  public function normalizeText( $string ) {
123 
124  // Some languages such as Chinese require word segmentation
125  return $wgContLang->segmentByWord( $string );
126  }
127 
135  public function transformSearchTerm( $term ) {
136  return $term;
137  }
138 
144  public function getNearMatcher( Config $config ) {
146  return new SearchNearMatcher( $config, $wgContLang );
147  }
148 
153  protected static function defaultNearMatcher() {
154  $config = MediaWikiServices::getInstance()->getMainConfig();
155  return MediaWikiServices::getInstance()->newSearchEngine()->getNearMatcher( $config );
156  }
157 
165  public static function getNearMatch( $searchterm ) {
166  return static::defaultNearMatcher()->getNearMatch( $searchterm );
167  }
168 
176  public static function getNearMatchResultSet( $searchterm ) {
177  return static::defaultNearMatcher()->getNearMatchResultSet( $searchterm );
178  }
179 
187  public static function legalSearchChars( $type = self::CHARS_ALL ) {
188  return "A-Za-z_'.0-9\\x80-\\xFF\\-";
189  }
190 
198  function setLimitOffset( $limit, $offset = 0 ) {
199  $this->limit = intval( $limit );
200  $this->offset = intval( $offset );
201  }
202 
210  if ( $namespaces ) {
211  // Filter namespaces to only keep valid ones
212  $validNs = $this->searchableNamespaces();
213  $namespaces = array_filter( $namespaces, function( $ns ) use( $validNs ) {
214  return $ns < 0 || isset( $validNs[$ns] );
215  } );
216  } else {
217  $namespaces = [];
218  }
219  $this->namespaces = $namespaces;
220  }
221 
230  $this->showSuggestion = $showSuggestion;
231  }
232 
240  public function getValidSorts() {
241  return [ 'relevance' ];
242  }
243 
252  public function setSort( $sort ) {
253  if ( !in_array( $sort, $this->getValidSorts() ) ) {
254  throw new InvalidArgumentException( "Invalid sort: $sort. " .
255  "Must be one of: " . implode( ', ', $this->getValidSorts() ) );
256  }
257  $this->sort = $sort;
258  }
259 
266  public function getSort() {
267  return $this->sort;
268  }
269 
277  function replacePrefixes( $query ) {
279 
280  $parsed = $query;
281  if ( strpos( $query, ':' ) === false ) { // nothing to do
282  return $parsed;
283  }
284 
285  $allkeyword = wfMessage( 'searchall' )->inContentLanguage()->text() . ":";
286  if ( strncmp( $query, $allkeyword, strlen( $allkeyword ) ) == 0 ) {
287  $this->namespaces = null;
288  $parsed = substr( $query, strlen( $allkeyword ) );
289  } elseif ( strpos( $query, ':' ) !== false ) {
290  $prefix = str_replace( ' ', '_', substr( $query, 0, strpos( $query, ':' ) ) );
291  $index = $wgContLang->getNsIndex( $prefix );
292  if ( $index !== false ) {
293  $this->namespaces = [ $index ];
294  $parsed = substr( $query, strlen( $prefix ) + 1 );
295  }
296  }
297  if ( trim( $parsed ) == '' ) {
298  $parsed = $query; // prefix was the whole query
299  }
300 
301  return $parsed;
302  }
303 
308  public static function userHighlightPrefs() {
309  $contextlines = 2; // Hardcode this. Old defaults sucked. :)
310  $contextchars = 75; // same as above.... :P
311  return [ $contextlines, $contextchars ];
312  }
313 
323  function update( $id, $title, $text ) {
324  // no-op
325  }
326 
335  function updateTitle( $id, $title ) {
336  // no-op
337  }
338 
347  function delete( $id, $title ) {
348  // no-op
349  }
350 
357  public static function getOpenSearchTemplate() {
358  wfDeprecated( __METHOD__, '1.25' );
359  return ApiOpenSearch::getOpenSearchTemplate( 'application/x-suggestions+json' );
360  }
361 
372  public function getTextFromContent( Title $t, Content $c = null ) {
373  return $c ? $c->getTextForSearchIndex() : '';
374  }
375 
383  public function textAlreadyUpdatedForIndex() {
384  return false;
385  }
386 
393  protected function normalizeNamespaces( $search ) {
394  // Find a Title which is not an interwiki and is in NS_MAIN
395  $title = Title::newFromText( $search );
396  $ns = $this->namespaces;
397  if ( $title && !$title->isExternal() ) {
398  $ns = [ $title->getNamespace() ];
399  $search = $title->getText();
400  if ( $ns[0] == NS_MAIN ) {
401  $ns = $this->namespaces; // no explicit prefix, use default namespaces
402  Hooks::run( 'PrefixSearchExtractNamespace', [ &$ns, &$search ] );
403  }
404  } else {
405  $title = Title::newFromText( $search . 'Dummy' );
406  if ( $title && $title->getText() == 'Dummy'
407  && $title->getNamespace() != NS_MAIN
408  && !$title->isExternal() )
409  {
410  $ns = [ $title->getNamespace() ];
411  $search = '';
412  } else {
413  Hooks::run( 'PrefixSearchExtractNamespace', [ &$ns, &$search ] );
414  }
415  }
416 
417  $ns = array_map( function( $space ) {
418  return $space == NS_MEDIA ? NS_FILE : $space;
419  }, $ns );
420 
421  $this->setNamespaces( $ns );
422  return $search;
423  }
424 
432  protected function completionSearchBackend( $search ) {
433  $results = [];
434 
435  $search = trim( $search );
436 
437  if ( !in_array( NS_SPECIAL, $this->namespaces ) && // We do not run hook on Special: search
438  !Hooks::run( 'PrefixSearchBackend',
439  [ $this->namespaces, $search, $this->limit, &$results, $this->offset ]
440  ) ) {
441  // False means hook worked.
442  // FIXME: Yes, the API is weird. That's why it is going to be deprecated.
443 
444  return SearchSuggestionSet::fromStrings( $results );
445  } else {
446  // Hook did not do the job, use default simple search
447  $results = $this->simplePrefixSearch( $search );
448  return SearchSuggestionSet::fromTitles( $results );
449  }
450  }
451 
457  public function completionSearch( $search ) {
458  if ( trim( $search ) === '' ) {
459  return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
460  }
461  $search = $this->normalizeNamespaces( $search );
462  return $this->processCompletionResults( $search, $this->completionSearchBackend( $search ) );
463  }
464 
470  public function completionSearchWithVariants( $search ) {
471  if ( trim( $search ) === '' ) {
472  return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
473  }
474  $search = $this->normalizeNamespaces( $search );
475 
476  $results = $this->completionSearchBackend( $search );
477  $fallbackLimit = $this->limit - $results->getSize();
478  if ( $fallbackLimit > 0 ) {
480 
481  $fallbackSearches = $wgContLang->autoConvertToAllVariants( $search );
482  $fallbackSearches = array_diff( array_unique( $fallbackSearches ), [ $search ] );
483 
484  foreach ( $fallbackSearches as $fbs ) {
485  $this->setLimitOffset( $fallbackLimit );
486  $fallbackSearchResult = $this->completionSearch( $fbs );
487  $results->appendAll( $fallbackSearchResult );
488  $fallbackLimit -= count( $fallbackSearchResult );
489  if ( $fallbackLimit <= 0 ) {
490  break;
491  }
492  }
493  }
494  return $this->processCompletionResults( $search, $results );
495  }
496 
502  public function extractTitles( SearchSuggestionSet $completionResults ) {
503  return $completionResults->map( function( SearchSuggestion $sugg ) {
504  return $sugg->getSuggestedTitle();
505  } );
506  }
507 
514  protected function processCompletionResults( $search, SearchSuggestionSet $suggestions ) {
515  $search = trim( $search );
516  // preload the titles with LinkBatch
517  $titles = $suggestions->map( function( SearchSuggestion $sugg ) {
518  return $sugg->getSuggestedTitle();
519  } );
520  $lb = new LinkBatch( $titles );
521  $lb->setCaller( __METHOD__ );
522  $lb->execute();
523 
524  $results = $suggestions->map( function( SearchSuggestion $sugg ) {
525  return $sugg->getSuggestedTitle()->getPrefixedText();
526  } );
527 
528  // Rescore results with an exact title match
529  // NOTE: in some cases like cross-namespace redirects
530  // (frequently used as shortcuts e.g. WP:WP on huwiki) some
531  // backends like Cirrus will return no results. We should still
532  // try an exact title match to workaround this limitation
533  $rescorer = new SearchExactMatchRescorer();
534  $rescoredResults = $rescorer->rescore( $search, $this->namespaces, $results, $this->limit );
535 
536  if ( count( $rescoredResults ) > 0 ) {
537  $found = array_search( $rescoredResults[0], $results );
538  if ( $found === false ) {
539  // If the first result is not in the previous array it
540  // means that we found a new exact match
541  $exactMatch = SearchSuggestion::fromTitle( 0, Title::newFromText( $rescoredResults[0] ) );
542  $suggestions->prepend( $exactMatch );
543  $suggestions->shrink( $this->limit );
544  } else {
545  // if the first result is not the same we need to rescore
546  if ( $found > 0 ) {
547  $suggestions->rescore( $found );
548  }
549  }
550  }
551 
552  return $suggestions;
553  }
554 
560  public function defaultPrefixSearch( $search ) {
561  if ( trim( $search ) === '' ) {
562  return [];
563  }
564 
565  $search = $this->normalizeNamespaces( $search );
566  return $this->simplePrefixSearch( $search );
567  }
568 
575  protected function simplePrefixSearch( $search ) {
576  // Use default database prefix search
577  $backend = new TitlePrefixSearch;
578  return $backend->defaultSearchBackend( $this->namespaces, $search, $this->limit, $this->offset );
579  }
580 
586  public static function searchableNamespaces() {
587  return MediaWikiServices::getInstance()->getSearchEngineConfig()->searchableNamespaces();
588  }
589 
597  public static function userNamespaces( $user ) {
598  return MediaWikiServices::getInstance()->getSearchEngineConfig()->userNamespaces( $user );
599  }
600 
606  public static function defaultNamespaces() {
607  return MediaWikiServices::getInstance()->getSearchEngineConfig()->defaultNamespaces();
608  }
609 
617  public static function namespacesAsText( $namespaces ) {
618  return MediaWikiServices::getInstance()->getSearchEngineConfig()->namespacesAsText( $namespaces );
619  }
620 
628  public static function create( $type = null ) {
629  return MediaWikiServices::getInstance()->getSearchEngineFactory()->create( $type );
630  }
631 
638  public static function getSearchTypes() {
639  return MediaWikiServices::getInstance()->getSearchEngineConfig()->getSearchTypes();
640  }
641 
642 }
643 
651  // no-op
652 }
Dummy class to be used when non-supported Database engine is present.
getSort()
Get the sort direction of the search results.
replacePrefixes($query)
Parse some common prefixes: all (search everything) or namespace names.
string $prefix
static getNearMatchResultSet($searchterm)
Do a near match (see SearchEngine::getNearMatch) and wrap it into a SearchResultSet.
external whereas SearchGetNearMatch runs after $term
Definition: hooks.txt:2562
transformSearchTerm($term)
Transform search term in cases when parts of the query came as different GET params (when supported)...
static searchableNamespaces()
Make a list of searchable namespaces and their canonical names.
searchText($term)
Perform a full text search query and return a result set.
null for the local wiki Added should default to null in handler for backwards compatibility add a value to it if you want to add a cookie that have to vary cache options can modify $query
Definition: hooks.txt:1422
Search suggestion.
completionSearchBackend($search)
Perform a completion search.
static defaultNamespaces()
An array of namespaces indexes to be searched by default.
static namespacesAsText($namespaces)
Get a list of namespace names useful for showing in tooltips and preferences.
const NS_MAIN
Definition: Defines.php:70
to move a page</td >< td > &*You are moving the page across namespaces
static legalSearchChars($type=self::CHARS_ALL)
Get chars legal for search NOTE: usage as static is deprecated and preserved only as BC measure...
static userNamespaces($user)
Extract default namespaces to search from the given user's settings, returning a list of index number...
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
update($id, $title, $text)
Create or update the search index record for the given page.
setShowSuggestion($showSuggestion)
Set whether the searcher should try to build a suggestion.
defaultPrefixSearch($search)
Simple prefix search for subpages.
const NS_SPECIAL
Definition: Defines.php:59
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency MediaWikiServices
Definition: injection.txt:23
static newFromText($text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition: Title.php:277
static fromStrings(array $titles)
Builds a new set of suggestion based on a string array.
Represents a title within MediaWiki.
Definition: Title.php:34
when a variable name is used in a it is silently declared as a new local masking the global
Definition: design.txt:93
getSuggestedTitle()
Title object in the case this suggestion is based on a title.
simplePrefixSearch($search)
Call out to simple search backend.
supports($feature)
completionSearchWithVariants($search)
Perform a completion search with variants.
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition: LinkBatch.php:31
normalizeNamespaces($search)
Makes search simple string if it was namespaced.
static getOpenSearchTemplate()
Get OpenSearch suggestion template.
setLimitOffset($limit, $offset=0)
Set the maximum number of results to return and how many to skip before returning the first...
normalizeText($string)
When overridden in derived class, performs database-specific conversions on text to be used for searc...
map($callback)
Call array_map on the suggestions array.
prepend(SearchSuggestion $suggestion)
Add a new suggestion at the top.
defaultSearchBackend($namespaces, $search, $limit, $offset)
Unless overridden by PrefixSearchBackend hook...
getTextFromContent(Title $t, Content $c=null)
Get the raw text for updating the index from a content object Nicer search backends could possibly do...
array string $searchTerms
const NS_MEDIA
Definition: Defines.php:58
searchTitle($term)
Perform a title-only search query and return a result set.
An utility class to rescore search results by looking for an exact match in the db and add the page f...
Base interface for content objects.
Definition: Content.php:34
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock()-offset Set to overwrite offset parameter in $wgRequest set to ''to unsetoffset-wrap String Wrap the message in html(usually something like"&lt
textAlreadyUpdatedForIndex()
If an implementation of SearchEngine handles all of its own text processing in getTextFromContent() a...
static getNearMatch($searchterm)
If an exact title match can be found, or a very slightly close match, return the title.
getValidSorts()
Get the valid sort directions.
static defaultNearMatcher()
Get near matcher for default SearchEngine.
wfDeprecated($function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
bool $showSuggestion
namespace and then decline to actually register it file or subcat img or subcat $title
Definition: hooks.txt:916
static run($event, array $args=[], $deprecatedVersion=null)
Call hook functions defined in Hooks::register and $wgHooks.
Definition: Hooks.php:131
const NS_FILE
Definition: Defines.php:76
extractTitles(SearchSuggestionSet $completionResults)
Extract titles from completion results.
const CHARS_ALL
int flag for legalSearchChars: includes all chars allowed in a search query
static getSearchTypes()
Return the search engines we support.
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
Performs prefix search, returning Title objects.
updateTitle($id, $title)
Update a search index record's title only.
please add to it if you re going to add events to the MediaWiki code where normally authentication against an external auth plugin would be creating a local account $user
Definition: hooks.txt:246
shrink($limit)
Remove any extra elements in the suggestions set.
setSort($sort)
Set the sort direction of the search results.
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
int[] null $namespaces
setFeatureData($feature, $data)
Way to pass custom data for engines.
Implementation of near match title search.
linkcache txt The LinkCache class maintains a list of article titles and the information about whether or not the article exists in the database This is used to mark up links when displaying a page If the same link appears more than once on any page then it only has to be looked up once In most cases link lookups are done in batches with the LinkBatch class or the equivalent in so the link cache is mostly useful for short snippets of parsed and for links in the navigation areas of the skin The link cache was formerly used to track links used in a document for the purposes of updating the link tables This application is now deprecated To create a you can use the following $titles
Definition: linkcache.txt:17
array $features
Feature values.
setNamespaces($namespaces)
Set which namespaces the search should include.
completionSearch($search)
Perform a completion search.
rescore($key)
Move the suggestion at index $key to the first position.
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the local content language as $wgContLang
Definition: design.txt:56
Search suggestion sets.
static create($type=null)
Load up the appropriate search engine class for the currently active database backend, and return a configured instance.
getNearMatcher(Config $config)
Get service class to finding near matches.
static fromTitle($score, Title $title)
Create suggestion from Title.
const CHARS_NO_SYNTAX
int flag for legalSearchChars: includes all chars allowed in a search term
static fromTitles(array $titles)
Builds a new set of suggestion based on a title array.
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached one of or reset my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition: hooks.txt:2342
processCompletionResults($search, SearchSuggestionSet $suggestions)
Process completion search results.
static userHighlightPrefs()
Find snippet highlight settings for all users.
static getOpenSearchTemplate($type)
Fetch the template for a type.