MediaWiki REL1_27
SearchEngine.php
Go to the documentation of this file.
1<?php
29
34abstract class SearchEngine {
36 public $prefix = '';
37
39 public $namespaces = [ NS_MAIN ];
40
42 protected $limit = 10;
43
45 protected $offset = 0;
46
48 protected $searchTerms = [];
49
51 protected $showSuggestion = true;
52 private $sort = 'relevance';
53
55 protected $features = [];
56
58 const CHARS_ALL = 1;
59
61 const CHARS_NO_SYNTAX = 2;
62
71 function searchText( $term ) {
72 return null;
73 }
74
83 function searchTitle( $term ) {
84 return null;
85 }
86
92 public function supports( $feature ) {
93 switch ( $feature ) {
94 case 'search-update':
95 return true;
96 case 'title-suffix-filter':
97 default:
98 return false;
99 }
100 }
101
109 public function setFeatureData( $feature, $data ) {
110 $this->features[$feature] = $data;
111 }
112
121 public function normalizeText( $string ) {
123
124 // Some languages such as Chinese require word segmentation
125 return $wgContLang->segmentByWord( $string );
126 }
127
135 public function transformSearchTerm( $term ) {
136 return $term;
137 }
138
144 public function getNearMatcher( Config $config ) {
146 return new SearchNearMatcher( $config, $wgContLang );
147 }
148
153 protected static function defaultNearMatcher() {
154 $config = MediaWikiServices::getInstance()->getMainConfig();
155 return MediaWikiServices::getInstance()->newSearchEngine()->getNearMatcher( $config );
156 }
157
165 public static function getNearMatch( $searchterm ) {
166 return static::defaultNearMatcher()->getNearMatch( $searchterm );
167 }
168
176 public static function getNearMatchResultSet( $searchterm ) {
177 return static::defaultNearMatcher()->getNearMatchResultSet( $searchterm );
178 }
179
187 public static function legalSearchChars( $type = self::CHARS_ALL ) {
188 return "A-Za-z_'.0-9\\x80-\\xFF\\-";
189 }
190
198 function setLimitOffset( $limit, $offset = 0 ) {
199 $this->limit = intval( $limit );
200 $this->offset = intval( $offset );
201 }
202
210 if ( $namespaces ) {
211 // Filter namespaces to only keep valid ones
212 $validNs = $this->searchableNamespaces();
213 $namespaces = array_filter( $namespaces, function( $ns ) use( $validNs ) {
214 return $ns < 0 || isset( $validNs[$ns] );
215 } );
216 } else {
217 $namespaces = [];
218 }
219 $this->namespaces = $namespaces;
220 }
221
229 function setShowSuggestion( $showSuggestion ) {
230 $this->showSuggestion = $showSuggestion;
231 }
232
240 public function getValidSorts() {
241 return [ 'relevance' ];
242 }
243
252 public function setSort( $sort ) {
253 if ( !in_array( $sort, $this->getValidSorts() ) ) {
254 throw new InvalidArgumentException( "Invalid sort: $sort. " .
255 "Must be one of: " . implode( ', ', $this->getValidSorts() ) );
256 }
257 $this->sort = $sort;
258 }
259
266 public function getSort() {
267 return $this->sort;
268 }
269
279
280 $parsed = $query;
281 if ( strpos( $query, ':' ) === false ) { // nothing to do
282 return $parsed;
283 }
284
285 $allkeyword = wfMessage( 'searchall' )->inContentLanguage()->text() . ":";
286 if ( strncmp( $query, $allkeyword, strlen( $allkeyword ) ) == 0 ) {
287 $this->namespaces = null;
288 $parsed = substr( $query, strlen( $allkeyword ) );
289 } elseif ( strpos( $query, ':' ) !== false ) {
290 $prefix = str_replace( ' ', '_', substr( $query, 0, strpos( $query, ':' ) ) );
291 $index = $wgContLang->getNsIndex( $prefix );
292 if ( $index !== false ) {
293 $this->namespaces = [ $index ];
294 $parsed = substr( $query, strlen( $prefix ) + 1 );
295 }
296 }
297 if ( trim( $parsed ) == '' ) {
298 $parsed = $query; // prefix was the whole query
299 }
300
301 return $parsed;
302 }
303
308 public static function userHighlightPrefs() {
309 $contextlines = 2; // Hardcode this. Old defaults sucked. :)
310 $contextchars = 75; // same as above.... :P
311 return [ $contextlines, $contextchars ];
312 }
313
323 function update( $id, $title, $text ) {
324 // no-op
325 }
326
335 function updateTitle( $id, $title ) {
336 // no-op
337 }
338
347 function delete( $id, $title ) {
348 // no-op
349 }
350
357 public static function getOpenSearchTemplate() {
358 wfDeprecated( __METHOD__, '1.25' );
359 return ApiOpenSearch::getOpenSearchTemplate( 'application/x-suggestions+json' );
360 }
361
372 public function getTextFromContent( Title $t, Content $c = null ) {
373 return $c ? $c->getTextForSearchIndex() : '';
374 }
375
383 public function textAlreadyUpdatedForIndex() {
384 return false;
385 }
386
393 protected function normalizeNamespaces( $search ) {
394 // Find a Title which is not an interwiki and is in NS_MAIN
395 $title = Title::newFromText( $search );
396 $ns = $this->namespaces;
397 if ( $title && !$title->isExternal() ) {
398 $ns = [ $title->getNamespace() ];
399 $search = $title->getText();
400 if ( $ns[0] == NS_MAIN ) {
401 $ns = $this->namespaces; // no explicit prefix, use default namespaces
402 Hooks::run( 'PrefixSearchExtractNamespace', [ &$ns, &$search ] );
403 }
404 } else {
405 $title = Title::newFromText( $search . 'Dummy' );
406 if ( $title && $title->getText() == 'Dummy'
407 && $title->getNamespace() != NS_MAIN
408 && !$title->isExternal() )
409 {
410 $ns = [ $title->getNamespace() ];
411 $search = '';
412 } else {
413 Hooks::run( 'PrefixSearchExtractNamespace', [ &$ns, &$search ] );
414 }
415 }
416
417 $ns = array_map( function( $space ) {
418 return $space == NS_MEDIA ? NS_FILE : $space;
419 }, $ns );
420
421 $this->setNamespaces( $ns );
422 return $search;
423 }
424
432 protected function completionSearchBackend( $search ) {
433 $results = [];
434
435 $search = trim( $search );
436
437 if ( !in_array( NS_SPECIAL, $this->namespaces ) && // We do not run hook on Special: search
438 !Hooks::run( 'PrefixSearchBackend',
439 [ $this->namespaces, $search, $this->limit, &$results, $this->offset ]
440 ) ) {
441 // False means hook worked.
442 // FIXME: Yes, the API is weird. That's why it is going to be deprecated.
443
444 return SearchSuggestionSet::fromStrings( $results );
445 } else {
446 // Hook did not do the job, use default simple search
447 $results = $this->simplePrefixSearch( $search );
448 return SearchSuggestionSet::fromTitles( $results );
449 }
450 }
451
457 public function completionSearch( $search ) {
458 if ( trim( $search ) === '' ) {
459 return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
460 }
461 $search = $this->normalizeNamespaces( $search );
462 return $this->processCompletionResults( $search, $this->completionSearchBackend( $search ) );
463 }
464
470 public function completionSearchWithVariants( $search ) {
471 if ( trim( $search ) === '' ) {
472 return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
473 }
474 $search = $this->normalizeNamespaces( $search );
475
476 $results = $this->completionSearchBackend( $search );
477 $fallbackLimit = $this->limit - $results->getSize();
478 if ( $fallbackLimit > 0 ) {
480
481 $fallbackSearches = $wgContLang->autoConvertToAllVariants( $search );
482 $fallbackSearches = array_diff( array_unique( $fallbackSearches ), [ $search ] );
483
484 foreach ( $fallbackSearches as $fbs ) {
485 $this->setLimitOffset( $fallbackLimit );
486 $fallbackSearchResult = $this->completionSearch( $fbs );
487 $results->appendAll( $fallbackSearchResult );
488 $fallbackLimit -= count( $fallbackSearchResult );
489 if ( $fallbackLimit <= 0 ) {
490 break;
491 }
492 }
493 }
494 return $this->processCompletionResults( $search, $results );
495 }
496
502 public function extractTitles( SearchSuggestionSet $completionResults ) {
503 return $completionResults->map( function( SearchSuggestion $sugg ) {
504 return $sugg->getSuggestedTitle();
505 } );
506 }
507
514 protected function processCompletionResults( $search, SearchSuggestionSet $suggestions ) {
515 $search = trim( $search );
516 // preload the titles with LinkBatch
517 $titles = $suggestions->map( function( SearchSuggestion $sugg ) {
518 return $sugg->getSuggestedTitle();
519 } );
520 $lb = new LinkBatch( $titles );
521 $lb->setCaller( __METHOD__ );
522 $lb->execute();
523
524 $results = $suggestions->map( function( SearchSuggestion $sugg ) {
525 return $sugg->getSuggestedTitle()->getPrefixedText();
526 } );
527
528 // Rescore results with an exact title match
529 // NOTE: in some cases like cross-namespace redirects
530 // (frequently used as shortcuts e.g. WP:WP on huwiki) some
531 // backends like Cirrus will return no results. We should still
532 // try an exact title match to workaround this limitation
533 $rescorer = new SearchExactMatchRescorer();
534 $rescoredResults = $rescorer->rescore( $search, $this->namespaces, $results, $this->limit );
535
536 if ( count( $rescoredResults ) > 0 ) {
537 $found = array_search( $rescoredResults[0], $results );
538 if ( $found === false ) {
539 // If the first result is not in the previous array it
540 // means that we found a new exact match
541 $exactMatch = SearchSuggestion::fromTitle( 0, Title::newFromText( $rescoredResults[0] ) );
542 $suggestions->prepend( $exactMatch );
543 $suggestions->shrink( $this->limit );
544 } else {
545 // if the first result is not the same we need to rescore
546 if ( $found > 0 ) {
547 $suggestions->rescore( $found );
548 }
549 }
550 }
551
552 return $suggestions;
553 }
554
560 public function defaultPrefixSearch( $search ) {
561 if ( trim( $search ) === '' ) {
562 return [];
563 }
564
565 $search = $this->normalizeNamespaces( $search );
566 return $this->simplePrefixSearch( $search );
567 }
568
575 protected function simplePrefixSearch( $search ) {
576 // Use default database prefix search
577 $backend = new TitlePrefixSearch;
578 return $backend->defaultSearchBackend( $this->namespaces, $search, $this->limit, $this->offset );
579 }
580
586 public static function searchableNamespaces() {
587 return MediaWikiServices::getInstance()->getSearchEngineConfig()->searchableNamespaces();
588 }
589
597 public static function userNamespaces( $user ) {
598 return MediaWikiServices::getInstance()->getSearchEngineConfig()->userNamespaces( $user );
599 }
600
606 public static function defaultNamespaces() {
607 return MediaWikiServices::getInstance()->getSearchEngineConfig()->defaultNamespaces();
608 }
609
617 public static function namespacesAsText( $namespaces ) {
618 return MediaWikiServices::getInstance()->getSearchEngineConfig()->namespacesAsText( $namespaces );
619 }
620
628 public static function create( $type = null ) {
629 return MediaWikiServices::getInstance()->getSearchEngineFactory()->create( $type );
630 }
631
638 public static function getSearchTypes() {
639 return MediaWikiServices::getInstance()->getSearchEngineConfig()->getSearchTypes();
640 }
641
642}
643
651 // no-op
652}
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
to move a page</td >< td > &*You are moving the page across namespaces
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
static getOpenSearchTemplate( $type)
Fetch the template for a type.
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition LinkBatch.php:31
MediaWikiServices is the service locator for the application scope of MediaWiki.
defaultSearchBackend( $namespaces, $search, $limit, $offset)
Unless overridden by PrefixSearchBackend hook... This is case-sensitive (First character may be autom...
Dummy class to be used when non-supported Database engine is present.
Contain a class for special pages.
static searchableNamespaces()
Make a list of searchable namespaces and their canonical names.
static userNamespaces( $user)
Extract default namespaces to search from the given user's settings, returning a list of index number...
getNearMatcher(Config $config)
Get service class to finding near matches.
searchTitle( $term)
Perform a title-only search query and return a result set.
supports( $feature)
processCompletionResults( $search, SearchSuggestionSet $suggestions)
Process completion search results.
static namespacesAsText( $namespaces)
Get a list of namespace names useful for showing in tooltips and preferences.
update( $id, $title, $text)
Create or update the search index record for the given page.
setNamespaces( $namespaces)
Set which namespaces the search should include.
array $features
Feature values.
replacePrefixes( $query)
Parse some common prefixes: all (search everything) or namespace names.
static defaultNamespaces()
An array of namespaces indexes to be searched by default.
array string $searchTerms
textAlreadyUpdatedForIndex()
If an implementation of SearchEngine handles all of its own text processing in getTextFromContent() a...
defaultPrefixSearch( $search)
Simple prefix search for subpages.
normalizeText( $string)
When overridden in derived class, performs database-specific conversions on text to be used for searc...
setFeatureData( $feature, $data)
Way to pass custom data for engines.
completionSearchBackend( $search)
Perform a completion search.
getTextFromContent(Title $t, Content $c=null)
Get the raw text for updating the index from a content object Nicer search backends could possibly do...
static create( $type=null)
Load up the appropriate search engine class for the currently active database backend,...
transformSearchTerm( $term)
Transform search term in cases when parts of the query came as different GET params (when supported),...
static getNearMatch( $searchterm)
If an exact title match can be found, or a very slightly close match, return the title.
getSort()
Get the sort direction of the search results.
static defaultNearMatcher()
Get near matcher for default SearchEngine.
getValidSorts()
Get the valid sort directions.
static userHighlightPrefs()
Find snippet highlight settings for all users.
updateTitle( $id, $title)
Update a search index record's title only.
completionSearchWithVariants( $search)
Perform a completion search with variants.
normalizeNamespaces( $search)
Makes search simple string if it was namespaced.
const CHARS_ALL
@const int flag for legalSearchChars: includes all chars allowed in a search query
static getOpenSearchTemplate()
Get OpenSearch suggestion template.
static getSearchTypes()
Return the search engines we support.
completionSearch( $search)
Perform a completion search.
setLimitOffset( $limit, $offset=0)
Set the maximum number of results to return and how many to skip before returning the first.
const CHARS_NO_SYNTAX
@const int flag for legalSearchChars: includes all chars allowed in a search term
setShowSuggestion( $showSuggestion)
Set whether the searcher should try to build a suggestion.
static getNearMatchResultSet( $searchterm)
Do a near match (see SearchEngine::getNearMatch) and wrap it into a SearchResultSet.
simplePrefixSearch( $search)
Call out to simple search backend.
setSort( $sort)
Set the sort direction of the search results.
searchText( $term)
Perform a full text search query and return a result set.
extractTitles(SearchSuggestionSet $completionResults)
Extract titles from completion results.
static legalSearchChars( $type=self::CHARS_ALL)
Get chars legal for search NOTE: usage as static is deprecated and preserved only as BC measure.
An utility class to rescore search results by looking for an exact match in the db and add the page f...
Implementation of near match title search.
Search suggestion sets.
rescore( $key)
Move the suggestion at index $key to the first position.
shrink( $limit)
Remove any extra elements in the suggestions set.
static fromStrings(array $titles)
Builds a new set of suggestion based on a string array.
map( $callback)
Call array_map on the suggestions array.
static fromTitles(array $titles)
Builds a new set of suggestion based on a title array.
prepend(SearchSuggestion $suggestion)
Add a new suggestion at the top.
Search suggestion.
getSuggestedTitle()
Title object in the case this suggestion is based on a title.
static fromTitle( $score, Title $title)
Create suggestion from Title.
Performs prefix search, returning Title objects.
Represents a title within MediaWiki.
Definition Title.php:34
static newFromText( $text, $defaultNamespace=NS_MAIN)
Create a new Title from text, such as what one would find in a link.
Definition Title.php:277
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the local content language as $wgContLang
Definition design.txt:57
when a variable name is used in a it is silently declared as a new local masking the global
Definition design.txt:95
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
const NS_FILE
Definition Defines.php:76
const NS_MAIN
Definition Defines.php:70
const NS_SPECIAL
Definition Defines.php:59
const NS_MEDIA
Definition Defines.php:58
external whereas SearchGetNearMatch runs after $term
Definition hooks.txt:2564
the array() calling protocol came about after MediaWiki 1.4rc1.
please add to it if you re going to add events to the MediaWiki code where normally authentication against an external auth plugin would be creating a local account $user
Definition hooks.txt:249
namespace and then decline to actually register it & $namespaces
Definition hooks.txt:915
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped noclasses just before the function returns a value If you return an< a > element with HTML attributes $attribs and contents $html will be returned If you return $ret will be returned and may include noclasses after processing after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt;div ...>$1&lt;/div>"). - flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException':Called before an exception(or PHP error) is logged. This is meant for integration with external error aggregation services
do that in ParserLimitReportFormat instead use this to modify the parameters of the image and a DIV can begin in one section and end in another Make sure your code can handle that case gracefully See the EditSectionClearerLink extension for an example zero but section is usually empty its values are the globals values before the output is cached one of or reset my talk my contributions etc etc otherwise the built in rate limiting checks are if enabled allows for interception of redirect as a string mapping parameter names to values & $type
Definition hooks.txt:2413
namespace and then decline to actually register it file or subcat img or subcat $title
Definition hooks.txt:944
this hook is for auditing only RecentChangesLinked and Watchlist RecentChangesLinked and Watchlist e g Watchlist removed from all revisions and log entries to which it was applied This gives extensions a chance to take it off their books as the deletion has already been partly carried out by this point or something similar the user will be unable to create the tag set and then return false from the hook function Ensure you consume the ChangeTagAfterDelete hook to carry out custom deletion actions as context called by AbstractContent::getParserOutput May be used to override the normal model specific rendering of page content as context as context the output can only depend on parameters provided to this hook not on global state indicating whether full HTML should be generated If generation of HTML may be but other information should still be present in the ParserOutput object to manipulate or replace but no entry for that model exists in $wgContentHandlers if desired whether it is OK to use $contentModel on $title Handler functions that modify $ok should generally return false to prevent further hooks from further modifying $ok inclusive $limit
Definition hooks.txt:1081
null for the local wiki Added should default to null in handler for backwards compatibility add a value to it if you want to add a cookie that have to vary cache options can modify $query
Definition hooks.txt:1458
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
Interface for configuration instances.
Definition Config.php:28
Base interface for content objects.
Definition Content.php:34
linkcache txt The LinkCache class maintains a list of article titles and the information about whether or not the article exists in the database This is used to mark up links when displaying a page If the same link appears more than once on any page then it only has to be looked up once In most cases link lookups are done in batches with the LinkBatch class or the equivalent in so the link cache is mostly useful for short snippets of parsed and for links in the navigation areas of the skin The link cache was formerly used to track links used in a document for the purposes of updating the link tables This application is now deprecated To create a you can use the following $titles
Definition linkcache.txt:17
$sort