MediaWiki REL1_31
SearchEngine.php
Go to the documentation of this file.
1<?php
29
34abstract class SearchEngine {
36 public $prefix = '';
37
39 public $namespaces = [ NS_MAIN ];
40
42 protected $limit = 10;
43
45 protected $offset = 0;
46
48 protected $searchTerms = [];
49
51 protected $showSuggestion = true;
52 private $sort = 'relevance';
53
55 protected $features = [];
56
58 const COMPLETION_PROFILE_TYPE = 'completionSearchProfile';
59
61 const FT_QUERY_INDEP_PROFILE_TYPE = 'fulltextQueryIndepProfile';
62
64 const CHARS_ALL = 1;
65
67 const CHARS_NO_SYNTAX = 2;
68
77 function searchText( $term ) {
78 return null;
79 }
80
93 return Status::newGood( [] );
94 }
95
104 function searchTitle( $term ) {
105 return null;
106 }
107
113 public function supports( $feature ) {
114 switch ( $feature ) {
115 case 'search-update':
116 return true;
117 case 'title-suffix-filter':
118 default:
119 return false;
120 }
121 }
122
129 public function setFeatureData( $feature, $data ) {
130 $this->features[$feature] = $data;
131 }
132
140 public function getFeatureData( $feature ) {
141 if ( isset( $this->features[$feature] ) ) {
142 return $this->features[$feature];
143 }
144 return null;
145 }
146
155 public function normalizeText( $string ) {
156 global $wgContLang;
157
158 // Some languages such as Chinese require word segmentation
159 return $wgContLang->segmentByWord( $string );
160 }
161
169 public function transformSearchTerm( $term ) {
170 return $term;
171 }
172
178 public function getNearMatcher( Config $config ) {
179 global $wgContLang;
180 return new SearchNearMatcher( $config, $wgContLang );
181 }
182
187 protected static function defaultNearMatcher() {
188 $config = MediaWikiServices::getInstance()->getMainConfig();
189 return MediaWikiServices::getInstance()->newSearchEngine()->getNearMatcher( $config );
190 }
191
199 public static function getNearMatch( $searchterm ) {
200 return static::defaultNearMatcher()->getNearMatch( $searchterm );
201 }
202
210 public static function getNearMatchResultSet( $searchterm ) {
211 return static::defaultNearMatcher()->getNearMatchResultSet( $searchterm );
212 }
213
221 public static function legalSearchChars( $type = self::CHARS_ALL ) {
222 return "A-Za-z_'.0-9\\x80-\\xFF\\-";
223 }
224
232 function setLimitOffset( $limit, $offset = 0 ) {
233 $this->limit = intval( $limit );
234 $this->offset = intval( $offset );
235 }
236
244 if ( $namespaces ) {
245 // Filter namespaces to only keep valid ones
246 $validNs = $this->searchableNamespaces();
247 $namespaces = array_filter( $namespaces, function ( $ns ) use( $validNs ) {
248 return $ns < 0 || isset( $validNs[$ns] );
249 } );
250 } else {
251 $namespaces = [];
252 }
253 $this->namespaces = $namespaces;
254 }
255
263 function setShowSuggestion( $showSuggestion ) {
264 $this->showSuggestion = $showSuggestion;
265 }
266
274 public function getValidSorts() {
275 return [ 'relevance' ];
276 }
277
286 public function setSort( $sort ) {
287 if ( !in_array( $sort, $this->getValidSorts() ) ) {
288 throw new InvalidArgumentException( "Invalid sort: $sort. " .
289 "Must be one of: " . implode( ', ', $this->getValidSorts() ) );
290 }
291 $this->sort = $sort;
292 }
293
300 public function getSort() {
301 return $this->sort;
302 }
303
313 $queryAndNs = self::parseNamespacePrefixes( $query );
314 if ( $queryAndNs === false ) {
315 return $query;
316 }
317 $this->namespaces = $queryAndNs[1];
318 return $queryAndNs[0];
319 }
320
330 public static function parseNamespacePrefixes( $query ) {
331 global $wgContLang;
332
333 $parsed = $query;
334 if ( strpos( $query, ':' ) === false ) { // nothing to do
335 return false;
336 }
337 $extractedNamespace = null;
338
339 $allkeyword = wfMessage( 'searchall' )->inContentLanguage()->text() . ":";
340 if ( strncmp( $query, $allkeyword, strlen( $allkeyword ) ) == 0 ) {
341 $extractedNamespace = null;
342 $parsed = substr( $query, strlen( $allkeyword ) );
343 } elseif ( strpos( $query, ':' ) !== false ) {
344 // TODO: should we unify with PrefixSearch::extractNamespace ?
345 $prefix = str_replace( ' ', '_', substr( $query, 0, strpos( $query, ':' ) ) );
346 $index = $wgContLang->getNsIndex( $prefix );
347 if ( $index !== false ) {
348 $extractedNamespace = [ $index ];
349 $parsed = substr( $query, strlen( $prefix ) + 1 );
350 } else {
351 return false;
352 }
353 }
354
355 if ( trim( $parsed ) == '' ) {
356 $parsed = $query; // prefix was the whole query
357 }
358
359 return [ $parsed, $extractedNamespace ];
360 }
361
366 public static function userHighlightPrefs() {
367 $contextlines = 2; // Hardcode this. Old defaults sucked. :)
368 $contextchars = 75; // same as above.... :P
369 return [ $contextlines, $contextchars ];
370 }
371
381 function update( $id, $title, $text ) {
382 // no-op
383 }
384
393 function updateTitle( $id, $title ) {
394 // no-op
395 }
396
405 function delete( $id, $title ) {
406 // no-op
407 }
408
419 public function getTextFromContent( Title $t, Content $c = null ) {
420 return $c ? $c->getTextForSearchIndex() : '';
421 }
422
430 public function textAlreadyUpdatedForIndex() {
431 return false;
432 }
433
440 protected function normalizeNamespaces( $search ) {
441 // Find a Title which is not an interwiki and is in NS_MAIN
442 $title = Title::newFromText( $search );
443 $ns = $this->namespaces;
444 if ( $title && !$title->isExternal() ) {
445 $ns = [ $title->getNamespace() ];
446 if ( $title->getNamespace() !== NS_MAIN ) {
447 $search = substr( $search, strpos( $search, ':' ) + 1 );
448 }
449 if ( $ns[0] == NS_MAIN ) {
450 $ns = $this->namespaces; // no explicit prefix, use default namespaces
451 Hooks::run( 'PrefixSearchExtractNamespace', [ &$ns, &$search ] );
452 }
453 } else {
454 $title = Title::newFromText( $search . 'Dummy' );
455 if ( $title && $title->getText() == 'Dummy'
456 && $title->getNamespace() != NS_MAIN
457 && !$title->isExternal()
458 ) {
459 $ns = [ $title->getNamespace() ];
460 $search = '';
461 } else {
462 Hooks::run( 'PrefixSearchExtractNamespace', [ &$ns, &$search ] );
463 }
464 }
465
466 $ns = array_map( function ( $space ) {
467 return $space == NS_MEDIA ? NS_FILE : $space;
468 }, $ns );
469
470 $this->setNamespaces( $ns );
471 return $search;
472 }
473
481 protected function completionSearchBackend( $search ) {
482 $results = [];
483
484 $search = trim( $search );
485
486 if ( !in_array( NS_SPECIAL, $this->namespaces ) && // We do not run hook on Special: search
487 !Hooks::run( 'PrefixSearchBackend',
488 [ $this->namespaces, $search, $this->limit, &$results, $this->offset ]
489 ) ) {
490 // False means hook worked.
491 // FIXME: Yes, the API is weird. That's why it is going to be deprecated.
492
493 return SearchSuggestionSet::fromStrings( $results );
494 } else {
495 // Hook did not do the job, use default simple search
496 $results = $this->simplePrefixSearch( $search );
497 return SearchSuggestionSet::fromTitles( $results );
498 }
499 }
500
506 public function completionSearch( $search ) {
507 if ( trim( $search ) === '' ) {
508 return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
509 }
510 $search = $this->normalizeNamespaces( $search );
511 return $this->processCompletionResults( $search, $this->completionSearchBackend( $search ) );
512 }
513
519 public function completionSearchWithVariants( $search ) {
520 if ( trim( $search ) === '' ) {
521 return SearchSuggestionSet::emptySuggestionSet(); // Return empty result
522 }
523 $search = $this->normalizeNamespaces( $search );
524
525 $results = $this->completionSearchBackend( $search );
526 $fallbackLimit = $this->limit - $results->getSize();
527 if ( $fallbackLimit > 0 ) {
528 global $wgContLang;
529
530 $fallbackSearches = $wgContLang->autoConvertToAllVariants( $search );
531 $fallbackSearches = array_diff( array_unique( $fallbackSearches ), [ $search ] );
532
533 foreach ( $fallbackSearches as $fbs ) {
534 $this->setLimitOffset( $fallbackLimit );
535 $fallbackSearchResult = $this->completionSearch( $fbs );
536 $results->appendAll( $fallbackSearchResult );
537 $fallbackLimit -= $fallbackSearchResult->getSize();
538 if ( $fallbackLimit <= 0 ) {
539 break;
540 }
541 }
542 }
543 return $this->processCompletionResults( $search, $results );
544 }
545
551 public function extractTitles( SearchSuggestionSet $completionResults ) {
552 return $completionResults->map( function ( SearchSuggestion $sugg ) {
553 return $sugg->getSuggestedTitle();
554 } );
555 }
556
564 protected function processCompletionResults( $search, SearchSuggestionSet $suggestions ) {
565 $search = trim( $search );
566 // preload the titles with LinkBatch
567 $titles = $suggestions->map( function ( SearchSuggestion $sugg ) {
568 return $sugg->getSuggestedTitle();
569 } );
570 $lb = new LinkBatch( $titles );
571 $lb->setCaller( __METHOD__ );
572 $lb->execute();
573
574 $results = $suggestions->map( function ( SearchSuggestion $sugg ) {
575 return $sugg->getSuggestedTitle()->getPrefixedText();
576 } );
577
578 if ( $this->offset === 0 ) {
579 // Rescore results with an exact title match
580 // NOTE: in some cases like cross-namespace redirects
581 // (frequently used as shortcuts e.g. WP:WP on huwiki) some
582 // backends like Cirrus will return no results. We should still
583 // try an exact title match to workaround this limitation
584 $rescorer = new SearchExactMatchRescorer();
585 $rescoredResults = $rescorer->rescore( $search, $this->namespaces, $results, $this->limit );
586 } else {
587 // No need to rescore if offset is not 0
588 // The exact match must have been returned at position 0
589 // if it existed.
590 $rescoredResults = $results;
591 }
592
593 if ( count( $rescoredResults ) > 0 ) {
594 $found = array_search( $rescoredResults[0], $results );
595 if ( $found === false ) {
596 // If the first result is not in the previous array it
597 // means that we found a new exact match
598 $exactMatch = SearchSuggestion::fromTitle( 0, Title::newFromText( $rescoredResults[0] ) );
599 $suggestions->prepend( $exactMatch );
600 $suggestions->shrink( $this->limit );
601 } else {
602 // if the first result is not the same we need to rescore
603 if ( $found > 0 ) {
604 $suggestions->rescore( $found );
605 }
606 }
607 }
608
609 return $suggestions;
610 }
611
617 public function defaultPrefixSearch( $search ) {
618 if ( trim( $search ) === '' ) {
619 return [];
620 }
621
622 $search = $this->normalizeNamespaces( $search );
623 return $this->simplePrefixSearch( $search );
624 }
625
632 protected function simplePrefixSearch( $search ) {
633 // Use default database prefix search
634 $backend = new TitlePrefixSearch;
635 return $backend->defaultSearchBackend( $this->namespaces, $search, $this->limit, $this->offset );
636 }
637
643 public static function searchableNamespaces() {
644 return MediaWikiServices::getInstance()->getSearchEngineConfig()->searchableNamespaces();
645 }
646
654 public static function userNamespaces( $user ) {
655 return MediaWikiServices::getInstance()->getSearchEngineConfig()->userNamespaces( $user );
656 }
657
663 public static function defaultNamespaces() {
664 return MediaWikiServices::getInstance()->getSearchEngineConfig()->defaultNamespaces();
665 }
666
674 public static function namespacesAsText( $namespaces ) {
675 return MediaWikiServices::getInstance()->getSearchEngineConfig()->namespacesAsText( $namespaces );
676 }
677
685 public static function create( $type = null ) {
686 return MediaWikiServices::getInstance()->getSearchEngineFactory()->create( $type );
687 }
688
695 public static function getSearchTypes() {
696 return MediaWikiServices::getInstance()->getSearchEngineConfig()->getSearchTypes();
697 }
698
714 public function getProfiles( $profileType, User $user = null ) {
715 return null;
716 }
717
726 public function makeSearchFieldMapping( $name, $type ) {
727 return new NullIndexField();
728 }
729
735 public function getSearchIndexFields() {
736 $models = ContentHandler::getContentModels();
737 $fields = [];
738 $seenHandlers = new SplObjectStorage();
739 foreach ( $models as $model ) {
740 try {
741 $handler = ContentHandler::getForModelID( $model );
742 }
744 // If we can find no handler, ignore it
745 continue;
746 }
747 // Several models can have the same handler, so avoid processing it repeatedly
748 if ( $seenHandlers->contains( $handler ) ) {
749 // We already did this one
750 continue;
751 }
752 $seenHandlers->attach( $handler );
753 $handlerFields = $handler->getFieldsForSearchIndex( $this );
754 foreach ( $handlerFields as $fieldName => $fieldData ) {
755 if ( empty( $fields[$fieldName] ) ) {
756 $fields[$fieldName] = $fieldData;
757 } else {
758 // TODO: do we allow some clashes with the same type or reject all of them?
759 $mergeDef = $fields[$fieldName]->merge( $fieldData );
760 if ( !$mergeDef ) {
761 throw new InvalidArgumentException( "Duplicate field $fieldName for model $model" );
762 }
763 $fields[$fieldName] = $mergeDef;
764 }
765 }
766 }
767 // Hook to allow extensions to produce search mapping fields
768 Hooks::run( 'SearchIndexFields', [ &$fields, $this ] );
769 return $fields;
770 }
771
777 public function augmentSearchResults( SearchResultSet $resultSet ) {
778 $setAugmentors = [];
779 $rowAugmentors = [];
780 Hooks::run( "SearchResultsAugment", [ &$setAugmentors, &$rowAugmentors ] );
781
782 if ( !$setAugmentors && !$rowAugmentors ) {
783 // We're done here
784 return;
785 }
786
787 // Convert row augmentors to set augmentor
788 foreach ( $rowAugmentors as $name => $row ) {
789 if ( isset( $setAugmentors[$name] ) ) {
790 throw new InvalidArgumentException( "Both row and set augmentors are defined for $name" );
791 }
792 $setAugmentors[$name] = new PerRowAugmentor( $row );
793 }
794
795 foreach ( $setAugmentors as $name => $augmentor ) {
796 $data = $augmentor->augmentAll( $resultSet );
797 if ( $data ) {
798 $resultSet->setAugmentedData( $name, $data );
799 }
800 }
801 }
802}
803
811 // no-op
812}
to move a page</td >< td > &*You are moving the page across namespaces
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Class representing a list of titles The execute() method checks them all for existence and adds them ...
Definition LinkBatch.php:34
Exception thrown when an unregistered content model is requested.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Null index field - means search engine does not implement this field.
Perform augmentation of each row and return composite result, indexed by ID.
defaultSearchBackend( $namespaces, $search, $limit, $offset)
Unless overridden by PrefixSearchBackend hook... This is case-sensitive (First character may be autom...
Dummy class to be used when non-supported Database engine is present.
Contain a class for special pages.
static searchableNamespaces()
Make a list of searchable namespaces and their canonical names.
static userNamespaces( $user)
Extract default namespaces to search from the given user's settings, returning a list of index number...
makeSearchFieldMapping( $name, $type)
Create a search field definition.
getNearMatcher(Config $config)
Get service class to finding near matches.
searchTitle( $term)
Perform a title-only search query and return a result set.
supports( $feature)
processCompletionResults( $search, SearchSuggestionSet $suggestions)
Process completion search results.
static namespacesAsText( $namespaces)
Get a list of namespace names useful for showing in tooltips and preferences.
getFeatureData( $feature)
Way to retrieve custom data set by setFeatureData or by the engine itself.
update( $id, $title, $text)
Create or update the search index record for the given page.
setNamespaces( $namespaces)
Set which namespaces the search should include.
augmentSearchResults(SearchResultSet $resultSet)
Augment search results with extra data.
array $features
Feature values.
static parseNamespacePrefixes( $query)
Parse some common prefixes: all (search everything) or namespace names.
replacePrefixes( $query)
Parse some common prefixes: all (search everything) or namespace names and set the list of namespaces...
static defaultNamespaces()
An array of namespaces indexes to be searched by default.
array string $searchTerms
textAlreadyUpdatedForIndex()
If an implementation of SearchEngine handles all of its own text processing in getTextFromContent() a...
defaultPrefixSearch( $search)
Simple prefix search for subpages.
searchArchiveTitle( $term)
Perform a title search in the article archive.
normalizeText( $string)
When overridden in derived class, performs database-specific conversions on text to be used for searc...
setFeatureData( $feature, $data)
Way to pass custom data for engines.
completionSearchBackend( $search)
Perform a completion search.
getTextFromContent(Title $t, Content $c=null)
Get the raw text for updating the index from a content object Nicer search backends could possibly do...
static create( $type=null)
Load up the appropriate search engine class for the currently active database backend,...
getProfiles( $profileType, User $user=null)
Get a list of supported profiles.
transformSearchTerm( $term)
Transform search term in cases when parts of the query came as different GET params (when supported),...
static getNearMatch( $searchterm)
If an exact title match can be found, or a very slightly close match, return the title.
getSort()
Get the sort direction of the search results.
static defaultNearMatcher()
Get near matcher for default SearchEngine.
getSearchIndexFields()
Get fields for search index.
getValidSorts()
Get the valid sort directions.
static userHighlightPrefs()
Find snippet highlight settings for all users.
updateTitle( $id, $title)
Update a search index record's title only.
completionSearchWithVariants( $search)
Perform a completion search with variants.
normalizeNamespaces( $search)
Makes search simple string if it was namespaced.
const CHARS_ALL
@const int flag for legalSearchChars: includes all chars allowed in a search query
static getSearchTypes()
Return the search engines we support.
completionSearch( $search)
Perform a completion search.
setLimitOffset( $limit, $offset=0)
Set the maximum number of results to return and how many to skip before returning the first.
const CHARS_NO_SYNTAX
@const int flag for legalSearchChars: includes all chars allowed in a search term
setShowSuggestion( $showSuggestion)
Set whether the searcher should try to build a suggestion.
static getNearMatchResultSet( $searchterm)
Do a near match (see SearchEngine::getNearMatch) and wrap it into a SearchResultSet.
simplePrefixSearch( $search)
Call out to simple search backend.
setSort( $sort)
Set the sort direction of the search results.
const FT_QUERY_INDEP_PROFILE_TYPE
@const string profile type for query independent ranking features
searchText( $term)
Perform a full text search query and return a result set.
extractTitles(SearchSuggestionSet $completionResults)
Extract titles from completion results.
const COMPLETION_PROFILE_TYPE
@const string profile type for completionSearch
static legalSearchChars( $type=self::CHARS_ALL)
Get chars legal for search NOTE: usage as static is deprecated and preserved only as BC measure.
An utility class to rescore search results by looking for an exact match in the db and add the page f...
Implementation of near match title search.
setAugmentedData( $name, $data)
Sets augmented data for result set.
Search suggestion sets.
rescore( $key)
Move the suggestion at index $key to the first position.
shrink( $limit)
Remove any extra elements in the suggestions set.
static fromStrings(array $titles)
Builds a new set of suggestion based on a string array.
map( $callback)
Call array_map on the suggestions array.
static fromTitles(array $titles)
Builds a new set of suggestion based on a title array.
prepend(SearchSuggestion $suggestion)
Add a new suggestion at the top.
Search suggestion.
static fromTitle( $score, Title $title)
Create suggestion from Title.
Performs prefix search, returning Title objects.
Represents a title within MediaWiki.
Definition Title.php:39
The User object encapsulates all of the user-specific settings (user_id, name, rights,...
Definition User.php:53
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the local content language as $wgContLang
Definition design.txt:57
namespace and then decline to actually register it & $namespaces
Definition hooks.txt:934
the value to return A Title object or null for latest all implement SearchIndexField must implement ResultSetAugmentor & $rowAugmentors
Definition hooks.txt:2882
For QUnit the mediawiki tests qunit testrunner dependency will be added to any module whereas SearchGetNearMatch runs after $term
Definition hooks.txt:2845
namespace and then decline to actually register it file or subcat img or subcat $title
Definition hooks.txt:964
either a unescaped string or a HtmlArmor object after in associative array form externallinks including delete and has completed for all link tables whether this was an auto creation default is conds Array Extra conditions for the No matching items in log is displayed if loglist is empty msgKey Array If you want a nice box with a set this to the key of the message First element is the message additional optional elements are parameters for the key that are processed with wfMessage() -> params() ->parseAsBlock() - offset Set to overwrite offset parameter in $wgRequest set to '' to unset offset - wrap String Wrap the message in html(usually something like "&lt;div ...>$1&lt;/div>"). - flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException':Called before an exception(or PHP error) is logged. This is meant for integration with external error aggregation services
Allows to change the fields on the form that will be generated $name
Definition hooks.txt:302
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that probably a stub it is not rendered in wiki pages or galleries in category pages allow injecting custom HTML after the section Any uses of the hook need to handle escaping see BaseTemplate::getToolbox and BaseTemplate::makeListItem for details on the format of individual items inside of this array or by returning and letting standard HTTP rendering take place modifiable or by returning false and taking over the output modifiable modifiable after all normalizations have been except for the $wgMaxImageArea check set to true or false to override the $wgMaxImageArea check result gives extension the possibility to transform it themselves $handler
Definition hooks.txt:903
null for the local wiki Added should default to null in handler for backwards compatibility add a value to it if you want to add a cookie that have to vary cache options can modify $query
Definition hooks.txt:1620
returning false will NOT prevent logging $e
Definition hooks.txt:2176
const NS_FILE
Definition Defines.php:80
const NS_MAIN
Definition Defines.php:74
const NS_SPECIAL
Definition Defines.php:63
const NS_MEDIA
Definition Defines.php:62
Interface for configuration instances.
Definition Config.php:28
Base interface for content objects.
Definition Content.php:34
linkcache txt The LinkCache class maintains a list of article titles and the information about whether or not the article exists in the database This is used to mark up links when displaying a page If the same link appears more than once on any page then it only has to be looked up once In most cases link lookups are done in batches with the LinkBatch class or the equivalent in so the link cache is mostly useful for short snippets of parsed and for links in the navigation areas of the skin The link cache was formerly used to track links used in a document for the purposes of updating the link tables This application is now deprecated To create a you can use the following $titles
Definition linkcache.txt:17
$sort