MediaWiki  1.23.5
SearchMySQL.php
Go to the documentation of this file.
1 <?php
31 class SearchMySQL extends SearchDatabase {
32  var $strictMatching = true;
34 
44  function parseQuery( $filteredText, $fulltext ) {
46  $lc = SearchEngine::legalSearchChars(); // Minus format chars
47  $searchon = '';
48  $this->searchTerms = array();
49 
50  # @todo FIXME: This doesn't handle parenthetical expressions.
51  $m = array();
52  if ( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/',
53  $filteredText, $m, PREG_SET_ORDER ) ) {
54  foreach ( $m as $bits ) {
55  @list( /* all */, $modifier, $term, $nonQuoted, $wildcard ) = $bits;
56 
57  if ( $nonQuoted != '' ) {
58  $term = $nonQuoted;
59  $quote = '';
60  } else {
61  $term = str_replace( '"', '', $term );
62  $quote = '"';
63  }
64 
65  if ( $searchon !== '' ) {
66  $searchon .= ' ';
67  }
68  if ( $this->strictMatching && ( $modifier == '' ) ) {
69  // If we leave this out, boolean op defaults to OR which is rarely helpful.
70  $modifier = '+';
71  }
72 
73  // Some languages such as Serbian store the input form in the search index,
74  // so we may need to search for matches in multiple writing system variants.
75  $convertedVariants = $wgContLang->autoConvertToAllVariants( $term );
76  if ( is_array( $convertedVariants ) ) {
77  $variants = array_unique( array_values( $convertedVariants ) );
78  } else {
79  $variants = array( $term );
80  }
81 
82  // The low-level search index does some processing on input to work
83  // around problems with minimum lengths and encoding in MySQL's
84  // fulltext engine.
85  // For Chinese this also inserts spaces between adjacent Han characters.
86  $strippedVariants = array_map(
87  array( $wgContLang, 'normalizeForSearch' ),
88  $variants );
89 
90  // Some languages such as Chinese force all variants to a canonical
91  // form when stripping to the low-level search index, so to be sure
92  // let's check our variants list for unique items after stripping.
93  $strippedVariants = array_unique( $strippedVariants );
94 
95  $searchon .= $modifier;
96  if ( count( $strippedVariants ) > 1 ) {
97  $searchon .= '(';
98  }
99  foreach ( $strippedVariants as $stripped ) {
100  $stripped = $this->normalizeText( $stripped );
101  if ( $nonQuoted && strpos( $stripped, ' ' ) !== false ) {
102  // Hack for Chinese: we need to toss in quotes for
103  // multiple-character phrases since normalizeForSearch()
104  // added spaces between them to make word breaks.
105  $stripped = '"' . trim( $stripped ) . '"';
106  }
107  $searchon .= "$quote$stripped$quote$wildcard ";
108  }
109  if ( count( $strippedVariants ) > 1 ) {
110  $searchon .= ')';
111  }
112 
113  // Match individual terms or quoted phrase in result highlighting...
114  // Note that variants will be introduced in a later stage for highlighting!
115  $regexp = $this->regexTerm( $term, $wildcard );
116  $this->searchTerms[] = $regexp;
117  }
118  wfDebug( __METHOD__ . ": Would search with '$searchon'\n" );
119  wfDebug( __METHOD__ . ': Match with /' . implode( '|', $this->searchTerms ) . "/\n" );
120  } else {
121  wfDebug( __METHOD__ . ": Can't understand search query '{$filteredText}'\n" );
122  }
123 
124  $searchon = $this->db->strencode( $searchon );
125  $field = $this->getIndexField( $fulltext );
126  return " MATCH($field) AGAINST('$searchon' IN BOOLEAN MODE) ";
127  }
128 
129  function regexTerm( $string, $wildcard ) {
131 
132  $regex = preg_quote( $string, '/' );
133  if ( $wgContLang->hasWordBreaks() ) {
134  if ( $wildcard ) {
135  // Don't cut off the final bit!
136  $regex = "\b$regex";
137  } else {
138  $regex = "\b$regex\b";
139  }
140  } else {
141  // For Chinese, words may legitimately abut other words in the text literal.
142  // Don't add \b boundary checks... note this could cause false positives
143  // for latin chars.
144  }
145  return $regex;
146  }
147 
148  public static function legalSearchChars() {
149  return "\"*" . parent::legalSearchChars();
150  }
151 
158  function searchText( $term ) {
159  return $this->searchInternal( $term, true );
160  }
161 
168  function searchTitle( $term ) {
169  return $this->searchInternal( $term, false );
170  }
171 
172  protected function searchInternal( $term, $fulltext ) {
173  global $wgCountTotalSearchHits;
174 
175  // This seems out of place, why is this called with empty term?
176  if ( trim( $term ) === '' ) {
177  return null;
178  }
179 
180  $filteredTerm = $this->filter( $term );
181  $query = $this->getQuery( $filteredTerm, $fulltext );
182  $resultSet = $this->db->select(
183  $query['tables'], $query['fields'], $query['conds'],
184  __METHOD__, $query['options'], $query['joins']
185  );
186 
187  $total = null;
188  if ( $wgCountTotalSearchHits ) {
189  $query = $this->getCountQuery( $filteredTerm, $fulltext );
190  $totalResult = $this->db->select(
191  $query['tables'], $query['fields'], $query['conds'],
192  __METHOD__, $query['options'], $query['joins']
193  );
194 
195  $row = $totalResult->fetchObject();
196  if ( $row ) {
197  $total = intval( $row->c );
198  }
199  $totalResult->free();
200  }
201 
202  return new MySQLSearchResultSet( $resultSet, $this->searchTerms, $total );
203  }
204 
205  public function supports( $feature ) {
206  switch ( $feature ) {
207  case 'title-suffix-filter':
208  return true;
209  default:
210  return parent::supports( $feature );
211  }
212  }
213 
219  protected function queryFeatures( &$query ) {
220  foreach ( $this->features as $feature => $value ) {
221  if ( $feature === 'title-suffix-filter' && $value ) {
222  $query['conds'][] = 'page_title' . $this->db->buildLike( $this->db->anyString(), $value );
223  }
224  }
225  }
226 
232  function queryNamespaces( &$query ) {
233  if ( is_array( $this->namespaces ) ) {
234  if ( count( $this->namespaces ) === 0 ) {
235  $this->namespaces[] = '0';
236  }
237  $query['conds']['page_namespace'] = $this->namespaces;
238  }
239  }
240 
246  protected function limitResult( &$query ) {
247  $query['options']['LIMIT'] = $this->limit;
248  $query['options']['OFFSET'] = $this->offset;
249  }
250 
259  function getQuery( $filteredTerm, $fulltext ) {
260  $query = array(
261  'tables' => array(),
262  'fields' => array(),
263  'conds' => array(),
264  'options' => array(),
265  'joins' => array(),
266  );
267 
268  $this->queryMain( $query, $filteredTerm, $fulltext );
269  $this->queryFeatures( $query );
270  $this->queryNamespaces( $query );
271  $this->limitResult( $query );
272 
273  return $query;
274  }
275 
281  function getIndexField( $fulltext ) {
282  return $fulltext ? 'si_text' : 'si_title';
283  }
284 
293  function queryMain( &$query, $filteredTerm, $fulltext ) {
294  $match = $this->parseQuery( $filteredTerm, $fulltext );
295  $query['tables'][] = 'page';
296  $query['tables'][] = 'searchindex';
297  $query['fields'][] = 'page_id';
298  $query['fields'][] = 'page_namespace';
299  $query['fields'][] = 'page_title';
300  $query['conds'][] = 'page_id=si_page';
301  $query['conds'][] = $match;
302  }
303 
308  function getCountQuery( $filteredTerm, $fulltext ) {
309  $match = $this->parseQuery( $filteredTerm, $fulltext );
310 
311  $query = array(
312  'tables' => array( 'page', 'searchindex' ),
313  'fields' => array( 'COUNT(*) as c' ),
314  'conds' => array( 'page_id=si_page', $match ),
315  'options' => array(),
316  'joins' => array(),
317  );
318 
319  $this->queryFeatures( $query );
320  $this->queryNamespaces( $query );
321 
322  return $query;
323  }
324 
333  function update( $id, $title, $text ) {
334  $dbw = wfGetDB( DB_MASTER );
335  $dbw->replace( 'searchindex',
336  array( 'si_page' ),
337  array(
338  'si_page' => $id,
339  'si_title' => $this->normalizeText( $title ),
340  'si_text' => $this->normalizeText( $text )
341  ), __METHOD__ );
342  }
343 
351  function updateTitle( $id, $title ) {
352  $dbw = wfGetDB( DB_MASTER );
353 
354  $dbw->update( 'searchindex',
355  array( 'si_title' => $this->normalizeText( $title ) ),
356  array( 'si_page' => $id ),
357  __METHOD__,
358  array( $dbw->lowPriorityOption() ) );
359  }
360 
368  function delete( $id, $title ) {
369  $dbw = wfGetDB( DB_MASTER );
370 
371  $dbw->delete( 'searchindex', array( 'si_page' => $id ), __METHOD__ );
372  }
373 
379  function normalizeText( $string ) {
381 
382  wfProfileIn( __METHOD__ );
383 
384  $out = parent::normalizeText( $string );
385 
386  // MySQL fulltext index doesn't grok utf-8, so we
387  // need to fold cases and convert to hex
388  $out = preg_replace_callback(
389  "/([\\xc0-\\xff][\\x80-\\xbf]*)/",
390  array( $this, 'stripForSearchCallback' ),
391  $wgContLang->lc( $out ) );
392 
393  // And to add insult to injury, the default indexing
394  // ignores short words... Pad them so we can pass them
395  // through without reconfiguring the server...
396  $minLength = $this->minSearchLength();
397  if ( $minLength > 1 ) {
398  $n = $minLength - 1;
399  $out = preg_replace(
400  "/\b(\w{1,$n})\b/",
401  "$1u800",
402  $out );
403  }
404 
405  // Periods within things like hostnames and IP addresses
406  // are also important -- we want a search for "example.com"
407  // or "192.168.1.1" to work sanely.
408  //
409  // MySQL's search seems to ignore them, so you'd match on
410  // "example.wikipedia.com" and "192.168.83.1" as well.
411  $out = preg_replace(
412  "/(\w)\.(\w|\*)/u",
413  "$1u82e$2",
414  $out );
415 
416  wfProfileOut( __METHOD__ );
417 
418  return $out;
419  }
420 
427  protected function stripForSearchCallback( $matches ) {
428  return 'u8' . bin2hex( $matches[1] );
429  }
430 
437  protected function minSearchLength() {
438  if ( is_null( self::$mMinSearchLength ) ) {
439  $sql = "SHOW GLOBAL VARIABLES LIKE 'ft\\_min\\_word\\_len'";
440 
441  $dbr = wfGetDB( DB_SLAVE );
442  $result = $dbr->query( $sql );
443  $row = $result->fetchObject();
444  $result->free();
445 
446  if ( $row && $row->Variable_name == 'ft_min_word_len' ) {
447  self::$mMinSearchLength = intval( $row->Value );
448  } else {
449  self::$mMinSearchLength = 0;
450  }
451  }
453  }
454 }
455 
460  function __construct( $resultSet, $terms, $totalHits = null ) {
461  parent::__construct( $resultSet, $terms );
462  $this->mTotalHits = $totalHits;
463  }
464 
465  function getTotalHits() {
466  return $this->mTotalHits;
467  }
468 }
$result
The index of the header message $result[1]=The index of the body text message $result[2 through n]=Parameters passed to body text message. Please note the header message cannot receive/use parameters. 'ImportHandleLogItemXMLTag':When parsing a XML tag in a log item. $reader:XMLReader object $logInfo:Array of information Return false to stop further processing of the tag 'ImportHandlePageXMLTag':When parsing a XML tag in a page. $reader:XMLReader object $pageInfo:Array of information Return false to stop further processing of the tag 'ImportHandleRevisionXMLTag':When parsing a XML tag in a page revision. $reader:XMLReader object $pageInfo:Array of page information $revisionInfo:Array of revision information Return false to stop further processing of the tag 'ImportHandleToplevelXMLTag':When parsing a top level XML tag. $reader:XMLReader object Return false to stop further processing of the tag 'ImportHandleUploadXMLTag':When parsing a XML tag in a file upload. $reader:XMLReader object $revisionInfo:Array of information Return false to stop further processing of the tag 'InfoAction':When building information to display on the action=info page. $context:IContextSource object & $pageInfo:Array of information 'InitializeArticleMaybeRedirect':MediaWiki check to see if title is a redirect. $title:Title object for the current page $request:WebRequest $ignoreRedirect:boolean to skip redirect check $target:Title/string of redirect target $article:Article object 'InterwikiLoadPrefix':When resolving if a given prefix is an interwiki or not. Return true without providing an interwiki to continue interwiki search. $prefix:interwiki prefix we are looking for. & $iwData:output array describing the interwiki with keys iw_url, iw_local, iw_trans and optionally iw_api and iw_wikiid. 'InternalParseBeforeSanitize':during Parser 's internalParse method just before the parser removes unwanted/dangerous HTML tags and after nowiki/noinclude/includeonly/onlyinclude and other processings. Ideal for syntax-extensions after template/parser function execution which respect nowiki and HTML-comments. & $parser:Parser object & $text:string containing partially parsed text & $stripState:Parser 's internal StripState object 'InternalParseBeforeLinks':during Parser 's internalParse method before links but after nowiki/noinclude/includeonly/onlyinclude and other processings. & $parser:Parser object & $text:string containing partially parsed text & $stripState:Parser 's internal StripState object 'InvalidateEmailComplete':Called after a user 's email has been invalidated successfully. $user:user(object) whose email is being invalidated 'IRCLineURL':When constructing the URL to use in an IRC notification. Callee may modify $url and $query, URL will be constructed as $url . $query & $url:URL to index.php & $query:Query string $rc:RecentChange object that triggered url generation 'IsFileCacheable':Override the result of Article::isFileCacheable()(if true) $article:article(object) being checked 'IsTrustedProxy':Override the result of wfIsTrustedProxy() $ip:IP being check $result:Change this value to override the result of wfIsTrustedProxy() 'IsUploadAllowedFromUrl':Override the result of UploadFromUrl::isAllowedUrl() $url:URL used to upload from & $allowed:Boolean indicating if uploading is allowed for given URL 'isValidEmailAddr':Override the result of User::isValidEmailAddr(), for instance to return false if the domain name doesn 't match your organization. $addr:The e-mail address entered by the user & $result:Set this and return false to override the internal checks 'isValidPassword':Override the result of User::isValidPassword() $password:The password entered by the user & $result:Set this and return false to override the internal checks $user:User the password is being validated for 'Language::getMessagesFileName':$code:The language code or the language we 're looking for a messages file for & $file:The messages file path, you can override this to change the location. 'LanguageGetNamespaces':Provide custom ordering for namespaces or remove namespaces. Do not use this hook to add namespaces. Use CanonicalNamespaces for that. & $namespaces:Array of namespaces indexed by their numbers 'LanguageGetMagic':DEPRECATED, use $magicWords in a file listed in $wgExtensionMessagesFiles instead. Use this to define synonyms of magic words depending of the language $magicExtensions:associative array of magic words synonyms $lang:language code(string) 'LanguageGetSpecialPageAliases':DEPRECATED, use $specialPageAliases in a file listed in $wgExtensionMessagesFiles instead. Use to define aliases of special pages names depending of the language $specialPageAliases:associative array of magic words synonyms $lang:language code(string) 'LanguageGetTranslatedLanguageNames':Provide translated language names. & $names:array of language code=> language name $code language of the preferred translations 'LanguageLinks':Manipulate a page 's language links. This is called in various places to allow extensions to define the effective language links for a page. $title:The page 's Title. & $links:Associative array mapping language codes to prefixed links of the form "language:title". & $linkFlags:Associative array mapping prefixed links to arrays of flags. Currently unused, but planned to provide support for marking individual language links in the UI, e.g. for featured articles. 'LinkBegin':Used when generating internal and interwiki links in Linker::link(), before processing starts. Return false to skip default processing and return $ret. See documentation for Linker::link() for details on the expected meanings of parameters. $skin:the Skin object $target:the Title that the link is pointing to & $html:the contents that the< a > tag should have(raw HTML) $result
Definition: hooks.txt:1528
DB_MASTER
const DB_MASTER
Definition: Defines.php:56
php
skin txt MediaWiki includes four core it has been set as the default in MediaWiki since the replacing Monobook it had been been the default skin since before being replaced by Vector largely rewritten in while keeping its appearance Several legacy skins were removed in the as the burden of supporting them became too heavy to bear Those in etc for skin dependent CSS etc for skin dependent JavaScript These can also be customised on a per user by etc This feature has led to a wide variety of user styles becoming that gallery is a good place to ending in php
Definition: skin.txt:62
SearchMySQL\limitResult
limitResult(&$query)
Add limit options.
Definition: SearchMySQL.php:246
SearchMySQL\queryNamespaces
queryNamespaces(&$query)
Add namespace conditions.
Definition: SearchMySQL.php:232
SearchMySQL
Search engine hook for MySQL 4+.
Definition: SearchMySQL.php:31
wfGetDB
& wfGetDB( $db, $groups=array(), $wiki=false)
Get a Database object.
Definition: GlobalFunctions.php:3659
SearchMySQL\searchTitle
searchTitle( $term)
Perform a title-only search query and return a result set.
Definition: SearchMySQL.php:168
SearchMySQL\minSearchLength
minSearchLength()
Check MySQL server's ft_min_word_len setting so we know if we need to pad short words....
Definition: SearchMySQL.php:437
SearchMySQL\getIndexField
getIndexField( $fulltext)
Picks which field to index on, depending on what type of query.
Definition: SearchMySQL.php:281
wfProfileIn
wfProfileIn( $functionname)
Begin profiling of a function.
Definition: Profiler.php:33
$n
$n
Definition: RandomTest.php:76
SearchMySQL\normalizeText
normalizeText( $string)
Converts some characters for MySQL's indexing to grok it correctly, and pads short words to overcome ...
Definition: SearchMySQL.php:379
SearchMySQL\queryMain
queryMain(&$query, $filteredTerm, $fulltext)
Get the base part of the search query.
Definition: SearchMySQL.php:293
$wgContLang
this class mediates it Skin Encapsulates a look and feel for the wiki All of the functions that render HTML and make choices about how to render it are here and are called from various other places when and is meant to be subclassed with other skins that may override some of its functions The User object contains a reference to a and so rather than having a global skin object we just rely on the global User and get the skin with $wgUser and also has some character encoding functions and other locale stuff The current user interface language is instantiated as and the content language as $wgContLang
Definition: design.txt:56
SearchMySQL\queryFeatures
queryFeatures(&$query)
Add special conditions.
Definition: SearchMySQL.php:219
$dbr
$dbr
Definition: testCompression.php:48
SearchDatabase
Base search engine base class for database-backed searches.
Definition: SearchDatabase.php:29
MySQLSearchResultSet\getTotalHits
getTotalHits()
Some search modes return a total hit count for the query in the entire article database.
Definition: SearchMySQL.php:465
namespaces
to move a page</td >< td > &*You are moving the page across namespaces
Definition: All_system_messages.txt:2677
SearchMySQL\stripForSearchCallback
stripForSearchCallback( $matches)
Armor a case-folded UTF-8 string to get through MySQL's fulltext search without being mucked up by fu...
Definition: SearchMySQL.php:427
SearchMySQL\updateTitle
updateTitle( $id, $title)
Update a search index record's title only.
Definition: SearchMySQL.php:351
$total
$total
Definition: Utf8Test.php:92
$out
$out
Definition: UtfNormalGenerate.php:167
SearchMySQL\regexTerm
regexTerm( $string, $wildcard)
Definition: SearchMySQL.php:129
SearchMySQL\getQuery
getQuery( $filteredTerm, $fulltext)
Construct the SQL query to do the search.
Definition: SearchMySQL.php:259
wfProfileOut
wfProfileOut( $functionname='missing')
Stop profiling of a function.
Definition: Profiler.php:46
SearchEngine\$offset
$offset
Definition: SearchEngine.php:34
array
the array() calling protocol came about after MediaWiki 1.4rc1.
List of Api Query prop modules.
SearchMySQL\getCountQuery
getCountQuery( $filteredTerm, $fulltext)
Definition: SearchMySQL.php:308
SearchEngine\filter
filter( $text)
Return a 'cleaned up' search string.
Definition: SearchEngine.php:435
global
when a variable name is used in a it is silently declared as a new masking the global
Definition: design.txt:93
$regexp
$regexp
Definition: mwdoc-filter.php:19
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
wfDebug
wfDebug( $text, $dest='all')
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:933
$title
presenting them properly to the user as errors is done by the caller $title
Definition: hooks.txt:1324
SqlSearchResultSet
This class is used for different SQL-based search engines shipped with MediaWiki.
Definition: SearchResultSet.php:140
SearchEngine\$limit
$limit
Definition: SearchEngine.php:33
$matches
if(!defined( 'MEDIAWIKI')) if(!isset( $wgVersion)) $matches
Definition: NoLocalSettings.php:33
$value
$value
Definition: styleTest.css.php:45
SearchEngine\legalSearchChars
static legalSearchChars()
Definition: SearchEngine.php:256
SearchMySQL\supports
supports( $feature)
Definition: SearchMySQL.php:205
SearchEngine\$namespaces
$namespaces
Definition: SearchEngine.php:37
SearchMySQL\parseQuery
parseQuery( $filteredText, $fulltext)
Parse the user's query and transform it into an SQL fragment which will become part of a WHERE clause...
Definition: SearchMySQL.php:44
SearchMySQL\update
update( $id, $title, $text)
Create or update the search index record for the given page.
Definition: SearchMySQL.php:333
DB_SLAVE
const DB_SLAVE
Definition: Defines.php:55
SearchMySQL\$strictMatching
$strictMatching
Definition: SearchMySQL.php:32
$term
the value to return A Title object or null whereas SearchGetNearMatch runs after $term
Definition: hooks.txt:2125
SearchMySQL\$mMinSearchLength
static $mMinSearchLength
Definition: SearchMySQL.php:33
as
This document is intended to provide useful advice for parties seeking to redistribute MediaWiki to end users It s targeted particularly at maintainers for Linux since it s been observed that distribution packages of MediaWiki often break We ve consistently had to recommend that users seeking support use official tarballs instead of their distribution s and this often solves whatever problem the user is having It would be nice if this could such as
Definition: distributors.txt:9
MySQLSearchResultSet
Definition: SearchMySQL.php:459
SearchMySQL\searchText
searchText( $term)
Perform a full text search query and return a result set.
Definition: SearchMySQL.php:158
$query
return true to allow those checks to and false if checking is done use this to change the tables headers temp or archived zone change it to an object instance and return false override the list derivative used the name of the old file when set the default code will be skipped add a value to it if you want to add a cookie that have to vary cache options can modify $query
Definition: hooks.txt:1105
MySQLSearchResultSet\__construct
__construct( $resultSet, $terms, $totalHits=null)
Definition: SearchMySQL.php:460
SearchMySQL\legalSearchChars
static legalSearchChars()
Definition: SearchMySQL.php:148
SearchMySQL\searchInternal
searchInternal( $term, $fulltext)
Definition: SearchMySQL.php:172