MediaWiki  master
SearchPostgres.php
Go to the documentation of this file.
1 <?php
28 
42  protected function doSearchTitleInDB( $term ) {
43  $q = $this->searchQuery( $term, 'titlevector', 'page_title' );
44  $olderror = error_reporting( E_ERROR );
45  $dbr = $this->lb->getConnectionRef( DB_REPLICA );
46  $resultSet = $dbr->query( $q, 'SearchPostgres', true );
47  error_reporting( $olderror );
48  return new SqlSearchResultSet( $resultSet, $this->searchTerms );
49  }
50 
51  protected function doSearchTextInDB( $term ) {
52  $q = $this->searchQuery( $term, 'textvector', 'old_text' );
53  $olderror = error_reporting( E_ERROR );
54  $dbr = $this->lb->getConnectionRef( DB_REPLICA );
55  $resultSet = $dbr->query( $q, 'SearchPostgres', true );
56  error_reporting( $olderror );
57  return new SqlSearchResultSet( $resultSet, $this->searchTerms );
58  }
59 
68  private function parseQuery( $term ) {
69  wfDebug( "parseQuery received: $term \n" );
70 
71  # # No backslashes allowed
72  $term = preg_replace( '/\\\/', '', $term );
73 
74  # # Collapse parens into nearby words:
75  $term = preg_replace( '/\s*\(\s*/', ' (', $term );
76  $term = preg_replace( '/\s*\)\s*/', ') ', $term );
77 
78  # # Treat colons as word separators:
79  $term = preg_replace( '/:/', ' ', $term );
80 
81  $searchstring = '';
82  $m = [];
83  if ( preg_match_all( '/([-!]?)(\S+)\s*/', $term, $m, PREG_SET_ORDER ) ) {
84  foreach ( $m as $terms ) {
85  if ( strlen( $terms[1] ) ) {
86  $searchstring .= ' & !';
87  }
88  if ( strtolower( $terms[2] ) === 'and' ) {
89  $searchstring .= ' & ';
90  } elseif ( strtolower( $terms[2] ) === 'or' || $terms[2] === '|' ) {
91  $searchstring .= ' | ';
92  } elseif ( strtolower( $terms[2] ) === 'not' ) {
93  $searchstring .= ' & !';
94  } else {
95  $searchstring .= " & $terms[2]";
96  }
97  }
98  }
99 
100  # # Strip out leading junk
101  $searchstring = preg_replace( '/^[\s\&\|]+/', '', $searchstring );
102 
103  # # Remove any doubled-up operators
104  $searchstring = preg_replace( '/([\!\&\|]) +(?:[\&\|] +)+/', "$1 ", $searchstring );
105 
106  # # Remove any non-spaced operators (e.g. "Zounds!")
107  $searchstring = preg_replace( '/([^ ])[\!\&\|]/', "$1", $searchstring );
108 
109  # # Remove any trailing whitespace or operators
110  $searchstring = preg_replace( '/[\s\!\&\|]+$/', '', $searchstring );
111 
112  # # Remove unnecessary quotes around everything
113  $searchstring = preg_replace( '/^[\'"](.*)[\'"]$/', "$1", $searchstring );
114 
115  # # Quote the whole thing
116  $dbr = $this->lb->getConnectionRef( DB_REPLICA );
117  $searchstring = $dbr->addQuotes( $searchstring );
118 
119  wfDebug( "parseQuery returned: $searchstring \n" );
120 
121  return $searchstring;
122  }
123 
131  private function searchQuery( $term, $fulltext, $colname ) {
132  # Get the SQL fragment for the given term
133  $searchstring = $this->parseQuery( $term );
134 
135  # # We need a separate query here so gin does not complain about empty searches
136  $sql = "SELECT to_tsquery($searchstring)";
137  $dbr = $this->lb->getConnectionRef( DB_REPLICA );
138  $res = $dbr->query( $sql );
139  if ( !$res ) {
140  # # TODO: Better output (example to catch: one 'two)
141  die( "Sorry, that was not a valid search string. Please go back and try again" );
142  }
143  $top = $res->fetchRow()[0];
144 
145  $this->searchTerms = [];
146  $slotRoleStore = MediaWikiServices::getInstance()->getSlotRoleStore();
147  if ( $top === "" ) { # # e.g. if only stopwords are used XXX return something better
148  $query = "SELECT page_id, page_namespace, page_title, 0 AS score " .
149  "FROM page p, revision r, slots s, content c, pagecontent pc " .
150  "WHERE p.page_latest = r.rev_id " .
151  "AND s.slot_revision_id = r.rev_id " .
152  "AND s.slot_role_id = " . $slotRoleStore->getId( SlotRecord::MAIN ) . " " .
153  "AND c.content_id = s.slot_content_id " .
154  "AND pc.old_id = substring( c.content_address from '^tt:([0-9]+)$' )::int " .
155  "AND 1=0";
156  } else {
157  $m = [];
158  if ( preg_match_all( "/'([^']+)'/", $top, $m, PREG_SET_ORDER ) ) {
159  foreach ( $m as $terms ) {
160  $this->searchTerms[$terms[1]] = $terms[1];
161  }
162  }
163 
164  $query = "SELECT page_id, page_namespace, page_title, " .
165  "ts_rank($fulltext, to_tsquery($searchstring), 5) AS score " .
166  "FROM page p, revision r, slots s, content c, pagecontent pc " .
167  "WHERE p.page_latest = r.rev_id " .
168  "AND s.slot_revision_id = r.rev_id " .
169  "AND s.slot_role_id = " . $slotRoleStore->getId( SlotRecord::MAIN ) . " " .
170  "AND c.content_id = s.slot_content_id " .
171  "AND pc.old_id = substring( c.content_address from '^tt:([0-9]+)$' )::int " .
172  "AND $fulltext @@ to_tsquery($searchstring)";
173  }
174  # # Namespaces - defaults to 0
175  if ( !is_null( $this->namespaces ) ) { // null -> search all
176  if ( count( $this->namespaces ) < 1 ) {
177  $query .= ' AND page_namespace = 0';
178  } else {
179  $namespaces = $dbr->makeList( $this->namespaces );
180  $query .= " AND page_namespace IN ($namespaces)";
181  }
182  }
183 
184  $query .= " ORDER BY score DESC, page_id DESC";
185 
186  $query .= $dbr->limitResult( '', $this->limit, $this->offset );
187 
188  wfDebug( "searchQuery returned: $query \n" );
189 
190  return $query;
191  }
192 
193  # # Most of the work of these two functions are done automatically via triggers
194 
195  function update( $pageid, $title, $text ) {
196  # # We don't want to index older revisions
197  $slotRoleStore = MediaWikiServices::getInstance()->getSlotRoleStore();
198  $sql = "UPDATE pagecontent SET textvector = NULL " .
199  "WHERE textvector IS NOT NULL " .
200  "AND old_id IN " .
201  "(SELECT DISTINCT substring( c.content_address from '^tt:([0-9]+)$' )::int AS old_rev_text_id " .
202  " FROM content c, slots s, revision r " .
203  " WHERE r.rev_page = $pageid " .
204  " AND s.slot_revision_id = r.rev_id " .
205  " AND s.slot_role_id = " . $slotRoleStore->getId( SlotRecord::MAIN ) . " " .
206  " AND c.content_id = s.slot_content_id " .
207  " ORDER BY old_rev_text_id DESC OFFSET 1)";
208 
209  $dbw = $this->lb->getConnectionRef( DB_MASTER );
210  $dbw->query( $sql );
211 
212  return true;
213  }
214 
215  function updateTitle( $id, $title ) {
216  return true;
217  }
218 }
updateTitle( $id, $title)
doSearchTitleInDB( $term)
Perform a full text search query via tsearch2 and return a result set.
doSearchTextInDB( $term)
searchQuery( $term, $fulltext, $colname)
Construct the full SQL query to do the search.
const DB_MASTER
Definition: defines.php:26
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
This class is used for different SQL-based search engines shipped with MediaWiki. ...
update( $pageid, $title, $text)
int [] null $namespaces
parseQuery( $term)
Transform the user&#39;s search string into a better form for tsearch2 Returns an SQL fragment consisting...
Base search engine base class for database-backed searches.
const DB_REPLICA
Definition: defines.php:25
Search engine hook base class for Postgres.