MediaWiki  master
SearchPostgres.php
Go to the documentation of this file.
1 <?php
29 
43  protected function doSearchTitleInDB( $term ) {
44  $q = $this->searchQuery( $term, 'titlevector', 'page_title' );
45  $olderror = error_reporting( E_ERROR );
46  $dbr = $this->lb->getConnectionRef( DB_REPLICA );
47  $resultSet = $dbr->query( $q, 'SearchPostgres', IDatabase::QUERY_SILENCE_ERRORS );
48  error_reporting( $olderror );
49  return new SqlSearchResultSet( $resultSet, $this->searchTerms );
50  }
51 
52  protected function doSearchTextInDB( $term ) {
53  $q = $this->searchQuery( $term, 'textvector', 'old_text' );
54  $olderror = error_reporting( E_ERROR );
55  $dbr = $this->lb->getConnectionRef( DB_REPLICA );
56  $resultSet = $dbr->query( $q, 'SearchPostgres', IDatabase::QUERY_SILENCE_ERRORS );
57  error_reporting( $olderror );
58  return new SqlSearchResultSet( $resultSet, $this->searchTerms );
59  }
60 
69  private function parseQuery( $term ) {
70  wfDebug( "parseQuery received: $term" );
71 
72  # # No backslashes allowed
73  $term = preg_replace( '/\\\/', '', $term );
74 
75  # # Collapse parens into nearby words:
76  $term = preg_replace( '/\s*\‍(\s*/', ' (', $term );
77  $term = preg_replace( '/\s*\‍)\s*/', ') ', $term );
78 
79  # # Treat colons as word separators:
80  $term = preg_replace( '/:/', ' ', $term );
81 
82  $searchstring = '';
83  $m = [];
84  if ( preg_match_all( '/([-!]?)(\S+)\s*/', $term, $m, PREG_SET_ORDER ) ) {
85  foreach ( $m as $terms ) {
86  if ( strlen( $terms[1] ) ) {
87  $searchstring .= ' & !';
88  }
89  if ( strtolower( $terms[2] ) === 'and' ) {
90  $searchstring .= ' & ';
91  } elseif ( strtolower( $terms[2] ) === 'or' || $terms[2] === '|' ) {
92  $searchstring .= ' | ';
93  } elseif ( strtolower( $terms[2] ) === 'not' ) {
94  $searchstring .= ' & !';
95  } else {
96  $searchstring .= " & $terms[2]";
97  }
98  }
99  }
100 
101  # # Strip out leading junk
102  $searchstring = preg_replace( '/^[\s\&\|]+/', '', $searchstring );
103 
104  # # Remove any doubled-up operators
105  $searchstring = preg_replace( '/([\!\&\|]) +(?:[\&\|] +)+/', "$1 ", $searchstring );
106 
107  # # Remove any non-spaced operators (e.g. "Zounds!")
108  $searchstring = preg_replace( '/([^ ])[\!\&\|]/', "$1", $searchstring );
109 
110  # # Remove any trailing whitespace or operators
111  $searchstring = preg_replace( '/[\s\!\&\|]+$/', '', $searchstring );
112 
113  # # Remove unnecessary quotes around everything
114  $searchstring = preg_replace( '/^[\'"](.*)[\'"]$/', "$1", $searchstring );
115 
116  # # Quote the whole thing
117  $dbr = $this->lb->getConnectionRef( DB_REPLICA );
118  $searchstring = $dbr->addQuotes( $searchstring );
119 
120  wfDebug( "parseQuery returned: $searchstring" );
121 
122  return $searchstring;
123  }
124 
132  private function searchQuery( $term, $fulltext, $colname ) {
133  # Get the SQL fragment for the given term
134  $searchstring = $this->parseQuery( $term );
135 
136  # # We need a separate query here so gin does not complain about empty searches
137  $sql = "SELECT to_tsquery($searchstring)";
138  $dbr = $this->lb->getConnectionRef( DB_REPLICA );
139  $res = $dbr->query( $sql, __METHOD__ );
140  if ( !$res ) {
141  # # TODO: Better output (example to catch: one 'two)
142  die( "Sorry, that was not a valid search string. Please go back and try again" );
143  }
144  $top = $res->fetchRow()[0];
145 
146  $this->searchTerms = [];
147  $slotRoleStore = MediaWikiServices::getInstance()->getSlotRoleStore();
148  if ( $top === "" ) { # # e.g. if only stopwords are used XXX return something better
149  $query = "SELECT page_id, page_namespace, page_title, 0 AS score " .
150  "FROM page p, revision r, slots s, content c, pagecontent pc " .
151  "WHERE p.page_latest = r.rev_id " .
152  "AND s.slot_revision_id = r.rev_id " .
153  "AND s.slot_role_id = " . $slotRoleStore->getId( SlotRecord::MAIN ) . " " .
154  "AND c.content_id = s.slot_content_id " .
155  "AND pc.old_id = substring( c.content_address from '^tt:([0-9]+)$' )::int " .
156  "AND 1=0";
157  } else {
158  $m = [];
159  if ( preg_match_all( "/'([^']+)'/", $top, $m, PREG_SET_ORDER ) ) {
160  foreach ( $m as $terms ) {
161  $this->searchTerms[$terms[1]] = $terms[1];
162  }
163  }
164 
165  $query = "SELECT page_id, page_namespace, page_title, " .
166  "ts_rank($fulltext, to_tsquery($searchstring), 5) AS score " .
167  "FROM page p, revision r, slots s, content c, pagecontent pc " .
168  "WHERE p.page_latest = r.rev_id " .
169  "AND s.slot_revision_id = r.rev_id " .
170  "AND s.slot_role_id = " . $slotRoleStore->getId( SlotRecord::MAIN ) . " " .
171  "AND c.content_id = s.slot_content_id " .
172  "AND pc.old_id = substring( c.content_address from '^tt:([0-9]+)$' )::int " .
173  "AND $fulltext @@ to_tsquery($searchstring)";
174  }
175  # # Namespaces - defaults to 0
176  if ( $this->namespaces !== null ) { // null -> search all
177  if ( count( $this->namespaces ) < 1 ) {
178  $query .= ' AND page_namespace = 0';
179  } else {
180  $namespaces = $dbr->makeList( $this->namespaces );
181  $query .= " AND page_namespace IN ($namespaces)";
182  }
183  }
184 
185  $query .= " ORDER BY score DESC, page_id DESC";
186 
187  $query .= $dbr->limitResult( '', $this->limit, $this->offset );
188 
189  wfDebug( "searchQuery returned: $query" );
190 
191  return $query;
192  }
193 
194  # # Most of the work of these two functions are done automatically via triggers
195 
196  public function update( $pageid, $title, $text ) {
197  # # We don't want to index older revisions
198  $slotRoleStore = MediaWikiServices::getInstance()->getSlotRoleStore();
199  $sql = "UPDATE pagecontent SET textvector = NULL " .
200  "WHERE textvector IS NOT NULL " .
201  "AND old_id IN " .
202  "(SELECT DISTINCT substring( c.content_address from '^tt:([0-9]+)$' )::int AS old_rev_text_id " .
203  " FROM content c, slots s, revision r " .
204  " WHERE r.rev_page = $pageid " .
205  " AND s.slot_revision_id = r.rev_id " .
206  " AND s.slot_role_id = " . $slotRoleStore->getId( SlotRecord::MAIN ) . " " .
207  " AND c.content_id = s.slot_content_id " .
208  " ORDER BY old_rev_text_id DESC OFFSET 1)";
209 
210  $dbw = $this->lb->getConnectionRef( DB_MASTER );
211  $dbw->query( $sql, __METHOD__ );
212 
213  return true;
214  }
215 
216  public function updateTitle( $id, $title ) {
217  return true;
218  }
219 }
SearchPostgres\update
update( $pageid, $title, $text)
Create or update the search index record for the given page.
Definition: SearchPostgres.php:196
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:152
SearchPostgres\parseQuery
parseQuery( $term)
Transform the user's search string into a better form for tsearch2 Returns an SQL fragment consisting...
Definition: SearchPostgres.php:69
SearchEngine\$namespaces
int[] null $namespaces
Definition: SearchEngine.php:44
$res
$res
Definition: testCompression.php:57
SearchPostgres
Search engine hook base class for Postgres.
Definition: SearchPostgres.php:34
Wikimedia\Rdbms\IDatabase
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:38
SearchPostgres\searchQuery
searchQuery( $term, $fulltext, $colname)
Construct the full SQL query to do the search.
Definition: SearchPostgres.php:132
$dbr
$dbr
Definition: testCompression.php:54
SearchDatabase
Base search engine base class for database-backed searches Stable for subclassing.
Definition: SearchDatabase.php:33
SearchPostgres\doSearchTitleInDB
doSearchTitleInDB( $term)
Perform a full text search query via tsearch2 and return a result set.
Definition: SearchPostgres.php:43
$title
$title
Definition: testCompression.php:38
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
DB_MASTER
const DB_MASTER
Definition: defines.php:26
wfDebug
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:909
SqlSearchResultSet
This class is used for different SQL-based search engines shipped with MediaWiki.
Definition: SqlSearchResultSet.php:9
SearchPostgres\doSearchTextInDB
doSearchTextInDB( $term)
Perform a full text search query and return a result set.
Definition: SearchPostgres.php:52
SearchPostgres\updateTitle
updateTitle( $id, $title)
Update a search index record's title only.
Definition: SearchPostgres.php:216
Revision\SlotRecord
Value object representing a content slot associated with a page revision.
Definition: SlotRecord.php:39