Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 100 |
|
0.00% |
0 / 6 |
CRAP | |
0.00% |
0 / 1 |
SearchPostgres | |
0.00% |
0 / 100 |
|
0.00% |
0 / 6 |
380 | |
0.00% |
0 / 1 |
doSearchTitleInDB | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
doSearchTextInDB | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
parseQuery | |
0.00% |
0 / 27 |
|
0.00% |
0 / 1 |
72 | |||
searchQuery | |
0.00% |
0 / 43 |
|
0.00% |
0 / 1 |
56 | |||
update | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
2 | |||
updateTitle | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * PostgreSQL search engine |
4 | * |
5 | * Copyright © 2006-2007 Greg Sabino Mullane <greg@turnstep.com> |
6 | * https://www.mediawiki.org/ |
7 | * |
8 | * This program is free software; you can redistribute it and/or modify |
9 | * it under the terms of the GNU General Public License as published by |
10 | * the Free Software Foundation; either version 2 of the License, or |
11 | * (at your option) any later version. |
12 | * |
13 | * This program is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | * GNU General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU General Public License along |
19 | * with this program; if not, write to the Free Software Foundation, Inc., |
20 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
21 | * http://www.gnu.org/copyleft/gpl.html |
22 | * |
23 | * @file |
24 | * @ingroup Search |
25 | */ |
26 | use MediaWiki\MediaWikiServices; |
27 | use MediaWiki\Revision\SlotRecord; |
28 | use Wikimedia\Rdbms\IDatabase; |
29 | |
30 | /** |
31 | * Search engine hook base class for Postgres |
32 | * @ingroup Search |
33 | */ |
34 | class SearchPostgres extends SearchDatabase { |
35 | /** |
36 | * Perform a full text search query via tsearch2 and return a result set. |
37 | * Currently searches a page's current title (page.page_title) and |
38 | * latest revision article text (text.old_text) |
39 | * |
40 | * @param string $term Raw search term |
41 | * @return SqlSearchResultSet |
42 | */ |
43 | protected function doSearchTitleInDB( $term ) { |
44 | $q = $this->searchQuery( $term, 'titlevector' ); |
45 | $olderror = error_reporting( E_ERROR ); |
46 | $dbr = $this->dbProvider->getReplicaDatabase(); |
47 | // The real type is still IDatabase, but IReplicaDatabase is used for safety. |
48 | '@phan-var IDatabase $dbr'; |
49 | // phpcs:ignore MediaWiki.Usage.DbrQueryUsage.DbrQueryFound |
50 | $resultSet = $dbr->query( $q, 'SearchPostgres', IDatabase::QUERY_SILENCE_ERRORS ); |
51 | error_reporting( $olderror ); |
52 | return new SqlSearchResultSet( $resultSet, $this->searchTerms ); |
53 | } |
54 | |
55 | protected function doSearchTextInDB( $term ) { |
56 | $q = $this->searchQuery( $term, 'textvector' ); |
57 | $olderror = error_reporting( E_ERROR ); |
58 | $dbr = $this->dbProvider->getReplicaDatabase(); |
59 | // The real type is still IDatabase, but IReplicaDatabase is used for safety. |
60 | '@phan-var IDatabase $dbr'; |
61 | // phpcs:ignore MediaWiki.Usage.DbrQueryUsage.DbrQueryFound |
62 | $resultSet = $dbr->query( $q, 'SearchPostgres', IDatabase::QUERY_SILENCE_ERRORS ); |
63 | error_reporting( $olderror ); |
64 | return new SqlSearchResultSet( $resultSet, $this->searchTerms ); |
65 | } |
66 | |
67 | /** |
68 | * Transform the user's search string into a better form for tsearch2 |
69 | * Returns an SQL fragment consisting of quoted text to search for. |
70 | * |
71 | * @param string $term |
72 | * |
73 | * @return string |
74 | */ |
75 | private function parseQuery( $term ) { |
76 | wfDebug( "parseQuery received: $term" ); |
77 | |
78 | // No backslashes allowed |
79 | $term = preg_replace( '/\\\\/', '', $term ); |
80 | |
81 | // Collapse parens into nearby words: |
82 | $term = preg_replace( '/\s*\(\s*/', ' (', $term ); |
83 | $term = preg_replace( '/\s*\)\s*/', ') ', $term ); |
84 | |
85 | // Treat colons as word separators: |
86 | $term = preg_replace( '/:/', ' ', $term ); |
87 | |
88 | $searchstring = ''; |
89 | $m = []; |
90 | if ( preg_match_all( '/([-!]?)(\S+)\s*/', $term, $m, PREG_SET_ORDER ) ) { |
91 | foreach ( $m as $terms ) { |
92 | if ( $terms[1] !== '' ) { |
93 | $searchstring .= ' & !'; |
94 | } |
95 | if ( strtolower( $terms[2] ) === 'and' ) { |
96 | $searchstring .= ' & '; |
97 | } elseif ( strtolower( $terms[2] ) === 'or' || $terms[2] === '|' ) { |
98 | $searchstring .= ' | '; |
99 | } elseif ( strtolower( $terms[2] ) === 'not' ) { |
100 | $searchstring .= ' & !'; |
101 | } else { |
102 | $searchstring .= " & $terms[2]"; |
103 | } |
104 | } |
105 | } |
106 | |
107 | // Strip out leading junk |
108 | $searchstring = preg_replace( '/^[\s\&\|]+/', '', $searchstring ); |
109 | |
110 | // Remove any doubled-up operators |
111 | $searchstring = preg_replace( '/([\!\&\|]) +(?:[\&\|] +)+/', "$1 ", $searchstring ); |
112 | |
113 | // Remove any non-spaced operators (e.g. "Zounds!") |
114 | $searchstring = preg_replace( '/([^ ])[\!\&\|]/', "$1", $searchstring ); |
115 | |
116 | // Remove any trailing whitespace or operators |
117 | $searchstring = preg_replace( '/[\s\!\&\|]+$/', '', $searchstring ); |
118 | |
119 | // Remove unnecessary quotes around everything |
120 | $searchstring = preg_replace( '/^[\'"](.*)[\'"]$/', "$1", $searchstring ); |
121 | |
122 | // Quote the whole thing |
123 | $dbr = $this->dbProvider->getReplicaDatabase(); |
124 | $searchstring = $dbr->addQuotes( $searchstring ); |
125 | |
126 | wfDebug( "parseQuery returned: $searchstring" ); |
127 | |
128 | return $searchstring; |
129 | } |
130 | |
131 | /** |
132 | * Construct the full SQL query to do the search. |
133 | * @param string $term |
134 | * @param string $fulltext |
135 | * @return string |
136 | */ |
137 | private function searchQuery( $term, $fulltext ) { |
138 | # Get the SQL fragment for the given term |
139 | $searchstring = $this->parseQuery( $term ); |
140 | |
141 | // We need a separate query here so gin does not complain about empty searches |
142 | $sql = "SELECT to_tsquery($searchstring)"; |
143 | $dbr = $this->dbProvider->getReplicaDatabase(); |
144 | // The real type is still IDatabase, but IReplicaDatabase is used for safety. |
145 | '@phan-var IDatabase $dbr'; |
146 | // phpcs:ignore MediaWiki.Usage.DbrQueryUsage.DbrQueryFound |
147 | $res = $dbr->query( $sql, __METHOD__ ); |
148 | if ( !$res ) { |
149 | // TODO: Better output (example to catch: one 'two) |
150 | die( "Sorry, that was not a valid search string. Please go back and try again" ); |
151 | } |
152 | $top = $res->fetchRow()[0]; |
153 | |
154 | $this->searchTerms = []; |
155 | $slotRoleStore = MediaWikiServices::getInstance()->getSlotRoleStore(); |
156 | if ( $top === "" ) { // e.g. if only stopwords are used XXX return something better |
157 | $query = "SELECT page_id, page_namespace, page_title, 0 AS score " . |
158 | "FROM page p, revision r, slots s, content c, \"text\" pc " . |
159 | "WHERE p.page_latest = r.rev_id " . |
160 | "AND s.slot_revision_id = r.rev_id " . |
161 | "AND s.slot_role_id = " . |
162 | $dbr->addQuotes( $slotRoleStore->acquireId( SlotRecord::MAIN ) ) . " " . |
163 | "AND c.content_id = s.slot_content_id " . |
164 | "AND pc.old_id = substring( c.content_address from '^tt:([0-9]+)$' )::int " . |
165 | "AND 1=0"; |
166 | } else { |
167 | $m = []; |
168 | if ( preg_match_all( "/'([^']+)'/", $top, $m, PREG_SET_ORDER ) ) { |
169 | foreach ( $m as $terms ) { |
170 | $this->searchTerms[$terms[1]] = $terms[1]; |
171 | } |
172 | } |
173 | |
174 | $query = "SELECT page_id, page_namespace, page_title, " . |
175 | "ts_rank($fulltext, to_tsquery($searchstring), 5) AS score " . |
176 | "FROM page p, revision r, slots s, content c, \"text\" pc " . |
177 | "WHERE p.page_latest = r.rev_id " . |
178 | "AND s.slot_revision_id = r.rev_id " . |
179 | "AND s.slot_role_id = " . $dbr->addQuotes( |
180 | $slotRoleStore->acquireId( SlotRecord::MAIN ) ) . " " . |
181 | "AND c.content_id = s.slot_content_id " . |
182 | "AND pc.old_id = substring( c.content_address from '^tt:([0-9]+)$' )::int " . |
183 | "AND $fulltext @@ to_tsquery($searchstring)"; |
184 | } |
185 | // Namespaces - defaults to main |
186 | if ( $this->namespaces !== null ) { // null -> search all |
187 | if ( count( $this->namespaces ) < 1 ) { |
188 | $query .= ' AND page_namespace = ' . NS_MAIN; |
189 | } else { |
190 | $namespaces = $dbr->makeList( $this->namespaces ); |
191 | $query .= " AND page_namespace IN ($namespaces)"; |
192 | } |
193 | } |
194 | |
195 | $query .= " ORDER BY score DESC, page_id DESC"; |
196 | |
197 | $query .= $dbr->limitResult( '', $this->limit, $this->offset ); |
198 | |
199 | wfDebug( "searchQuery returned: $query" ); |
200 | |
201 | return $query; |
202 | } |
203 | |
204 | // Most of the work of these two functions are done automatically via triggers |
205 | |
206 | public function update( $pageid, $title, $text ) { |
207 | // We don't want to index older revisions |
208 | $slotRoleStore = MediaWikiServices::getInstance()->getSlotRoleStore(); |
209 | $dbw = $this->dbProvider->getPrimaryDatabase(); |
210 | $sql = "UPDATE \"text\" SET textvector = NULL " . |
211 | "WHERE textvector IS NOT NULL " . |
212 | "AND old_id IN " . |
213 | "(SELECT DISTINCT substring( c.content_address from '^tt:([0-9]+)$' )::int AS old_rev_text_id " . |
214 | " FROM content c, slots s, revision r " . |
215 | " WHERE r.rev_page = $pageid " . |
216 | " AND s.slot_revision_id = r.rev_id " . |
217 | " AND s.slot_role_id = " . |
218 | $dbw->addQuotes( $slotRoleStore->acquireId( SlotRecord::MAIN ) ) . " " . |
219 | " AND c.content_id = s.slot_content_id " . |
220 | " ORDER BY old_rev_text_id DESC OFFSET 1)"; |
221 | |
222 | $dbw->query( $sql, __METHOD__ ); |
223 | |
224 | return true; |
225 | } |
226 | |
227 | public function updateTitle( $id, $title ) { |
228 | return true; |
229 | } |
230 | } |