Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
92.31% |
144 / 156 |
|
75.00% |
9 / 12 |
CRAP | |
0.00% |
0 / 1 |
CognateStore | |
92.31% |
144 / 156 |
|
75.00% |
9 / 12 |
26.31 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
insertPage | |
96.97% |
32 / 33 |
|
0.00% |
0 / 1 |
5 | |||
deletePage | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
2 | |||
selectLinkDetailsForPage | |
100.00% |
25 / 25 |
|
100.00% |
1 / 1 |
2 | |||
selectSitesForPage | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
1 | |||
insertPages | |
96.15% |
25 / 26 |
|
0.00% |
0 / 1 |
5 | |||
buildRows | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
1 | |||
insertSites | |
100.00% |
16 / 16 |
|
100.00% |
1 / 1 |
3 | |||
deletePagesForSite | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
12 | |||
getStringHash | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getNormalizedStringHash | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
throwReadOnlyException | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace Cognate; |
4 | |
5 | use MediaWiki\Linker\LinkTarget; |
6 | use MediaWiki\Title\TitleValue; |
7 | use RuntimeException; |
8 | use Wikimedia\Rdbms\DBReadOnlyError; |
9 | use Wikimedia\Rdbms\IConnectionProvider; |
10 | |
11 | /** |
12 | * Database access for the Cognate tables. |
13 | * |
14 | * This class should generally not be accessed directly but instead via CognateRepo which contains |
15 | * extra business logic such as logging, stats and cache purges. |
16 | * |
17 | * @license GPL-2.0-or-later |
18 | * @author Gabriel Birke < gabriel.birke@wikimedia.de > |
19 | * @author Addshore |
20 | */ |
21 | class CognateStore { |
22 | |
23 | /** |
24 | * @var IConnectionProvider |
25 | */ |
26 | private $connectionProvider; |
27 | |
28 | /** |
29 | * @var StringNormalizer |
30 | */ |
31 | private $stringNormalizer; |
32 | |
33 | /** |
34 | * @var StringHasher |
35 | */ |
36 | private $stringHasher; |
37 | |
38 | /** |
39 | * @var bool |
40 | */ |
41 | private $readOnly; |
42 | |
43 | public const PAGES_TABLE_NAME = 'cognate_pages'; |
44 | public const SITES_TABLE_NAME = 'cognate_sites'; |
45 | public const TITLES_TABLE_NAME = 'cognate_titles'; |
46 | |
47 | /** |
48 | * @param IConnectionProvider $connectionProvider |
49 | * @param StringNormalizer $stringNormalizer |
50 | * @param StringHasher $stringHasher |
51 | * @param bool $readOnly Is Cognate in readonly mode? |
52 | */ |
53 | public function __construct( |
54 | IConnectionProvider $connectionProvider, |
55 | StringNormalizer $stringNormalizer, |
56 | StringHasher $stringHasher, |
57 | $readOnly |
58 | ) { |
59 | $this->connectionProvider = $connectionProvider; |
60 | $this->stringNormalizer = $stringNormalizer; |
61 | $this->stringHasher = $stringHasher; |
62 | $this->readOnly = $readOnly; |
63 | } |
64 | |
65 | /** |
66 | * Adds a page to the database. As well as adding the data to the pages table this also |
67 | * includes adding the data to the titles table where needed. |
68 | * |
69 | * @param string $dbName The dbName for the site |
70 | * @param LinkTarget $linkTarget |
71 | * |
72 | * @return bool|int number of inserts run on success, false if there was a key conflict |
73 | * @throws DBReadOnlyError |
74 | */ |
75 | public function insertPage( $dbName, LinkTarget $linkTarget ) { |
76 | if ( $this->readOnly ) { |
77 | $this->throwReadOnlyException(); |
78 | } |
79 | |
80 | $dbr = $this->connectionProvider->getReplicaDatabase( CognateServices::VIRTUAL_DOMAIN ); |
81 | |
82 | [ $pagesToInsert, $titlesToInsert ] = $this->buildRows( |
83 | $linkTarget, |
84 | $dbName |
85 | ); |
86 | |
87 | $row = $dbr->newSelectQueryBuilder() |
88 | ->select( 'cgti_raw' ) |
89 | ->from( self::TITLES_TABLE_NAME ) |
90 | ->where( [ 'cgti_raw_key' => $this->getStringHash( $linkTarget->getDBkey() ) ] ) |
91 | ->caller( __METHOD__ ) |
92 | ->fetchRow(); |
93 | |
94 | if ( $row && $row->cgti_raw !== $linkTarget->getDBkey() ) { |
95 | return false; |
96 | } |
97 | |
98 | $insertQueryCounter = 0; |
99 | |
100 | $dbw = $this->connectionProvider->getPrimaryDatabase( CognateServices::VIRTUAL_DOMAIN ); |
101 | if ( !$row ) { |
102 | $dbw->newInsertQueryBuilder() |
103 | ->insertInto( self::TITLES_TABLE_NAME ) |
104 | ->ignore() |
105 | ->rows( $titlesToInsert ) |
106 | ->caller( __METHOD__ ) |
107 | ->execute(); |
108 | $insertQueryCounter++; |
109 | } |
110 | |
111 | $dbw->newInsertQueryBuilder() |
112 | ->insertInto( self::PAGES_TABLE_NAME ) |
113 | ->ignore() |
114 | ->rows( $pagesToInsert ) |
115 | ->caller( __METHOD__ ) |
116 | ->execute(); |
117 | $insertQueryCounter++; |
118 | |
119 | return $insertQueryCounter; |
120 | } |
121 | |
122 | /** |
123 | * Note: this method will not remove any relevant entries from the titles table |
124 | * |
125 | * @param string $dbName The dbName for the site |
126 | * @param LinkTarget $linkTarget |
127 | * |
128 | * @return bool |
129 | * @throws DBReadOnlyError |
130 | */ |
131 | public function deletePage( $dbName, LinkTarget $linkTarget ) { |
132 | if ( $this->readOnly ) { |
133 | $this->throwReadOnlyException(); |
134 | } |
135 | |
136 | $pageData = [ |
137 | 'cgpa_site' => $this->getStringHash( $dbName ), |
138 | 'cgpa_title' => $this->getStringHash( $linkTarget->getDBkey() ), |
139 | 'cgpa_namespace' => $linkTarget->getNamespace(), |
140 | ]; |
141 | $dbw = $this->connectionProvider->getPrimaryDatabase( CognateServices::VIRTUAL_DOMAIN ); |
142 | $dbw->newDeleteQueryBuilder() |
143 | ->deleteFrom( self::PAGES_TABLE_NAME ) |
144 | ->where( $pageData ) |
145 | ->caller( __METHOD__ ) |
146 | ->execute(); |
147 | |
148 | return true; |
149 | } |
150 | |
151 | /** |
152 | * @param string $dbName The dbName of the site being linked from |
153 | * @param LinkTarget $linkTarget of the page the links should be retrieved for |
154 | * |
155 | * @return array[] details used to create interwiki links. Each array will look like: |
156 | * [ 'interwiki' => 'en', 'namespaceID' => 0, 'title' => 'Berlin' ] |
157 | */ |
158 | public function selectLinkDetailsForPage( $dbName, LinkTarget $linkTarget ) { |
159 | $dbr = $this->connectionProvider->getReplicaDatabase( CognateServices::VIRTUAL_DOMAIN ); |
160 | $result = $dbr->newSelectQueryBuilder() |
161 | ->select( [ |
162 | 'cgsi_interwiki', |
163 | 'cgpa_namespace', |
164 | 'cgti_raw', |
165 | ] ) |
166 | ->from( self::TITLES_TABLE_NAME ) |
167 | ->join( self::PAGES_TABLE_NAME, null, 'cgti_raw_key = cgpa_title' ) |
168 | ->join( self::SITES_TABLE_NAME, null, 'cgpa_site = cgsi_key' ) |
169 | ->where( [ |
170 | $dbr->expr( 'cgsi_dbname', '!=', $dbName ), |
171 | 'cgti_normalized_key' => $this->getNormalizedStringHash( $linkTarget->getDBkey() ), |
172 | 'cgpa_namespace' => $linkTarget->getNamespace(), |
173 | ] ) |
174 | ->caller( __METHOD__ ) |
175 | ->fetchResultSet(); |
176 | |
177 | $linkDetails = []; |
178 | foreach ( $result as $row ) { |
179 | $linkDetails[] = [ |
180 | 'interwiki' => $row->cgsi_interwiki, |
181 | 'namespaceID' => intval( $row->cgpa_namespace ), |
182 | 'title' => $row->cgti_raw, |
183 | ]; |
184 | } |
185 | |
186 | return $linkDetails; |
187 | } |
188 | |
189 | /** |
190 | * @param LinkTarget $linkTarget |
191 | * |
192 | * @return string[] array of dbnames |
193 | */ |
194 | public function selectSitesForPage( LinkTarget $linkTarget ) { |
195 | $dbr = $this->connectionProvider->getReplicaDatabase( CognateServices::VIRTUAL_DOMAIN ); |
196 | return $dbr->newSelectQueryBuilder() |
197 | ->select( 'cgsi_dbname' ) |
198 | ->from( self::TITLES_TABLE_NAME ) |
199 | ->join( self::PAGES_TABLE_NAME, null, 'cgti_raw_key = cgpa_title' ) |
200 | ->join( self::SITES_TABLE_NAME, null, 'cgpa_site = cgsi_key' ) |
201 | ->where( [ |
202 | 'cgti_normalized_key' => $this->getNormalizedStringHash( $linkTarget->getDBkey() ), |
203 | 'cgpa_namespace' => $linkTarget->getNamespace(), |
204 | ] ) |
205 | ->caller( __METHOD__ ) |
206 | ->fetchFieldValues(); |
207 | } |
208 | |
209 | /** |
210 | * Adds pages to the database. As well as adding the data to the pages table this also |
211 | * includes adding the data to the titles table where needed. |
212 | * |
213 | * @note Errors during insertion are totally ignored by this method. If there were duplicate |
214 | * keys in the DB then you will not find out about them here. |
215 | * |
216 | * @param array[] $pageDetailsArray where each element contains the keys 'site', 'namespace', |
217 | * and 'title', e.g. [ [ 'site' => 'enwiktionary', 'namespace' => 0, 'title' => 'Berlin' ] ]. |
218 | * |
219 | * @throws RuntimeException |
220 | */ |
221 | public function insertPages( array $pageDetailsArray ) { |
222 | if ( !defined( 'RUN_MAINTENANCE_IF_MAIN' ) && !defined( 'MW_PHPUNIT_TEST' ) ) { |
223 | throw new RuntimeException( __METHOD__ . ' can only be used for maintenance or tests.' ); |
224 | } |
225 | |
226 | if ( !$pageDetailsArray ) { |
227 | return; |
228 | } |
229 | |
230 | $pagesToInsert = []; |
231 | $titlesToInsert = []; |
232 | foreach ( $pageDetailsArray as $pageDetails ) { |
233 | $this->buildRows( |
234 | new TitleValue( $pageDetails['namespace'], $pageDetails['title'] ), |
235 | $pageDetails['site'], |
236 | $pagesToInsert, |
237 | $titlesToInsert |
238 | ); |
239 | } |
240 | |
241 | $dbw = $this->connectionProvider->getPrimaryDatabase( CognateServices::VIRTUAL_DOMAIN ); |
242 | $dbw->newInsertQueryBuilder() |
243 | ->insertInto( self::TITLES_TABLE_NAME ) |
244 | ->ignore() |
245 | ->rows( $titlesToInsert ) |
246 | ->caller( __METHOD__ ) |
247 | ->execute(); |
248 | |
249 | $dbw->newInsertQueryBuilder() |
250 | ->insertInto( self::PAGES_TABLE_NAME ) |
251 | ->ignore() |
252 | ->rows( $pagesToInsert ) |
253 | ->caller( __METHOD__ ) |
254 | ->execute(); |
255 | } |
256 | |
257 | /** |
258 | * @param LinkTarget $linkTarget |
259 | * @param string $site |
260 | * @param array[] &$pagesToInsert |
261 | * @param array[] &$titlesToInsert |
262 | * |
263 | * @return array[] 0 => $pagesToInsert, 1 => $titleToInsert |
264 | */ |
265 | private function buildRows( |
266 | LinkTarget $linkTarget, |
267 | $site, |
268 | array &$pagesToInsert = [], |
269 | array &$titlesToInsert = [] |
270 | ) { |
271 | $pagesToInsert[] = [ |
272 | 'cgpa_site' => $this->getStringHash( $site ), |
273 | 'cgpa_namespace' => $linkTarget->getNamespace(), |
274 | 'cgpa_title' => $this->getStringHash( $linkTarget->getDBkey() ), |
275 | ]; |
276 | $titlesToInsert[] = [ |
277 | 'cgti_raw' => $linkTarget->getDBkey(), |
278 | 'cgti_raw_key' => $this->getStringHash( $linkTarget->getDBkey() ), |
279 | 'cgti_normalized_key' => $this->getNormalizedStringHash( $linkTarget->getDBkey() ), |
280 | ]; |
281 | |
282 | return [ $pagesToInsert, $titlesToInsert ]; |
283 | } |
284 | |
285 | /** |
286 | * @param string[] $sites keys of site dbname => values of interwiki prefix |
287 | * e.g. 'enwiktionary' => 'en' |
288 | * |
289 | * @throws RuntimeException |
290 | */ |
291 | public function insertSites( array $sites ) { |
292 | if ( !$sites ) { |
293 | return; |
294 | } |
295 | |
296 | $toInsert = []; |
297 | foreach ( $sites as $dbname => $interwikiPrefix ) { |
298 | $toInsert[] = [ |
299 | 'cgsi_key' => $this->getStringHash( $dbname ), |
300 | 'cgsi_dbname' => $dbname, |
301 | 'cgsi_interwiki' => $interwikiPrefix, |
302 | ]; |
303 | } |
304 | |
305 | $dbw = $this->connectionProvider->getPrimaryDatabase( CognateServices::VIRTUAL_DOMAIN ); |
306 | $dbw->newInsertQueryBuilder() |
307 | ->insertInto( 'cognate_sites' ) |
308 | ->ignore() |
309 | ->rows( $toInsert ) |
310 | ->caller( __METHOD__ ) |
311 | ->execute(); |
312 | } |
313 | |
314 | /** |
315 | * Delete all entries from the cognate_pages table for the given site. |
316 | * |
317 | * @param string $dbName The dbname of the site to delete pages for. |
318 | * |
319 | * @throws RuntimeException if not run in a maintenance or test scope |
320 | */ |
321 | public function deletePagesForSite( $dbName ) { |
322 | if ( !defined( 'RUN_MAINTENANCE_IF_MAIN' ) && !defined( 'MW_PHPUNIT_TEST' ) ) { |
323 | throw new RuntimeException( __METHOD__ . ' can only be used for maintenance or tests.' ); |
324 | } |
325 | |
326 | $dbw = $this->connectionProvider->getPrimaryDatabase( CognateServices::VIRTUAL_DOMAIN ); |
327 | $dbw->newDeleteQueryBuilder() |
328 | ->deleteFrom( 'cognate_pages' ) |
329 | ->where( [ |
330 | 'cgpa_site' => $this->getStringHash( $dbName ), |
331 | ] ) |
332 | ->caller( __METHOD__ ) |
333 | ->execute(); |
334 | } |
335 | |
336 | /** |
337 | * @param string $string |
338 | * |
339 | * @return int |
340 | */ |
341 | private function getStringHash( $string ) { |
342 | return $this->stringHasher->hash( $string ); |
343 | } |
344 | |
345 | /** |
346 | * @param string $string |
347 | * |
348 | * @return int |
349 | */ |
350 | private function getNormalizedStringHash( $string ) { |
351 | return $this->stringHasher->hash( |
352 | $this->stringNormalizer->normalize( $string ) |
353 | ); |
354 | } |
355 | |
356 | /** |
357 | * @throws DBReadOnlyError |
358 | * @return never |
359 | */ |
360 | private function throwReadOnlyException() { |
361 | throw new DBReadOnlyError( null, 'Cognate is in Read Only mode' ); |
362 | } |
363 | |
364 | } |