Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 171 |
|
0.00% |
0 / 17 |
CRAP | |
0.00% |
0 / 1 |
ExternalStoreDB | |
0.00% |
0 / 171 |
|
0.00% |
0 / 17 |
1892 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
fetchFromURL | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
12 | |||
batchFetchFromURLs | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
42 | |||
store | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
6 | |||
isReadOnly | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
getLoadBalancer | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getReplica | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
getPrimary | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
2 | |||
getDomainId | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
12 | |||
getTable | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
2 | |||
initializeTable | |
0.00% |
0 / 26 |
|
0.00% |
0 / 1 |
12 | |||
fetchBlob | |
0.00% |
0 / 31 |
|
0.00% |
0 / 1 |
56 | |||
batchFetchBlobs | |
0.00% |
0 / 32 |
|
0.00% |
0 / 1 |
12 | |||
mergeBatchResult | |
0.00% |
0 / 7 |
|
0.00% |
0 / 1 |
20 | |||
parseURL | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
getClusterForUrl | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getDomainIdForCluster | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | */ |
20 | |
21 | use Wikimedia\Rdbms\DatabaseDomain; |
22 | use Wikimedia\Rdbms\DBUnexpectedError; |
23 | use Wikimedia\Rdbms\ILoadBalancer; |
24 | use Wikimedia\Rdbms\LBFactory; |
25 | use Wikimedia\Rdbms\Query; |
26 | use Wikimedia\Rdbms\ServerInfo; |
27 | use Wikimedia\ScopedCallback; |
28 | |
29 | /** |
30 | * External storage in a SQL database. |
31 | * |
32 | * In this system, each store "location" maps to a database "cluster". |
33 | * The clusters must be defined in the normal LBFactory configuration. |
34 | * |
35 | * @see ExternalStoreAccess |
36 | * @ingroup ExternalStorage |
37 | */ |
38 | class ExternalStoreDB extends ExternalStoreMedium { |
39 | /** @var LBFactory */ |
40 | private $lbFactory; |
41 | |
42 | /** |
43 | * @see ExternalStoreMedium::__construct() |
44 | * @param array $params Additional parameters include: |
45 | * - lbFactory: an LBFactory instance |
46 | */ |
47 | public function __construct( array $params ) { |
48 | parent::__construct( $params ); |
49 | if ( !isset( $params['lbFactory'] ) || !( $params['lbFactory'] instanceof LBFactory ) ) { |
50 | throw new InvalidArgumentException( "LBFactory required in 'lbFactory' field." ); |
51 | } |
52 | $this->lbFactory = $params['lbFactory']; |
53 | } |
54 | |
55 | /** |
56 | * Fetch data from given external store URL. |
57 | * |
58 | * The provided URL is in the form of `DB://cluster/id` or `DB://cluster/id/itemid` |
59 | * for concatenated storage if ConcatenatedGzipHistoryBlob was used. |
60 | * |
61 | * @param string $url |
62 | * @return string|false False if missing |
63 | * @see ExternalStoreMedium::fetchFromURL() |
64 | */ |
65 | public function fetchFromURL( $url ) { |
66 | [ $cluster, $id, $itemID ] = $this->parseURL( $url ); |
67 | $ret = $this->fetchBlob( $cluster, $id, $itemID ); |
68 | |
69 | if ( $itemID !== false && $ret !== false ) { |
70 | return $ret->getItem( $itemID ); |
71 | } |
72 | |
73 | return $ret; |
74 | } |
75 | |
76 | /** |
77 | * Fetch multiple URLs from given external store. |
78 | * |
79 | * The provided URLs are in the form of `DB://cluster/id`, or `DB://cluster/id/itemid` |
80 | * for concatenated storage if ConcatenatedGzipHistoryBlob was used. |
81 | * |
82 | * @param array $urls An array of external store URLs |
83 | * @return array A map from url to stored content. Failed results |
84 | * are not represented. |
85 | */ |
86 | public function batchFetchFromURLs( array $urls ) { |
87 | $batched = $inverseUrlMap = []; |
88 | foreach ( $urls as $url ) { |
89 | [ $cluster, $id, $itemID ] = $this->parseURL( $url ); |
90 | $batched[$cluster][$id][] = $itemID; |
91 | // false $itemID gets cast to int, but should be ok |
92 | // since we do === from the $itemID in $batched |
93 | $inverseUrlMap[$cluster][$id][$itemID] = $url; |
94 | } |
95 | $ret = []; |
96 | foreach ( $batched as $cluster => $batchByCluster ) { |
97 | $res = $this->batchFetchBlobs( $cluster, $batchByCluster ); |
98 | /** @var HistoryBlob $blob */ |
99 | foreach ( $res as $id => $blob ) { |
100 | foreach ( $batchByCluster[$id] as $itemID ) { |
101 | $url = $inverseUrlMap[$cluster][$id][$itemID]; |
102 | if ( $itemID === false ) { |
103 | $ret[$url] = $blob; |
104 | } else { |
105 | $ret[$url] = $blob->getItem( $itemID ); |
106 | } |
107 | } |
108 | } |
109 | } |
110 | |
111 | return $ret; |
112 | } |
113 | |
114 | /** |
115 | * @inheritDoc |
116 | */ |
117 | public function store( $location, $data ) { |
118 | $blobsTable = $this->getTable( $location ); |
119 | |
120 | $dbw = $this->getPrimary( $location ); |
121 | $dbw->newInsertQueryBuilder() |
122 | ->insertInto( $blobsTable ) |
123 | ->row( [ 'blob_text' => $data ] ) |
124 | ->caller( __METHOD__ )->execute(); |
125 | |
126 | $id = $dbw->insertId(); |
127 | if ( !$id ) { |
128 | throw new ExternalStoreException( __METHOD__ . ': no insert ID' ); |
129 | } |
130 | |
131 | return "DB://$location/$id"; |
132 | } |
133 | |
134 | /** |
135 | * @inheritDoc |
136 | */ |
137 | public function isReadOnly( $location ) { |
138 | if ( parent::isReadOnly( $location ) ) { |
139 | return true; |
140 | } |
141 | |
142 | return ( $this->getLoadBalancer( $location )->getReadOnlyReason() !== false ); |
143 | } |
144 | |
145 | /** |
146 | * Get a LoadBalancer for the specified cluster |
147 | * |
148 | * @param string $cluster Cluster name |
149 | * @return ILoadBalancer |
150 | */ |
151 | private function getLoadBalancer( $cluster ) { |
152 | return $this->lbFactory->getExternalLB( $cluster ); |
153 | } |
154 | |
155 | /** |
156 | * Get a replica DB connection for the specified cluster |
157 | * |
158 | * @since 1.34 |
159 | * @param string $cluster Cluster name |
160 | * @return \Wikimedia\Rdbms\IReadableDatabase |
161 | */ |
162 | public function getReplica( $cluster ) { |
163 | $lb = $this->getLoadBalancer( $cluster ); |
164 | |
165 | return $lb->getConnection( |
166 | DB_REPLICA, |
167 | [], |
168 | $this->getDomainId( $lb->getServerInfo( ServerInfo::WRITER_INDEX ) ), |
169 | $lb::CONN_TRX_AUTOCOMMIT |
170 | ); |
171 | } |
172 | |
173 | /** |
174 | * Get a primary database connection for the specified cluster |
175 | * |
176 | * @param string $cluster Cluster name |
177 | * @return \Wikimedia\Rdbms\IMaintainableDatabase |
178 | * @since 1.37 |
179 | */ |
180 | public function getPrimary( $cluster ) { |
181 | $lb = $this->getLoadBalancer( $cluster ); |
182 | |
183 | return $lb->getMaintenanceConnectionRef( |
184 | DB_PRIMARY, |
185 | [], |
186 | $this->getDomainId( $lb->getServerInfo( ServerInfo::WRITER_INDEX ) ), |
187 | $lb::CONN_TRX_AUTOCOMMIT |
188 | ); |
189 | } |
190 | |
191 | /** |
192 | * @param array $server Primary DB server configuration array for LoadBalancer |
193 | * @return string|false Database domain ID or false |
194 | */ |
195 | private function getDomainId( array $server ) { |
196 | if ( $this->isDbDomainExplicit ) { |
197 | return $this->dbDomain; // explicit foreign domain |
198 | } |
199 | |
200 | if ( isset( $server['dbname'] ) ) { |
201 | // T200471: for b/c, treat any "dbname" field as forcing which database to use. |
202 | // MediaWiki/LoadBalancer previously did not enforce any concept of a local DB |
203 | // domain, but rather assumed that the LB server configuration matched $wgDBname. |
204 | // This check is useful when the external storage DB for this cluster does not use |
205 | // the same name as the corresponding "main" DB(s) for wikis. |
206 | $domain = new DatabaseDomain( |
207 | $server['dbname'], |
208 | $server['schema'] ?? null, |
209 | $server['tablePrefix'] ?? '' |
210 | ); |
211 | |
212 | return $domain->getId(); |
213 | } |
214 | |
215 | return false; // local LB domain |
216 | } |
217 | |
218 | /** |
219 | * Get the configured blobs table name for this database |
220 | * |
221 | * Typically, a suffix like "_clusterX" can be used to facilitate clean merging of |
222 | * read-only storage clusters by simply cloning tables to the new cluster servers. |
223 | * |
224 | * @param string $cluster Cluster name |
225 | * @return string Unqualified table name (e.g. "blobs_cluster32" or default "blobs") |
226 | * @internal Only for use within ExternalStoreDB and its core maintenance scripts |
227 | */ |
228 | public function getTable( string $cluster ) { |
229 | $lb = $this->getLoadBalancer( $cluster ); |
230 | $info = $lb->getServerInfo( ServerInfo::WRITER_INDEX ); |
231 | |
232 | return $info['blobs table'] ?? 'blobs'; |
233 | } |
234 | |
235 | /** |
236 | * Create the appropriate blobs table on this cluster |
237 | * |
238 | * @since 1.34 |
239 | * @param string $cluster |
240 | */ |
241 | public function initializeTable( $cluster ) { |
242 | global $IP; |
243 | |
244 | static $supportedTypes = [ 'mysql', 'sqlite' ]; |
245 | |
246 | $dbw = $this->getPrimary( $cluster ); |
247 | if ( !in_array( $dbw->getType(), $supportedTypes, true ) ) { |
248 | throw new DBUnexpectedError( $dbw, "RDBMS type '{$dbw->getType()}' not supported." ); |
249 | } |
250 | |
251 | $sqlFilePath = "$IP/maintenance/storage/blobs.sql"; |
252 | $sql = file_get_contents( $sqlFilePath ); |
253 | if ( $sql === false ) { |
254 | throw new RuntimeException( "Failed to read '$sqlFilePath'." ); |
255 | } |
256 | |
257 | $blobsTable = $this->getTable( $cluster ); |
258 | $encTable = $dbw->tableName( $blobsTable ); |
259 | $sqlWithReplacedVars = str_replace( |
260 | [ '/*$wgDBprefix*/blobs', '/*_*/blobs' ], |
261 | [ $encTable, $encTable ], |
262 | $sql |
263 | ); |
264 | |
265 | $dbw->query( |
266 | new Query( |
267 | $sqlWithReplacedVars, |
268 | $dbw::QUERY_CHANGE_SCHEMA, |
269 | 'CREATE', |
270 | $blobsTable, |
271 | $sqlWithReplacedVars |
272 | ), |
273 | __METHOD__ |
274 | ); |
275 | } |
276 | |
277 | /** |
278 | * Fetch a blob item out of the database; a cache of the last-loaded |
279 | * blob will be kept so that multiple loads out of a multi-item blob |
280 | * can avoid redundant database access and decompression. |
281 | * @param string $cluster |
282 | * @param string $id |
283 | * @param string $itemID |
284 | * @return HistoryBlob|false Returns false if missing |
285 | */ |
286 | private function fetchBlob( $cluster, $id, $itemID ) { |
287 | /** |
288 | * One-step cache variable to hold base blobs; operations that |
289 | * pull multiple revisions may often pull multiple times from |
290 | * the same blob. By keeping the last-used one open, we avoid |
291 | * redundant unserialization and decompression overhead. |
292 | */ |
293 | static $externalBlobCache = []; |
294 | |
295 | $cacheID = ( $itemID === false ) ? "$cluster/$id" : "$cluster/$id/"; |
296 | $cacheID = "$cacheID@{$this->dbDomain}"; |
297 | |
298 | if ( isset( $externalBlobCache[$cacheID] ) ) { |
299 | $this->logger->debug( __METHOD__ . ": cache hit on $cacheID" ); |
300 | |
301 | return $externalBlobCache[$cacheID]; |
302 | } |
303 | |
304 | $this->logger->debug( __METHOD__ . ": cache miss on $cacheID" ); |
305 | |
306 | $blobsTable = $this->getTable( $cluster ); |
307 | |
308 | $dbr = $this->getReplica( $cluster ); |
309 | $ret = $dbr->newSelectQueryBuilder() |
310 | ->select( 'blob_text' ) |
311 | ->from( $blobsTable ) |
312 | ->where( [ 'blob_id' => $id ] ) |
313 | ->caller( __METHOD__ )->fetchField(); |
314 | |
315 | if ( $ret === false ) { |
316 | // Try the primary DB |
317 | $this->logger->warning( __METHOD__ . ": primary DB fallback on $cacheID" ); |
318 | $trxProfiler = $this->lbFactory->getTransactionProfiler(); |
319 | $scope = $trxProfiler->silenceForScope( $trxProfiler::EXPECTATION_REPLICAS_ONLY ); |
320 | $dbw = $this->getPrimary( $cluster ); |
321 | $ret = $dbw->newSelectQueryBuilder() |
322 | ->select( 'blob_text' ) |
323 | ->from( $blobsTable ) |
324 | ->where( [ 'blob_id' => $id ] ) |
325 | ->caller( __METHOD__ )->fetchField(); |
326 | ScopedCallback::consume( $scope ); |
327 | if ( $ret === false ) { |
328 | $this->logger->warning( __METHOD__ . ": primary DB failed to find $cacheID" ); |
329 | } |
330 | } |
331 | if ( $itemID !== false && $ret !== false ) { |
332 | // Unserialise object; caller extracts item |
333 | $ret = HistoryBlobUtils::unserialize( $ret ); |
334 | } |
335 | |
336 | $externalBlobCache = [ $cacheID => $ret ]; |
337 | |
338 | return $ret; |
339 | } |
340 | |
341 | /** |
342 | * Fetch multiple blob items out of the database |
343 | * |
344 | * @param string $cluster A cluster name valid for use with LBFactory |
345 | * @param array $ids A map from the blob_id's to look for to the requested itemIDs in the blobs |
346 | * @return array A map from the blob_id's requested to their content. |
347 | * Unlocated ids are not represented |
348 | */ |
349 | private function batchFetchBlobs( $cluster, array $ids ) { |
350 | $blobsTable = $this->getTable( $cluster ); |
351 | |
352 | $dbr = $this->getReplica( $cluster ); |
353 | $res = $dbr->newSelectQueryBuilder() |
354 | ->select( [ 'blob_id', 'blob_text' ] ) |
355 | ->from( $blobsTable ) |
356 | ->where( [ 'blob_id' => array_keys( $ids ) ] ) |
357 | ->caller( __METHOD__ ) |
358 | ->fetchResultSet(); |
359 | |
360 | $ret = []; |
361 | $this->mergeBatchResult( $ret, $ids, $res ); |
362 | if ( $ids ) { |
363 | // Try the primary |
364 | $this->logger->info( |
365 | __METHOD__ . ": primary fallback on '$cluster' for: " . |
366 | implode( ',', array_keys( $ids ) ) |
367 | ); |
368 | $trxProfiler = $this->lbFactory->getTransactionProfiler(); |
369 | $scope = $trxProfiler->silenceForScope( $trxProfiler::EXPECTATION_REPLICAS_ONLY ); |
370 | $dbw = $this->getPrimary( $cluster ); |
371 | $res = $dbw->newSelectQueryBuilder() |
372 | ->select( [ 'blob_id', 'blob_text' ] ) |
373 | ->from( $blobsTable ) |
374 | ->where( [ 'blob_id' => array_keys( $ids ) ] ) |
375 | ->caller( __METHOD__ ) |
376 | ->fetchResultSet(); |
377 | ScopedCallback::consume( $scope ); |
378 | $this->mergeBatchResult( $ret, $ids, $res ); |
379 | } |
380 | if ( $ids ) { |
381 | $this->logger->error( |
382 | __METHOD__ . ": primary on '$cluster' failed locating items: " . |
383 | implode( ',', array_keys( $ids ) ) |
384 | ); |
385 | } |
386 | |
387 | return $ret; |
388 | } |
389 | |
390 | /** |
391 | * Helper function for self::batchFetchBlobs for merging primary/replica DB results |
392 | * @param array &$ret Current self::batchFetchBlobs return value |
393 | * @param array &$ids Map from blob_id to requested itemIDs |
394 | * @param mixed $res DB result from Database::select |
395 | */ |
396 | private function mergeBatchResult( array &$ret, array &$ids, $res ) { |
397 | foreach ( $res as $row ) { |
398 | $id = $row->blob_id; |
399 | $itemIDs = $ids[$id]; |
400 | unset( $ids[$id] ); // to track if everything is found |
401 | if ( count( $itemIDs ) === 1 && reset( $itemIDs ) === false ) { |
402 | // single result stored per blob |
403 | $ret[$id] = $row->blob_text; |
404 | } else { |
405 | // multi result stored per blob |
406 | $ret[$id] = HistoryBlobUtils::unserialize( $row->blob_text ); |
407 | } |
408 | } |
409 | } |
410 | |
411 | /** |
412 | * @param string $url |
413 | * @return array |
414 | */ |
415 | protected function parseURL( $url ) { |
416 | $path = explode( '/', $url ); |
417 | |
418 | return [ |
419 | $path[2], // cluster |
420 | $path[3], // id |
421 | $path[4] ?? false // itemID |
422 | ]; |
423 | } |
424 | |
425 | /** |
426 | * Get the cluster part of a URL |
427 | * |
428 | * @internal for installer |
429 | * @param string $url |
430 | * @return string|null |
431 | */ |
432 | public function getClusterForUrl( $url ) { |
433 | $parts = explode( '/', $url ); |
434 | return $parts[2] ?? null; |
435 | } |
436 | |
437 | /** |
438 | * Get the domain ID for a given cluster, which is false for the local wiki ID |
439 | * |
440 | * @internal for installer |
441 | * @param string $cluster |
442 | * @return string|false |
443 | */ |
444 | public function getDomainIdForCluster( $cluster ) { |
445 | $lb = $this->getLoadBalancer( $cluster ); |
446 | return $this->getDomainId( $lb->getServerInfo( ServerInfo::WRITER_INDEX ) ); |
447 | } |
448 | } |