MediaWiki  master
ExternalStoreDB.php
Go to the documentation of this file.
1 <?php
28 use Wikimedia\ScopedCallback;
29 
41  private $lbFactory;
42 
48  public function __construct( array $params ) {
49  parent::__construct( $params );
50  if ( !isset( $params['lbFactory'] ) || !( $params['lbFactory'] instanceof LBFactory ) ) {
51  throw new InvalidArgumentException( "LBFactory required in 'lbFactory' field." );
52  }
53  $this->lbFactory = $params['lbFactory'];
54  }
55 
66  public function fetchFromURL( $url ) {
67  list( $cluster, $id, $itemID ) = $this->parseURL( $url );
68  $ret = $this->fetchBlob( $cluster, $id, $itemID );
69 
70  if ( $itemID !== false && $ret !== false ) {
71  return $ret->getItem( $itemID );
72  }
73 
74  return $ret;
75  }
76 
87  public function batchFetchFromURLs( array $urls ) {
88  $batched = $inverseUrlMap = [];
89  foreach ( $urls as $url ) {
90  list( $cluster, $id, $itemID ) = $this->parseURL( $url );
91  $batched[$cluster][$id][] = $itemID;
92  // false $itemID gets cast to int, but should be ok
93  // since we do === from the $itemID in $batched
94  $inverseUrlMap[$cluster][$id][$itemID] = $url;
95  }
96  $ret = [];
97  foreach ( $batched as $cluster => $batchByCluster ) {
98  $res = $this->batchFetchBlobs( $cluster, $batchByCluster );
100  foreach ( $res as $id => $blob ) {
101  foreach ( $batchByCluster[$id] as $itemID ) {
102  $url = $inverseUrlMap[$cluster][$id][$itemID];
103  if ( $itemID === false ) {
104  $ret[$url] = $blob;
105  } else {
106  $ret[$url] = $blob->getItem( $itemID );
107  }
108  }
109  }
110  }
111 
112  return $ret;
113  }
114 
118  public function store( $location, $data ) {
119  $dbw = $this->getPrimary( $location );
120  $dbw->insert(
121  $this->getTable( $dbw, $location ),
122  [ 'blob_text' => $data ],
123  __METHOD__
124  );
125  $id = $dbw->insertId();
126  if ( !$id ) {
127  throw new MWException( __METHOD__ . ': no insert ID' );
128  }
129 
130  return "DB://$location/$id";
131  }
132 
136  public function isReadOnly( $location ) {
137  if ( parent::isReadOnly( $location ) ) {
138  return true;
139  }
140 
141  $lb = $this->getLoadBalancer( $location );
142  $domainId = $this->getDomainId( $lb->getServerInfo( $lb->getWriterIndex() ) );
143 
144  return ( $lb->getReadOnlyReason( $domainId ) !== false );
145  }
146 
153  private function getLoadBalancer( $cluster ) {
154  return $this->lbFactory->getExternalLB( $cluster );
155  }
156 
164  public function getReplica( $cluster ) {
165  $lb = $this->getLoadBalancer( $cluster );
166 
167  return $lb->getConnectionRef(
168  DB_REPLICA,
169  [],
170  $this->getDomainId( $lb->getServerInfo( $lb->getWriterIndex() ) ),
171  $lb::CONN_TRX_AUTOCOMMIT
172  );
173  }
174 
182  public function getSlave( $cluster ) {
183  wfDeprecated( __METHOD__, '1.34' );
184  return $this->getReplica( $cluster );
185  }
186 
194  public function getPrimary( $cluster ) {
195  $lb = $this->getLoadBalancer( $cluster );
196 
197  return $lb->getMaintenanceConnectionRef(
198  DB_PRIMARY,
199  [],
200  $this->getDomainId( $lb->getServerInfo( $lb->getWriterIndex() ) ),
201  $lb::CONN_TRX_AUTOCOMMIT
202  );
203  }
204 
210  public function getMaster( $cluster ) {
211  wfDeprecated( __METHOD__, '1.37' );
212  return $this->getPrimary( $cluster );
213  }
214 
219  private function getDomainId( array $server ) {
220  if ( $this->isDbDomainExplicit ) {
221  return $this->dbDomain; // explicit foreign domain
222  }
223 
224  if ( isset( $server['dbname'] ) ) {
225  // T200471: for b/c, treat any "dbname" field as forcing which database to use.
226  // MediaWiki/LoadBalancer previously did not enforce any concept of a local DB
227  // domain, but rather assumed that the LB server configuration matched $wgDBname.
228  // This check is useful when the external storage DB for this cluster does not use
229  // the same name as the corresponding "main" DB(s) for wikis.
230  $domain = new DatabaseDomain(
231  $server['dbname'],
232  $server['schema'] ?? null,
233  $server['tablePrefix'] ?? ''
234  );
235 
236  return $domain->getId();
237  }
238 
239  return false; // local LB domain
240  }
241 
249  public function getTable( $db, $cluster = null ) {
250  if ( $cluster !== null ) {
251  $lb = $this->getLoadBalancer( $cluster );
252  $info = $lb->getServerInfo( $lb->getWriterIndex() );
253  if ( isset( $info['blobs table'] ) ) {
254  return $info['blobs table'];
255  }
256  }
257 
258  return $db->getLBInfo( 'blobs table' ) ?? 'blobs'; // b/c
259  }
260 
268  public function initializeTable( $cluster ) {
269  global $IP;
270 
271  static $supportedTypes = [ 'mysql', 'sqlite' ];
272 
273  $dbw = $this->getPrimary( $cluster );
274  if ( !in_array( $dbw->getType(), $supportedTypes, true ) ) {
275  throw new DBUnexpectedError( $dbw, "RDBMS type '{$dbw->getType()}' not supported." );
276  }
277 
278  $sqlFilePath = "$IP/maintenance/storage/blobs.sql";
279  $sql = file_get_contents( $sqlFilePath );
280  if ( $sql === false ) {
281  throw new RuntimeException( "Failed to read '$sqlFilePath'." );
282  }
283 
284  $rawTable = $this->getTable( $dbw, $cluster ); // e.g. "blobs_cluster23"
285  $encTable = $dbw->tableName( $rawTable );
286  $dbw->query(
287  str_replace(
288  [ '/*$wgDBprefix*/blobs', '/*_*/blobs' ],
289  [ $encTable, $encTable ],
290  $sql
291  ),
292  __METHOD__,
293  $dbw::QUERY_IGNORE_DBO_TRX
294  );
295  }
296 
306  private function fetchBlob( $cluster, $id, $itemID ) {
313  static $externalBlobCache = [];
314 
315  $cacheID = ( $itemID === false ) ? "$cluster/$id" : "$cluster/$id/";
316  $cacheID = "$cacheID@{$this->dbDomain}";
317 
318  if ( isset( $externalBlobCache[$cacheID] ) ) {
319  $this->logger->debug( __METHOD__ . ": cache hit on $cacheID" );
320 
321  return $externalBlobCache[$cacheID];
322  }
323 
324  $this->logger->debug( __METHOD__ . ": cache miss on $cacheID" );
325 
326  $dbr = $this->getReplica( $cluster );
327  $ret = $dbr->selectField(
328  $this->getTable( $dbr, $cluster ),
329  'blob_text',
330  [ 'blob_id' => $id ],
331  __METHOD__
332  );
333  if ( $ret === false ) {
334  // Try the primary DB
335  $this->logger->warning( __METHOD__ . ": primary DB fallback on $cacheID" );
336  $scope = $this->lbFactory->getTransactionProfiler()->silenceForScope();
337  $dbw = $this->getPrimary( $cluster );
338  $ret = $dbw->selectField(
339  $this->getTable( $dbw, $cluster ),
340  'blob_text',
341  [ 'blob_id' => $id ],
342  __METHOD__
343  );
344  ScopedCallback::consume( $scope );
345  if ( $ret === false ) {
346  $this->logger->warning( __METHOD__ . ": primary DB failed to find $cacheID" );
347  }
348  }
349  if ( $itemID !== false && $ret !== false ) {
350  // Unserialise object; caller extracts item
351  $ret = unserialize( $ret );
352  }
353 
354  $externalBlobCache = [ $cacheID => $ret ];
355 
356  return $ret;
357  }
358 
367  private function batchFetchBlobs( $cluster, array $ids ) {
368  $dbr = $this->getReplica( $cluster );
369  $res = $dbr->select(
370  $this->getTable( $dbr, $cluster ),
371  [ 'blob_id', 'blob_text' ],
372  [ 'blob_id' => array_keys( $ids ) ],
373  __METHOD__
374  );
375 
376  $ret = [];
377  if ( $res !== false ) {
378  $this->mergeBatchResult( $ret, $ids, $res );
379  }
380  if ( $ids ) {
381  // Try the primary
382  $this->logger->info(
383  __METHOD__ . ": primary fallback on '$cluster' for: " .
384  implode( ',', array_keys( $ids ) )
385  );
386  $scope = $this->lbFactory->getTransactionProfiler()->silenceForScope();
387  $dbw = $this->getPrimary( $cluster );
388  $res = $dbw->select(
389  $this->getTable( $dbr, $cluster ),
390  [ 'blob_id', 'blob_text' ],
391  [ 'blob_id' => array_keys( $ids ) ],
392  __METHOD__ );
393  ScopedCallback::consume( $scope );
394  if ( $res === false ) {
395  $this->logger->error( __METHOD__ . ": primary failed on '$cluster'" );
396  } else {
397  $this->mergeBatchResult( $ret, $ids, $res );
398  }
399  }
400  if ( $ids ) {
401  $this->logger->error(
402  __METHOD__ . ": primary on '$cluster' failed locating items: " .
403  implode( ',', array_keys( $ids ) )
404  );
405  }
406 
407  return $ret;
408  }
409 
416  private function mergeBatchResult( array &$ret, array &$ids, $res ) {
417  foreach ( $res as $row ) {
418  $id = $row->blob_id;
419  $itemIDs = $ids[$id];
420  unset( $ids[$id] ); // to track if everything is found
421  if ( count( $itemIDs ) === 1 && reset( $itemIDs ) === false ) {
422  // single result stored per blob
423  $ret[$id] = $row->blob_text;
424  } else {
425  // multi result stored per blob
426  $ret[$id] = unserialize( $row->blob_text );
427  }
428  }
429  }
430 
435  protected function parseURL( $url ) {
436  $path = explode( '/', $url );
437 
438  return [
439  $path[2], // cluster
440  $path[3], // id
441  $path[4] ?? false // itemID
442  ];
443  }
444 }
ExternalStoreDB\batchFetchBlobs
batchFetchBlobs( $cluster, array $ids)
Fetch multiple blob items out of the database.
Definition: ExternalStoreDB.php:367
ExternalStoreMedium
Base class for external storage.
Definition: ExternalStoreMedium.php:36
ExternalStoreDB\__construct
__construct(array $params)
Definition: ExternalStoreDB.php:48
ExternalStoreMedium\$params
array $params
Usage context options for this instance.
Definition: ExternalStoreMedium.php:38
HistoryBlob
Base class for general text storage via the "object" flag in old_flags, or two-part external storage ...
Definition: HistoryBlob.php:28
ExternalStoreDB\getSlave
getSlave( $cluster)
Get a replica DB connection for the specified cluster.
Definition: ExternalStoreDB.php:182
ExternalStoreDB\parseURL
parseURL( $url)
Definition: ExternalStoreDB.php:435
ExternalStoreDB
External storage in a SQL database.
Definition: ExternalStoreDB.php:39
ExternalStoreDB\store
store( $location, $data)
Insert a data item into a given location.The location name The data item string|bool The URL of the s...
Definition: ExternalStoreDB.php:118
ExternalStoreDB\batchFetchFromURLs
batchFetchFromURLs(array $urls)
Fetch multiple URLs from given external store.
Definition: ExternalStoreDB.php:87
ExternalStoreDB\$lbFactory
LBFactory $lbFactory
Definition: ExternalStoreDB.php:41
ExternalStoreMedium\$dbDomain
string $dbDomain
Default database domain to store content under.
Definition: ExternalStoreMedium.php:40
$res
$res
Definition: testCompression.php:57
ExternalStoreDB\fetchBlob
fetchBlob( $cluster, $id, $itemID)
Fetch a blob item out of the database; a cache of the last-loaded blob will be kept so that multiple ...
Definition: ExternalStoreDB.php:306
Wikimedia\Rdbms\IDatabase
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:38
ExternalStoreDB\getDomainId
getDomainId(array $server)
Definition: ExternalStoreDB.php:219
$dbr
$dbr
Definition: testCompression.php:54
Wikimedia\Rdbms\MaintainableDBConnRef
Helper class to handle automatically marking connections as reusable (via RAII pattern) as well handl...
Definition: MaintainableDBConnRef.php:13
ExternalStoreDB\getLoadBalancer
getLoadBalancer( $cluster)
Get a LoadBalancer for the specified cluster.
Definition: ExternalStoreDB.php:153
MWException
MediaWiki exception.
Definition: MWException.php:29
wfDeprecated
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
Definition: GlobalFunctions.php:997
$blob
$blob
Definition: testCompression.php:70
ExternalStoreDB\isReadOnly
isReadOnly( $location)
Check if a given location is read-only.The location name bool Whether this location is read-only 1....
Definition: ExternalStoreDB.php:136
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
ExternalStoreDB\fetchFromURL
fetchFromURL( $url)
Fetch data from given external store URL.
Definition: ExternalStoreDB.php:66
DB_PRIMARY
const DB_PRIMARY
Definition: defines.php:27
ExternalStoreDB\getReplica
getReplica( $cluster)
Get a replica DB connection for the specified cluster.
Definition: ExternalStoreDB.php:164
Wikimedia\Rdbms\DBUnexpectedError
Definition: DBUnexpectedError.php:29
Wikimedia\Rdbms\DBConnRef
Helper class used for automatically marking an IDatabase connection as reusable (once it no longer ma...
Definition: DBConnRef.php:30
unserialize
unserialize( $serialized)
Definition: ApiMessageTrait.php:146
ExternalStoreDB\getPrimary
getPrimary( $cluster)
Get a primary database connection for the specified cluster.
Definition: ExternalStoreDB.php:194
Wikimedia\Rdbms\LBFactory
An interface for generating database load balancers.
Definition: LBFactory.php:42
$path
$path
Definition: NoLocalSettings.php:25
ExternalStoreDB\initializeTable
initializeTable( $cluster)
Create the appropriate blobs table on this cluster.
Definition: ExternalStoreDB.php:268
Wikimedia\Rdbms\DatabaseDomain
Class to handle database/schema/prefix specifications for IDatabase.
Definition: DatabaseDomain.php:40
$IP
$IP
Definition: WebStart.php:49
ExternalStoreDB\mergeBatchResult
mergeBatchResult(array &$ret, array &$ids, $res)
Helper function for self::batchFetchBlobs for merging primary/replica DB results.
Definition: ExternalStoreDB.php:416
ExternalStoreDB\getTable
getTable( $db, $cluster=null)
Get the 'blobs' table name for this database.
Definition: ExternalStoreDB.php:249
ExternalStoreDB\getMaster
getMaster( $cluster)
Definition: ExternalStoreDB.php:210
Wikimedia\Rdbms\ILoadBalancer
Database cluster connection, tracking, load balancing, and transaction manager interface.
Definition: ILoadBalancer.php:81