MediaWiki  master
ExternalStoreDB.php
Go to the documentation of this file.
1 <?php
27 use Wikimedia\ScopedCallback;
28 
40  private $lbFactory;
41 
47  public function __construct( array $params ) {
48  parent::__construct( $params );
49  if ( !isset( $params['lbFactory'] ) || !( $params['lbFactory'] instanceof LBFactory ) ) {
50  throw new InvalidArgumentException( "LBFactory required in 'lbFactory' field." );
51  }
52  $this->lbFactory = $params['lbFactory'];
53  }
54 
65  public function fetchFromURL( $url ) {
66  [ $cluster, $id, $itemID ] = $this->parseURL( $url );
67  $ret = $this->fetchBlob( $cluster, $id, $itemID );
68 
69  if ( $itemID !== false && $ret !== false ) {
70  return $ret->getItem( $itemID );
71  }
72 
73  return $ret;
74  }
75 
86  public function batchFetchFromURLs( array $urls ) {
87  $batched = $inverseUrlMap = [];
88  foreach ( $urls as $url ) {
89  [ $cluster, $id, $itemID ] = $this->parseURL( $url );
90  $batched[$cluster][$id][] = $itemID;
91  // false $itemID gets cast to int, but should be ok
92  // since we do === from the $itemID in $batched
93  $inverseUrlMap[$cluster][$id][$itemID] = $url;
94  }
95  $ret = [];
96  foreach ( $batched as $cluster => $batchByCluster ) {
97  $res = $this->batchFetchBlobs( $cluster, $batchByCluster );
99  foreach ( $res as $id => $blob ) {
100  foreach ( $batchByCluster[$id] as $itemID ) {
101  $url = $inverseUrlMap[$cluster][$id][$itemID];
102  if ( $itemID === false ) {
103  $ret[$url] = $blob;
104  } else {
105  $ret[$url] = $blob->getItem( $itemID );
106  }
107  }
108  }
109  }
110 
111  return $ret;
112  }
113 
117  public function store( $location, $data ) {
118  $dbw = $this->getPrimary( $location );
119  $dbw->insert(
120  $this->getTable( $dbw, $location ),
121  [ 'blob_text' => $data ],
122  __METHOD__
123  );
124  $id = $dbw->insertId();
125  if ( !$id ) {
126  throw new MWException( __METHOD__ . ': no insert ID' );
127  }
128 
129  return "DB://$location/$id";
130  }
131 
135  public function isReadOnly( $location ) {
136  if ( parent::isReadOnly( $location ) ) {
137  return true;
138  }
139 
140  return ( $this->getLoadBalancer( $location )->getReadOnlyReason() !== false );
141  }
142 
149  private function getLoadBalancer( $cluster ) {
150  return $this->lbFactory->getExternalLB( $cluster );
151  }
152 
160  public function getReplica( $cluster ) {
161  $lb = $this->getLoadBalancer( $cluster );
162 
163  return $lb->getConnectionRef(
164  DB_REPLICA,
165  [],
166  $this->getDomainId( $lb->getServerInfo( $lb->getWriterIndex() ) ),
167  $lb::CONN_TRX_AUTOCOMMIT
168  );
169  }
170 
178  public function getPrimary( $cluster ) {
179  $lb = $this->getLoadBalancer( $cluster );
180 
181  return $lb->getMaintenanceConnectionRef(
182  DB_PRIMARY,
183  [],
184  $this->getDomainId( $lb->getServerInfo( $lb->getWriterIndex() ) ),
185  $lb::CONN_TRX_AUTOCOMMIT
186  );
187  }
188 
194  public function getMaster( $cluster ) {
195  wfDeprecated( __METHOD__, '1.37' );
196  return $this->getPrimary( $cluster );
197  }
198 
203  private function getDomainId( array $server ) {
204  if ( $this->isDbDomainExplicit ) {
205  return $this->dbDomain; // explicit foreign domain
206  }
207 
208  if ( isset( $server['dbname'] ) ) {
209  // T200471: for b/c, treat any "dbname" field as forcing which database to use.
210  // MediaWiki/LoadBalancer previously did not enforce any concept of a local DB
211  // domain, but rather assumed that the LB server configuration matched $wgDBname.
212  // This check is useful when the external storage DB for this cluster does not use
213  // the same name as the corresponding "main" DB(s) for wikis.
214  $domain = new DatabaseDomain(
215  $server['dbname'],
216  $server['schema'] ?? null,
217  $server['tablePrefix'] ?? ''
218  );
219 
220  return $domain->getId();
221  }
222 
223  return false; // local LB domain
224  }
225 
233  public function getTable( $db, $cluster = null ) {
234  if ( $cluster !== null ) {
235  $lb = $this->getLoadBalancer( $cluster );
236  $info = $lb->getServerInfo( $lb->getWriterIndex() );
237  if ( isset( $info['blobs table'] ) ) {
238  return $info['blobs table'];
239  }
240  }
241 
242  return $db->getLBInfo( 'blobs table' ) ?? 'blobs'; // b/c
243  }
244 
252  public function initializeTable( $cluster ) {
253  global $IP;
254 
255  static $supportedTypes = [ 'mysql', 'sqlite' ];
256 
257  $dbw = $this->getPrimary( $cluster );
258  if ( !in_array( $dbw->getType(), $supportedTypes, true ) ) {
259  throw new DBUnexpectedError( $dbw, "RDBMS type '{$dbw->getType()}' not supported." );
260  }
261 
262  $sqlFilePath = "$IP/maintenance/storage/blobs.sql";
263  $sql = file_get_contents( $sqlFilePath );
264  if ( $sql === false ) {
265  throw new RuntimeException( "Failed to read '$sqlFilePath'." );
266  }
267 
268  $rawTable = $this->getTable( $dbw, $cluster ); // e.g. "blobs_cluster23"
269  $encTable = $dbw->tableName( $rawTable );
270  $dbw->query(
271  str_replace(
272  [ '/*$wgDBprefix*/blobs', '/*_*/blobs' ],
273  [ $encTable, $encTable ],
274  $sql
275  ),
276  __METHOD__,
277  $dbw::QUERY_IGNORE_DBO_TRX
278  );
279  }
280 
290  private function fetchBlob( $cluster, $id, $itemID ) {
297  static $externalBlobCache = [];
298 
299  $cacheID = ( $itemID === false ) ? "$cluster/$id" : "$cluster/$id/";
300  $cacheID = "$cacheID@{$this->dbDomain}";
301 
302  if ( isset( $externalBlobCache[$cacheID] ) ) {
303  $this->logger->debug( __METHOD__ . ": cache hit on $cacheID" );
304 
305  return $externalBlobCache[$cacheID];
306  }
307 
308  $this->logger->debug( __METHOD__ . ": cache miss on $cacheID" );
309 
310  $dbr = $this->getReplica( $cluster );
311  $ret = $dbr->selectField(
312  $this->getTable( $dbr, $cluster ),
313  'blob_text',
314  [ 'blob_id' => $id ],
315  __METHOD__
316  );
317  if ( $ret === false ) {
318  // Try the primary DB
319  $this->logger->warning( __METHOD__ . ": primary DB fallback on $cacheID" );
320  $trxProfiler = $this->lbFactory->getTransactionProfiler();
321  $scope = $trxProfiler->silenceForScope( $trxProfiler::EXPECTATION_REPLICAS_ONLY );
322  $dbw = $this->getPrimary( $cluster );
323  $ret = $dbw->selectField(
324  $this->getTable( $dbw, $cluster ),
325  'blob_text',
326  [ 'blob_id' => $id ],
327  __METHOD__
328  );
329  ScopedCallback::consume( $scope );
330  if ( $ret === false ) {
331  $this->logger->warning( __METHOD__ . ": primary DB failed to find $cacheID" );
332  }
333  }
334  if ( $itemID !== false && $ret !== false ) {
335  // Unserialise object; caller extracts item
336  $ret = unserialize( $ret );
337  }
338 
339  $externalBlobCache = [ $cacheID => $ret ];
340 
341  return $ret;
342  }
343 
352  private function batchFetchBlobs( $cluster, array $ids ) {
353  $dbr = $this->getReplica( $cluster );
354  $res = $dbr->newSelectQueryBuilder()
355  ->select( [ 'blob_id', 'blob_text' ] )
356  ->from( $this->getTable( $dbr, $cluster ) )
357  ->where( [ 'blob_id' => array_keys( $ids ) ] )
358  ->caller( __METHOD__ )
359  ->fetchResultSet();
360 
361  $ret = [];
362  if ( $res !== false ) {
363  $this->mergeBatchResult( $ret, $ids, $res );
364  }
365  if ( $ids ) {
366  // Try the primary
367  $this->logger->info(
368  __METHOD__ . ": primary fallback on '$cluster' for: " .
369  implode( ',', array_keys( $ids ) )
370  );
371  $trxProfiler = $this->lbFactory->getTransactionProfiler();
372  $scope = $trxProfiler->silenceForScope( $trxProfiler::EXPECTATION_REPLICAS_ONLY );
373  $dbw = $this->getPrimary( $cluster );
374  $res = $dbw->newSelectQueryBuilder()
375  ->select( [ 'blob_id', 'blob_text' ] )
376  ->from( $this->getTable( $dbr, $cluster ) )
377  ->where( [ 'blob_id' => array_keys( $ids ) ] )
378  ->caller( __METHOD__ )
379  ->fetchResultSet();
380  ScopedCallback::consume( $scope );
381  if ( $res === false ) {
382  $this->logger->error( __METHOD__ . ": primary failed on '$cluster'" );
383  } else {
384  $this->mergeBatchResult( $ret, $ids, $res );
385  }
386  }
387  if ( $ids ) {
388  $this->logger->error(
389  __METHOD__ . ": primary on '$cluster' failed locating items: " .
390  implode( ',', array_keys( $ids ) )
391  );
392  }
393 
394  return $ret;
395  }
396 
403  private function mergeBatchResult( array &$ret, array &$ids, $res ) {
404  foreach ( $res as $row ) {
405  $id = $row->blob_id;
406  $itemIDs = $ids[$id];
407  unset( $ids[$id] ); // to track if everything is found
408  if ( count( $itemIDs ) === 1 && reset( $itemIDs ) === false ) {
409  // single result stored per blob
410  $ret[$id] = $row->blob_text;
411  } else {
412  // multi result stored per blob
413  $ret[$id] = unserialize( $row->blob_text );
414  }
415  }
416  }
417 
422  protected function parseURL( $url ) {
423  $path = explode( '/', $url );
424 
425  return [
426  $path[2], // cluster
427  $path[3], // id
428  $path[4] ?? false // itemID
429  ];
430  }
431 }
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
if(!defined( 'MEDIAWIKI')) if(ini_get( 'mbstring.func_overload')) if(!defined( 'MW_ENTRY_POINT')) global $IP
Environment checks.
Definition: Setup.php:91
External storage in a SQL database.
getPrimary( $cluster)
Get a primary database connection for the specified cluster.
__construct(array $params)
getReplica( $cluster)
Get a replica DB connection for the specified cluster.
getMaster( $cluster)
initializeTable( $cluster)
Create the appropriate blobs table on this cluster.
fetchFromURL( $url)
Fetch data from given external store URL.
getTable( $db, $cluster=null)
Get the 'blobs' table name for this database.
store( $location, $data)
Insert a data item into a given location.The location name The data item string|bool The URL of the s...
batchFetchFromURLs(array $urls)
Fetch multiple URLs from given external store.
isReadOnly( $location)
Check if a given location is read-only.The location name bool Whether this location is read-only 1....
Base class for external storage.
array $params
Usage context options for this instance.
string $dbDomain
Default database domain to store content under.
MediaWiki exception.
Definition: MWException.php:29
Helper class used for automatically marking an IDatabase connection as reusable (once it no longer ma...
Definition: DBConnRef.php:29
Class to handle database/schema/prefix specifications for IDatabase.
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:40
Create and track the database connections and transactions for a given database cluster.
const DB_REPLICA
Definition: defines.php:26
const DB_PRIMARY
Definition: defines.php:28