MediaWiki  master
ExternalStoreDB.php
Go to the documentation of this file.
1 <?php
27 use Wikimedia\ScopedCallback;
28 
40  private $lbFactory;
41 
47  public function __construct( array $params ) {
48  parent::__construct( $params );
49  if ( !isset( $params['lbFactory'] ) || !( $params['lbFactory'] instanceof LBFactory ) ) {
50  throw new InvalidArgumentException( "LBFactory required in 'lbFactory' field." );
51  }
52  $this->lbFactory = $params['lbFactory'];
53  }
54 
65  public function fetchFromURL( $url ) {
66  [ $cluster, $id, $itemID ] = $this->parseURL( $url );
67  $ret = $this->fetchBlob( $cluster, $id, $itemID );
68 
69  if ( $itemID !== false && $ret !== false ) {
70  return $ret->getItem( $itemID );
71  }
72 
73  return $ret;
74  }
75 
86  public function batchFetchFromURLs( array $urls ) {
87  $batched = $inverseUrlMap = [];
88  foreach ( $urls as $url ) {
89  [ $cluster, $id, $itemID ] = $this->parseURL( $url );
90  $batched[$cluster][$id][] = $itemID;
91  // false $itemID gets cast to int, but should be ok
92  // since we do === from the $itemID in $batched
93  $inverseUrlMap[$cluster][$id][$itemID] = $url;
94  }
95  $ret = [];
96  foreach ( $batched as $cluster => $batchByCluster ) {
97  $res = $this->batchFetchBlobs( $cluster, $batchByCluster );
99  foreach ( $res as $id => $blob ) {
100  foreach ( $batchByCluster[$id] as $itemID ) {
101  $url = $inverseUrlMap[$cluster][$id][$itemID];
102  if ( $itemID === false ) {
103  $ret[$url] = $blob;
104  } else {
105  $ret[$url] = $blob->getItem( $itemID );
106  }
107  }
108  }
109  }
110 
111  return $ret;
112  }
113 
117  public function store( $location, $data ) {
118  $dbw = $this->getPrimary( $location );
119  $dbw->insert(
120  $this->getTable( $dbw, $location ),
121  [ 'blob_text' => $data ],
122  __METHOD__
123  );
124  $id = $dbw->insertId();
125  if ( !$id ) {
126  throw new MWException( __METHOD__ . ': no insert ID' );
127  }
128 
129  return "DB://$location/$id";
130  }
131 
135  public function isReadOnly( $location ) {
136  if ( parent::isReadOnly( $location ) ) {
137  return true;
138  }
139 
140  return ( $this->getLoadBalancer( $location )->getReadOnlyReason() !== false );
141  }
142 
149  private function getLoadBalancer( $cluster ) {
150  return $this->lbFactory->getExternalLB( $cluster );
151  }
152 
160  public function getReplica( $cluster ) {
161  $lb = $this->getLoadBalancer( $cluster );
162 
163  return $lb->getConnectionRef(
164  DB_REPLICA,
165  [],
166  $this->getDomainId( $lb->getServerInfo( $lb->getWriterIndex() ) ),
167  $lb::CONN_TRX_AUTOCOMMIT
168  );
169  }
170 
178  public function getPrimary( $cluster ) {
179  $lb = $this->getLoadBalancer( $cluster );
180 
181  return $lb->getMaintenanceConnectionRef(
182  DB_PRIMARY,
183  [],
184  $this->getDomainId( $lb->getServerInfo( $lb->getWriterIndex() ) ),
185  $lb::CONN_TRX_AUTOCOMMIT
186  );
187  }
188 
193  private function getDomainId( array $server ) {
194  if ( $this->isDbDomainExplicit ) {
195  return $this->dbDomain; // explicit foreign domain
196  }
197 
198  if ( isset( $server['dbname'] ) ) {
199  // T200471: for b/c, treat any "dbname" field as forcing which database to use.
200  // MediaWiki/LoadBalancer previously did not enforce any concept of a local DB
201  // domain, but rather assumed that the LB server configuration matched $wgDBname.
202  // This check is useful when the external storage DB for this cluster does not use
203  // the same name as the corresponding "main" DB(s) for wikis.
204  $domain = new DatabaseDomain(
205  $server['dbname'],
206  $server['schema'] ?? null,
207  $server['tablePrefix'] ?? ''
208  );
209 
210  return $domain->getId();
211  }
212 
213  return false; // local LB domain
214  }
215 
223  public function getTable( $db, $cluster = null ) {
224  if ( $cluster !== null ) {
225  $lb = $this->getLoadBalancer( $cluster );
226  $info = $lb->getServerInfo( $lb->getWriterIndex() );
227  if ( isset( $info['blobs table'] ) ) {
228  return $info['blobs table'];
229  }
230  }
231 
232  return $db->getLBInfo( 'blobs table' ) ?? 'blobs'; // b/c
233  }
234 
242  public function initializeTable( $cluster ) {
243  global $IP;
244 
245  static $supportedTypes = [ 'mysql', 'sqlite' ];
246 
247  $dbw = $this->getPrimary( $cluster );
248  if ( !in_array( $dbw->getType(), $supportedTypes, true ) ) {
249  throw new DBUnexpectedError( $dbw, "RDBMS type '{$dbw->getType()}' not supported." );
250  }
251 
252  $sqlFilePath = "$IP/maintenance/storage/blobs.sql";
253  $sql = file_get_contents( $sqlFilePath );
254  if ( $sql === false ) {
255  throw new RuntimeException( "Failed to read '$sqlFilePath'." );
256  }
257 
258  $rawTable = $this->getTable( $dbw, $cluster ); // e.g. "blobs_cluster23"
259  $encTable = $dbw->tableName( $rawTable );
260  $dbw->query(
261  str_replace(
262  [ '/*$wgDBprefix*/blobs', '/*_*/blobs' ],
263  [ $encTable, $encTable ],
264  $sql
265  ),
266  __METHOD__,
267  $dbw::QUERY_IGNORE_DBO_TRX
268  );
269  }
270 
280  private function fetchBlob( $cluster, $id, $itemID ) {
287  static $externalBlobCache = [];
288 
289  $cacheID = ( $itemID === false ) ? "$cluster/$id" : "$cluster/$id/";
290  $cacheID = "$cacheID@{$this->dbDomain}";
291 
292  if ( isset( $externalBlobCache[$cacheID] ) ) {
293  $this->logger->debug( __METHOD__ . ": cache hit on $cacheID" );
294 
295  return $externalBlobCache[$cacheID];
296  }
297 
298  $this->logger->debug( __METHOD__ . ": cache miss on $cacheID" );
299 
300  $dbr = $this->getReplica( $cluster );
301  $ret = $dbr->selectField(
302  $this->getTable( $dbr, $cluster ),
303  'blob_text',
304  [ 'blob_id' => $id ],
305  __METHOD__
306  );
307  if ( $ret === false ) {
308  // Try the primary DB
309  $this->logger->warning( __METHOD__ . ": primary DB fallback on $cacheID" );
310  $trxProfiler = $this->lbFactory->getTransactionProfiler();
311  $scope = $trxProfiler->silenceForScope( $trxProfiler::EXPECTATION_REPLICAS_ONLY );
312  $dbw = $this->getPrimary( $cluster );
313  $ret = $dbw->selectField(
314  $this->getTable( $dbw, $cluster ),
315  'blob_text',
316  [ 'blob_id' => $id ],
317  __METHOD__
318  );
319  ScopedCallback::consume( $scope );
320  if ( $ret === false ) {
321  $this->logger->warning( __METHOD__ . ": primary DB failed to find $cacheID" );
322  }
323  }
324  if ( $itemID !== false && $ret !== false ) {
325  // Unserialise object; caller extracts item
326  $ret = HistoryBlobUtils::unserialize( $ret );
327  }
328 
329  $externalBlobCache = [ $cacheID => $ret ];
330 
331  return $ret;
332  }
333 
342  private function batchFetchBlobs( $cluster, array $ids ) {
343  $dbr = $this->getReplica( $cluster );
344  $res = $dbr->newSelectQueryBuilder()
345  ->select( [ 'blob_id', 'blob_text' ] )
346  ->from( $this->getTable( $dbr, $cluster ) )
347  ->where( [ 'blob_id' => array_keys( $ids ) ] )
348  ->caller( __METHOD__ )
349  ->fetchResultSet();
350 
351  $ret = [];
352  if ( $res !== false ) {
353  $this->mergeBatchResult( $ret, $ids, $res );
354  }
355  if ( $ids ) {
356  // Try the primary
357  $this->logger->info(
358  __METHOD__ . ": primary fallback on '$cluster' for: " .
359  implode( ',', array_keys( $ids ) )
360  );
361  $trxProfiler = $this->lbFactory->getTransactionProfiler();
362  $scope = $trxProfiler->silenceForScope( $trxProfiler::EXPECTATION_REPLICAS_ONLY );
363  $dbw = $this->getPrimary( $cluster );
364  $res = $dbw->newSelectQueryBuilder()
365  ->select( [ 'blob_id', 'blob_text' ] )
366  ->from( $this->getTable( $dbr, $cluster ) )
367  ->where( [ 'blob_id' => array_keys( $ids ) ] )
368  ->caller( __METHOD__ )
369  ->fetchResultSet();
370  ScopedCallback::consume( $scope );
371  if ( $res === false ) {
372  $this->logger->error( __METHOD__ . ": primary failed on '$cluster'" );
373  } else {
374  $this->mergeBatchResult( $ret, $ids, $res );
375  }
376  }
377  if ( $ids ) {
378  $this->logger->error(
379  __METHOD__ . ": primary on '$cluster' failed locating items: " .
380  implode( ',', array_keys( $ids ) )
381  );
382  }
383 
384  return $ret;
385  }
386 
393  private function mergeBatchResult( array &$ret, array &$ids, $res ) {
394  foreach ( $res as $row ) {
395  $id = $row->blob_id;
396  $itemIDs = $ids[$id];
397  unset( $ids[$id] ); // to track if everything is found
398  if ( count( $itemIDs ) === 1 && reset( $itemIDs ) === false ) {
399  // single result stored per blob
400  $ret[$id] = $row->blob_text;
401  } else {
402  // multi result stored per blob
403  $ret[$id] = HistoryBlobUtils::unserialize( $row->blob_text );
404  }
405  }
406  }
407 
412  protected function parseURL( $url ) {
413  $path = explode( '/', $url );
414 
415  return [
416  $path[2], // cluster
417  $path[3], // id
418  $path[4] ?? false // itemID
419  ];
420  }
421 }
if(!defined( 'MEDIAWIKI')) if(ini_get( 'mbstring.func_overload')) if(!defined( 'MW_ENTRY_POINT')) global $IP
Environment checks.
Definition: Setup.php:93
External storage in a SQL database.
getPrimary( $cluster)
Get a primary database connection for the specified cluster.
__construct(array $params)
getReplica( $cluster)
Get a replica DB connection for the specified cluster.
initializeTable( $cluster)
Create the appropriate blobs table on this cluster.
fetchFromURL( $url)
Fetch data from given external store URL.
getTable( $db, $cluster=null)
Get the 'blobs' table name for this database.
store( $location, $data)
Insert a data item into a given location.The location name The data item string|bool The URL of the s...
batchFetchFromURLs(array $urls)
Fetch multiple URLs from given external store.
isReadOnly( $location)
Check if a given location is read-only.The location name bool Whether this location is read-only 1....
Base class for external storage.
array $params
Usage context options for this instance.
string $dbDomain
Default database domain to store content under.
static unserialize(string $str, bool $allowDouble=false)
Unserialize a HistoryBlob.
MediaWiki exception.
Definition: MWException.php:32
Helper class used for automatically marking an IDatabase connection as reusable (once it no longer ma...
Definition: DBConnRef.php:29
Class to handle database/schema/prefix specifications for IDatabase.
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:36
This class is a delegate to ILBFactory for a given database cluster.
const DB_REPLICA
Definition: defines.php:26
const DB_PRIMARY
Definition: defines.php:28