MediaWiki  1.31.0
SqlBlobStore.php
Go to the documentation of this file.
1 <?php
27 namespace MediaWiki\Storage;
28 
33 use InvalidArgumentException;
37 use Wikimedia\Assert\Assert;
41 
51 
52  // Note: the name has been taken unchanged from the Revision class.
53  const TEXT_CACHE_GROUP = 'revisiontext:10';
54 
58  private $dbLoadBalancer;
59 
63  private $cache;
64 
68  private $wikiId;
69 
73  private $cacheExpiry = 604800; // 7 days
74 
78  private $compressBlobs = false;
79 
83  private $legacyEncoding = false;
84 
89 
93  private $useExternalStore = false;
94 
100  public function __construct(
103  $wikiId = false
104  ) {
105  $this->dbLoadBalancer = $dbLoadBalancer;
106  $this->cache = $cache;
107  $this->wikiId = $wikiId;
108  }
109 
113  public function getCacheExpiry() {
114  return $this->cacheExpiry;
115  }
116 
120  public function setCacheExpiry( $cacheExpiry ) {
121  Assert::parameterType( 'integer', $cacheExpiry, '$cacheExpiry' );
122 
123  $this->cacheExpiry = $cacheExpiry;
124  }
125 
129  public function getCompressBlobs() {
130  return $this->compressBlobs;
131  }
132 
136  public function setCompressBlobs( $compressBlobs ) {
137  $this->compressBlobs = $compressBlobs;
138  }
139 
144  public function getLegacyEncoding() {
145  return $this->legacyEncoding;
146  }
147 
154  }
155 
161  public function setLegacyEncoding( $legacyEncoding, Language $language ) {
162  Assert::parameterType( 'string', $legacyEncoding, '$legacyEncoding' );
163 
164  $this->legacyEncoding = $legacyEncoding;
165  $this->legacyEncodingConversionLang = $language;
166  }
167 
171  public function getUseExternalStore() {
173  }
174 
179  Assert::parameterType( 'boolean', $useExternalStore, '$useExternalStore' );
180 
181  $this->useExternalStore = $useExternalStore;
182  }
183 
187  private function getDBLoadBalancer() {
188  return $this->dbLoadBalancer;
189  }
190 
196  private function getDBConnection( $index ) {
197  $lb = $this->getDBLoadBalancer();
198  return $lb->getConnection( $index, [], $this->wikiId );
199  }
200 
211  public function storeBlob( $data, $hints = [] ) {
212  try {
213  $flags = $this->compressData( $data );
214 
215  # Write to external storage if required
216  if ( $this->useExternalStore ) {
217  // Store and get the URL
218  $data = ExternalStore::insertToDefault( $data );
219  if ( !$data ) {
220  throw new BlobAccessException( "Failed to store text to external storage" );
221  }
222  if ( $flags ) {
223  $flags .= ',';
224  }
225  $flags .= 'external';
226 
227  // TODO: we could also return an address for the external store directly here.
228  // That would mean bypassing the text table entirely when the external store is
229  // used. We'll need to assess expected fallout before doing that.
230  }
231 
232  $dbw = $this->getDBConnection( DB_MASTER );
233 
234  $old_id = $dbw->nextSequenceValue( 'text_old_id_seq' );
235  $dbw->insert(
236  'text',
237  [
238  'old_id' => $old_id,
239  'old_text' => $data,
240  'old_flags' => $flags,
241  ],
242  __METHOD__
243  );
244 
245  $textId = $dbw->insertId();
246 
247  return 'tt:' . $textId;
248  } catch ( MWException $e ) {
249  throw new BlobAccessException( $e->getMessage(), 0, $e );
250  }
251  }
252 
265  public function getBlob( $blobAddress, $queryFlags = 0 ) {
266  Assert::parameterType( 'string', $blobAddress, '$blobAddress' );
267 
268  // No negative caching; negative hits on text rows may be due to corrupted replica DBs
269  $blob = $this->cache->getWithSetCallback(
270  // TODO: change key, since this is not necessarily revision text!
271  $this->cache->makeKey( 'revisiontext', 'textid', $blobAddress ),
272  $this->getCacheTTL(),
273  function ( $unused, &$ttl, &$setOpts ) use ( $blobAddress, $queryFlags ) {
274  list( $index ) = DBAccessObjectUtils::getDBOptions( $queryFlags );
275  $setOpts += Database::getCacheSetOptions( $this->getDBConnection( $index ) );
276 
277  return $this->fetchBlob( $blobAddress, $queryFlags );
278  },
279  [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => IExpiringStore::TTL_PROC_LONG ]
280  );
281 
282  if ( $blob === false ) {
283  throw new BlobAccessException( 'Failed to load blob from address ' . $blobAddress );
284  }
285 
286  return $blob;
287  }
288 
298  private function fetchBlob( $blobAddress, $queryFlags ) {
299  list( $schema, $id, ) = self::splitBlobAddress( $blobAddress );
300 
301  //TODO: MCR: also support 'ex' schema with ExternalStore URLs, plus flags encoded in the URL!
302  if ( $schema === 'tt' ) {
303  $textId = intval( $id );
304  } else {
305  // XXX: change to better exceptions! That makes migration more difficult, though.
306  throw new BlobAccessException( "Unknown blob address schema: $schema" );
307  }
308 
309  if ( !$textId || $id !== (string)$textId ) {
310  // XXX: change to better exceptions! That makes migration more difficult, though.
311  throw new BlobAccessException( "Bad blob address: $blobAddress" );
312  }
313 
314  // Callers doing updates will pass in READ_LATEST as usual. Since the text/blob tables
315  // do not normally get rows changed around, set READ_LATEST_IMMUTABLE in those cases.
316  $queryFlags |= DBAccessObjectUtils::hasFlags( $queryFlags, self::READ_LATEST )
317  ? self::READ_LATEST_IMMUTABLE
318  : 0;
319 
320  list( $index, $options, $fallbackIndex, $fallbackOptions ) =
321  DBAccessObjectUtils::getDBOptions( $queryFlags );
322 
323  // Text data is immutable; check replica DBs first.
324  $row = $this->getDBConnection( $index )->selectRow(
325  'text',
326  [ 'old_text', 'old_flags' ],
327  [ 'old_id' => $textId ],
328  __METHOD__,
329  $options
330  );
331 
332  // Fallback to DB_MASTER in some cases if the row was not found, using the appropriate
333  // options, such as FOR UPDATE to avoid missing rows due to REPEATABLE-READ.
334  if ( !$row && $fallbackIndex !== null ) {
335  $row = $this->getDBConnection( $fallbackIndex )->selectRow(
336  'text',
337  [ 'old_text', 'old_flags' ],
338  [ 'old_id' => $textId ],
339  __METHOD__,
340  $fallbackOptions
341  );
342  }
343 
344  if ( !$row ) {
345  wfWarn( __METHOD__ . ": No text row with ID $textId." );
346  return false;
347  }
348 
349  $blob = $this->expandBlob( $row->old_text, $row->old_flags, $blobAddress );
350 
351  if ( $blob === false ) {
352  wfWarn( __METHOD__ . ": Bad data in text row $textId." );
353  return false;
354  }
355 
356  return $blob;
357  }
358 
377  public function expandBlob( $raw, $flags, $cacheKey = null ) {
378  if ( is_string( $flags ) ) {
379  $flags = explode( ',', $flags );
380  }
381 
382  // Use external methods for external objects, text in table is URL-only then
383  if ( in_array( 'external', $flags ) ) {
384  $url = $raw;
385  $parts = explode( '://', $url, 2 );
386  if ( count( $parts ) == 1 || $parts[1] == '' ) {
387  return false;
388  }
389 
390  if ( $cacheKey && $this->wikiId === false ) {
391  // Make use of the wiki-local revision text cache.
392  // The cached value should be decompressed, so handle that and return here.
393  // NOTE: we rely on $this->cache being the right cache for $this->wikiId!
394  return $this->cache->getWithSetCallback(
395  // TODO: change key, since this is not necessarily revision text!
396  $this->cache->makeKey( 'revisiontext', 'textid', $cacheKey ),
397  $this->getCacheTTL(),
398  function () use ( $url, $flags ) {
399  // No negative caching per BlobStore::getBlob()
400  $blob = ExternalStore::fetchFromURL( $url, [ 'wiki' => $this->wikiId ] );
401 
402  return $this->decompressData( $blob, $flags );
403  },
404  [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => WANObjectCache::TTL_PROC_LONG ]
405  );
406  } else {
407  $blob = ExternalStore::fetchFromURL( $url, [ 'wiki' => $this->wikiId ] );
408  return $this->decompressData( $blob, $flags );
409  }
410  } else {
411  return $this->decompressData( $raw, $flags );
412  }
413  }
414 
431  public function compressData( &$blob ) {
432  $blobFlags = [];
433 
434  // Revisions not marked as UTF-8 will have legacy decoding applied by decompressData().
435  // XXX: if $this->legacyEncoding is not set, we could skip this. That would however be
436  // risky, since $this->legacyEncoding being set in the future would lead to data corruption.
437  $blobFlags[] = 'utf-8';
438 
439  if ( $this->compressBlobs ) {
440  if ( function_exists( 'gzdeflate' ) ) {
441  $deflated = gzdeflate( $blob );
442 
443  if ( $deflated === false ) {
444  wfLogWarning( __METHOD__ . ': gzdeflate() failed' );
445  } else {
446  $blob = $deflated;
447  $blobFlags[] = 'gzip';
448  }
449  } else {
450  wfDebug( __METHOD__ . " -- no zlib support, not compressing\n" );
451  }
452  }
453  return implode( ',', $blobFlags );
454  }
455 
471  public function decompressData( $blob, array $blobFlags ) {
472  if ( $blob === false ) {
473  // Text failed to be fetched; nothing to do
474  return false;
475  }
476 
477  if ( in_array( 'error', $blobFlags ) ) {
478  // Error row, return false
479  return false;
480  }
481 
482  if ( in_array( 'gzip', $blobFlags ) ) {
483  # Deal with optional compression of archived pages.
484  # This can be done periodically via maintenance/compressOld.php, and
485  # as pages are saved if $wgCompressRevisions is set.
486  $blob = gzinflate( $blob );
487 
488  if ( $blob === false ) {
489  wfLogWarning( __METHOD__ . ': gzinflate() failed' );
490  return false;
491  }
492  }
493 
494  if ( in_array( 'object', $blobFlags ) ) {
495  # Generic compressed storage
496  $obj = unserialize( $blob );
497  if ( !is_object( $obj ) ) {
498  // Invalid object
499  return false;
500  }
501  $blob = $obj->getText();
502  }
503 
504  // Needed to support old revisions left over from from the 1.4 / 1.5 migration.
505  if ( $blob !== false && $this->legacyEncoding && $this->legacyEncodingConversionLang
506  && !in_array( 'utf-8', $blobFlags ) && !in_array( 'utf8', $blobFlags )
507  ) {
508  # Old revisions kept around in a legacy encoding?
509  # Upconvert on demand.
510  # ("utf8" checked for compatibility with some broken
511  # conversion scripts 2008-12-30)
512  $blob = $this->legacyEncodingConversionLang->iconv( $this->legacyEncoding, 'UTF-8', $blob );
513  }
514 
515  return $blob;
516  }
517 
525  private function getCacheTTL() {
526  if ( $this->cache->getQoS( WANObjectCache::ATTR_EMULATION )
528  ) {
529  // Do not cache RDBMs blobs in...the RDBMs store
531  } else {
532  $ttl = $this->cacheExpiry ?: WANObjectCache::TTL_UNCACHEABLE;
533  }
534 
535  return $ttl;
536  }
537 
557  public function getTextIdFromAddress( $address ) {
558  list( $schema, $id, ) = self::splitBlobAddress( $address );
559 
560  if ( $schema !== 'tt' ) {
561  return null;
562  }
563 
564  $textId = intval( $id );
565 
566  if ( !$textId || $id !== (string)$textId ) {
567  throw new InvalidArgumentException( "Malformed text_id: $id" );
568  }
569 
570  return $textId;
571  }
572 
581  private static function splitBlobAddress( $address ) {
582  if ( !preg_match( '/^(\w+):(\w+)(\?(.*))?$/', $address, $m ) ) {
583  throw new InvalidArgumentException( "Bad blob address: $address" );
584  }
585 
586  $schema = strtolower( $m[1] );
587  $id = $m[2];
588  $parameters = isset( $m[4] ) ? wfCgiToArray( $m[4] ) : [];
589 
590  return [ $schema, $id, $parameters ];
591  }
592 
593  public function isReadOnly() {
594  if ( $this->useExternalStore && ExternalStore::defaultStoresAreReadOnly() ) {
595  return true;
596  }
597 
598  return ( $this->getDBLoadBalancer()->getReadOnlyReason() !== false );
599  }
600 }
ExternalStore\insertToDefault
static insertToDefault( $data, array $params=[])
Like insert() above, but does more of the work for us.
Definition: ExternalStore.php:165
MediaWiki\Storage\SqlBlobStore\getBlob
getBlob( $blobAddress, $queryFlags=0)
Retrieve a blob, given an address.
Definition: SqlBlobStore.php:265
Wikimedia\Rdbms\Database
Relational database abstraction object.
Definition: Database.php:48
MediaWiki\Storage\SqlBlobStore\getDBLoadBalancer
getDBLoadBalancer()
Definition: SqlBlobStore.php:187
MediaWiki\Storage\BlobAccessException
Exception representing a failure to access a data blob.
Definition: BlobAccessException.php:32
WANObjectCache\TTL_UNCACHEABLE
const TTL_UNCACHEABLE
Idiom for getWithSetCallback() callbacks to avoid calling set()
Definition: WANObjectCache.php:148
MediaWiki\Storage\SqlBlobStore\TEXT_CACHE_GROUP
const TEXT_CACHE_GROUP
Definition: SqlBlobStore.php:53
ExternalStore\defaultStoresAreReadOnly
static defaultStoresAreReadOnly()
Definition: ExternalStore.php:226
MediaWiki\Storage\SqlBlobStore\setLegacyEncoding
setLegacyEncoding( $legacyEncoding, Language $language)
Definition: SqlBlobStore.php:161
MediaWiki\Storage\SqlBlobStore\getDBConnection
getDBConnection( $index)
Definition: SqlBlobStore.php:196
captcha-old.count
count
Definition: captcha-old.py:249
MediaWiki\Storage\SqlBlobStore
Service for storing and loading Content objects.
Definition: SqlBlobStore.php:50
IExpiringStore\QOS_EMULATION_SQL
const QOS_EMULATION_SQL
Definition: IExpiringStore.php:50
MediaWiki\Storage\SqlBlobStore\$compressBlobs
bool $compressBlobs
Definition: SqlBlobStore.php:78
MediaWiki\Storage\SqlBlobStore\fetchBlob
fetchBlob( $blobAddress, $queryFlags)
MCR migration note: this corresponds to Revision::fetchText.
Definition: SqlBlobStore.php:298
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
ExternalStore\fetchFromURL
static fetchFromURL( $url, array $params=[])
Fetch data from given URL.
Definition: ExternalStore.php:70
unserialize
unserialize( $serialized)
Definition: ApiMessage.php:192
MediaWiki\Storage\SqlBlobStore\getTextIdFromAddress
getTextIdFromAddress( $address)
Returns an ID corresponding to the old_id field in the text table, corresponding to the given $addres...
Definition: SqlBlobStore.php:557
MediaWiki\Storage\SqlBlobStore\splitBlobAddress
static splitBlobAddress( $address)
Splits a blob address into three parts: the schema, the ID, and parameters/flags.
Definition: SqlBlobStore.php:581
MediaWiki\Storage\SqlBlobStore\expandBlob
expandBlob( $raw, $flags, $cacheKey=null)
Expand a raw data blob according to the flags given.
Definition: SqlBlobStore.php:377
wfLogWarning
wfLogWarning( $msg, $callerOffset=1, $level=E_USER_WARNING)
Send a warning as a PHP error and the debug log.
Definition: GlobalFunctions.php:1138
DBAccessObjectUtils\getDBOptions
static getDBOptions( $bitfield)
Get an appropriate DB index, options, and fallback DB index for a query.
Definition: DBAccessObjectUtils.php:52
IDBAccessObject
Interface for database access objects.
Definition: IDBAccessObject.php:55
cache
you have access to all of the normal MediaWiki so you can get a DB use the cache
Definition: maintenance.txt:52
MediaWiki\Storage\SqlBlobStore\$legacyEncodingConversionLang
Language null $legacyEncodingConversionLang
Definition: SqlBlobStore.php:88
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
Wikimedia\Rdbms\IDatabase
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:38
IExpiringStore\ATTR_EMULATION
const ATTR_EMULATION
Definition: IExpiringStore.php:49
MediaWiki\Storage\SqlBlobStore\getCompressBlobs
getCompressBlobs()
Definition: SqlBlobStore.php:129
MediaWiki\Storage\SqlBlobStore\getLegacyEncoding
getLegacyEncoding()
Definition: SqlBlobStore.php:144
IExpiringStore
Generic base class for storage interfaces.
Definition: IExpiringStore.php:31
MWException
MediaWiki exception.
Definition: MWException.php:26
MediaWiki\Storage\SqlBlobStore\getLegacyEncodingConversionLang
getLegacyEncodingConversionLang()
Definition: SqlBlobStore.php:152
MediaWiki\Storage\SqlBlobStore\getCacheTTL
getCacheTTL()
Get the text cache TTL.
Definition: SqlBlobStore.php:525
Wikimedia\Rdbms\Database\getCacheSetOptions
static getCacheSetOptions(IDatabase $db1, IDatabase $db2=null)
Merge the result of getSessionLagStatus() for several DBs using the most pessimistic values to estima...
Definition: Database.php:4069
MediaWiki\Storage\SqlBlobStore\isReadOnly
isReadOnly()
Check if the blob metadata or backing blob data store is read-only.
Definition: SqlBlobStore.php:593
$blob
$blob
Definition: testCompression.php:65
MediaWiki\Storage\SqlBlobStore\__construct
__construct(LoadBalancer $dbLoadBalancer, WANObjectCache $cache, $wikiId=false)
Definition: SqlBlobStore.php:100
ExternalStore
Constructor class for key/value blob data kept in external repositories.
Definition: ExternalStore.php:48
MediaWiki\Storage\SqlBlobStore\$dbLoadBalancer
LoadBalancer $dbLoadBalancer
Definition: SqlBlobStore.php:58
MediaWiki\Storage\SqlBlobStore\$useExternalStore
boolean $useExternalStore
Definition: SqlBlobStore.php:93
wfCgiToArray
wfCgiToArray( $query)
This is the logical opposite of wfArrayToCgi(): it accepts a query string as its argument and returns...
Definition: GlobalFunctions.php:422
DB_MASTER
const DB_MASTER
Definition: defines.php:26
wfDebug
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:982
DBAccessObjectUtils
Helper class for DAO classes.
Definition: DBAccessObjectUtils.php:29
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
Wikimedia\Rdbms\LoadBalancer
Database connection, tracking, load balancing, and transaction manager for a cluster.
Definition: LoadBalancer.php:40
$e
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException' returning false will NOT prevent logging $e
Definition: hooks.txt:2163
DBAccessObjectUtils\hasFlags
static hasFlags( $bitfield, $flags)
Definition: DBAccessObjectUtils.php:35
MediaWiki\Storage\SqlBlobStore\storeBlob
storeBlob( $data, $hints=[])
Stores an arbitrary blob of data and returns an address that can be used with getBlob() to retrieve t...
Definition: SqlBlobStore.php:211
WANObjectCache
Multi-datacenter aware caching interface.
Definition: WANObjectCache.php:87
MediaWiki\Storage\SqlBlobStore\getUseExternalStore
getUseExternalStore()
Definition: SqlBlobStore.php:171
MediaWiki\Storage
Definition: BlobAccessException.php:23
MediaWiki\Storage\SqlBlobStore\getCacheExpiry
getCacheExpiry()
Definition: SqlBlobStore.php:113
MediaWiki\Storage\SqlBlobStore\$cacheExpiry
int $cacheExpiry
Definition: SqlBlobStore.php:73
MediaWiki\Storage\BlobStore
Service for loading and storing data blobs.
Definition: BlobStore.php:33
MediaWiki\Storage\SqlBlobStore\decompressData
decompressData( $blob, array $blobFlags)
Re-converts revision text according to its flags.
Definition: SqlBlobStore.php:471
$options
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition: hooks.txt:1987
MediaWiki\Storage\SqlBlobStore\$wikiId
bool string $wikiId
Wiki ID.
Definition: SqlBlobStore.php:68
MediaWiki\Storage\SqlBlobStore\setUseExternalStore
setUseExternalStore( $useExternalStore)
Definition: SqlBlobStore.php:178
MediaWiki\Storage\SqlBlobStore\setCompressBlobs
setCompressBlobs( $compressBlobs)
Definition: SqlBlobStore.php:136
wfWarn
wfWarn( $msg, $callerOffset=1, $level=E_USER_NOTICE)
Send a warning either to the debug log or in a PHP error depending on $wgDevelopmentWarnings.
Definition: GlobalFunctions.php:1125
MediaWiki\Storage\SqlBlobStore\$cache
WANObjectCache $cache
Definition: SqlBlobStore.php:63
MediaWiki\Storage\SqlBlobStore\compressData
compressData(&$blob)
If $wgCompressRevisions is enabled, we will compress data.
Definition: SqlBlobStore.php:431
MediaWiki\Storage\SqlBlobStore\$legacyEncoding
bool string $legacyEncoding
Definition: SqlBlobStore.php:83
MediaWiki\Storage\SqlBlobStore\setCacheExpiry
setCacheExpiry( $cacheExpiry)
Definition: SqlBlobStore.php:120
Language
Internationalisation code.
Definition: Language.php:35
IExpiringStore\TTL_PROC_LONG
const TTL_PROC_LONG
Definition: IExpiringStore.php:43
array
the array() calling protocol came about after MediaWiki 1.4rc1.