MediaWiki  1.32.0
SqlBlobStore.php
Go to the documentation of this file.
1 <?php
27 namespace MediaWiki\Storage;
28 
33 use InvalidArgumentException;
37 use Wikimedia\Assert\Assert;
41 
51 
52  // Note: the name has been taken unchanged from the Revision class.
53  const TEXT_CACHE_GROUP = 'revisiontext:10';
54 
58  private $dbLoadBalancer;
59 
63  private $cache;
64 
68  private $wikiId;
69 
73  private $cacheExpiry = 604800; // 7 days
74 
78  private $compressBlobs = false;
79 
83  private $legacyEncoding = false;
84 
89 
93  private $useExternalStore = false;
94 
105  public function __construct(
108  $wikiId = false
109  ) {
110  $this->dbLoadBalancer = $dbLoadBalancer;
111  $this->cache = $cache;
112  $this->wikiId = $wikiId;
113  }
114 
118  public function getCacheExpiry() {
119  return $this->cacheExpiry;
120  }
121 
125  public function setCacheExpiry( $cacheExpiry ) {
126  Assert::parameterType( 'integer', $cacheExpiry, '$cacheExpiry' );
127 
128  $this->cacheExpiry = $cacheExpiry;
129  }
130 
134  public function getCompressBlobs() {
135  return $this->compressBlobs;
136  }
137 
141  public function setCompressBlobs( $compressBlobs ) {
142  $this->compressBlobs = $compressBlobs;
143  }
144 
149  public function getLegacyEncoding() {
150  return $this->legacyEncoding;
151  }
152 
159  }
160 
166  public function setLegacyEncoding( $legacyEncoding, Language $language ) {
167  Assert::parameterType( 'string', $legacyEncoding, '$legacyEncoding' );
168 
169  $this->legacyEncoding = $legacyEncoding;
170  $this->legacyEncodingConversionLang = $language;
171  }
172 
176  public function getUseExternalStore() {
178  }
179 
184  Assert::parameterType( 'boolean', $useExternalStore, '$useExternalStore' );
185 
186  $this->useExternalStore = $useExternalStore;
187  }
188 
192  private function getDBLoadBalancer() {
193  return $this->dbLoadBalancer;
194  }
195 
201  private function getDBConnection( $index ) {
202  $lb = $this->getDBLoadBalancer();
203  return $lb->getConnection( $index, [], $this->wikiId );
204  }
205 
216  public function storeBlob( $data, $hints = [] ) {
217  try {
218  $flags = $this->compressData( $data );
219 
220  # Write to external storage if required
221  if ( $this->useExternalStore ) {
222  // Store and get the URL
223  $data = ExternalStore::insertToDefault( $data );
224  if ( !$data ) {
225  throw new BlobAccessException( "Failed to store text to external storage" );
226  }
227  if ( $flags ) {
228  $flags .= ',';
229  }
230  $flags .= 'external';
231 
232  // TODO: we could also return an address for the external store directly here.
233  // That would mean bypassing the text table entirely when the external store is
234  // used. We'll need to assess expected fallout before doing that.
235  }
236 
237  $dbw = $this->getDBConnection( DB_MASTER );
238 
239  $old_id = $dbw->nextSequenceValue( 'text_old_id_seq' );
240  $dbw->insert(
241  'text',
242  [
243  'old_id' => $old_id,
244  'old_text' => $data,
245  'old_flags' => $flags,
246  ],
247  __METHOD__
248  );
249 
250  $textId = $dbw->insertId();
251 
252  return self::makeAddressFromTextId( $textId );
253  } catch ( MWException $e ) {
254  throw new BlobAccessException( $e->getMessage(), 0, $e );
255  }
256  }
257 
270  public function getBlob( $blobAddress, $queryFlags = 0 ) {
271  Assert::parameterType( 'string', $blobAddress, '$blobAddress' );
272 
273  // No negative caching; negative hits on text rows may be due to corrupted replica DBs
274  $blob = $this->cache->getWithSetCallback(
275  $this->getCacheKey( $blobAddress ),
276  $this->getCacheTTL(),
277  function ( $unused, &$ttl, &$setOpts ) use ( $blobAddress, $queryFlags ) {
278  list( $index ) = DBAccessObjectUtils::getDBOptions( $queryFlags );
279  $setOpts += Database::getCacheSetOptions( $this->getDBConnection( $index ) );
280 
281  return $this->fetchBlob( $blobAddress, $queryFlags );
282  },
283  [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => IExpiringStore::TTL_PROC_LONG ]
284  );
285 
286  if ( $blob === false ) {
287  throw new BlobAccessException( 'Failed to load blob from address ' . $blobAddress );
288  }
289 
290  return $blob;
291  }
292 
302  private function fetchBlob( $blobAddress, $queryFlags ) {
303  list( $schema, $id, ) = self::splitBlobAddress( $blobAddress );
304 
305  //TODO: MCR: also support 'ex' schema with ExternalStore URLs, plus flags encoded in the URL!
306  if ( $schema === 'tt' ) {
307  $textId = intval( $id );
308  } else {
309  // XXX: change to better exceptions! That makes migration more difficult, though.
310  throw new BlobAccessException( "Unknown blob address schema: $schema" );
311  }
312 
313  if ( !$textId || $id !== (string)$textId ) {
314  // XXX: change to better exceptions! That makes migration more difficult, though.
315  throw new BlobAccessException( "Bad blob address: $blobAddress" );
316  }
317 
318  // Callers doing updates will pass in READ_LATEST as usual. Since the text/blob tables
319  // do not normally get rows changed around, set READ_LATEST_IMMUTABLE in those cases.
320  $queryFlags |= DBAccessObjectUtils::hasFlags( $queryFlags, self::READ_LATEST )
321  ? self::READ_LATEST_IMMUTABLE
322  : 0;
323 
324  list( $index, $options, $fallbackIndex, $fallbackOptions ) =
325  DBAccessObjectUtils::getDBOptions( $queryFlags );
326 
327  // Text data is immutable; check replica DBs first.
328  $row = $this->getDBConnection( $index )->selectRow(
329  'text',
330  [ 'old_text', 'old_flags' ],
331  [ 'old_id' => $textId ],
332  __METHOD__,
333  $options
334  );
335 
336  // Fallback to DB_MASTER in some cases if the row was not found, using the appropriate
337  // options, such as FOR UPDATE to avoid missing rows due to REPEATABLE-READ.
338  if ( !$row && $fallbackIndex !== null ) {
339  $row = $this->getDBConnection( $fallbackIndex )->selectRow(
340  'text',
341  [ 'old_text', 'old_flags' ],
342  [ 'old_id' => $textId ],
343  __METHOD__,
344  $fallbackOptions
345  );
346  }
347 
348  if ( !$row ) {
349  wfWarn( __METHOD__ . ": No text row with ID $textId." );
350  return false;
351  }
352 
353  $blob = $this->expandBlob( $row->old_text, $row->old_flags, $blobAddress );
354 
355  if ( $blob === false ) {
356  wfLogWarning( __METHOD__ . ": Bad data in text row $textId." );
357  return false;
358  }
359 
360  return $blob;
361  }
362 
373  private function getCacheKey( $blobAddress ) {
374  return $this->cache->makeGlobalKey(
375  'BlobStore',
376  'address',
377  $this->dbLoadBalancer->resolveDomainID( $this->wikiId ),
378  $blobAddress
379  );
380  }
381 
401  public function expandBlob( $raw, $flags, $cacheKey = null ) {
402  if ( is_string( $flags ) ) {
403  $flags = explode( ',', $flags );
404  }
405 
406  // Use external methods for external objects, text in table is URL-only then
407  if ( in_array( 'external', $flags ) ) {
408  $url = $raw;
409  $parts = explode( '://', $url, 2 );
410  if ( count( $parts ) == 1 || $parts[1] == '' ) {
411  return false;
412  }
413 
414  if ( $cacheKey ) {
415  // The cached value should be decompressed, so handle that and return here.
416  return $this->cache->getWithSetCallback(
417  $this->getCacheKey( $cacheKey ),
418  $this->getCacheTTL(),
419  function () use ( $url, $flags ) {
420  // No negative caching per BlobStore::getBlob()
421  $blob = ExternalStore::fetchFromURL( $url, [ 'wiki' => $this->wikiId ] );
422 
423  return $blob === false ? false : $this->decompressData( $blob, $flags );
424  },
425  [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => WANObjectCache::TTL_PROC_LONG ]
426  );
427  } else {
428  $blob = ExternalStore::fetchFromURL( $url, [ 'wiki' => $this->wikiId ] );
429  return $blob === false ? false : $this->decompressData( $blob, $flags );
430  }
431  } else {
432  return $this->decompressData( $raw, $flags );
433  }
434  }
435 
452  public function compressData( &$blob ) {
453  $blobFlags = [];
454 
455  // Revisions not marked as UTF-8 will have legacy decoding applied by decompressData().
456  // XXX: if $this->legacyEncoding is not set, we could skip this. That would however be
457  // risky, since $this->legacyEncoding being set in the future would lead to data corruption.
458  $blobFlags[] = 'utf-8';
459 
460  if ( $this->compressBlobs ) {
461  if ( function_exists( 'gzdeflate' ) ) {
462  $deflated = gzdeflate( $blob );
463 
464  if ( $deflated === false ) {
465  wfLogWarning( __METHOD__ . ': gzdeflate() failed' );
466  } else {
467  $blob = $deflated;
468  $blobFlags[] = 'gzip';
469  }
470  } else {
471  wfDebug( __METHOD__ . " -- no zlib support, not compressing\n" );
472  }
473  }
474  return implode( ',', $blobFlags );
475  }
476 
492  public function decompressData( $blob, array $blobFlags ) {
493  // Revision::decompressRevisionText accepted false here, so defend against that
494  Assert::parameterType( 'string', $blob, '$blob' );
495 
496  if ( in_array( 'error', $blobFlags ) ) {
497  // Error row, return false
498  return false;
499  }
500 
501  if ( in_array( 'gzip', $blobFlags ) ) {
502  # Deal with optional compression of archived pages.
503  # This can be done periodically via maintenance/compressOld.php, and
504  # as pages are saved if $wgCompressRevisions is set.
505  $blob = gzinflate( $blob );
506 
507  if ( $blob === false ) {
508  wfWarn( __METHOD__ . ': gzinflate() failed' );
509  return false;
510  }
511  }
512 
513  if ( in_array( 'object', $blobFlags ) ) {
514  # Generic compressed storage
515  $obj = unserialize( $blob );
516  if ( !is_object( $obj ) ) {
517  // Invalid object
518  return false;
519  }
520  $blob = $obj->getText();
521  }
522 
523  // Needed to support old revisions left over from from the 1.4 / 1.5 migration.
524  if ( $blob !== false && $this->legacyEncoding && $this->legacyEncodingConversionLang
525  && !in_array( 'utf-8', $blobFlags ) && !in_array( 'utf8', $blobFlags )
526  ) {
527  # Old revisions kept around in a legacy encoding?
528  # Upconvert on demand.
529  # ("utf8" checked for compatibility with some broken
530  # conversion scripts 2008-12-30)
531  $blob = $this->legacyEncodingConversionLang->iconv( $this->legacyEncoding, 'UTF-8', $blob );
532  }
533 
534  return $blob;
535  }
536 
544  private function getCacheTTL() {
545  if ( $this->cache->getQoS( WANObjectCache::ATTR_EMULATION )
547  ) {
548  // Do not cache RDBMs blobs in...the RDBMs store
550  } else {
551  $ttl = $this->cacheExpiry ?: WANObjectCache::TTL_UNCACHEABLE;
552  }
553 
554  return $ttl;
555  }
556 
577  public function getTextIdFromAddress( $address ) {
578  list( $schema, $id, ) = self::splitBlobAddress( $address );
579 
580  if ( $schema !== 'tt' ) {
581  return null;
582  }
583 
584  $textId = intval( $id );
585 
586  if ( !$textId || $id !== (string)$textId ) {
587  throw new InvalidArgumentException( "Malformed text_id: $id" );
588  }
589 
590  return $textId;
591  }
592 
605  public static function makeAddressFromTextId( $id ) {
606  return 'tt:' . $id;
607  }
608 
617  private static function splitBlobAddress( $address ) {
618  if ( !preg_match( '/^(\w+):(\w+)(\?(.*))?$/', $address, $m ) ) {
619  throw new InvalidArgumentException( "Bad blob address: $address" );
620  }
621 
622  $schema = strtolower( $m[1] );
623  $id = $m[2];
624  $parameters = isset( $m[4] ) ? wfCgiToArray( $m[4] ) : [];
625 
626  return [ $schema, $id, $parameters ];
627  }
628 
629  public function isReadOnly() {
630  if ( $this->useExternalStore && ExternalStore::defaultStoresAreReadOnly() ) {
631  return true;
632  }
633 
634  return ( $this->getDBLoadBalancer()->getReadOnlyReason() !== false );
635  }
636 }
ExternalStore\insertToDefault
static insertToDefault( $data, array $params=[])
Like insert() above, but does more of the work for us.
Definition: ExternalStore.php:165
MediaWiki\Storage\SqlBlobStore\getBlob
getBlob( $blobAddress, $queryFlags=0)
Retrieve a blob, given an address.
Definition: SqlBlobStore.php:270
Wikimedia\Rdbms\Database
Relational database abstraction object.
Definition: Database.php:48
MediaWiki\Storage\SqlBlobStore\getDBLoadBalancer
getDBLoadBalancer()
Definition: SqlBlobStore.php:192
MediaWiki\Storage\BlobAccessException
Exception representing a failure to access a data blob.
Definition: BlobAccessException.php:32
MediaWiki\Storage\SqlBlobStore\getCacheKey
getCacheKey( $blobAddress)
Get a cache key for a given Blob address.
Definition: SqlBlobStore.php:373
WANObjectCache\TTL_UNCACHEABLE
const TTL_UNCACHEABLE
Idiom for getWithSetCallback() callbacks to avoid calling set()
Definition: WANObjectCache.php:184
false
processing should stop and the error should be shown to the user * false
Definition: hooks.txt:187
MediaWiki\Storage\SqlBlobStore\TEXT_CACHE_GROUP
const TEXT_CACHE_GROUP
Definition: SqlBlobStore.php:53
ExternalStore\defaultStoresAreReadOnly
static defaultStoresAreReadOnly()
Definition: ExternalStore.php:226
MediaWiki\Storage\SqlBlobStore\setLegacyEncoding
setLegacyEncoding( $legacyEncoding, Language $language)
Definition: SqlBlobStore.php:166
MediaWiki\Storage\SqlBlobStore\getDBConnection
getDBConnection( $index)
Definition: SqlBlobStore.php:201
captcha-old.count
count
Definition: captcha-old.py:249
MediaWiki\Storage\SqlBlobStore
Service for storing and loading Content objects.
Definition: SqlBlobStore.php:50
IExpiringStore\QOS_EMULATION_SQL
const QOS_EMULATION_SQL
Definition: IExpiringStore.php:50
MediaWiki\Storage\SqlBlobStore\$compressBlobs
bool $compressBlobs
Definition: SqlBlobStore.php:78
MediaWiki\Storage\SqlBlobStore\fetchBlob
fetchBlob( $blobAddress, $queryFlags)
MCR migration note: this corresponds to Revision::fetchText.
Definition: SqlBlobStore.php:302
ExternalStore\fetchFromURL
static fetchFromURL( $url, array $params=[])
Fetch data from given URL.
Definition: ExternalStore.php:70
MediaWiki\Storage\SqlBlobStore\getTextIdFromAddress
getTextIdFromAddress( $address)
Returns an ID corresponding to the old_id field in the text table, corresponding to the given $addres...
Definition: SqlBlobStore.php:577
MediaWiki\Storage\SqlBlobStore\splitBlobAddress
static splitBlobAddress( $address)
Splits a blob address into three parts: the schema, the ID, and parameters/flags.
Definition: SqlBlobStore.php:617
MediaWiki\Storage\SqlBlobStore\expandBlob
expandBlob( $raw, $flags, $cacheKey=null)
Expand a raw data blob according to the flags given.
Definition: SqlBlobStore.php:401
wfLogWarning
wfLogWarning( $msg, $callerOffset=1, $level=E_USER_WARNING)
Send a warning as a PHP error and the debug log.
Definition: GlobalFunctions.php:1145
DBAccessObjectUtils\getDBOptions
static getDBOptions( $bitfield)
Get an appropriate DB index, options, and fallback DB index for a query.
Definition: DBAccessObjectUtils.php:52
IDBAccessObject
Interface for database access objects.
Definition: IDBAccessObject.php:55
cache
you have access to all of the normal MediaWiki so you can get a DB use the cache
Definition: maintenance.txt:52
MediaWiki\Storage\SqlBlobStore\$legacyEncodingConversionLang
Language null $legacyEncodingConversionLang
Definition: SqlBlobStore.php:88
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
Wikimedia\Rdbms\IDatabase
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:38
IExpiringStore\ATTR_EMULATION
const ATTR_EMULATION
Definition: IExpiringStore.php:49
MediaWiki\Storage\SqlBlobStore\getCompressBlobs
getCompressBlobs()
Definition: SqlBlobStore.php:134
MediaWiki\Storage\SqlBlobStore\getLegacyEncoding
getLegacyEncoding()
Definition: SqlBlobStore.php:149
IExpiringStore
Generic base class for storage interfaces.
Definition: IExpiringStore.php:31
MWException
MediaWiki exception.
Definition: MWException.php:26
MediaWiki\Storage\SqlBlobStore\getLegacyEncodingConversionLang
getLegacyEncodingConversionLang()
Definition: SqlBlobStore.php:157
MediaWiki\Storage\SqlBlobStore\getCacheTTL
getCacheTTL()
Get the text cache TTL.
Definition: SqlBlobStore.php:544
Wikimedia\Rdbms\Database\getCacheSetOptions
static getCacheSetOptions(IDatabase $db1, IDatabase $db2=null)
Merge the result of getSessionLagStatus() for several DBs using the most pessimistic values to estima...
Definition: Database.php:4227
MediaWiki\Storage\SqlBlobStore\isReadOnly
isReadOnly()
Check if the blob metadata or backing blob data store is read-only.
Definition: SqlBlobStore.php:629
$blob
$blob
Definition: testCompression.php:65
MediaWiki\Storage\SqlBlobStore\__construct
__construct(LoadBalancer $dbLoadBalancer, WANObjectCache $cache, $wikiId=false)
Definition: SqlBlobStore.php:105
ExternalStore
Constructor class for key/value blob data kept in external repositories.
Definition: ExternalStore.php:48
MediaWiki\Storage\SqlBlobStore\$dbLoadBalancer
LoadBalancer $dbLoadBalancer
Definition: SqlBlobStore.php:58
MediaWiki\Storage\SqlBlobStore\$useExternalStore
boolean $useExternalStore
Definition: SqlBlobStore.php:93
MediaWiki\Storage\SqlBlobStore\makeAddressFromTextId
static makeAddressFromTextId( $id)
Returns an address referring to content stored in the text table row with the given ID.
Definition: SqlBlobStore.php:605
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
wfCgiToArray
wfCgiToArray( $query)
This is the logical opposite of wfArrayToCgi(): it accepts a query string as its argument and returns...
Definition: GlobalFunctions.php:413
DB_MASTER
const DB_MASTER
Definition: defines.php:26
array
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
wfDebug
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:988
DBAccessObjectUtils
Helper class for DAO classes.
Definition: DBAccessObjectUtils.php:29
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
Wikimedia\Rdbms\LoadBalancer
Database connection, tracking, load balancing, and transaction manager for a cluster.
Definition: LoadBalancer.php:41
$e
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException' returning false will NOT prevent logging $e
Definition: hooks.txt:2213
DBAccessObjectUtils\hasFlags
static hasFlags( $bitfield, $flags)
Definition: DBAccessObjectUtils.php:35
MediaWiki\Storage\SqlBlobStore\storeBlob
storeBlob( $data, $hints=[])
Stores an arbitrary blob of data and returns an address that can be used with getBlob() to retrieve t...
Definition: SqlBlobStore.php:216
WANObjectCache
Multi-datacenter aware caching interface.
Definition: WANObjectCache.php:118
MediaWiki\Storage\SqlBlobStore\getUseExternalStore
getUseExternalStore()
Definition: SqlBlobStore.php:176
MediaWiki\Storage
Definition: BlobAccessException.php:23
MediaWiki\Storage\SqlBlobStore\getCacheExpiry
getCacheExpiry()
Definition: SqlBlobStore.php:118
MediaWiki\Storage\SqlBlobStore\$cacheExpiry
int $cacheExpiry
Definition: SqlBlobStore.php:73
MediaWiki\Storage\BlobStore
Service for loading and storing data blobs.
Definition: BlobStore.php:33
unserialize
unserialize( $serialized)
Definition: ApiMessageTrait.php:139
MediaWiki\Storage\SqlBlobStore\decompressData
decompressData( $blob, array $blobFlags)
Re-converts revision text according to its flags.
Definition: SqlBlobStore.php:492
$options
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition: hooks.txt:2036
MediaWiki\Storage\SqlBlobStore\$wikiId
bool string $wikiId
Wiki ID.
Definition: SqlBlobStore.php:68
MediaWiki\Storage\SqlBlobStore\setUseExternalStore
setUseExternalStore( $useExternalStore)
Definition: SqlBlobStore.php:183
MediaWiki\Storage\SqlBlobStore\setCompressBlobs
setCompressBlobs( $compressBlobs)
Definition: SqlBlobStore.php:141
wfWarn
wfWarn( $msg, $callerOffset=1, $level=E_USER_NOTICE)
Send a warning either to the debug log or in a PHP error depending on $wgDevelopmentWarnings.
Definition: GlobalFunctions.php:1132
MediaWiki\Storage\SqlBlobStore\$cache
WANObjectCache $cache
Definition: SqlBlobStore.php:63
MediaWiki\Storage\SqlBlobStore\compressData
compressData(&$blob)
If $wgCompressRevisions is enabled, we will compress data.
Definition: SqlBlobStore.php:452
MediaWiki\Storage\SqlBlobStore\$legacyEncoding
bool string $legacyEncoding
Definition: SqlBlobStore.php:83
MediaWiki\Storage\SqlBlobStore\setCacheExpiry
setCacheExpiry( $cacheExpiry)
Definition: SqlBlobStore.php:125
Language
Internationalisation code.
Definition: Language.php:35
IExpiringStore\TTL_PROC_LONG
const TTL_PROC_LONG
Definition: IExpiringStore.php:43