MediaWiki  1.33.0
SqlBlobStore.php
Go to the documentation of this file.
1 <?php
27 namespace MediaWiki\Storage;
28 
33 use InvalidArgumentException;
37 use Wikimedia\Assert\Assert;
40 
50 
51  // Note: the name has been taken unchanged from the Revision class.
52  const TEXT_CACHE_GROUP = 'revisiontext:10';
53 
57  private $dbLoadBalancer;
58 
62  private $cache;
63 
67  private $wikiId;
68 
72  private $cacheExpiry = 604800; // 7 days
73 
77  private $compressBlobs = false;
78 
82  private $legacyEncoding = false;
83 
88 
92  private $useExternalStore = false;
93 
104  public function __construct(
107  $wikiId = false
108  ) {
109  $this->dbLoadBalancer = $dbLoadBalancer;
110  $this->cache = $cache;
111  $this->wikiId = $wikiId;
112  }
113 
117  public function getCacheExpiry() {
118  return $this->cacheExpiry;
119  }
120 
124  public function setCacheExpiry( $cacheExpiry ) {
125  Assert::parameterType( 'integer', $cacheExpiry, '$cacheExpiry' );
126 
127  $this->cacheExpiry = $cacheExpiry;
128  }
129 
133  public function getCompressBlobs() {
134  return $this->compressBlobs;
135  }
136 
140  public function setCompressBlobs( $compressBlobs ) {
141  $this->compressBlobs = $compressBlobs;
142  }
143 
148  public function getLegacyEncoding() {
149  return $this->legacyEncoding;
150  }
151 
158  }
159 
165  public function setLegacyEncoding( $legacyEncoding, Language $language ) {
166  Assert::parameterType( 'string', $legacyEncoding, '$legacyEncoding' );
167 
168  $this->legacyEncoding = $legacyEncoding;
169  $this->legacyEncodingConversionLang = $language;
170  }
171 
175  public function getUseExternalStore() {
177  }
178 
183  Assert::parameterType( 'boolean', $useExternalStore, '$useExternalStore' );
184 
185  $this->useExternalStore = $useExternalStore;
186  }
187 
191  private function getDBLoadBalancer() {
192  return $this->dbLoadBalancer;
193  }
194 
200  private function getDBConnection( $index ) {
201  $lb = $this->getDBLoadBalancer();
202  return $lb->getConnection( $index, [], $this->wikiId );
203  }
204 
215  public function storeBlob( $data, $hints = [] ) {
216  try {
217  $flags = $this->compressData( $data );
218 
219  # Write to external storage if required
220  if ( $this->useExternalStore ) {
221  // Store and get the URL
223  if ( !$data ) {
224  throw new BlobAccessException( "Failed to store text to external storage" );
225  }
226  if ( $flags ) {
227  $flags .= ',';
228  }
229  $flags .= 'external';
230 
231  // TODO: we could also return an address for the external store directly here.
232  // That would mean bypassing the text table entirely when the external store is
233  // used. We'll need to assess expected fallout before doing that.
234  }
235 
236  $dbw = $this->getDBConnection( DB_MASTER );
237 
238  $old_id = $dbw->nextSequenceValue( 'text_old_id_seq' );
239  $dbw->insert(
240  'text',
241  [
242  'old_id' => $old_id,
243  'old_text' => $data,
244  'old_flags' => $flags,
245  ],
246  __METHOD__
247  );
248 
249  $textId = $dbw->insertId();
250 
251  return self::makeAddressFromTextId( $textId );
252  } catch ( MWException $e ) {
253  throw new BlobAccessException( $e->getMessage(), 0, $e );
254  }
255  }
256 
269  public function getBlob( $blobAddress, $queryFlags = 0 ) {
270  Assert::parameterType( 'string', $blobAddress, '$blobAddress' );
271 
272  // No negative caching; negative hits on text rows may be due to corrupted replica DBs
273  $blob = $this->cache->getWithSetCallback(
274  $this->getCacheKey( $blobAddress ),
275  $this->getCacheTTL(),
276  function ( $unused, &$ttl, &$setOpts ) use ( $blobAddress, $queryFlags ) {
277  // Ignore $setOpts; blobs are immutable and negatives are not cached
278  return $this->fetchBlob( $blobAddress, $queryFlags );
279  },
280  [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => IExpiringStore::TTL_PROC_LONG ]
281  );
282 
283  if ( $blob === false ) {
284  throw new BlobAccessException( 'Failed to load blob from address ' . $blobAddress );
285  }
286 
287  return $blob;
288  }
289 
299  private function fetchBlob( $blobAddress, $queryFlags ) {
300  list( $schema, $id, ) = self::splitBlobAddress( $blobAddress );
301 
302  //TODO: MCR: also support 'ex' schema with ExternalStore URLs, plus flags encoded in the URL!
303  if ( $schema === 'tt' ) {
304  $textId = intval( $id );
305  } else {
306  // XXX: change to better exceptions! That makes migration more difficult, though.
307  throw new BlobAccessException( "Unknown blob address schema: $schema" );
308  }
309 
310  if ( !$textId || $id !== (string)$textId ) {
311  // XXX: change to better exceptions! That makes migration more difficult, though.
312  throw new BlobAccessException( "Bad blob address: $blobAddress" );
313  }
314 
315  // Callers doing updates will pass in READ_LATEST as usual. Since the text/blob tables
316  // do not normally get rows changed around, set READ_LATEST_IMMUTABLE in those cases.
317  $queryFlags |= DBAccessObjectUtils::hasFlags( $queryFlags, self::READ_LATEST )
318  ? self::READ_LATEST_IMMUTABLE
319  : 0;
320 
321  list( $index, $options, $fallbackIndex, $fallbackOptions ) =
322  DBAccessObjectUtils::getDBOptions( $queryFlags );
323 
324  // Text data is immutable; check replica DBs first.
325  $row = $this->getDBConnection( $index )->selectRow(
326  'text',
327  [ 'old_text', 'old_flags' ],
328  [ 'old_id' => $textId ],
329  __METHOD__,
330  $options
331  );
332 
333  // Fallback to DB_MASTER in some cases if the row was not found, using the appropriate
334  // options, such as FOR UPDATE to avoid missing rows due to REPEATABLE-READ.
335  if ( !$row && $fallbackIndex !== null ) {
336  $row = $this->getDBConnection( $fallbackIndex )->selectRow(
337  'text',
338  [ 'old_text', 'old_flags' ],
339  [ 'old_id' => $textId ],
340  __METHOD__,
341  $fallbackOptions
342  );
343  }
344 
345  if ( !$row ) {
346  wfWarn( __METHOD__ . ": No text row with ID $textId." );
347  return false;
348  }
349 
350  $blob = $this->expandBlob( $row->old_text, $row->old_flags, $blobAddress );
351 
352  if ( $blob === false ) {
353  wfLogWarning( __METHOD__ . ": Bad data in text row $textId." );
354  return false;
355  }
356 
357  return $blob;
358  }
359 
370  private function getCacheKey( $blobAddress ) {
371  return $this->cache->makeGlobalKey(
372  'BlobStore',
373  'address',
374  $this->dbLoadBalancer->resolveDomainID( $this->wikiId ),
375  $blobAddress
376  );
377  }
378 
398  public function expandBlob( $raw, $flags, $cacheKey = null ) {
399  if ( is_string( $flags ) ) {
400  $flags = explode( ',', $flags );
401  }
402 
403  // Use external methods for external objects, text in table is URL-only then
404  if ( in_array( 'external', $flags ) ) {
405  $url = $raw;
406  $parts = explode( '://', $url, 2 );
407  if ( count( $parts ) == 1 || $parts[1] == '' ) {
408  return false;
409  }
410 
411  if ( $cacheKey ) {
412  // The cached value should be decompressed, so handle that and return here.
413  return $this->cache->getWithSetCallback(
414  $this->getCacheKey( $cacheKey ),
415  $this->getCacheTTL(),
416  function () use ( $url, $flags ) {
417  // Ignore $setOpts; blobs are immutable and negatives are not cached
418  $blob = ExternalStore::fetchFromURL( $url, [ 'wiki' => $this->wikiId ] );
419 
420  return $blob === false ? false : $this->decompressData( $blob, $flags );
421  },
422  [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => WANObjectCache::TTL_PROC_LONG ]
423  );
424  } else {
425  $blob = ExternalStore::fetchFromURL( $url, [ 'wiki' => $this->wikiId ] );
426  return $blob === false ? false : $this->decompressData( $blob, $flags );
427  }
428  } else {
429  return $this->decompressData( $raw, $flags );
430  }
431  }
432 
449  public function compressData( &$blob ) {
450  $blobFlags = [];
451 
452  // Revisions not marked as UTF-8 will have legacy decoding applied by decompressData().
453  // XXX: if $this->legacyEncoding is not set, we could skip this. That would however be
454  // risky, since $this->legacyEncoding being set in the future would lead to data corruption.
455  $blobFlags[] = 'utf-8';
456 
457  if ( $this->compressBlobs ) {
458  if ( function_exists( 'gzdeflate' ) ) {
459  $deflated = gzdeflate( $blob );
460 
461  if ( $deflated === false ) {
462  wfLogWarning( __METHOD__ . ': gzdeflate() failed' );
463  } else {
464  $blob = $deflated;
465  $blobFlags[] = 'gzip';
466  }
467  } else {
468  wfDebug( __METHOD__ . " -- no zlib support, not compressing\n" );
469  }
470  }
471  return implode( ',', $blobFlags );
472  }
473 
489  public function decompressData( $blob, array $blobFlags ) {
490  // Revision::decompressRevisionText accepted false here, so defend against that
491  Assert::parameterType( 'string', $blob, '$blob' );
492 
493  if ( in_array( 'error', $blobFlags ) ) {
494  // Error row, return false
495  return false;
496  }
497 
498  if ( in_array( 'gzip', $blobFlags ) ) {
499  # Deal with optional compression of archived pages.
500  # This can be done periodically via maintenance/compressOld.php, and
501  # as pages are saved if $wgCompressRevisions is set.
502  $blob = gzinflate( $blob );
503 
504  if ( $blob === false ) {
505  wfWarn( __METHOD__ . ': gzinflate() failed' );
506  return false;
507  }
508  }
509 
510  if ( in_array( 'object', $blobFlags ) ) {
511  # Generic compressed storage
512  $obj = unserialize( $blob );
513  if ( !is_object( $obj ) ) {
514  // Invalid object
515  return false;
516  }
517  $blob = $obj->getText();
518  }
519 
520  // Needed to support old revisions left over from from the 1.4 / 1.5 migration.
521  if ( $blob !== false && $this->legacyEncoding && $this->legacyEncodingConversionLang
522  && !in_array( 'utf-8', $blobFlags ) && !in_array( 'utf8', $blobFlags )
523  ) {
524  # Old revisions kept around in a legacy encoding?
525  # Upconvert on demand.
526  # ("utf8" checked for compatibility with some broken
527  # conversion scripts 2008-12-30)
528  $blob = $this->legacyEncodingConversionLang->iconv( $this->legacyEncoding, 'UTF-8', $blob );
529  }
530 
531  return $blob;
532  }
533 
541  private function getCacheTTL() {
542  if ( $this->cache->getQoS( WANObjectCache::ATTR_EMULATION )
544  ) {
545  // Do not cache RDBMs blobs in...the RDBMs store
547  } else {
548  $ttl = $this->cacheExpiry ?: WANObjectCache::TTL_UNCACHEABLE;
549  }
550 
551  return $ttl;
552  }
553 
574  public function getTextIdFromAddress( $address ) {
575  list( $schema, $id, ) = self::splitBlobAddress( $address );
576 
577  if ( $schema !== 'tt' ) {
578  return null;
579  }
580 
581  $textId = intval( $id );
582 
583  if ( !$textId || $id !== (string)$textId ) {
584  throw new InvalidArgumentException( "Malformed text_id: $id" );
585  }
586 
587  return $textId;
588  }
589 
602  public static function makeAddressFromTextId( $id ) {
603  return 'tt:' . $id;
604  }
605 
616  public static function splitBlobAddress( $address ) {
617  if ( !preg_match( '/^(\w+):(\w+)(\?(.*))?$/', $address, $m ) ) {
618  throw new InvalidArgumentException( "Bad blob address: $address" );
619  }
620 
621  $schema = strtolower( $m[1] );
622  $id = $m[2];
623  $parameters = isset( $m[4] ) ? wfCgiToArray( $m[4] ) : [];
624 
625  return [ $schema, $id, $parameters ];
626  }
627 
628  public function isReadOnly() {
629  if ( $this->useExternalStore && ExternalStore::defaultStoresAreReadOnly() ) {
630  return true;
631  }
632 
633  return ( $this->getDBLoadBalancer()->getReadOnlyReason() !== false );
634  }
635 }
ExternalStore\insertToDefault
static insertToDefault( $data, array $params=[])
Like insert() above, but does more of the work for us.
Definition: ExternalStore.php:165
MediaWiki\Storage\SqlBlobStore\getBlob
getBlob( $blobAddress, $queryFlags=0)
Retrieve a blob, given an address.
Definition: SqlBlobStore.php:269
MediaWiki\Storage\SqlBlobStore\getDBLoadBalancer
getDBLoadBalancer()
Definition: SqlBlobStore.php:191
MediaWiki\Storage\BlobAccessException
Exception representing a failure to access a data blob.
Definition: BlobAccessException.php:32
MediaWiki\Storage\SqlBlobStore\getCacheKey
getCacheKey( $blobAddress)
Get a cache key for a given Blob address.
Definition: SqlBlobStore.php:370
WANObjectCache\TTL_UNCACHEABLE
const TTL_UNCACHEABLE
Idiom for getWithSetCallback() callbacks to avoid calling set()
Definition: WANObjectCache.php:177
false
processing should stop and the error should be shown to the user * false
Definition: hooks.txt:187
MediaWiki\Storage\SqlBlobStore\TEXT_CACHE_GROUP
const TEXT_CACHE_GROUP
Definition: SqlBlobStore.php:52
ExternalStore\defaultStoresAreReadOnly
static defaultStoresAreReadOnly()
Definition: ExternalStore.php:226
MediaWiki\Storage\SqlBlobStore\setLegacyEncoding
setLegacyEncoding( $legacyEncoding, Language $language)
Definition: SqlBlobStore.php:165
MediaWiki\Storage\SqlBlobStore\getDBConnection
getDBConnection( $index)
Definition: SqlBlobStore.php:200
captcha-old.count
count
Definition: captcha-old.py:249
MediaWiki\Storage\SqlBlobStore
Service for storing and loading Content objects.
Definition: SqlBlobStore.php:49
IExpiringStore\QOS_EMULATION_SQL
const QOS_EMULATION_SQL
Definition: IExpiringStore.php:50
MediaWiki\Storage\SqlBlobStore\$compressBlobs
bool $compressBlobs
Definition: SqlBlobStore.php:77
MediaWiki\Storage\SqlBlobStore\fetchBlob
fetchBlob( $blobAddress, $queryFlags)
MCR migration note: this corresponds to Revision::fetchText.
Definition: SqlBlobStore.php:299
ExternalStore\fetchFromURL
static fetchFromURL( $url, array $params=[])
Fetch data from given URL.
Definition: ExternalStore.php:70
MediaWiki\Storage\SqlBlobStore\getTextIdFromAddress
getTextIdFromAddress( $address)
Returns an ID corresponding to the old_id field in the text table, corresponding to the given $addres...
Definition: SqlBlobStore.php:574
MediaWiki\Storage\SqlBlobStore\splitBlobAddress
static splitBlobAddress( $address)
Splits a blob address into three parts: the schema, the ID, and parameters/flags.
Definition: SqlBlobStore.php:616
MediaWiki\Storage\SqlBlobStore\expandBlob
expandBlob( $raw, $flags, $cacheKey=null)
Expand a raw data blob according to the flags given.
Definition: SqlBlobStore.php:398
wfLogWarning
wfLogWarning( $msg, $callerOffset=1, $level=E_USER_WARNING)
Send a warning as a PHP error and the debug log.
Definition: GlobalFunctions.php:1105
DBAccessObjectUtils\getDBOptions
static getDBOptions( $bitfield)
Get an appropriate DB index, options, and fallback DB index for a query.
Definition: DBAccessObjectUtils.php:52
IDBAccessObject
Interface for database access objects.
Definition: IDBAccessObject.php:55
cache
you have access to all of the normal MediaWiki so you can get a DB use the cache
Definition: maintenance.txt:52
MediaWiki\Storage\SqlBlobStore\$legacyEncodingConversionLang
Language null $legacyEncodingConversionLang
Definition: SqlBlobStore.php:87
php
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
Wikimedia\Rdbms\IDatabase
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:38
IExpiringStore\ATTR_EMULATION
const ATTR_EMULATION
Definition: IExpiringStore.php:48
MediaWiki\Storage\SqlBlobStore\getCompressBlobs
getCompressBlobs()
Definition: SqlBlobStore.php:133
MediaWiki\Storage\SqlBlobStore\getLegacyEncoding
getLegacyEncoding()
Definition: SqlBlobStore.php:148
$data
$data
Utility to generate mapping file used in mw.Title (phpCharToUpper.json)
Definition: generatePhpCharToUpperMappings.php:13
IExpiringStore
Generic interface for lightweight expiring object stores.
Definition: IExpiringStore.php:31
MWException
MediaWiki exception.
Definition: MWException.php:26
MediaWiki\Storage\SqlBlobStore\getLegacyEncodingConversionLang
getLegacyEncodingConversionLang()
Definition: SqlBlobStore.php:156
MediaWiki\Storage\SqlBlobStore\getCacheTTL
getCacheTTL()
Get the text cache TTL.
Definition: SqlBlobStore.php:541
MediaWiki\Storage\SqlBlobStore\isReadOnly
isReadOnly()
Check if the blob metadata or backing blob data store is read-only.
Definition: SqlBlobStore.php:628
$blob
$blob
Definition: testCompression.php:65
MediaWiki\Storage\SqlBlobStore\__construct
__construct(LoadBalancer $dbLoadBalancer, WANObjectCache $cache, $wikiId=false)
Definition: SqlBlobStore.php:104
ExternalStore
Constructor class for key/value blob data kept in external repositories.
Definition: ExternalStore.php:48
MediaWiki\Storage\SqlBlobStore\$dbLoadBalancer
LoadBalancer $dbLoadBalancer
Definition: SqlBlobStore.php:57
MediaWiki\Storage\SqlBlobStore\$useExternalStore
boolean $useExternalStore
Definition: SqlBlobStore.php:92
MediaWiki\Storage\SqlBlobStore\makeAddressFromTextId
static makeAddressFromTextId( $id)
Returns an address referring to content stored in the text table row with the given ID.
Definition: SqlBlobStore.php:602
use
as see the revision history and available at free of to any person obtaining a copy of this software and associated documentation to deal in the Software without including without limitation the rights to use
Definition: MIT-LICENSE.txt:10
wfCgiToArray
wfCgiToArray( $query)
This is the logical opposite of wfArrayToCgi(): it accepts a query string as its argument and returns...
Definition: GlobalFunctions.php:416
DB_MASTER
const DB_MASTER
Definition: defines.php:26
array
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
wfDebug
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
Definition: GlobalFunctions.php:949
DBAccessObjectUtils
Helper class for DAO classes.
Definition: DBAccessObjectUtils.php:29
list
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
Wikimedia\Rdbms\LoadBalancer
Database connection, tracking, load balancing, and transaction manager for a cluster.
Definition: LoadBalancer.php:41
$e
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException' returning false will NOT prevent logging $e
Definition: hooks.txt:2162
DBAccessObjectUtils\hasFlags
static hasFlags( $bitfield, $flags)
Definition: DBAccessObjectUtils.php:35
MediaWiki\Storage\SqlBlobStore\storeBlob
storeBlob( $data, $hints=[])
Stores an arbitrary blob of data and returns an address that can be used with getBlob() to retrieve t...
Definition: SqlBlobStore.php:215
WANObjectCache
Multi-datacenter aware caching interface.
Definition: WANObjectCache.php:116
MediaWiki\Storage\SqlBlobStore\getUseExternalStore
getUseExternalStore()
Definition: SqlBlobStore.php:175
MediaWiki\Storage
Definition: BlobAccessException.php:23
MediaWiki\Storage\SqlBlobStore\getCacheExpiry
getCacheExpiry()
Definition: SqlBlobStore.php:117
MediaWiki\Storage\SqlBlobStore\$cacheExpiry
int $cacheExpiry
Definition: SqlBlobStore.php:72
MediaWiki\Storage\BlobStore
Service for loading and storing data blobs.
Definition: BlobStore.php:33
unserialize
unserialize( $serialized)
Definition: ApiMessageTrait.php:142
MediaWiki\Storage\SqlBlobStore\decompressData
decompressData( $blob, array $blobFlags)
Re-converts revision text according to its flags.
Definition: SqlBlobStore.php:489
$options
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition: hooks.txt:1985
MediaWiki\Storage\SqlBlobStore\$wikiId
bool string $wikiId
Wiki ID.
Definition: SqlBlobStore.php:67
MediaWiki\Storage\SqlBlobStore\setUseExternalStore
setUseExternalStore( $useExternalStore)
Definition: SqlBlobStore.php:182
MediaWiki\Storage\SqlBlobStore\setCompressBlobs
setCompressBlobs( $compressBlobs)
Definition: SqlBlobStore.php:140
wfWarn
wfWarn( $msg, $callerOffset=1, $level=E_USER_NOTICE)
Send a warning either to the debug log or in a PHP error depending on $wgDevelopmentWarnings.
Definition: GlobalFunctions.php:1092
MediaWiki\Storage\SqlBlobStore\$cache
WANObjectCache $cache
Definition: SqlBlobStore.php:62
MediaWiki\Storage\SqlBlobStore\compressData
compressData(&$blob)
If $wgCompressRevisions is enabled, we will compress data.
Definition: SqlBlobStore.php:449
MediaWiki\Storage\SqlBlobStore\$legacyEncoding
bool string $legacyEncoding
Definition: SqlBlobStore.php:82
MediaWiki\Storage\SqlBlobStore\setCacheExpiry
setCacheExpiry( $cacheExpiry)
Definition: SqlBlobStore.php:124
Language
Internationalisation code.
Definition: Language.php:36
IExpiringStore\TTL_PROC_LONG
const TTL_PROC_LONG
Definition: IExpiringStore.php:43