MediaWiki  master
SqlBlobStore.php
Go to the documentation of this file.
1 <?php
27 namespace MediaWiki\Storage;
28 
40 
50 
51  // Note: the name has been taken unchanged from the Revision class.
52  const TEXT_CACHE_GROUP = 'revisiontext:10';
53 
57  private $dbLoadBalancer;
58 
62  private $extStoreAccess;
63 
67  private $cache;
68 
72  private $dbDomain;
73 
77  private $cacheExpiry = 604800; // 7 days
78 
82  private $compressBlobs = false;
83 
87  private $legacyEncoding = false;
88 
93 
97  private $useExternalStore = false;
98 
110  public function __construct(
114  $dbDomain = false
115  ) {
116  $this->dbLoadBalancer = $dbLoadBalancer;
117  $this->extStoreAccess = $extStoreAccess;
118  $this->cache = $cache;
119  $this->dbDomain = $dbDomain;
120  }
121 
125  public function getCacheExpiry() {
126  return $this->cacheExpiry;
127  }
128 
132  public function setCacheExpiry( $cacheExpiry ) {
133  Assert::parameterType( 'integer', $cacheExpiry, '$cacheExpiry' );
134 
135  $this->cacheExpiry = $cacheExpiry;
136  }
137 
141  public function getCompressBlobs() {
142  return $this->compressBlobs;
143  }
144 
148  public function setCompressBlobs( $compressBlobs ) {
149  $this->compressBlobs = $compressBlobs;
150  }
151 
156  public function getLegacyEncoding() {
157  return $this->legacyEncoding;
158  }
159 
166  }
167 
173  public function setLegacyEncoding( $legacyEncoding, Language $language ) {
174  Assert::parameterType( 'string', $legacyEncoding, '$legacyEncoding' );
175 
176  $this->legacyEncoding = $legacyEncoding;
177  $this->legacyEncodingConversionLang = $language;
178  }
179 
183  public function getUseExternalStore() {
185  }
186 
191  Assert::parameterType( 'boolean', $useExternalStore, '$useExternalStore' );
192 
193  $this->useExternalStore = $useExternalStore;
194  }
195 
199  private function getDBLoadBalancer() {
200  return $this->dbLoadBalancer;
201  }
202 
208  private function getDBConnection( $index ) {
209  $lb = $this->getDBLoadBalancer();
210  return $lb->getConnection( $index, [], $this->dbDomain );
211  }
212 
223  public function storeBlob( $data, $hints = [] ) {
224  try {
225  $flags = $this->compressData( $data );
226 
227  # Write to external storage if required
228  if ( $this->useExternalStore ) {
229  // Store and get the URL
230  $data = $this->extStoreAccess->insert( $data, [ 'domain' => $this->dbDomain ] );
231  if ( !$data ) {
232  throw new BlobAccessException( "Failed to store text to external storage" );
233  }
234  if ( $flags ) {
235  $flags .= ',';
236  }
237  $flags .= 'external';
238 
239  // TODO: we could also return an address for the external store directly here.
240  // That would mean bypassing the text table entirely when the external store is
241  // used. We'll need to assess expected fallout before doing that.
242  }
243 
244  $dbw = $this->getDBConnection( DB_MASTER );
245 
246  $old_id = $dbw->nextSequenceValue( 'text_old_id_seq' );
247  $dbw->insert(
248  'text',
249  [
250  'old_id' => $old_id,
251  'old_text' => $data,
252  'old_flags' => $flags,
253  ],
254  __METHOD__
255  );
256 
257  $textId = $dbw->insertId();
258 
259  return self::makeAddressFromTextId( $textId );
260  } catch ( MWException $e ) {
261  throw new BlobAccessException( $e->getMessage(), 0, $e );
262  }
263  }
264 
277  public function getBlob( $blobAddress, $queryFlags = 0 ) {
278  Assert::parameterType( 'string', $blobAddress, '$blobAddress' );
279 
280  // No negative caching; negative hits on text rows may be due to corrupted replica DBs
281  $blob = $this->cache->getWithSetCallback(
282  $this->getCacheKey( $blobAddress ),
283  $this->getCacheTTL(),
284  function ( $unused, &$ttl, &$setOpts ) use ( $blobAddress, $queryFlags ) {
285  // Ignore $setOpts; blobs are immutable and negatives are not cached
286  return $this->fetchBlob( $blobAddress, $queryFlags );
287  },
288  [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => IExpiringStore::TTL_PROC_LONG ]
289  );
290 
291  if ( $blob === false ) {
292  throw new BlobAccessException( 'Failed to load blob from address ' . $blobAddress );
293  }
294 
295  return $blob;
296  }
297 
307  private function fetchBlob( $blobAddress, $queryFlags ) {
308  list( $schema, $id, ) = self::splitBlobAddress( $blobAddress );
309 
310  //TODO: MCR: also support 'ex' schema with ExternalStore URLs, plus flags encoded in the URL!
311  if ( $schema === 'tt' ) {
312  $textId = intval( $id );
313  } else {
314  // XXX: change to better exceptions! That makes migration more difficult, though.
315  throw new BlobAccessException( "Unknown blob address schema: $schema" );
316  }
317 
318  if ( !$textId || $id !== (string)$textId ) {
319  // XXX: change to better exceptions! That makes migration more difficult, though.
320  throw new BlobAccessException( "Bad blob address: $blobAddress" );
321  }
322 
323  // Callers doing updates will pass in READ_LATEST as usual. Since the text/blob tables
324  // do not normally get rows changed around, set READ_LATEST_IMMUTABLE in those cases.
325  $queryFlags |= DBAccessObjectUtils::hasFlags( $queryFlags, self::READ_LATEST )
326  ? self::READ_LATEST_IMMUTABLE
327  : 0;
328 
329  list( $index, $options, $fallbackIndex, $fallbackOptions ) =
330  DBAccessObjectUtils::getDBOptions( $queryFlags );
331 
332  // Text data is immutable; check replica DBs first.
333  $row = $this->getDBConnection( $index )->selectRow(
334  'text',
335  [ 'old_text', 'old_flags' ],
336  [ 'old_id' => $textId ],
337  __METHOD__,
338  $options
339  );
340 
341  // Fallback to DB_MASTER in some cases if the row was not found, using the appropriate
342  // options, such as FOR UPDATE to avoid missing rows due to REPEATABLE-READ.
343  if ( !$row && $fallbackIndex !== null ) {
344  $row = $this->getDBConnection( $fallbackIndex )->selectRow(
345  'text',
346  [ 'old_text', 'old_flags' ],
347  [ 'old_id' => $textId ],
348  __METHOD__,
349  $fallbackOptions
350  );
351  }
352 
353  if ( !$row ) {
354  wfWarn( __METHOD__ . ": No text row with ID $textId." );
355  return false;
356  }
357 
358  $blob = $this->expandBlob( $row->old_text, $row->old_flags, $blobAddress );
359 
360  if ( $blob === false ) {
361  wfLogWarning( __METHOD__ . ": Bad data in text row $textId." );
362  return false;
363  }
364 
365  return $blob;
366  }
367 
378  private function getCacheKey( $blobAddress ) {
379  return $this->cache->makeGlobalKey(
380  'BlobStore',
381  'address',
382  $this->dbLoadBalancer->resolveDomainID( $this->dbDomain ),
383  $blobAddress
384  );
385  }
386 
406  public function expandBlob( $raw, $flags, $cacheKey = null ) {
407  if ( is_string( $flags ) ) {
408  $flags = explode( ',', $flags );
409  }
410 
411  // Use external methods for external objects, text in table is URL-only then
412  if ( in_array( 'external', $flags ) ) {
413  $url = $raw;
414  $parts = explode( '://', $url, 2 );
415  if ( count( $parts ) == 1 || $parts[1] == '' ) {
416  return false;
417  }
418 
419  if ( $cacheKey ) {
420  // The cached value should be decompressed, so handle that and return here.
421  return $this->cache->getWithSetCallback(
422  $this->getCacheKey( $cacheKey ),
423  $this->getCacheTTL(),
424  function () use ( $url, $flags ) {
425  // Ignore $setOpts; blobs are immutable and negatives are not cached
426  $blob = $this->extStoreAccess
427  ->fetchFromURL( $url, [ 'domain' => $this->dbDomain ] );
428 
429  return $blob === false ? false : $this->decompressData( $blob, $flags );
430  },
431  [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => WANObjectCache::TTL_PROC_LONG ]
432  );
433  } else {
434  $blob = $this->extStoreAccess->fetchFromURL( $url, [ 'domain' => $this->dbDomain ] );
435  return $blob === false ? false : $this->decompressData( $blob, $flags );
436  }
437  } else {
438  return $this->decompressData( $raw, $flags );
439  }
440  }
441 
458  public function compressData( &$blob ) {
459  $blobFlags = [];
460 
461  // Revisions not marked as UTF-8 will have legacy decoding applied by decompressData().
462  // XXX: if $this->legacyEncoding is not set, we could skip this. That would however be
463  // risky, since $this->legacyEncoding being set in the future would lead to data corruption.
464  $blobFlags[] = 'utf-8';
465 
466  if ( $this->compressBlobs ) {
467  if ( function_exists( 'gzdeflate' ) ) {
468  $deflated = gzdeflate( $blob );
469 
470  if ( $deflated === false ) {
471  wfLogWarning( __METHOD__ . ': gzdeflate() failed' );
472  } else {
473  $blob = $deflated;
474  $blobFlags[] = 'gzip';
475  }
476  } else {
477  wfDebug( __METHOD__ . " -- no zlib support, not compressing\n" );
478  }
479  }
480  return implode( ',', $blobFlags );
481  }
482 
498  public function decompressData( $blob, array $blobFlags ) {
499  // Revision::decompressRevisionText accepted false here, so defend against that
500  Assert::parameterType( 'string', $blob, '$blob' );
501 
502  if ( in_array( 'error', $blobFlags ) ) {
503  // Error row, return false
504  return false;
505  }
506 
507  if ( in_array( 'gzip', $blobFlags ) ) {
508  # Deal with optional compression of archived pages.
509  # This can be done periodically via maintenance/compressOld.php, and
510  # as pages are saved if $wgCompressRevisions is set.
511  $blob = gzinflate( $blob );
512 
513  if ( $blob === false ) {
514  wfWarn( __METHOD__ . ': gzinflate() failed' );
515  return false;
516  }
517  }
518 
519  if ( in_array( 'object', $blobFlags ) ) {
520  # Generic compressed storage
521  $obj = unserialize( $blob );
522  if ( !is_object( $obj ) ) {
523  // Invalid object
524  return false;
525  }
526  $blob = $obj->getText();
527  }
528 
529  // Needed to support old revisions left over from from the 1.4 / 1.5 migration.
530  if ( $blob !== false && $this->legacyEncoding && $this->legacyEncodingConversionLang
531  && !in_array( 'utf-8', $blobFlags ) && !in_array( 'utf8', $blobFlags )
532  ) {
533  # Old revisions kept around in a legacy encoding?
534  # Upconvert on demand.
535  # ("utf8" checked for compatibility with some broken
536  # conversion scripts 2008-12-30)
537  $blob = $this->legacyEncodingConversionLang->iconv( $this->legacyEncoding, 'UTF-8', $blob );
538  }
539 
540  return $blob;
541  }
542 
550  private function getCacheTTL() {
551  if ( $this->cache->getQoS( WANObjectCache::ATTR_EMULATION )
553  ) {
554  // Do not cache RDBMs blobs in...the RDBMs store
556  } else {
557  $ttl = $this->cacheExpiry ?: WANObjectCache::TTL_UNCACHEABLE;
558  }
559 
560  return $ttl;
561  }
562 
583  public function getTextIdFromAddress( $address ) {
584  list( $schema, $id, ) = self::splitBlobAddress( $address );
585 
586  if ( $schema !== 'tt' ) {
587  return null;
588  }
589 
590  $textId = intval( $id );
591 
592  if ( !$textId || $id !== (string)$textId ) {
593  throw new InvalidArgumentException( "Malformed text_id: $id" );
594  }
595 
596  return $textId;
597  }
598 
611  public static function makeAddressFromTextId( $id ) {
612  return 'tt:' . $id;
613  }
614 
625  public static function splitBlobAddress( $address ) {
626  if ( !preg_match( '/^(\w+):(\w+)(\?(.*))?$/', $address, $m ) ) {
627  throw new InvalidArgumentException( "Bad blob address: $address" );
628  }
629 
630  $schema = strtolower( $m[1] );
631  $id = $m[2];
632  $parameters = isset( $m[4] ) ? wfCgiToArray( $m[4] ) : [];
633 
634  return [ $schema, $id, $parameters ];
635  }
636 
637  public function isReadOnly() {
638  if ( $this->useExternalStore && $this->extStoreAccess->isReadOnly() ) {
639  return true;
640  }
641 
642  return ( $this->getDBLoadBalancer()->getReadOnlyReason() !== false );
643  }
644 }
Service for storing and loading Content objects.
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
string bool $dbDomain
DB domain ID of a wiki or false for the local one.
wfWarn( $msg, $callerOffset=1, $level=E_USER_NOTICE)
Send a warning either to the debug log or in a PHP error depending on $wgDevelopmentWarnings.
getBlob( $blobAddress, $queryFlags=0)
Retrieve a blob, given an address.
isReadOnly()
Check if the blob metadata or backing blob data store is read-only.
processing should stop and the error should be shown to the user * false
Definition: hooks.txt:187
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException' returning false will NOT prevent logging $e
Definition: hooks.txt:2146
static makeAddressFromTextId( $id)
Returns an address referring to content stored in the text table row with the given ID...
ExternalStoreAccess $extStoreAccess
const TTL_UNCACHEABLE
Idiom for getWithSetCallback() meaning "do not store the callback result".
setLegacyEncoding( $legacyEncoding, Language $language)
setCompressBlobs( $compressBlobs)
__construct(ILoadBalancer $dbLoadBalancer, ExternalStoreAccess $extStoreAccess, WANObjectCache $cache, $dbDomain=false)
getCacheTTL()
Get the text cache TTL.
static splitBlobAddress( $address)
Splits a blob address into three parts: the schema, the ID, and parameters/flags. ...
wfLogWarning( $msg, $callerOffset=1, $level=E_USER_WARNING)
Send a warning as a PHP error and the debug log.
storeBlob( $data, $hints=[])
Stores an arbitrary blob of data and returns an address that can be used with getBlob() to retrieve t...
Exception representing a failure to access a data blob.
const DB_MASTER
Definition: defines.php:26
static getDBOptions( $bitfield)
Get an appropriate DB index, options, and fallback DB index for a query.
Language null $legacyEncodingConversionLang
setUseExternalStore( $useExternalStore)
you have access to all of the normal MediaWiki so you can get a DB use the cache
Definition: maintenance.txt:52
wfCgiToArray( $query)
This is the logical opposite of wfArrayToCgi(): it accepts a query string as its argument and returns...
fetchBlob( $blobAddress, $queryFlags)
MCR migration note: this corresponds to Revision::fetchText.
getCacheKey( $blobAddress)
Get a cache key for a given Blob address.
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
expandBlob( $raw, $flags, $cacheKey=null)
Expand a raw data blob according to the flags given.
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition: hooks.txt:1971
unserialize( $serialized)
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not null
Definition: hooks.txt:767
static hasFlags( $bitfield, $flags)
decompressData( $blob, array $blobFlags)
Re-converts revision text according to its flags.
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
Database cluster connection, tracking, load balancing, and transaction manager interface.
getTextIdFromAddress( $address)
Returns an ID corresponding to the old_id field in the text table, corresponding to the given $addres...
Service for loading and storing data blobs.
Definition: BlobStore.php:33
compressData(&$blob)
If $wgCompressRevisions is enabled, we will compress data.