MediaWiki  master
SqlBlobStore.php
Go to the documentation of this file.
1 <?php
27 namespace MediaWiki\Storage;
28 
40 
50 
51  // Note: the name has been taken unchanged from the Revision class.
52  const TEXT_CACHE_GROUP = 'revisiontext:10';
53 
57  private $dbLoadBalancer;
58 
62  private $cache;
63 
67  private $wikiId;
68 
72  private $cacheExpiry = 604800; // 7 days
73 
77  private $compressBlobs = false;
78 
82  private $legacyEncoding = false;
83 
88 
92  private $useExternalStore = false;
93 
104  public function __construct(
107  $wikiId = false
108  ) {
109  $this->dbLoadBalancer = $dbLoadBalancer;
110  $this->cache = $cache;
111  $this->wikiId = $wikiId;
112  }
113 
117  public function getCacheExpiry() {
118  return $this->cacheExpiry;
119  }
120 
124  public function setCacheExpiry( $cacheExpiry ) {
125  Assert::parameterType( 'integer', $cacheExpiry, '$cacheExpiry' );
126 
127  $this->cacheExpiry = $cacheExpiry;
128  }
129 
133  public function getCompressBlobs() {
134  return $this->compressBlobs;
135  }
136 
140  public function setCompressBlobs( $compressBlobs ) {
141  $this->compressBlobs = $compressBlobs;
142  }
143 
148  public function getLegacyEncoding() {
149  return $this->legacyEncoding;
150  }
151 
158  }
159 
165  public function setLegacyEncoding( $legacyEncoding, Language $language ) {
166  Assert::parameterType( 'string', $legacyEncoding, '$legacyEncoding' );
167 
168  $this->legacyEncoding = $legacyEncoding;
169  $this->legacyEncodingConversionLang = $language;
170  }
171 
175  public function getUseExternalStore() {
177  }
178 
183  Assert::parameterType( 'boolean', $useExternalStore, '$useExternalStore' );
184 
185  $this->useExternalStore = $useExternalStore;
186  }
187 
191  private function getDBLoadBalancer() {
192  return $this->dbLoadBalancer;
193  }
194 
200  private function getDBConnection( $index ) {
201  $lb = $this->getDBLoadBalancer();
202  return $lb->getConnection( $index, [], $this->wikiId );
203  }
204 
215  public function storeBlob( $data, $hints = [] ) {
216  try {
217  $flags = $this->compressData( $data );
218 
219  # Write to external storage if required
220  if ( $this->useExternalStore ) {
221  // Store and get the URL
222  $data = ExternalStore::insertToDefault( $data );
223  if ( !$data ) {
224  throw new BlobAccessException( "Failed to store text to external storage" );
225  }
226  if ( $flags ) {
227  $flags .= ',';
228  }
229  $flags .= 'external';
230 
231  // TODO: we could also return an address for the external store directly here.
232  // That would mean bypassing the text table entirely when the external store is
233  // used. We'll need to assess expected fallout before doing that.
234  }
235 
236  $dbw = $this->getDBConnection( DB_MASTER );
237 
238  $old_id = $dbw->nextSequenceValue( 'text_old_id_seq' );
239  $dbw->insert(
240  'text',
241  [
242  'old_id' => $old_id,
243  'old_text' => $data,
244  'old_flags' => $flags,
245  ],
246  __METHOD__
247  );
248 
249  $textId = $dbw->insertId();
250 
251  return self::makeAddressFromTextId( $textId );
252  } catch ( MWException $e ) {
253  throw new BlobAccessException( $e->getMessage(), 0, $e );
254  }
255  }
256 
269  public function getBlob( $blobAddress, $queryFlags = 0 ) {
270  Assert::parameterType( 'string', $blobAddress, '$blobAddress' );
271 
272  // No negative caching; negative hits on text rows may be due to corrupted replica DBs
273  $blob = $this->cache->getWithSetCallback(
274  $this->getCacheKey( $blobAddress ),
275  $this->getCacheTTL(),
276  function ( $unused, &$ttl, &$setOpts ) use ( $blobAddress, $queryFlags ) {
277  // Ignore $setOpts; blobs are immutable and negatives are not cached
278  return $this->fetchBlob( $blobAddress, $queryFlags );
279  },
280  [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => IExpiringStore::TTL_PROC_LONG ]
281  );
282 
283  if ( $blob === false ) {
284  throw new BlobAccessException( 'Failed to load blob from address ' . $blobAddress );
285  }
286 
287  return $blob;
288  }
289 
299  private function fetchBlob( $blobAddress, $queryFlags ) {
300  list( $schema, $id, ) = self::splitBlobAddress( $blobAddress );
301 
302  //TODO: MCR: also support 'ex' schema with ExternalStore URLs, plus flags encoded in the URL!
303  if ( $schema === 'tt' ) {
304  $textId = intval( $id );
305  } else {
306  // XXX: change to better exceptions! That makes migration more difficult, though.
307  throw new BlobAccessException( "Unknown blob address schema: $schema" );
308  }
309 
310  if ( !$textId || $id !== (string)$textId ) {
311  // XXX: change to better exceptions! That makes migration more difficult, though.
312  throw new BlobAccessException( "Bad blob address: $blobAddress" );
313  }
314 
315  // Callers doing updates will pass in READ_LATEST as usual. Since the text/blob tables
316  // do not normally get rows changed around, set READ_LATEST_IMMUTABLE in those cases.
317  $queryFlags |= DBAccessObjectUtils::hasFlags( $queryFlags, self::READ_LATEST )
318  ? self::READ_LATEST_IMMUTABLE
319  : 0;
320 
321  list( $index, $options, $fallbackIndex, $fallbackOptions ) =
322  DBAccessObjectUtils::getDBOptions( $queryFlags );
323 
324  // Text data is immutable; check replica DBs first.
325  $row = $this->getDBConnection( $index )->selectRow(
326  'text',
327  [ 'old_text', 'old_flags' ],
328  [ 'old_id' => $textId ],
329  __METHOD__,
330  $options
331  );
332 
333  // Fallback to DB_MASTER in some cases if the row was not found, using the appropriate
334  // options, such as FOR UPDATE to avoid missing rows due to REPEATABLE-READ.
335  if ( !$row && $fallbackIndex !== null ) {
336  $row = $this->getDBConnection( $fallbackIndex )->selectRow(
337  'text',
338  [ 'old_text', 'old_flags' ],
339  [ 'old_id' => $textId ],
340  __METHOD__,
341  $fallbackOptions
342  );
343  }
344 
345  if ( !$row ) {
346  wfWarn( __METHOD__ . ": No text row with ID $textId." );
347  return false;
348  }
349 
350  $blob = $this->expandBlob( $row->old_text, $row->old_flags, $blobAddress );
351 
352  if ( $blob === false ) {
353  wfLogWarning( __METHOD__ . ": Bad data in text row $textId." );
354  return false;
355  }
356 
357  return $blob;
358  }
359 
370  private function getCacheKey( $blobAddress ) {
371  return $this->cache->makeGlobalKey(
372  'BlobStore',
373  'address',
374  $this->dbLoadBalancer->resolveDomainID( $this->wikiId ),
375  $blobAddress
376  );
377  }
378 
398  public function expandBlob( $raw, $flags, $cacheKey = null ) {
399  if ( is_string( $flags ) ) {
400  $flags = explode( ',', $flags );
401  }
402 
403  // Use external methods for external objects, text in table is URL-only then
404  if ( in_array( 'external', $flags ) ) {
405  $url = $raw;
406  $parts = explode( '://', $url, 2 );
407  if ( count( $parts ) == 1 || $parts[1] == '' ) {
408  return false;
409  }
410 
411  if ( $cacheKey ) {
412  // The cached value should be decompressed, so handle that and return here.
413  return $this->cache->getWithSetCallback(
414  $this->getCacheKey( $cacheKey ),
415  $this->getCacheTTL(),
416  function () use ( $url, $flags ) {
417  // Ignore $setOpts; blobs are immutable and negatives are not cached
418  $blob = ExternalStore::fetchFromURL( $url, [ 'wiki' => $this->wikiId ] );
419 
420  return $blob === false ? false : $this->decompressData( $blob, $flags );
421  },
422  [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => WANObjectCache::TTL_PROC_LONG ]
423  );
424  } else {
425  $blob = ExternalStore::fetchFromURL( $url, [ 'wiki' => $this->wikiId ] );
426  return $blob === false ? false : $this->decompressData( $blob, $flags );
427  }
428  } else {
429  return $this->decompressData( $raw, $flags );
430  }
431  }
432 
449  public function compressData( &$blob ) {
450  $blobFlags = [];
451 
452  // Revisions not marked as UTF-8 will have legacy decoding applied by decompressData().
453  // XXX: if $this->legacyEncoding is not set, we could skip this. That would however be
454  // risky, since $this->legacyEncoding being set in the future would lead to data corruption.
455  $blobFlags[] = 'utf-8';
456 
457  if ( $this->compressBlobs ) {
458  if ( function_exists( 'gzdeflate' ) ) {
459  $deflated = gzdeflate( $blob );
460 
461  if ( $deflated === false ) {
462  wfLogWarning( __METHOD__ . ': gzdeflate() failed' );
463  } else {
464  $blob = $deflated;
465  $blobFlags[] = 'gzip';
466  }
467  } else {
468  wfDebug( __METHOD__ . " -- no zlib support, not compressing\n" );
469  }
470  }
471  return implode( ',', $blobFlags );
472  }
473 
489  public function decompressData( $blob, array $blobFlags ) {
490  // Revision::decompressRevisionText accepted false here, so defend against that
491  Assert::parameterType( 'string', $blob, '$blob' );
492 
493  if ( in_array( 'error', $blobFlags ) ) {
494  // Error row, return false
495  return false;
496  }
497 
498  if ( in_array( 'gzip', $blobFlags ) ) {
499  # Deal with optional compression of archived pages.
500  # This can be done periodically via maintenance/compressOld.php, and
501  # as pages are saved if $wgCompressRevisions is set.
502  $blob = gzinflate( $blob );
503 
504  if ( $blob === false ) {
505  wfWarn( __METHOD__ . ': gzinflate() failed' );
506  return false;
507  }
508  }
509 
510  if ( in_array( 'object', $blobFlags ) ) {
511  # Generic compressed storage
512  $obj = unserialize( $blob );
513  if ( !is_object( $obj ) ) {
514  // Invalid object
515  return false;
516  }
517  $blob = $obj->getText();
518  }
519 
520  // Needed to support old revisions left over from from the 1.4 / 1.5 migration.
521  if ( $blob !== false && $this->legacyEncoding && $this->legacyEncodingConversionLang
522  && !in_array( 'utf-8', $blobFlags ) && !in_array( 'utf8', $blobFlags )
523  ) {
524  # Old revisions kept around in a legacy encoding?
525  # Upconvert on demand.
526  # ("utf8" checked for compatibility with some broken
527  # conversion scripts 2008-12-30)
528  $blob = $this->legacyEncodingConversionLang->iconv( $this->legacyEncoding, 'UTF-8', $blob );
529  }
530 
531  return $blob;
532  }
533 
541  private function getCacheTTL() {
542  if ( $this->cache->getQoS( WANObjectCache::ATTR_EMULATION )
544  ) {
545  // Do not cache RDBMs blobs in...the RDBMs store
547  } else {
548  $ttl = $this->cacheExpiry ?: WANObjectCache::TTL_UNCACHEABLE;
549  }
550 
551  return $ttl;
552  }
553 
574  public function getTextIdFromAddress( $address ) {
575  list( $schema, $id, ) = self::splitBlobAddress( $address );
576 
577  if ( $schema !== 'tt' ) {
578  return null;
579  }
580 
581  $textId = intval( $id );
582 
583  if ( !$textId || $id !== (string)$textId ) {
584  throw new InvalidArgumentException( "Malformed text_id: $id" );
585  }
586 
587  return $textId;
588  }
589 
602  public static function makeAddressFromTextId( $id ) {
603  return 'tt:' . $id;
604  }
605 
616  public static function splitBlobAddress( $address ) {
617  if ( !preg_match( '/^(\w+):(\w+)(\?(.*))?$/', $address, $m ) ) {
618  throw new InvalidArgumentException( "Bad blob address: $address" );
619  }
620 
621  $schema = strtolower( $m[1] );
622  $id = $m[2];
623  $parameters = isset( $m[4] ) ? wfCgiToArray( $m[4] ) : [];
624 
625  return [ $schema, $id, $parameters ];
626  }
627 
628  public function isReadOnly() {
629  if ( $this->useExternalStore && ExternalStore::defaultStoresAreReadOnly() ) {
630  return true;
631  }
632 
633  return ( $this->getDBLoadBalancer()->getReadOnlyReason() !== false );
634  }
635 }
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
Service for storing and loading Content objects.
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition: deferred.txt:11
wfWarn( $msg, $callerOffset=1, $level=E_USER_NOTICE)
Send a warning either to the debug log or in a PHP error depending on $wgDevelopmentWarnings.
getBlob( $blobAddress, $queryFlags=0)
Retrieve a blob, given an address.
isReadOnly()
Check if the blob metadata or backing blob data store is read-only.
processing should stop and the error should be shown to the user * false
Definition: hooks.txt:187
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
div flags Integer display flags(NO_ACTION_LINK, NO_EXTRA_USER_LINKS) 'LogException' returning false will NOT prevent logging $e
Definition: hooks.txt:2159
static makeAddressFromTextId( $id)
Returns an address referring to content stored in the text table row with the given ID...
const TTL_UNCACHEABLE
Idiom for getWithSetCallback() callbacks to avoid calling set()
setLegacyEncoding( $legacyEncoding, Language $language)
setCompressBlobs( $compressBlobs)
getCacheTTL()
Get the text cache TTL.
static splitBlobAddress( $address)
Splits a blob address into three parts: the schema, the ID, and parameters/flags. ...
wfLogWarning( $msg, $callerOffset=1, $level=E_USER_WARNING)
Send a warning as a PHP error and the debug log.
storeBlob( $data, $hints=[])
Stores an arbitrary blob of data and returns an address that can be used with getBlob() to retrieve t...
static insertToDefault( $data, array $params=[])
Like insert() above, but does more of the work for us.
Exception representing a failure to access a data blob.
const DB_MASTER
Definition: defines.php:26
static getDBOptions( $bitfield)
Get an appropriate DB index, options, and fallback DB index for a query.
Language null $legacyEncodingConversionLang
setUseExternalStore( $useExternalStore)
you have access to all of the normal MediaWiki so you can get a DB use the cache
Definition: maintenance.txt:52
static defaultStoresAreReadOnly()
bool string $wikiId
Wiki ID.
wfCgiToArray( $query)
This is the logical opposite of wfArrayToCgi(): it accepts a query string as its argument and returns...
fetchBlob( $blobAddress, $queryFlags)
MCR migration note: this corresponds to Revision::fetchText.
getCacheKey( $blobAddress)
Get a cache key for a given Blob address.
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
expandBlob( $raw, $flags, $cacheKey=null)
Expand a raw data blob according to the flags given.
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition: hooks.txt:1982
unserialize( $serialized)
this hook is for auditing only or null if authentication failed before getting that far or null if we can t even determine that When $user is not null
Definition: hooks.txt:780
static hasFlags( $bitfield, $flags)
decompressData( $blob, array $blobFlags)
Re-converts revision text according to its flags.
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition: injection.txt:35
getTextIdFromAddress( $address)
Returns an ID corresponding to the old_id field in the text table, corresponding to the given $addres...
Database connection, tracking, load balancing, and transaction manager for a cluster.
Service for loading and storing data blobs.
Definition: BlobStore.php:33
compressData(&$blob)
If $wgCompressRevisions is enabled, we will compress data.
__construct(LoadBalancer $dbLoadBalancer, WANObjectCache $cache, $wikiId=false)
static fetchFromURL( $url, array $params=[])
Fetch data from given URL.