MediaWiki  master
SqlBlobStore.php
Go to the documentation of this file.
1 <?php
26 namespace MediaWiki\Storage;
27 
28 use AppendIterator;
31 use IDBAccessObject;
32 use InvalidArgumentException;
33 use MWException;
34 use StatusValue;
35 use WANObjectCache;
36 use Wikimedia\Assert\Assert;
37 use Wikimedia\AtEase\AtEase;
41 
51 
52  // Note: the name has been taken unchanged from the old Revision class.
53  public const TEXT_CACHE_GROUP = 'revisiontext:10';
54 
58  private $dbLoadBalancer;
59 
63  private $extStoreAccess;
64 
68  private $cache;
69 
73  private $dbDomain;
74 
78  private $cacheExpiry = 604800; // 7 days
79 
83  private $compressBlobs = false;
84 
88  private $legacyEncoding = false;
89 
93  private $useExternalStore = false;
94 
106  public function __construct(
107  ILoadBalancer $dbLoadBalancer,
108  ExternalStoreAccess $extStoreAccess,
109  WANObjectCache $cache,
110  $dbDomain = false
111  ) {
112  $this->dbLoadBalancer = $dbLoadBalancer;
113  $this->extStoreAccess = $extStoreAccess;
114  $this->cache = $cache;
115  $this->dbDomain = $dbDomain;
116  }
117 
121  public function getCacheExpiry() {
122  return $this->cacheExpiry;
123  }
124 
128  public function setCacheExpiry( int $cacheExpiry ) {
129  $this->cacheExpiry = $cacheExpiry;
130  }
131 
135  public function getCompressBlobs() {
136  return $this->compressBlobs;
137  }
138 
142  public function setCompressBlobs( $compressBlobs ) {
143  $this->compressBlobs = $compressBlobs;
144  }
145 
150  public function getLegacyEncoding() {
151  return $this->legacyEncoding;
152  }
153 
162  public function setLegacyEncoding( string $legacyEncoding ) {
163  $this->legacyEncoding = $legacyEncoding;
164  }
165 
169  public function getUseExternalStore() {
170  return $this->useExternalStore;
171  }
172 
176  public function setUseExternalStore( bool $useExternalStore ) {
177  $this->useExternalStore = $useExternalStore;
178  }
179 
183  private function getDBLoadBalancer() {
184  return $this->dbLoadBalancer;
185  }
186 
192  private function getDBConnection( $index ) {
193  $lb = $this->getDBLoadBalancer();
194  return $lb->getConnectionRef( $index, [], $this->dbDomain );
195  }
196 
207  public function storeBlob( $data, $hints = [] ) {
208  try {
209  $flags = $this->compressData( $data );
210 
211  # Write to external storage if required
212  if ( $this->useExternalStore ) {
213  // Store and get the URL
214  $data = $this->extStoreAccess->insert( $data, [ 'domain' => $this->dbDomain ] );
215  if ( !$data ) {
216  throw new BlobAccessException( "Failed to store text to external storage" );
217  }
218  if ( $flags ) {
219  $flags .= ',';
220  }
221  $flags .= 'external';
222 
223  // TODO: we could also return an address for the external store directly here.
224  // That would mean bypassing the text table entirely when the external store is
225  // used. We'll need to assess expected fallout before doing that.
226  }
227 
228  $dbw = $this->getDBConnection( DB_PRIMARY );
229 
230  $dbw->insert(
231  'text',
232  [ 'old_text' => $data, 'old_flags' => $flags ],
233  __METHOD__
234  );
235 
236  $textId = $dbw->insertId();
237 
238  return self::makeAddressFromTextId( $textId );
239  } catch ( MWException $e ) {
240  throw new BlobAccessException( $e->getMessage(), 0, $e );
241  }
242  }
243 
256  public function getBlob( $blobAddress, $queryFlags = 0 ) {
257  Assert::parameterType( 'string', $blobAddress, '$blobAddress' );
258 
259  $error = null;
260  $blob = $this->cache->getWithSetCallback(
261  $this->getCacheKey( $blobAddress ),
262  $this->getCacheTTL(),
263  function ( $unused, &$ttl, &$setOpts ) use ( $blobAddress, $queryFlags, &$error ) {
264  // Ignore $setOpts; blobs are immutable and negatives are not cached
265  [ $result, $errors ] = $this->fetchBlobs( [ $blobAddress ], $queryFlags );
266  // No negative caching; negative hits on text rows may be due to corrupted replica DBs
267  $error = $errors[$blobAddress] ?? null;
268  return $result[$blobAddress];
269  },
270  $this->getCacheOptions()
271  );
272 
273  if ( $error ) {
274  throw new BlobAccessException( $error );
275  }
276 
277  Assert::postcondition( is_string( $blob ), 'Blob must not be null' );
278  return $blob;
279  }
280 
292  public function getBlobBatch( $blobAddresses, $queryFlags = 0 ) {
293  // FIXME: All caching has temporarily been removed in I94c6f9ba7b9caeeb due to T235188.
294  // Caching behavior should be restored by reverting I94c6f9ba7b9caeeb as soon as
295  // the root cause of T235188 has been resolved.
296 
297  [ $blobsByAddress, $errors ] = $this->fetchBlobs( $blobAddresses, $queryFlags );
298 
299  $blobsByAddress = array_map( static function ( $blob ) {
300  return $blob === false ? null : $blob;
301  }, $blobsByAddress );
302 
303  $result = StatusValue::newGood( $blobsByAddress );
304  if ( $errors ) {
305  foreach ( $errors as $error ) {
306  $result->warning( 'internalerror', $error );
307  }
308  }
309  return $result;
310  }
311 
322  private function fetchBlobs( $blobAddresses, $queryFlags ) {
323  $textIdToBlobAddress = [];
324  $result = [];
325  $errors = [];
326  foreach ( $blobAddresses as $blobAddress ) {
327  try {
328  [ $schema, $id ] = self::splitBlobAddress( $blobAddress );
329  } catch ( InvalidArgumentException $ex ) {
330  throw new BlobAccessException(
331  $ex->getMessage() . '. Use findBadBlobs.php to remedy.',
332  0,
333  $ex
334  );
335  }
336 
337  // TODO: MCR: also support 'ex' schema with ExternalStore URLs, plus flags encoded in the URL!
338  if ( $schema === 'bad' ) {
339  // Database row was marked as "known bad", no need to trigger an error.
340  wfDebug(
341  __METHOD__
342  . ": loading known-bad content ($blobAddress), returning empty string"
343  );
344  $result[$blobAddress] = '';
345  continue;
346  } elseif ( $schema === 'tt' ) {
347  $textId = intval( $id );
348 
349  if ( $textId < 1 || $id !== (string)$textId ) {
350  $errors[$blobAddress] = "Bad blob address: $blobAddress."
351  . ' Use findBadBlobs.php to remedy.';
352  $result[$blobAddress] = false;
353  }
354 
355  $textIdToBlobAddress[$textId] = $blobAddress;
356  } else {
357  $errors[$blobAddress] = "Unknown blob address schema: $schema."
358  . ' Use findBadBlobs.php to remedy.';
359  $result[$blobAddress] = false;
360  }
361  }
362 
363  $textIds = array_keys( $textIdToBlobAddress );
364  if ( !$textIds ) {
365  return [ $result, $errors ];
366  }
367  // Callers doing updates will pass in READ_LATEST as usual. Since the text/blob tables
368  // do not normally get rows changed around, set READ_LATEST_IMMUTABLE in those cases.
369  $queryFlags |= DBAccessObjectUtils::hasFlags( $queryFlags, self::READ_LATEST )
370  ? self::READ_LATEST_IMMUTABLE
371  : 0;
372  [ $index, $options, $fallbackIndex, $fallbackOptions ] =
373  DBAccessObjectUtils::getDBOptions( $queryFlags );
374  // Text data is immutable; check replica DBs first.
375  $dbConnection = $this->getDBConnection( $index );
376  $rows = $dbConnection->select(
377  'text',
378  [ 'old_id', 'old_text', 'old_flags' ],
379  [ 'old_id' => $textIds ],
380  __METHOD__,
381  $options
382  );
383  $numRows = 0;
384  if ( $rows instanceof IResultWrapper ) {
385  $numRows = $rows->numRows();
386  }
387 
388  // Fallback to DB_PRIMARY in some cases if not all the rows were found, using the appropriate
389  // options, such as FOR UPDATE to avoid missing rows due to REPEATABLE-READ.
390  if ( $numRows !== count( $textIds ) && $fallbackIndex !== null ) {
391  $fetchedTextIds = [];
392  foreach ( $rows as $row ) {
393  $fetchedTextIds[] = $row->old_id;
394  }
395  $missingTextIds = array_diff( $textIds, $fetchedTextIds );
396  $dbConnection = $this->getDBConnection( $fallbackIndex );
397  $rowsFromFallback = $dbConnection->select(
398  'text',
399  [ 'old_id', 'old_text', 'old_flags' ],
400  [ 'old_id' => $missingTextIds ],
401  __METHOD__,
402  $fallbackOptions
403  );
404  $appendIterator = new AppendIterator();
405  $appendIterator->append( $rows );
406  $appendIterator->append( $rowsFromFallback );
407  $rows = $appendIterator;
408  }
409 
410  foreach ( $rows as $row ) {
411  $blobAddress = $textIdToBlobAddress[$row->old_id];
412  $blob = false;
413  if ( $row->old_text !== null ) {
414  $blob = $this->expandBlob( $row->old_text, $row->old_flags, $blobAddress );
415  }
416  if ( $blob === false ) {
417  $errors[$blobAddress] = "Bad data in text row {$row->old_id}."
418  . ' Use findBadBlobs.php to remedy.';
419  }
420  $result[$blobAddress] = $blob;
421  }
422 
423  // If we're still missing some of the rows, set errors for missing blobs.
424  if ( count( $result ) !== count( $blobAddresses ) ) {
425  foreach ( $blobAddresses as $blobAddress ) {
426  if ( !isset( $result[$blobAddress ] ) ) {
427  $errors[$blobAddress] = "Unable to fetch blob at $blobAddress."
428  . ' Use findBadBlobs.php to remedy.';
429  $result[$blobAddress] = false;
430  }
431  }
432  }
433  return [ $result, $errors ];
434  }
435 
446  private function getCacheKey( $blobAddress ) {
447  return $this->cache->makeGlobalKey(
448  'SqlBlobStore-blob',
449  $this->dbLoadBalancer->resolveDomainID( $this->dbDomain ),
450  $blobAddress
451  );
452  }
453 
459  private function getCacheOptions() {
460  return [
461  'pcGroup' => self::TEXT_CACHE_GROUP,
462  'pcTTL' => WANObjectCache::TTL_PROC_LONG,
463  'segmentable' => true
464  ];
465  }
466 
486  public function expandBlob( $raw, $flags, $blobAddress = null ) {
487  if ( is_string( $flags ) ) {
488  $flags = explode( ',', $flags );
489  }
490 
491  // Use external methods for external objects, text in table is URL-only then
492  if ( in_array( 'external', $flags ) ) {
493  $url = $raw;
494  $parts = explode( '://', $url, 2 );
495  if ( count( $parts ) == 1 || $parts[1] == '' ) {
496  return false;
497  }
498 
499  if ( $blobAddress ) {
500  // The cached value should be decompressed, so handle that and return here.
501  return $this->cache->getWithSetCallback(
502  $this->getCacheKey( $blobAddress ),
503  $this->getCacheTTL(),
504  function () use ( $url, $flags ) {
505  // Ignore $setOpts; blobs are immutable and negatives are not cached
506  $blob = $this->extStoreAccess
507  ->fetchFromURL( $url, [ 'domain' => $this->dbDomain ] );
508 
509  return $blob === false ? false : $this->decompressData( $blob, $flags );
510  },
511  $this->getCacheOptions()
512  );
513  } else {
514  $blob = $this->extStoreAccess->fetchFromURL( $url, [ 'domain' => $this->dbDomain ] );
515  return $blob === false ? false : $this->decompressData( $blob, $flags );
516  }
517  } else {
518  return $this->decompressData( $raw, $flags );
519  }
520  }
521 
538  public function compressData( &$blob ) {
539  $blobFlags = [];
540 
541  // Revisions not marked as UTF-8 will have legacy decoding applied by decompressData().
542  // XXX: if $this->legacyEncoding is not set, we could skip this. That would however be
543  // risky, since $this->legacyEncoding being set in the future would lead to data corruption.
544  $blobFlags[] = 'utf-8';
545 
546  if ( $this->compressBlobs ) {
547  if ( function_exists( 'gzdeflate' ) ) {
548  $deflated = gzdeflate( $blob );
549 
550  if ( $deflated === false ) {
551  wfLogWarning( __METHOD__ . ': gzdeflate() failed' );
552  } else {
553  $blob = $deflated;
554  $blobFlags[] = 'gzip';
555  }
556  } else {
557  wfDebug( __METHOD__ . " -- no zlib support, not compressing" );
558  }
559  }
560  return implode( ',', $blobFlags );
561  }
562 
578  public function decompressData( string $blob, array $blobFlags ) {
579  if ( in_array( 'error', $blobFlags ) ) {
580  // Error row, return false
581  return false;
582  }
583 
584  if ( in_array( 'gzip', $blobFlags ) ) {
585  # Deal with optional compression of archived pages.
586  # This can be done periodically via maintenance/compressOld.php, and
587  # as pages are saved if $wgCompressRevisions is set.
588  $blob = gzinflate( $blob );
589 
590  if ( $blob === false ) {
591  wfWarn( __METHOD__ . ': gzinflate() failed' );
592  return false;
593  }
594  }
595 
596  if ( in_array( 'object', $blobFlags ) ) {
597  # Generic compressed storage
598  $obj = unserialize( $blob );
599  if ( !is_object( $obj ) ) {
600  // Invalid object
601  return false;
602  }
603  $blob = $obj->getText();
604  }
605 
606  // Needed to support old revisions from before MW 1.5.
607  if ( $blob !== false && $this->legacyEncoding
608  && !in_array( 'utf-8', $blobFlags ) && !in_array( 'utf8', $blobFlags )
609  ) {
610  # Old revisions kept around in a legacy encoding?
611  # Upconvert on demand.
612  # ("utf8" checked for compatibility with some broken
613  # conversion scripts 2008-12-30)
614  # Even with //IGNORE iconv can whine about illegal characters in
615  # *input* string. We just ignore those too.
616  # REF: https://bugs.php.net/bug.php?id=37166
617  # REF: https://phabricator.wikimedia.org/T18885
618  AtEase::suppressWarnings();
619  $blob = iconv( $this->legacyEncoding, 'UTF-8//IGNORE', $blob );
620  AtEase::restoreWarnings();
621  }
622 
623  return $blob;
624  }
625 
633  private function getCacheTTL() {
634  $cache = $this->cache;
635 
636  if ( $cache->getQoS( $cache::ATTR_DURABILITY ) >= $cache::QOS_DURABILITY_RDBMS ) {
637  // Do not cache RDBMs blobs in...the RDBMs store
638  $ttl = $cache::TTL_UNCACHEABLE;
639  } else {
640  $ttl = $this->cacheExpiry ?: $cache::TTL_UNCACHEABLE;
641  }
642 
643  return $ttl;
644  }
645 
666  public function getTextIdFromAddress( $address ) {
667  [ $schema, $id, ] = self::splitBlobAddress( $address );
668 
669  if ( $schema !== 'tt' ) {
670  return null;
671  }
672 
673  $textId = intval( $id );
674 
675  if ( !$textId || $id !== (string)$textId ) {
676  throw new InvalidArgumentException( "Malformed text_id: $id" );
677  }
678 
679  return $textId;
680  }
681 
695  public static function makeAddressFromTextId( $id ) {
696  return 'tt:' . $id;
697  }
698 
709  public static function splitBlobAddress( $address ) {
710  if ( !preg_match( '/^([-+.\w]+):([^\s?]+)(\?([^\s]*))?$/', $address, $m ) ) {
711  throw new InvalidArgumentException( "Bad blob address: $address" );
712  }
713 
714  $schema = strtolower( $m[1] );
715  $id = $m[2];
716  $parameters = wfCgiToArray( $m[4] ?? '' );
717 
718  return [ $schema, $id, $parameters ];
719  }
720 
721  public function isReadOnly() {
722  if ( $this->useExternalStore && $this->extStoreAccess->isReadOnly() ) {
723  return true;
724  }
725 
726  return ( $this->getDBLoadBalancer()->getReadOnlyReason() !== false );
727  }
728 }
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfWarn( $msg, $callerOffset=1, $level=E_USER_NOTICE)
Send a warning either to the debug log or in a PHP error depending on $wgDevelopmentWarnings.
wfLogWarning( $msg, $callerOffset=1, $level=E_USER_WARNING)
Send a warning as a PHP error and the debug log.
wfCgiToArray( $query)
This is the logical opposite of wfArrayToCgi(): it accepts a query string as its argument and returns...
Helper class for DAO classes.
static getDBOptions( $bitfield)
Get an appropriate DB index, options, and fallback DB index for a query.
static hasFlags( $bitfield, $flags)
This is the main interface for fetching or inserting objects with ExternalStore.
MediaWiki exception.
Definition: MWException.php:29
Exception representing a failure to access a data blob.
Service for storing and loading Content objects representing revision data blobs.
static makeAddressFromTextId( $id)
Returns an address referring to content stored in the text table row with the given ID.
getTextIdFromAddress( $address)
Returns an ID corresponding to the old_id field in the text table, corresponding to the given $addres...
__construct(ILoadBalancer $dbLoadBalancer, ExternalStoreAccess $extStoreAccess, WANObjectCache $cache, $dbDomain=false)
decompressData(string $blob, array $blobFlags)
Re-converts revision text according to its flags.
setCacheExpiry(int $cacheExpiry)
getBlob( $blobAddress, $queryFlags=0)
Retrieve a blob, given an address.
setLegacyEncoding(string $legacyEncoding)
Set the legacy encoding to assume for blobs that do not have the utf-8 flag set.
compressData(&$blob)
If $wgCompressRevisions is enabled, we will compress data.
static splitBlobAddress( $address)
Splits a blob address into three parts: the schema, the ID, and parameters/flags.
getBlobBatch( $blobAddresses, $queryFlags=0)
A batched version of BlobStore::getBlob.
storeBlob( $data, $hints=[])
Stores an arbitrary blob of data and returns an address that can be used with getBlob() to retrieve t...
setUseExternalStore(bool $useExternalStore)
isReadOnly()
Check if the blob metadata or backing blob data store is read-only.
setCompressBlobs( $compressBlobs)
expandBlob( $raw, $flags, $blobAddress=null)
Expand a raw data blob according to the flags given.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition: StatusValue.php:46
static newGood( $value=null)
Factory function for good results.
Definition: StatusValue.php:85
Multi-datacenter aware caching interface.
Interface for database access objects.
Service for loading and storing data blobs.
Definition: BlobStore.php:33
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:40
Create and track the database connections and transactions for a given database cluster.
Result wrapper for grabbing data queried from an IDatabase object.
$cache
Definition: mcc.php:33
const DB_PRIMARY
Definition: defines.php:28
return true
Definition: router.php:90