MediaWiki  master
SqlBlobStore.php
Go to the documentation of this file.
1 <?php
28 namespace MediaWiki\Storage;
29 
30 use AppendIterator;
33 use IDBAccessObject;
34 use IExpiringStore;
35 use InvalidArgumentException;
36 use MWException;
37 use StatusValue;
38 use WANObjectCache;
39 use Wikimedia\Assert\Assert;
40 use Wikimedia\AtEase\AtEase;
44 
54 
55  // Note: the name has been taken unchanged from the old Revision class.
56  public const TEXT_CACHE_GROUP = 'revisiontext:10';
57 
61  private $dbLoadBalancer;
62 
66  private $extStoreAccess;
67 
71  private $cache;
72 
76  private $dbDomain;
77 
81  private $cacheExpiry = 604800; // 7 days
82 
86  private $compressBlobs = false;
87 
91  private $legacyEncoding = false;
92 
96  private $useExternalStore = false;
97 
109  public function __construct(
113  $dbDomain = false
114  ) {
115  $this->dbLoadBalancer = $dbLoadBalancer;
116  $this->extStoreAccess = $extStoreAccess;
117  $this->cache = $cache;
118  $this->dbDomain = $dbDomain;
119  }
120 
124  public function getCacheExpiry() {
125  return $this->cacheExpiry;
126  }
127 
131  public function setCacheExpiry( int $cacheExpiry ) {
132  $this->cacheExpiry = $cacheExpiry;
133  }
134 
138  public function getCompressBlobs() {
139  return $this->compressBlobs;
140  }
141 
145  public function setCompressBlobs( $compressBlobs ) {
146  $this->compressBlobs = $compressBlobs;
147  }
148 
153  public function getLegacyEncoding() {
154  return $this->legacyEncoding;
155  }
156 
165  public function setLegacyEncoding( string $legacyEncoding ) {
166  $this->legacyEncoding = $legacyEncoding;
167  }
168 
172  public function getUseExternalStore() {
174  }
175 
179  public function setUseExternalStore( bool $useExternalStore ) {
180  $this->useExternalStore = $useExternalStore;
181  }
182 
186  private function getDBLoadBalancer() {
187  return $this->dbLoadBalancer;
188  }
189 
195  private function getDBConnection( $index ) {
196  $lb = $this->getDBLoadBalancer();
197  return $lb->getConnectionRef( $index, [], $this->dbDomain );
198  }
199 
210  public function storeBlob( $data, $hints = [] ) {
211  try {
212  $flags = $this->compressData( $data );
213 
214  # Write to external storage if required
215  if ( $this->useExternalStore ) {
216  // Store and get the URL
217  $data = $this->extStoreAccess->insert( $data, [ 'domain' => $this->dbDomain ] );
218  if ( !$data ) {
219  throw new BlobAccessException( "Failed to store text to external storage" );
220  }
221  if ( $flags ) {
222  $flags .= ',';
223  }
224  $flags .= 'external';
225 
226  // TODO: we could also return an address for the external store directly here.
227  // That would mean bypassing the text table entirely when the external store is
228  // used. We'll need to assess expected fallout before doing that.
229  }
230 
231  $dbw = $this->getDBConnection( DB_PRIMARY );
232 
233  $dbw->insert(
234  'text',
235  [ 'old_text' => $data, 'old_flags' => $flags ],
236  __METHOD__
237  );
238 
239  $textId = $dbw->insertId();
240 
241  return self::makeAddressFromTextId( $textId );
242  } catch ( MWException $e ) {
243  throw new BlobAccessException( $e->getMessage(), 0, $e );
244  }
245  }
246 
259  public function getBlob( $blobAddress, $queryFlags = 0 ) {
260  Assert::parameterType( 'string', $blobAddress, '$blobAddress' );
261 
262  $error = null;
263  $blob = $this->cache->getWithSetCallback(
264  $this->getCacheKey( $blobAddress ),
265  $this->getCacheTTL(),
266  function ( $unused, &$ttl, &$setOpts ) use ( $blobAddress, $queryFlags, &$error ) {
267  // Ignore $setOpts; blobs are immutable and negatives are not cached
268  list( $result, $errors ) = $this->fetchBlobs( [ $blobAddress ], $queryFlags );
269  // No negative caching; negative hits on text rows may be due to corrupted replica DBs
270  $error = $errors[$blobAddress] ?? null;
271  return $result[$blobAddress];
272  },
273  [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => IExpiringStore::TTL_PROC_LONG ]
274  );
275 
276  if ( $error ) {
277  throw new BlobAccessException( $error );
278  }
279 
280  Assert::postcondition( is_string( $blob ), 'Blob must not be null' );
281  return $blob;
282  }
283 
295  public function getBlobBatch( $blobAddresses, $queryFlags = 0 ) {
296  // FIXME: All caching has temporarily been removed in I94c6f9ba7b9caeeb due to T235188.
297  // Caching behavior should be restored by reverting I94c6f9ba7b9caeeb as soon as
298  // the root cause of T235188 has been resolved.
299 
300  list( $blobsByAddress, $errors ) = $this->fetchBlobs( $blobAddresses, $queryFlags );
301 
302  $blobsByAddress = array_map( static function ( $blob ) {
303  return $blob === false ? null : $blob;
304  }, $blobsByAddress );
305 
306  $result = StatusValue::newGood( $blobsByAddress );
307  if ( $errors ) {
308  foreach ( $errors as $error ) {
309  $result->warning( 'internalerror', $error );
310  }
311  }
312  return $result;
313  }
314 
325  private function fetchBlobs( $blobAddresses, $queryFlags ) {
326  $textIdToBlobAddress = [];
327  $result = [];
328  $errors = [];
329  foreach ( $blobAddresses as $blobAddress ) {
330  try {
331  list( $schema, $id ) = self::splitBlobAddress( $blobAddress );
332  } catch ( InvalidArgumentException $ex ) {
333  throw new BlobAccessException(
334  $ex->getMessage() . '. Use findBadBlobs.php to remedy.',
335  0,
336  $ex
337  );
338  }
339 
340  // TODO: MCR: also support 'ex' schema with ExternalStore URLs, plus flags encoded in the URL!
341  if ( $schema === 'bad' ) {
342  // Database row was marked as "known bad", no need to trigger an error.
343  wfDebug(
344  __METHOD__
345  . ": loading known-bad content ($blobAddress), returning empty string"
346  );
347  $result[$blobAddress] = '';
348  continue;
349  } elseif ( $schema === 'tt' ) {
350  $textId = intval( $id );
351 
352  if ( $textId < 1 || $id !== (string)$textId ) {
353  $errors[$blobAddress] = "Bad blob address: $blobAddress."
354  . ' Use findBadBlobs.php to remedy.';
355  $result[$blobAddress] = false;
356  }
357 
358  $textIdToBlobAddress[$textId] = $blobAddress;
359  } else {
360  $errors[$blobAddress] = "Unknown blob address schema: $schema."
361  . ' Use findBadBlobs.php to remedy.';
362  $result[$blobAddress] = false;
363  }
364  }
365 
366  $textIds = array_keys( $textIdToBlobAddress );
367  if ( !$textIds ) {
368  return [ $result, $errors ];
369  }
370  // Callers doing updates will pass in READ_LATEST as usual. Since the text/blob tables
371  // do not normally get rows changed around, set READ_LATEST_IMMUTABLE in those cases.
372  $queryFlags |= DBAccessObjectUtils::hasFlags( $queryFlags, self::READ_LATEST )
373  ? self::READ_LATEST_IMMUTABLE
374  : 0;
375  list( $index, $options, $fallbackIndex, $fallbackOptions ) =
376  DBAccessObjectUtils::getDBOptions( $queryFlags );
377  // Text data is immutable; check replica DBs first.
378  $dbConnection = $this->getDBConnection( $index );
379  $rows = $dbConnection->select(
380  'text',
381  [ 'old_id', 'old_text', 'old_flags' ],
382  [ 'old_id' => $textIds ],
383  __METHOD__,
384  $options
385  );
386  $numRows = 0;
387  if ( $rows instanceof IResultWrapper ) {
388  $numRows = $rows->numRows();
389  }
390 
391  // Fallback to DB_PRIMARY in some cases if not all the rows were found, using the appropriate
392  // options, such as FOR UPDATE to avoid missing rows due to REPEATABLE-READ.
393  if ( $numRows !== count( $textIds ) && $fallbackIndex !== null ) {
394  $fetchedTextIds = [];
395  foreach ( $rows as $row ) {
396  $fetchedTextIds[] = $row->old_id;
397  }
398  $missingTextIds = array_diff( $textIds, $fetchedTextIds );
399  $dbConnection = $this->getDBConnection( $fallbackIndex );
400  $rowsFromFallback = $dbConnection->select(
401  'text',
402  [ 'old_id', 'old_text', 'old_flags' ],
403  [ 'old_id' => $missingTextIds ],
404  __METHOD__,
405  $fallbackOptions
406  );
407  $appendIterator = new AppendIterator();
408  $appendIterator->append( $rows );
409  $appendIterator->append( $rowsFromFallback );
410  $rows = $appendIterator;
411  }
412 
413  foreach ( $rows as $row ) {
414  $blobAddress = $textIdToBlobAddress[$row->old_id];
415  $blob = false;
416  if ( $row->old_text !== null ) {
417  $blob = $this->expandBlob( $row->old_text, $row->old_flags, $blobAddress );
418  }
419  if ( $blob === false ) {
420  $errors[$blobAddress] = "Bad data in text row {$row->old_id}."
421  . ' Use findBadBlobs.php to remedy.';
422  }
423  $result[$blobAddress] = $blob;
424  }
425 
426  // If we're still missing some of the rows, set errors for missing blobs.
427  if ( count( $result ) !== count( $blobAddresses ) ) {
428  foreach ( $blobAddresses as $blobAddress ) {
429  if ( !isset( $result[$blobAddress ] ) ) {
430  $errors[$blobAddress] = "Unable to fetch blob at $blobAddress."
431  . ' Use findBadBlobs.php to remedy.';
432  $result[$blobAddress] = false;
433  }
434  }
435  }
436  return [ $result, $errors ];
437  }
438 
449  private function getCacheKey( $blobAddress ) {
450  return $this->cache->makeGlobalKey(
451  'SqlBlobStore-blob',
452  $this->dbLoadBalancer->resolveDomainID( $this->dbDomain ),
453  $blobAddress
454  );
455  }
456 
476  public function expandBlob( $raw, $flags, $cacheKey = null ) {
477  if ( is_string( $flags ) ) {
478  $flags = explode( ',', $flags );
479  }
480 
481  // Use external methods for external objects, text in table is URL-only then
482  if ( in_array( 'external', $flags ) ) {
483  $url = $raw;
484  $parts = explode( '://', $url, 2 );
485  if ( count( $parts ) == 1 || $parts[1] == '' ) {
486  return false;
487  }
488 
489  if ( $cacheKey ) {
490  // The cached value should be decompressed, so handle that and return here.
491  return $this->cache->getWithSetCallback(
492  $this->getCacheKey( $cacheKey ),
493  $this->getCacheTTL(),
494  function () use ( $url, $flags ) {
495  // Ignore $setOpts; blobs are immutable and negatives are not cached
496  $blob = $this->extStoreAccess
497  ->fetchFromURL( $url, [ 'domain' => $this->dbDomain ] );
498 
499  return $blob === false ? false : $this->decompressData( $blob, $flags );
500  },
501  [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => WANObjectCache::TTL_PROC_LONG ]
502  );
503  } else {
504  $blob = $this->extStoreAccess->fetchFromURL( $url, [ 'domain' => $this->dbDomain ] );
505  return $blob === false ? false : $this->decompressData( $blob, $flags );
506  }
507  } else {
508  return $this->decompressData( $raw, $flags );
509  }
510  }
511 
528  public function compressData( &$blob ) {
529  $blobFlags = [];
530 
531  // Revisions not marked as UTF-8 will have legacy decoding applied by decompressData().
532  // XXX: if $this->legacyEncoding is not set, we could skip this. That would however be
533  // risky, since $this->legacyEncoding being set in the future would lead to data corruption.
534  $blobFlags[] = 'utf-8';
535 
536  if ( $this->compressBlobs ) {
537  if ( function_exists( 'gzdeflate' ) ) {
538  $deflated = gzdeflate( $blob );
539 
540  if ( $deflated === false ) {
541  wfLogWarning( __METHOD__ . ': gzdeflate() failed' );
542  } else {
543  $blob = $deflated;
544  $blobFlags[] = 'gzip';
545  }
546  } else {
547  wfDebug( __METHOD__ . " -- no zlib support, not compressing" );
548  }
549  }
550  return implode( ',', $blobFlags );
551  }
552 
568  public function decompressData( string $blob, array $blobFlags ) {
569  if ( in_array( 'error', $blobFlags ) ) {
570  // Error row, return false
571  return false;
572  }
573 
574  if ( in_array( 'gzip', $blobFlags ) ) {
575  # Deal with optional compression of archived pages.
576  # This can be done periodically via maintenance/compressOld.php, and
577  # as pages are saved if $wgCompressRevisions is set.
578  $blob = gzinflate( $blob );
579 
580  if ( $blob === false ) {
581  wfWarn( __METHOD__ . ': gzinflate() failed' );
582  return false;
583  }
584  }
585 
586  if ( in_array( 'object', $blobFlags ) ) {
587  # Generic compressed storage
588  $obj = unserialize( $blob );
589  if ( !is_object( $obj ) ) {
590  // Invalid object
591  return false;
592  }
593  $blob = $obj->getText();
594  }
595 
596  // Needed to support old revisions left over from the 1.4 / 1.5 migration.
597  if ( $blob !== false && $this->legacyEncoding
598  && !in_array( 'utf-8', $blobFlags ) && !in_array( 'utf8', $blobFlags )
599  ) {
600  # Old revisions kept around in a legacy encoding?
601  # Upconvert on demand.
602  # ("utf8" checked for compatibility with some broken
603  # conversion scripts 2008-12-30)
604  # Even with //IGNORE iconv can whine about illegal characters in
605  # *input* string. We just ignore those too.
606  # REF: https://bugs.php.net/bug.php?id=37166
607  # REF: https://phabricator.wikimedia.org/T18885
608  AtEase::suppressWarnings();
609  $blob = iconv( $this->legacyEncoding, 'UTF-8//IGNORE', $blob );
610  AtEase::restoreWarnings();
611  }
612 
613  return $blob;
614  }
615 
623  private function getCacheTTL() {
625 
626  if ( $cache->getQoS( $cache::ATTR_DURABILITY ) >= $cache::QOS_DURABILITY_RDBMS ) {
627  // Do not cache RDBMs blobs in...the RDBMs store
628  $ttl = $cache::TTL_UNCACHEABLE;
629  } else {
630  $ttl = $this->cacheExpiry ?: $cache::TTL_UNCACHEABLE;
631  }
632 
633  return $ttl;
634  }
635 
656  public function getTextIdFromAddress( $address ) {
657  list( $schema, $id, ) = self::splitBlobAddress( $address );
658 
659  if ( $schema !== 'tt' ) {
660  return null;
661  }
662 
663  $textId = intval( $id );
664 
665  if ( !$textId || $id !== (string)$textId ) {
666  throw new InvalidArgumentException( "Malformed text_id: $id" );
667  }
668 
669  return $textId;
670  }
671 
685  public static function makeAddressFromTextId( $id ) {
686  return 'tt:' . $id;
687  }
688 
699  public static function splitBlobAddress( $address ) {
700  if ( !preg_match( '/^([-+.\w]+):([^\s?]+)(\?([^\s]*))?$/', $address, $m ) ) {
701  throw new InvalidArgumentException( "Bad blob address: $address" );
702  }
703 
704  $schema = strtolower( $m[1] );
705  $id = $m[2];
706  $parameters = wfCgiToArray( $m[4] ?? '' );
707 
708  return [ $schema, $id, $parameters ];
709  }
710 
711  public function isReadOnly() {
712  if ( $this->useExternalStore && $this->extStoreAccess->isReadOnly() ) {
713  return true;
714  }
715 
716  return ( $this->getDBLoadBalancer()->getReadOnlyReason() !== false );
717  }
718 }
unserialize( $serialized)
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfWarn( $msg, $callerOffset=1, $level=E_USER_NOTICE)
Send a warning either to the debug log or in a PHP error depending on $wgDevelopmentWarnings.
wfLogWarning( $msg, $callerOffset=1, $level=E_USER_WARNING)
Send a warning as a PHP error and the debug log.
wfCgiToArray( $query)
This is the logical opposite of wfArrayToCgi(): it accepts a query string as its argument and returns...
Helper class for DAO classes.
static getDBOptions( $bitfield)
Get an appropriate DB index, options, and fallback DB index for a query.
static hasFlags( $bitfield, $flags)
This is the main interface for fetching or inserting objects with ExternalStore.
MediaWiki exception.
Definition: MWException.php:29
Exception representing a failure to access a data blob.
Service for storing and loading Content objects.
static makeAddressFromTextId( $id)
Returns an address referring to content stored in the text table row with the given ID.
ExternalStoreAccess $extStoreAccess
getTextIdFromAddress( $address)
Returns an ID corresponding to the old_id field in the text table, corresponding to the given $addres...
getCacheTTL()
Get the text cache TTL.
string bool $dbDomain
DB domain ID of a wiki or false for the local one.
__construct(ILoadBalancer $dbLoadBalancer, ExternalStoreAccess $extStoreAccess, WANObjectCache $cache, $dbDomain=false)
decompressData(string $blob, array $blobFlags)
Re-converts revision text according to its flags.
setCacheExpiry(int $cacheExpiry)
getBlob( $blobAddress, $queryFlags=0)
Retrieve a blob, given an address.
expandBlob( $raw, $flags, $cacheKey=null)
Expand a raw data blob according to the flags given.
setLegacyEncoding(string $legacyEncoding)
Set the legacy encoding to assume for blobs that do not have the utf-8 flag set.
compressData(&$blob)
If $wgCompressRevisions is enabled, we will compress data.
getCacheKey( $blobAddress)
Get a cache key for a given Blob address.
static splitBlobAddress( $address)
Splits a blob address into three parts: the schema, the ID, and parameters/flags.
getBlobBatch( $blobAddresses, $queryFlags=0)
A batched version of BlobStore::getBlob.
storeBlob( $data, $hints=[])
Stores an arbitrary blob of data and returns an address that can be used with getBlob() to retrieve t...
setUseExternalStore(bool $useExternalStore)
isReadOnly()
Check if the blob metadata or backing blob data store is read-only.
setCompressBlobs( $compressBlobs)
fetchBlobs( $blobAddresses, $queryFlags)
MCR migration note: this corresponded to Revision::fetchText.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Definition: StatusValue.php:43
static newGood( $value=null)
Factory function for good results.
Definition: StatusValue.php:82
Multi-datacenter aware caching interface.
Interface for database access objects.
Generic interface providing TTL constants for lightweight expiring object stores.
Service for loading and storing data blobs.
Definition: BlobStore.php:35
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:39
Database cluster connection, tracking, load balancing, and transaction manager interface.
Result wrapper for grabbing data queried from an IDatabase object.
const DB_PRIMARY
Definition: defines.php:28