MediaWiki master
SqlBlobStore.php
Go to the documentation of this file.
1<?php
12namespace MediaWiki\Storage;
13
14use AppendIterator;
16use InvalidArgumentException;
19use StatusValue;
20use Wikimedia\Assert\Assert;
27
36class SqlBlobStore implements BlobStore {
37
38 // Note: the name has been taken unchanged from the old Revision class.
39 public const TEXT_CACHE_GROUP = 'revisiontext:10';
40
42 public const DEFAULT_TTL = 7 * 24 * 3600; // 7 days
43
47 private $cacheExpiry = self::DEFAULT_TTL;
48
52 private $compressBlobs = false;
53
57 private $legacyEncoding = false;
58
62 private $useExternalStore = false;
63
75 public function __construct(
76 private readonly ILoadBalancer $dbLoadBalancer,
77 private readonly ExternalStoreAccess $extStoreAccess,
78 private readonly WANObjectCache $cache,
79 private readonly bool|string $dbDomain = false,
80 ) {
81 }
82
86 public function getCacheExpiry() {
87 return $this->cacheExpiry;
88 }
89
93 public function setCacheExpiry( int $cacheExpiry ) {
94 $this->cacheExpiry = $cacheExpiry;
95 }
96
100 public function getCompressBlobs() {
101 return $this->compressBlobs;
102 }
103
107 public function setCompressBlobs( $compressBlobs ) {
108 $this->compressBlobs = $compressBlobs;
109 }
110
115 public function getLegacyEncoding() {
116 return $this->legacyEncoding;
117 }
118
127 public function setLegacyEncoding( string $legacyEncoding ) {
128 $this->legacyEncoding = $legacyEncoding;
129 }
130
134 public function getUseExternalStore() {
135 return $this->useExternalStore;
136 }
137
141 public function setUseExternalStore( bool $useExternalStore ) {
142 $this->useExternalStore = $useExternalStore;
143 }
144
148 private function getDBLoadBalancer() {
149 return $this->dbLoadBalancer;
150 }
151
157 private function getDBConnection( $index ) {
158 $lb = $this->getDBLoadBalancer();
159 return $lb->getConnection( $index, [], $this->dbDomain );
160 }
161
172 public function storeBlob( $data, $hints = [] ) {
173 $flags = $this->compressData( $data );
174
175 # Write to external storage if required
176 if ( $this->useExternalStore ) {
177 // Store and get the URL
178 try {
179 $data = $this->extStoreAccess->insert( $data, [ 'domain' => $this->dbDomain ] );
180 } catch ( ExternalStoreException $e ) {
181 throw new BlobAccessException( $e->getMessage(), 0, $e );
182 }
183 if ( !$data ) {
184 throw new BlobAccessException( "Failed to store text to external storage" );
185 }
186 if ( $flags ) {
187 return 'es:' . $data . '?flags=' . $flags;
188 } else {
189 return 'es:' . $data;
190 }
191 } else {
192 $dbw = $this->getDBConnection( DB_PRIMARY );
193
194 $dbw->newInsertQueryBuilder()
195 ->insertInto( 'text' )
196 ->row( [ 'old_text' => $data, 'old_flags' => $flags ] )
197 ->caller( __METHOD__ )->execute();
198
199 $textId = $dbw->insertId();
200
201 return self::makeAddressFromTextId( $textId );
202 }
203 }
204
217 public function getBlob( $blobAddress, $queryFlags = 0 ) {
218 Assert::parameterType( 'string', $blobAddress, '$blobAddress' );
219
220 $error = null;
221 $blob = $this->cache->getWithSetCallback(
222 $this->getCacheKey( $blobAddress ),
223 $this->getCacheTTL(),
224 function ( $unused, &$ttl, &$setOpts ) use ( $blobAddress, $queryFlags, &$error ) {
225 // Ignore $setOpts; blobs are immutable and negatives are not cached
226 [ $result, $errors ] = $this->fetchBlobs( [ $blobAddress ], $queryFlags );
227 // No negative caching; negative hits on text rows may be due to corrupted replica DBs
228 $error = $errors[$blobAddress] ?? null;
229 if ( $error ) {
230 $ttl = WANObjectCache::TTL_UNCACHEABLE;
231 }
232 return $result[$blobAddress];
233 },
234 $this->getCacheOptions()
235 );
236
237 if ( $error ) {
238 if ( $error[0] === 'badrevision' ) {
239 throw new BadBlobException( $error[1] );
240 } else {
241 throw new BlobAccessException( $error[1] );
242 }
243 }
244
245 Assert::postcondition( is_string( $blob ), 'Blob must not be null' );
246 return $blob;
247 }
248
260 public function getBlobBatch( $blobAddresses, $queryFlags = 0 ) {
261 // FIXME: All caching has temporarily been removed in I94c6f9ba7b9caeeb due to T235188.
262 // Caching behavior should be restored by reverting I94c6f9ba7b9caeeb as soon as
263 // the root cause of T235188 has been resolved.
264
265 [ $blobsByAddress, $errors ] = $this->fetchBlobs( $blobAddresses, $queryFlags );
266
267 $blobsByAddress = array_map( static function ( $blob ) {
268 return $blob === false ? null : $blob;
269 }, $blobsByAddress );
270
271 $result = StatusValue::newGood( $blobsByAddress );
272 foreach ( $errors as $error ) {
273 // @phan-suppress-next-line PhanParamTooFewUnpack
274 $result->warning( ...$error );
275 }
276 return $result;
277 }
278
293 private function fetchBlobs( $blobAddresses, $queryFlags ) {
294 $textIdToBlobAddress = [];
295 $result = [];
296 $errors = [];
297 foreach ( $blobAddresses as $blobAddress ) {
298 try {
299 [ $schema, $id, $params ] = self::splitBlobAddress( $blobAddress );
300 } catch ( InvalidArgumentException $ex ) {
301 throw new BlobAccessException(
302 $ex->getMessage() . '. Use findBadBlobs.php to remedy.',
303 0,
304 $ex
305 );
306 }
307
308 if ( $schema === 'es' ) {
309 if ( $params && isset( $params['flags'] ) ) {
310 $blob = $this->expandBlob( $id, $params['flags'] . ',external', $blobAddress );
311 } else {
312 $blob = $this->expandBlob( $id, 'external', $blobAddress );
313 }
314
315 if ( $blob === false ) {
316 $errors[$blobAddress] = [
317 'internalerror',
318 "Bad data in external store address $id. Use findBadBlobs.php to remedy."
319 ];
320 }
321 $result[$blobAddress] = $blob;
322 } elseif ( $schema === 'bad' ) {
323 // Database row was marked as "known bad"
324 wfDebug(
325 __METHOD__
326 . ": loading known-bad content ($blobAddress), returning empty string"
327 );
328 $result[$blobAddress] = '';
329 $errors[$blobAddress] = [
330 'badrevision',
331 'The content of this revision is missing or corrupted (bad schema)'
332 ];
333 } elseif ( $schema === 'tt' ) {
334 $textId = intval( $id );
335
336 if ( $textId < 1 || $id !== (string)$textId ) {
337 $errors[$blobAddress] = [
338 'internalerror',
339 "Bad blob address: $blobAddress. Use findBadBlobs.php to remedy."
340 ];
341 $result[$blobAddress] = false;
342 }
343
344 $textIdToBlobAddress[$textId] = $blobAddress;
345 } else {
346 $errors[$blobAddress] = [
347 'internalerror',
348 "Unknown blob address schema: $schema. Use findBadBlobs.php to remedy."
349 ];
350 $result[$blobAddress] = false;
351 }
352 }
353
354 $textIds = array_keys( $textIdToBlobAddress );
355 if ( !$textIds ) {
356 return [ $result, $errors ];
357 }
358 // Callers doing updates will pass in READ_LATEST as usual. Since the text/blob tables
359 // do not normally get rows changed around, set READ_LATEST_IMMUTABLE in those cases.
360 $queryFlags |= DBAccessObjectUtils::hasFlags( $queryFlags, IDBAccessObject::READ_LATEST )
361 ? IDBAccessObject::READ_LATEST_IMMUTABLE
362 : 0;
363 [ $index, $options, $fallbackIndex, $fallbackOptions ] =
364 self::getDBOptions( $queryFlags );
365 // Text data is immutable; check replica DBs first.
366 $dbConnection = $this->getDBConnection( $index );
367 $rows = $dbConnection->newSelectQueryBuilder()
368 ->select( [ 'old_id', 'old_text', 'old_flags' ] )
369 ->from( 'text' )
370 ->where( [ 'old_id' => $textIds ] )
371 ->options( $options )
372 ->caller( __METHOD__ )->fetchResultSet();
373 $numRows = $rows->numRows();
374
375 // Fallback to DB_PRIMARY in some cases if not all the rows were found, using the appropriate
376 // options, such as FOR UPDATE to avoid missing rows due to REPEATABLE-READ.
377 if ( $numRows !== count( $textIds ) && $fallbackIndex !== null ) {
378 $fetchedTextIds = [];
379 foreach ( $rows as $row ) {
380 $fetchedTextIds[] = $row->old_id;
381 }
382 $missingTextIds = array_diff( $textIds, $fetchedTextIds );
383 $dbConnection = $this->getDBConnection( $fallbackIndex );
384 $rowsFromFallback = $dbConnection->newSelectQueryBuilder()
385 ->select( [ 'old_id', 'old_text', 'old_flags' ] )
386 ->from( 'text' )
387 ->where( [ 'old_id' => $missingTextIds ] )
388 ->options( $fallbackOptions )
389 ->caller( __METHOD__ )->fetchResultSet();
390 $appendIterator = new AppendIterator();
391 $appendIterator->append( $rows );
392 $appendIterator->append( $rowsFromFallback );
393 $rows = $appendIterator;
394 }
395
396 foreach ( $rows as $row ) {
397 $blobAddress = $textIdToBlobAddress[$row->old_id];
398 $blob = false;
399 if ( $row->old_text !== null ) {
400 $blob = $this->expandBlob( $row->old_text, $row->old_flags, $blobAddress );
401 }
402 if ( $blob === false ) {
403 $errors[$blobAddress] = [
404 'internalerror',
405 "Bad data in text row {$row->old_id}. Use findBadBlobs.php to remedy."
406 ];
407 }
408 $result[$blobAddress] = $blob;
409 }
410
411 // If we're still missing some of the rows, set errors for missing blobs.
412 if ( count( $result ) !== count( $blobAddresses ) ) {
413 foreach ( $blobAddresses as $blobAddress ) {
414 if ( !isset( $result[$blobAddress ] ) ) {
415 $errors[$blobAddress] = [
416 'internalerror',
417 "Unable to fetch blob at $blobAddress. Use findBadBlobs.php to remedy."
418 ];
419 $result[$blobAddress] = false;
420 }
421 }
422 }
423 return [ $result, $errors ];
424 }
425
426 private static function getDBOptions( int $bitfield ): array {
427 if ( DBAccessObjectUtils::hasFlags( $bitfield, IDBAccessObject::READ_LATEST_IMMUTABLE ) ) {
428 $index = DB_REPLICA; // override READ_LATEST if set
429 $fallbackIndex = DB_PRIMARY;
430 } elseif ( DBAccessObjectUtils::hasFlags( $bitfield, IDBAccessObject::READ_LATEST ) ) {
431 $index = DB_PRIMARY;
432 $fallbackIndex = null;
433 } else {
434 $index = DB_REPLICA;
435 $fallbackIndex = null;
436 }
437
438 $lockingOptions = [];
439 if ( DBAccessObjectUtils::hasFlags( $bitfield, IDBAccessObject::READ_EXCLUSIVE ) ) {
440 $lockingOptions[] = 'FOR UPDATE';
441 } elseif ( DBAccessObjectUtils::hasFlags( $bitfield, IDBAccessObject::READ_LOCKING ) ) {
442 $lockingOptions[] = 'LOCK IN SHARE MODE';
443 }
444
445 if ( $fallbackIndex !== null ) {
446 $options = []; // locks on DB_REPLICA make no sense
447 $fallbackOptions = $lockingOptions;
448 } else {
449 $options = $lockingOptions;
450 $fallbackOptions = []; // no fallback
451 }
452
453 return [ $index, $options, $fallbackIndex, $fallbackOptions ];
454 }
455
466 private function getCacheKey( $blobAddress ) {
467 return $this->cache->makeGlobalKey(
468 'SqlBlobStore-blob',
469 $this->dbLoadBalancer->resolveDomainID( $this->dbDomain ),
470 $blobAddress
471 );
472 }
473
479 private function getCacheOptions() {
480 return [
481 'pcGroup' => self::TEXT_CACHE_GROUP,
482 'pcTTL' => WANObjectCache::TTL_PROC_LONG,
483 'segmentable' => true
484 ];
485 }
486
507 public function expandBlob( $raw, $flags, $blobAddress = null ) {
508 if ( is_string( $flags ) ) {
509 $flags = self::explodeFlags( $flags );
510 }
511 if ( in_array( 'error', $flags ) ) {
512 throw new BadBlobException(
513 "The content of this revision is missing or corrupted (error flag)"
514 );
515 }
516
517 // Use external methods for external objects, text in table is URL-only then
518 if ( in_array( 'external', $flags ) ) {
519 $url = $raw;
520 $parts = explode( '://', $url, 2 );
521 if ( count( $parts ) == 1 || $parts[1] == '' ) {
522 return false;
523 }
524
525 if ( $blobAddress ) {
526 // The cached value should be decompressed, so handle that and return here.
527 return $this->cache->getWithSetCallback(
528 $this->getCacheKey( $blobAddress ),
529 $this->getCacheTTL(),
530 function () use ( $url, $flags, $blobAddress ) {
531 // Ignore $setOpts; blobs are immutable and negatives are not cached
532 $blob = $this->extStoreAccess
533 ->fetchFromURL( $url, [ 'domain' => $this->dbDomain ] );
534
535 return $blob === false ? false : $this->decompressData( $blob, $flags, $blobAddress );
536 },
537 $this->getCacheOptions()
538 );
539 } else {
540 $blob = $this->extStoreAccess->fetchFromURL( $url, [ 'domain' => $this->dbDomain ] );
541 return $blob === false ? false : $this->decompressData( $blob, $flags, $blobAddress );
542 }
543 } else {
544 return $this->decompressData( $raw, $flags, $blobAddress );
545 }
546 }
547
564 public function compressData( &$blob ) {
565 $blobFlags = [];
566
567 // Revisions not marked as UTF-8 will have legacy decoding applied by decompressData().
568 // XXX: if $this->legacyEncoding is not set, we could skip this. That would however be
569 // risky, since $this->legacyEncoding being set in the future would lead to data corruption.
570 $blobFlags[] = 'utf-8';
571
572 if ( $this->compressBlobs ) {
573 if ( function_exists( 'gzdeflate' ) ) {
574 $deflated = gzdeflate( $blob );
575
576 if ( $deflated === false ) {
577 wfLogWarning( __METHOD__ . ': gzdeflate() failed' );
578 } else {
579 $blob = $deflated;
580 $blobFlags[] = 'gzip';
581 }
582 } else {
583 wfDebug( __METHOD__ . " -- no zlib support, not compressing" );
584 }
585 }
586 return implode( ',', $blobFlags );
587 }
588
605 public function decompressData( string $blob, array $blobFlags, ?string $blobAddress = null ) {
606 if ( in_array( 'error', $blobFlags ) ) {
607 // Error row, return false
608 return false;
609 }
610
611 // Deal with optional compression of archived pages.
612 // This can be done periodically via maintenance/compressOld.php, and
613 // as pages are saved if $wgCompressRevisions is set.
614 if ( in_array( 'gzip', $blobFlags ) ) {
615 // Silence native warning in favour of more detailed warning (T380347)
616 // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged
617 $blob = @gzinflate( $blob );
618 if ( $blob === false ) {
619 wfWarn( __METHOD__ . ': gzinflate() failed' .
620 ( $blobAddress ? ' (at blob address ' . $blobAddress . ')' : '' ) );
621 return false;
622 }
623 }
624
625 if ( in_array( 'object', $blobFlags ) ) {
626 # Generic compressed storage
627 $obj = HistoryBlobUtils::unserialize( $blob );
628 if ( !$obj ) {
629 // Invalid object
630 return false;
631 }
632 $blob = $obj->getText();
633 }
634
635 // Needed to support old revisions from before MW 1.5.
636 if ( $blob !== false && $this->legacyEncoding
637 && !in_array( 'utf-8', $blobFlags ) && !in_array( 'utf8', $blobFlags )
638 ) {
639 // - Old revisions kept around in a legacy encoding?
640 // Upconvert on demand.
641 // - "utf8" checked for compatibility with some broken
642 // conversion scripts 2008-12-30.
643 // - Even with "//IGNORE" iconv can whine about illegal characters in
644 // *input* string. We just ignore those too.
645 // Ref https://bugs.php.net/bug.php?id=37166
646 // Ref https://phabricator.wikimedia.org/T18885
647 //
648 // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged
649 $blob = @iconv( $this->legacyEncoding, 'UTF-8//IGNORE', $blob );
650 }
651
652 return $blob;
653 }
654
662 private function getCacheTTL() {
663 $cache = $this->cache;
664
665 if ( $cache->getQoS( BagOStuff::ATTR_DURABILITY ) >= BagOStuff::QOS_DURABILITY_RDBMS ) {
666 // Do not cache RDBMs blobs in...the RDBMs store
667 $ttl = $cache::TTL_UNCACHEABLE;
668 } else {
669 $ttl = $this->cacheExpiry ?: $cache::TTL_UNCACHEABLE;
670 }
671
672 return $ttl;
673 }
674
695 public function getTextIdFromAddress( $address ) {
696 [ $schema, $id, ] = self::splitBlobAddress( $address );
697
698 if ( $schema !== 'tt' ) {
699 return null;
700 }
701
702 $textId = intval( $id );
703
704 if ( !$textId || $id !== (string)$textId ) {
705 throw new InvalidArgumentException( "Malformed text_id: $id" );
706 }
707
708 return $textId;
709 }
710
724 public static function makeAddressFromTextId( $id ) {
725 return 'tt:' . $id;
726 }
727
734 public static function explodeFlags( string $flagsString ) {
735 return $flagsString === '' ? [] : explode( ',', $flagsString );
736 }
737
747 public static function splitBlobAddress( $address ) {
748 if ( !preg_match( '/^([-+.\w]+):([^\s?]+)(\?([^\s]*))?$/', $address, $m ) ) {
749 throw new InvalidArgumentException( "Bad blob address: $address" );
750 }
751
752 $schema = strtolower( $m[1] );
753 $id = $m[2];
754 $parameters = wfCgiToArray( $m[4] ?? '' );
755
756 return [ $schema, $id, $parameters ];
757 }
758
760 public function isReadOnly() {
761 if ( $this->useExternalStore && $this->extStoreAccess->isReadOnly() ) {
762 return true;
763 }
764
765 return ( $this->getDBLoadBalancer()->getReadOnlyReason() !== false );
766 }
767}
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfWarn( $msg, $callerOffset=1, $level=E_USER_NOTICE)
Send a warning either to the debug log or in a PHP error depending on $wgDevelopmentWarnings.
wfLogWarning( $msg, $callerOffset=1, $level=E_USER_WARNING)
Send a warning as a PHP error and the debug log.
wfCgiToArray( $query)
This is the logical opposite of wfArrayToCgi(): it accepts a query string as its argument and returns...
const DB_REPLICA
Definition defines.php:26
const DB_PRIMARY
Definition defines.php:28
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:69
This is the main interface for fetching or inserting objects with ExternalStore.
Exception thrown when a blob has the "bad" content address schema, or has "error" in its old_flags,...
Exception representing a failure to access a data blob.
Service for storing and loading Content objects representing revision data blobs.
static makeAddressFromTextId( $id)
Returns an address referring to content stored in the text table row with the given ID.
decompressData(string $blob, array $blobFlags, ?string $blobAddress=null)
Re-converts revision text according to its flags.
__construct(private readonly ILoadBalancer $dbLoadBalancer, private readonly ExternalStoreAccess $extStoreAccess, private readonly WANObjectCache $cache, private readonly bool|string $dbDomain=false,)
getTextIdFromAddress( $address)
Returns an ID corresponding to the old_id field in the text table, corresponding to the given $addres...
setCacheExpiry(int $cacheExpiry)
getBlob( $blobAddress, $queryFlags=0)
Retrieve a blob, given an address.
setLegacyEncoding(string $legacyEncoding)
Set the legacy encoding to assume for blobs that do not have the utf-8 flag set.
compressData(&$blob)
If $wgCompressRevisions is enabled, we will compress data.
static splitBlobAddress( $address)
Splits a blob address into three parts: the schema, the ID, and parameters/flags.
getBlobBatch( $blobAddresses, $queryFlags=0)
A batched version of BlobStore::getBlob.
storeBlob( $data, $hints=[])
Stores an arbitrary blob of data and returns an address that can be used with getBlob() to retrieve t...
setUseExternalStore(bool $useExternalStore)
isReadOnly()
Check if the blob metadata or backing blob data store is read-only.bool
expandBlob( $raw, $flags, $blobAddress=null)
Expand a raw data blob according to the flags given.
static explodeFlags(string $flagsString)
Split a comma-separated old_flags value into its constituent parts.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Abstract class for any ephemeral data store.
Definition BagOStuff.php:73
Multi-datacenter aware caching interface.
Service for loading and storing data blobs.
Definition BlobStore.php:19
Interface for database access objects.
Interface to a relational database.
Definition IDatabase.php:31
This class is a delegate to ILBFactory for a given database cluster.
getCacheKey()
Get the cache key used to store status.