MediaWiki REL1_34
SqlBlobStore.php
Go to the documentation of this file.
1<?php
27namespace MediaWiki\Storage;
28
29use AppendIterator;
33use InvalidArgumentException;
34use MWException;
35use StatusValue;
38use Wikimedia\Assert\Assert;
39use Wikimedia\AtEase\AtEase;
42
52
53 // Note: the name has been taken unchanged from the Revision class.
54 const TEXT_CACHE_GROUP = 'revisiontext:10';
55
60
65
69 private $cache;
70
74 private $dbDomain;
75
79 private $cacheExpiry = 604800; // 7 days
80
84 private $compressBlobs = false;
85
89 private $legacyEncoding = false;
90
94 private $useExternalStore = false;
95
107 public function __construct(
111 $dbDomain = false
112 ) {
113 $this->dbLoadBalancer = $dbLoadBalancer;
114 $this->extStoreAccess = $extStoreAccess;
115 $this->cache = $cache;
116 $this->dbDomain = $dbDomain;
117 }
118
122 public function getCacheExpiry() {
123 return $this->cacheExpiry;
124 }
125
129 public function setCacheExpiry( $cacheExpiry ) {
130 Assert::parameterType( 'integer', $cacheExpiry, '$cacheExpiry' );
131
132 $this->cacheExpiry = $cacheExpiry;
133 }
134
138 public function getCompressBlobs() {
140 }
141
145 public function setCompressBlobs( $compressBlobs ) {
146 $this->compressBlobs = $compressBlobs;
147 }
148
153 public function getLegacyEncoding() {
155 }
156
162 wfDeprecated( __METHOD__ );
163 return null;
164 }
165
175 Assert::parameterType( 'string', $legacyEncoding, '$legacyEncoding' );
176
177 $this->legacyEncoding = $legacyEncoding;
178 }
179
183 public function getUseExternalStore() {
185 }
186
191 Assert::parameterType( 'boolean', $useExternalStore, '$useExternalStore' );
192
193 $this->useExternalStore = $useExternalStore;
194 }
195
199 private function getDBLoadBalancer() {
201 }
202
208 private function getDBConnection( $index ) {
209 $lb = $this->getDBLoadBalancer();
210 return $lb->getConnectionRef( $index, [], $this->dbDomain );
211 }
212
223 public function storeBlob( $data, $hints = [] ) {
224 try {
225 $flags = $this->compressData( $data );
226
227 # Write to external storage if required
228 if ( $this->useExternalStore ) {
229 // Store and get the URL
230 $data = $this->extStoreAccess->insert( $data, [ 'domain' => $this->dbDomain ] );
231 if ( !$data ) {
232 throw new BlobAccessException( "Failed to store text to external storage" );
233 }
234 if ( $flags ) {
235 $flags .= ',';
236 }
237 $flags .= 'external';
238
239 // TODO: we could also return an address for the external store directly here.
240 // That would mean bypassing the text table entirely when the external store is
241 // used. We'll need to assess expected fallout before doing that.
242 }
243
244 $dbw = $this->getDBConnection( DB_MASTER );
245
246 $old_id = $dbw->nextSequenceValue( 'text_old_id_seq' );
247 $dbw->insert(
248 'text',
249 [
250 'old_id' => $old_id,
251 'old_text' => $data,
252 'old_flags' => $flags,
253 ],
254 __METHOD__
255 );
256
257 $textId = $dbw->insertId();
258
259 return self::makeAddressFromTextId( $textId );
260 } catch ( MWException $e ) {
261 throw new BlobAccessException( $e->getMessage(), 0, $e );
262 }
263 }
264
277 public function getBlob( $blobAddress, $queryFlags = 0 ) {
278 Assert::parameterType( 'string', $blobAddress, '$blobAddress' );
279
280 $error = null;
281 $blob = $this->cache->getWithSetCallback(
282 $this->getCacheKey( $blobAddress ),
283 $this->getCacheTTL(),
284 function ( $unused, &$ttl, &$setOpts ) use ( $blobAddress, $queryFlags, &$error ) {
285 // Ignore $setOpts; blobs are immutable and negatives are not cached
286 list( $result, $errors ) = $this->fetchBlobs( [ $blobAddress ], $queryFlags );
287 // No negative caching; negative hits on text rows may be due to corrupted replica DBs
288 $error = $errors[$blobAddress] ?? null;
289 return $result[$blobAddress];
290 },
291 [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => IExpiringStore::TTL_PROC_LONG ]
292 );
293
294 if ( $error ) {
295 throw new BlobAccessException( $error );
296 }
297
298 Assert::postcondition( is_string( $blob ), 'Blob must not be null' );
299 return $blob;
300 }
301
313 public function getBlobBatch( $blobAddresses, $queryFlags = 0 ) {
314 $errors = null;
315 $addressByCacheKey = $this->cache->makeMultiKeys(
316 $blobAddresses,
317 function ( $blobAddress ) {
318 return $this->getCacheKey( $blobAddress );
319 }
320 );
321 $blobsByCacheKey = $this->cache->getMultiWithUnionSetCallback(
322 $addressByCacheKey,
323 $this->getCacheTTL(),
324 function ( array $blobAddresses, array &$ttls, array &$setOpts ) use ( $queryFlags, &$errors ) {
325 // Ignore $setOpts; blobs are immutable and negatives are not cached
326 list( $result, $errors ) = $this->fetchBlobs( $blobAddresses, $queryFlags );
327 return $result;
328 },
329 [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => IExpiringStore::TTL_PROC_LONG ]
330 );
331
332 // Remap back to incoming blob addresses. The return value of the
333 // WANObjectCache::getMultiWithUnionSetCallback is keyed on the internal
334 // keys from WANObjectCache::makeMultiKeys, so we need to remap them
335 // before returning to the client.
336 $blobsByAddress = [];
337 foreach ( $blobsByCacheKey as $cacheKey => $blob ) {
338 $blobsByAddress[ $addressByCacheKey[ $cacheKey ] ] = $blob !== false ? $blob : null;
339 }
340
341 $result = StatusValue::newGood( $blobsByAddress );
342 if ( $errors ) {
343 foreach ( $errors as $error ) {
344 $result->warning( 'internalerror', $error );
345 }
346 }
347 return $result;
348 }
349
360 private function fetchBlobs( $blobAddresses, $queryFlags ) {
361 $textIdToBlobAddress = [];
362 $result = [];
363 $errors = [];
364 foreach ( $blobAddresses as $blobAddress ) {
365 list( $schema, $id ) = self::splitBlobAddress( $blobAddress );
366 //TODO: MCR: also support 'ex' schema with ExternalStore URLs, plus flags encoded in the URL!
367 if ( $schema === 'tt' ) {
368 $textId = intval( $id );
369 $textIdToBlobAddress[$textId] = $blobAddress;
370 } else {
371 $errors[$blobAddress] = "Unknown blob address schema: $schema";
372 $result[$blobAddress] = false;
373 continue;
374 }
375
376 if ( !$textId || $id !== (string)$textId ) {
377 $errors[$blobAddress] = "Bad blob address: $blobAddress";
378 $result[$blobAddress] = false;
379 }
380 }
381
382 $textIds = array_keys( $textIdToBlobAddress );
383 if ( !$textIds ) {
384 return [ $result, $errors ];
385 }
386 // Callers doing updates will pass in READ_LATEST as usual. Since the text/blob tables
387 // do not normally get rows changed around, set READ_LATEST_IMMUTABLE in those cases.
388 $queryFlags |= DBAccessObjectUtils::hasFlags( $queryFlags, self::READ_LATEST )
389 ? self::READ_LATEST_IMMUTABLE
390 : 0;
391 list( $index, $options, $fallbackIndex, $fallbackOptions ) =
392 DBAccessObjectUtils::getDBOptions( $queryFlags );
393 // Text data is immutable; check replica DBs first.
394 $dbConnection = $this->getDBConnection( $index );
395 $rows = $dbConnection->select(
396 'text',
397 [ 'old_id', 'old_text', 'old_flags' ],
398 [ 'old_id' => $textIds ],
399 __METHOD__,
400 $options
401 );
402
403 // Fallback to DB_MASTER in some cases if not all the rows were found, using the appropriate
404 // options, such as FOR UPDATE to avoid missing rows due to REPEATABLE-READ.
405 if ( $dbConnection->numRows( $rows ) !== count( $textIds ) && $fallbackIndex !== null ) {
406 $fetchedTextIds = [];
407 foreach ( $rows as $row ) {
408 $fetchedTextIds[] = $row->old_id;
409 }
410 $missingTextIds = array_diff( $textIds, $fetchedTextIds );
411 $dbConnection = $this->getDBConnection( $fallbackIndex );
412 $rowsFromFallback = $dbConnection->select(
413 'text',
414 [ 'old_id', 'old_text', 'old_flags' ],
415 [ 'old_id' => $missingTextIds ],
416 __METHOD__,
417 $fallbackOptions
418 );
419 $appendIterator = new AppendIterator();
420 $appendIterator->append( $rows );
421 $appendIterator->append( $rowsFromFallback );
422 $rows = $appendIterator;
423 }
424
425 foreach ( $rows as $row ) {
426 $blobAddress = $textIdToBlobAddress[$row->old_id];
427 $blob = $this->expandBlob( $row->old_text, $row->old_flags, $blobAddress );
428 if ( $blob === false ) {
429 $errors[$blobAddress] = "Bad data in text row {$row->old_id}.";
430 }
431 $result[$blobAddress] = $blob;
432 }
433
434 // If we're still missing some of the rows, set errors for missing blobs.
435 if ( count( $result ) !== count( $blobAddresses ) ) {
436 foreach ( $blobAddresses as $blobAddress ) {
437 if ( !isset( $result[$blobAddress ] ) ) {
438 $errors[$blobAddress] = "Unable to fetch blob at $blobAddress";
439 $result[$blobAddress] = false;
440 }
441 }
442 }
443 return [ $result, $errors ];
444 }
445
456 private function getCacheKey( $blobAddress ) {
457 return $this->cache->makeGlobalKey(
458 'SqlBlobStore-blob',
459 $this->dbLoadBalancer->resolveDomainID( $this->dbDomain ),
460 $blobAddress
461 );
462 }
463
483 public function expandBlob( $raw, $flags, $cacheKey = null ) {
484 if ( is_string( $flags ) ) {
485 $flags = explode( ',', $flags );
486 }
487
488 // Use external methods for external objects, text in table is URL-only then
489 if ( in_array( 'external', $flags ) ) {
490 $url = $raw;
491 $parts = explode( '://', $url, 2 );
492 if ( count( $parts ) == 1 || $parts[1] == '' ) {
493 return false;
494 }
495
496 if ( $cacheKey ) {
497 // The cached value should be decompressed, so handle that and return here.
498 return $this->cache->getWithSetCallback(
499 $this->getCacheKey( $cacheKey ),
500 $this->getCacheTTL(),
501 function () use ( $url, $flags ) {
502 // Ignore $setOpts; blobs are immutable and negatives are not cached
503 $blob = $this->extStoreAccess
504 ->fetchFromURL( $url, [ 'domain' => $this->dbDomain ] );
505
506 return $blob === false ? false : $this->decompressData( $blob, $flags );
507 },
508 [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => WANObjectCache::TTL_PROC_LONG ]
509 );
510 } else {
511 $blob = $this->extStoreAccess->fetchFromURL( $url, [ 'domain' => $this->dbDomain ] );
512 return $blob === false ? false : $this->decompressData( $blob, $flags );
513 }
514 } else {
515 return $this->decompressData( $raw, $flags );
516 }
517 }
518
535 public function compressData( &$blob ) {
536 $blobFlags = [];
537
538 // Revisions not marked as UTF-8 will have legacy decoding applied by decompressData().
539 // XXX: if $this->legacyEncoding is not set, we could skip this. That would however be
540 // risky, since $this->legacyEncoding being set in the future would lead to data corruption.
541 $blobFlags[] = 'utf-8';
542
543 if ( $this->compressBlobs ) {
544 if ( function_exists( 'gzdeflate' ) ) {
545 $deflated = gzdeflate( $blob );
546
547 if ( $deflated === false ) {
548 wfLogWarning( __METHOD__ . ': gzdeflate() failed' );
549 } else {
550 $blob = $deflated;
551 $blobFlags[] = 'gzip';
552 }
553 } else {
554 wfDebug( __METHOD__ . " -- no zlib support, not compressing\n" );
555 }
556 }
557 return implode( ',', $blobFlags );
558 }
559
575 public function decompressData( $blob, array $blobFlags ) {
576 // Revision::decompressRevisionText accepted false here, so defend against that
577 Assert::parameterType( 'string', $blob, '$blob' );
578
579 if ( in_array( 'error', $blobFlags ) ) {
580 // Error row, return false
581 return false;
582 }
583
584 if ( in_array( 'gzip', $blobFlags ) ) {
585 # Deal with optional compression of archived pages.
586 # This can be done periodically via maintenance/compressOld.php, and
587 # as pages are saved if $wgCompressRevisions is set.
588 $blob = gzinflate( $blob );
589
590 if ( $blob === false ) {
591 wfWarn( __METHOD__ . ': gzinflate() failed' );
592 return false;
593 }
594 }
595
596 if ( in_array( 'object', $blobFlags ) ) {
597 # Generic compressed storage
598 $obj = unserialize( $blob );
599 if ( !is_object( $obj ) ) {
600 // Invalid object
601 return false;
602 }
603 $blob = $obj->getText();
604 }
605
606 // Needed to support old revisions left over from from the 1.4 / 1.5 migration.
607 if ( $blob !== false && $this->legacyEncoding
608 && !in_array( 'utf-8', $blobFlags ) && !in_array( 'utf8', $blobFlags )
609 ) {
610 # Old revisions kept around in a legacy encoding?
611 # Upconvert on demand.
612 # ("utf8" checked for compatibility with some broken
613 # conversion scripts 2008-12-30)
614 # Even with //IGNORE iconv can whine about illegal characters in
615 # *input* string. We just ignore those too.
616 # REF: https://bugs.php.net/bug.php?id=37166
617 # REF: https://phabricator.wikimedia.org/T18885
618 AtEase::suppressWarnings();
619 $blob = iconv( $this->legacyEncoding, 'UTF-8//IGNORE', $blob );
620 AtEase::restoreWarnings();
621 }
622
623 return $blob;
624 }
625
633 private function getCacheTTL() {
634 if ( $this->cache->getQoS( WANObjectCache::ATTR_EMULATION )
635 <= WANObjectCache::QOS_EMULATION_SQL
636 ) {
637 // Do not cache RDBMs blobs in...the RDBMs store
638 $ttl = WANObjectCache::TTL_UNCACHEABLE;
639 } else {
640 $ttl = $this->cacheExpiry ?: WANObjectCache::TTL_UNCACHEABLE;
641 }
642
643 return $ttl;
644 }
645
666 public function getTextIdFromAddress( $address ) {
667 list( $schema, $id, ) = self::splitBlobAddress( $address );
668
669 if ( $schema !== 'tt' ) {
670 return null;
671 }
672
673 $textId = intval( $id );
674
675 if ( !$textId || $id !== (string)$textId ) {
676 throw new InvalidArgumentException( "Malformed text_id: $id" );
677 }
678
679 return $textId;
680 }
681
694 public static function makeAddressFromTextId( $id ) {
695 return 'tt:' . $id;
696 }
697
708 public static function splitBlobAddress( $address ) {
709 if ( !preg_match( '/^(\w+):(\w+)(\?(.*))?$/', $address, $m ) ) {
710 throw new InvalidArgumentException( "Bad blob address: $address" );
711 }
712
713 $schema = strtolower( $m[1] );
714 $id = $m[2];
715 $parameters = isset( $m[4] ) ? wfCgiToArray( $m[4] ) : [];
716
717 return [ $schema, $id, $parameters ];
718 }
719
720 public function isReadOnly() {
721 if ( $this->useExternalStore && $this->extStoreAccess->isReadOnly() ) {
722 return true;
723 }
724
725 return ( $this->getDBLoadBalancer()->getReadOnlyReason() !== false );
726 }
727}
unserialize( $serialized)
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfWarn( $msg, $callerOffset=1, $level=E_USER_NOTICE)
Send a warning either to the debug log or in a PHP error depending on $wgDevelopmentWarnings.
wfLogWarning( $msg, $callerOffset=1, $level=E_USER_WARNING)
Send a warning as a PHP error and the debug log.
wfCgiToArray( $query)
This is the logical opposite of wfArrayToCgi(): it accepts a query string as its argument and returns...
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Throws a warning that $function is deprecated.
Helper class for DAO classes.
Key/value blob storage for a collection of storage medium types (e.g.
MediaWiki exception.
Exception representing a failure to access a data blob.
Service for storing and loading Content objects.
static makeAddressFromTextId( $id)
Returns an address referring to content stored in the text table row with the given ID.
ExternalStoreAccess $extStoreAccess
getTextIdFromAddress( $address)
Returns an ID corresponding to the old_id field in the text table, corresponding to the given $addres...
getCacheTTL()
Get the text cache TTL.
string bool $dbDomain
DB domain ID of a wiki or false for the local one.
__construct(ILoadBalancer $dbLoadBalancer, ExternalStoreAccess $extStoreAccess, WANObjectCache $cache, $dbDomain=false)
setLegacyEncoding( $legacyEncoding)
Set the legacy encoding to assume for blobs that do not have the utf-8 flag set.
getBlob( $blobAddress, $queryFlags=0)
Retrieve a blob, given an address.
expandBlob( $raw, $flags, $cacheKey=null)
Expand a raw data blob according to the flags given.
setUseExternalStore( $useExternalStore)
compressData(&$blob)
If $wgCompressRevisions is enabled, we will compress data.
getCacheKey( $blobAddress)
Get a cache key for a given Blob address.
static splitBlobAddress( $address)
Splits a blob address into three parts: the schema, the ID, and parameters/flags.
getBlobBatch( $blobAddresses, $queryFlags=0)
A batched version of BlobStore::getBlob.
storeBlob( $data, $hints=[])
Stores an arbitrary blob of data and returns an address that can be used with getBlob() to retrieve t...
isReadOnly()
Check if the blob metadata or backing blob data store is read-only.
fetchBlobs( $blobAddresses, $queryFlags)
MCR migration note: this corresponds to Revision::fetchText.
decompressData( $blob, array $blobFlags)
Re-converts revision text according to its flags.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Multi-datacenter aware caching interface.
Interface for database access objects.
Generic interface for lightweight expiring object stores.
Service for loading and storing data blobs.
Definition BlobStore.php:35
Basic database interface for live and lazy-loaded relation database handles.
Definition IDatabase.php:38
Database cluster connection, tracking, load balancing, and transaction manager interface.
const DB_MASTER
Definition defines.php:26