MediaWiki REL1_37
SqlBlobStore.php
Go to the documentation of this file.
1<?php
28namespace MediaWiki\Storage;
29
30use AppendIterator;
35use InvalidArgumentException;
36use MWException;
37use StatusValue;
39use Wikimedia\Assert\Assert;
40use Wikimedia\AtEase\AtEase;
43
53
54 // Note: the name has been taken unchanged from the old Revision class.
55 public const TEXT_CACHE_GROUP = 'revisiontext:10';
56
61
66
70 private $cache;
71
75 private $dbDomain;
76
80 private $cacheExpiry = 604800; // 7 days
81
85 private $compressBlobs = false;
86
90 private $legacyEncoding = false;
91
95 private $useExternalStore = false;
96
108 public function __construct(
112 $dbDomain = false
113 ) {
114 $this->dbLoadBalancer = $dbLoadBalancer;
115 $this->extStoreAccess = $extStoreAccess;
116 $this->cache = $cache;
117 $this->dbDomain = $dbDomain;
118 }
119
123 public function getCacheExpiry() {
124 return $this->cacheExpiry;
125 }
126
130 public function setCacheExpiry( int $cacheExpiry ) {
131 $this->cacheExpiry = $cacheExpiry;
132 }
133
137 public function getCompressBlobs() {
139 }
140
144 public function setCompressBlobs( $compressBlobs ) {
145 $this->compressBlobs = $compressBlobs;
146 }
147
152 public function getLegacyEncoding() {
154 }
155
161 wfDeprecated( __METHOD__ );
162 return null;
163 }
164
173 public function setLegacyEncoding( string $legacyEncoding ) {
174 $this->legacyEncoding = $legacyEncoding;
175 }
176
180 public function getUseExternalStore() {
182 }
183
187 public function setUseExternalStore( bool $useExternalStore ) {
188 $this->useExternalStore = $useExternalStore;
189 }
190
194 private function getDBLoadBalancer() {
196 }
197
203 private function getDBConnection( $index ) {
204 $lb = $this->getDBLoadBalancer();
205 return $lb->getConnectionRef( $index, [], $this->dbDomain );
206 }
207
218 public function storeBlob( $data, $hints = [] ) {
219 try {
220 $flags = $this->compressData( $data );
221
222 # Write to external storage if required
223 if ( $this->useExternalStore ) {
224 // Store and get the URL
225 $data = $this->extStoreAccess->insert( $data, [ 'domain' => $this->dbDomain ] );
226 if ( !$data ) {
227 throw new BlobAccessException( "Failed to store text to external storage" );
228 }
229 if ( $flags ) {
230 $flags .= ',';
231 }
232 $flags .= 'external';
233
234 // TODO: we could also return an address for the external store directly here.
235 // That would mean bypassing the text table entirely when the external store is
236 // used. We'll need to assess expected fallout before doing that.
237 }
238
239 $dbw = $this->getDBConnection( DB_PRIMARY );
240
241 $old_id = $dbw->nextSequenceValue( 'text_old_id_seq' );
242 $dbw->insert(
243 'text',
244 [
245 'old_id' => $old_id,
246 'old_text' => $data,
247 'old_flags' => $flags,
248 ],
249 __METHOD__
250 );
251
252 $textId = $dbw->insertId();
253
254 return self::makeAddressFromTextId( $textId );
255 } catch ( MWException $e ) {
256 throw new BlobAccessException( $e->getMessage(), 0, $e );
257 }
258 }
259
272 public function getBlob( $blobAddress, $queryFlags = 0 ) {
273 Assert::parameterType( 'string', $blobAddress, '$blobAddress' );
274
275 $error = null;
276 $blob = $this->cache->getWithSetCallback(
277 $this->getCacheKey( $blobAddress ),
278 $this->getCacheTTL(),
279 function ( $unused, &$ttl, &$setOpts ) use ( $blobAddress, $queryFlags, &$error ) {
280 // Ignore $setOpts; blobs are immutable and negatives are not cached
281 list( $result, $errors ) = $this->fetchBlobs( [ $blobAddress ], $queryFlags );
282 // No negative caching; negative hits on text rows may be due to corrupted replica DBs
283 $error = $errors[$blobAddress] ?? null;
284 return $result[$blobAddress];
285 },
286 [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => IExpiringStore::TTL_PROC_LONG ]
287 );
288
289 if ( $error ) {
290 throw new BlobAccessException( $error );
291 }
292
293 Assert::postcondition( is_string( $blob ), 'Blob must not be null' );
294 return $blob;
295 }
296
308 public function getBlobBatch( $blobAddresses, $queryFlags = 0 ) {
309 // FIXME: All caching has temporarily been removed in I94c6f9ba7b9caeeb due to T235188.
310 // Caching behavior should be restored by reverting I94c6f9ba7b9caeeb as soon as
311 // the root cause of T235188 has been resolved.
312
313 list( $blobsByAddress, $errors ) = $this->fetchBlobs( $blobAddresses, $queryFlags );
314
315 $blobsByAddress = array_map( static function ( $blob ) {
316 return $blob === false ? null : $blob;
317 }, $blobsByAddress );
318
319 $result = StatusValue::newGood( $blobsByAddress );
320 if ( $errors ) {
321 foreach ( $errors as $error ) {
322 $result->warning( 'internalerror', $error );
323 }
324 }
325 return $result;
326 }
327
338 private function fetchBlobs( $blobAddresses, $queryFlags ) {
339 $textIdToBlobAddress = [];
340 $result = [];
341 $errors = [];
342 foreach ( $blobAddresses as $blobAddress ) {
343 try {
344 list( $schema, $id ) = self::splitBlobAddress( $blobAddress );
345 } catch ( InvalidArgumentException $ex ) {
346 throw new BlobAccessException(
347 $ex->getMessage() . '. Use findBadBlobs.php to remedy.',
348 0,
349 $ex
350 );
351 }
352
353 // TODO: MCR: also support 'ex' schema with ExternalStore URLs, plus flags encoded in the URL!
354 if ( $schema === 'bad' ) {
355 // Database row was marked as "known bad", no need to trigger an error.
356 wfDebug(
357 __METHOD__
358 . ": loading known-bad content ($blobAddress), returning empty string"
359 );
360 $result[$blobAddress] = '';
361 continue;
362 } elseif ( $schema === 'tt' ) {
363 $textId = intval( $id );
364
365 if ( $textId < 1 || $id !== (string)$textId ) {
366 $errors[$blobAddress] = "Bad blob address: $blobAddress."
367 . ' Use findBadBlobs.php to remedy.';
368 $result[$blobAddress] = false;
369 }
370
371 $textIdToBlobAddress[$textId] = $blobAddress;
372 } else {
373 $errors[$blobAddress] = "Unknown blob address schema: $schema."
374 . ' Use findBadBlobs.php to remedy.';
375 $result[$blobAddress] = false;
376 }
377 }
378
379 $textIds = array_keys( $textIdToBlobAddress );
380 if ( !$textIds ) {
381 return [ $result, $errors ];
382 }
383 // Callers doing updates will pass in READ_LATEST as usual. Since the text/blob tables
384 // do not normally get rows changed around, set READ_LATEST_IMMUTABLE in those cases.
385 $queryFlags |= DBAccessObjectUtils::hasFlags( $queryFlags, self::READ_LATEST )
386 ? self::READ_LATEST_IMMUTABLE
387 : 0;
388 list( $index, $options, $fallbackIndex, $fallbackOptions ) =
389 DBAccessObjectUtils::getDBOptions( $queryFlags );
390 // Text data is immutable; check replica DBs first.
391 $dbConnection = $this->getDBConnection( $index );
392 $rows = $dbConnection->select(
393 'text',
394 [ 'old_id', 'old_text', 'old_flags' ],
395 [ 'old_id' => $textIds ],
396 __METHOD__,
397 $options
398 );
399
400 // Fallback to DB_PRIMARY in some cases if not all the rows were found, using the appropriate
401 // options, such as FOR UPDATE to avoid missing rows due to REPEATABLE-READ.
402 if ( $dbConnection->numRows( $rows ) !== count( $textIds ) && $fallbackIndex !== null ) {
403 $fetchedTextIds = [];
404 foreach ( $rows as $row ) {
405 $fetchedTextIds[] = $row->old_id;
406 }
407 $missingTextIds = array_diff( $textIds, $fetchedTextIds );
408 $dbConnection = $this->getDBConnection( $fallbackIndex );
409 $rowsFromFallback = $dbConnection->select(
410 'text',
411 [ 'old_id', 'old_text', 'old_flags' ],
412 [ 'old_id' => $missingTextIds ],
413 __METHOD__,
414 $fallbackOptions
415 );
416 $appendIterator = new AppendIterator();
417 $appendIterator->append( $rows );
418 $appendIterator->append( $rowsFromFallback );
419 $rows = $appendIterator;
420 }
421
422 foreach ( $rows as $row ) {
423 $blobAddress = $textIdToBlobAddress[$row->old_id];
424 $blob = false;
425 if ( $row->old_text !== null ) {
426 $blob = $this->expandBlob( $row->old_text, $row->old_flags, $blobAddress );
427 }
428 if ( $blob === false ) {
429 $errors[$blobAddress] = "Bad data in text row {$row->old_id}."
430 . ' Use findBadBlobs.php to remedy.';
431 }
432 $result[$blobAddress] = $blob;
433 }
434
435 // If we're still missing some of the rows, set errors for missing blobs.
436 if ( count( $result ) !== count( $blobAddresses ) ) {
437 foreach ( $blobAddresses as $blobAddress ) {
438 if ( !isset( $result[$blobAddress ] ) ) {
439 $errors[$blobAddress] = "Unable to fetch blob at $blobAddress."
440 . ' Use findBadBlobs.php to remedy.';
441 $result[$blobAddress] = false;
442 }
443 }
444 }
445 return [ $result, $errors ];
446 }
447
458 private function getCacheKey( $blobAddress ) {
459 return $this->cache->makeGlobalKey(
460 'SqlBlobStore-blob',
461 $this->dbLoadBalancer->resolveDomainID( $this->dbDomain ),
462 $blobAddress
463 );
464 }
465
485 public function expandBlob( $raw, $flags, $cacheKey = null ) {
486 if ( is_string( $flags ) ) {
487 $flags = explode( ',', $flags );
488 }
489
490 // Use external methods for external objects, text in table is URL-only then
491 if ( in_array( 'external', $flags ) ) {
492 $url = $raw;
493 $parts = explode( '://', $url, 2 );
494 if ( count( $parts ) == 1 || $parts[1] == '' ) {
495 return false;
496 }
497
498 if ( $cacheKey ) {
499 // The cached value should be decompressed, so handle that and return here.
500 return $this->cache->getWithSetCallback(
501 $this->getCacheKey( $cacheKey ),
502 $this->getCacheTTL(),
503 function () use ( $url, $flags ) {
504 // Ignore $setOpts; blobs are immutable and negatives are not cached
505 $blob = $this->extStoreAccess
506 ->fetchFromURL( $url, [ 'domain' => $this->dbDomain ] );
507
508 return $blob === false ? false : $this->decompressData( $blob, $flags );
509 },
510 [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => WANObjectCache::TTL_PROC_LONG ]
511 );
512 } else {
513 $blob = $this->extStoreAccess->fetchFromURL( $url, [ 'domain' => $this->dbDomain ] );
514 return $blob === false ? false : $this->decompressData( $blob, $flags );
515 }
516 } else {
517 return $this->decompressData( $raw, $flags );
518 }
519 }
520
537 public function compressData( &$blob ) {
538 $blobFlags = [];
539
540 // Revisions not marked as UTF-8 will have legacy decoding applied by decompressData().
541 // XXX: if $this->legacyEncoding is not set, we could skip this. That would however be
542 // risky, since $this->legacyEncoding being set in the future would lead to data corruption.
543 $blobFlags[] = 'utf-8';
544
545 if ( $this->compressBlobs ) {
546 if ( function_exists( 'gzdeflate' ) ) {
547 $deflated = gzdeflate( $blob );
548
549 if ( $deflated === false ) {
550 wfLogWarning( __METHOD__ . ': gzdeflate() failed' );
551 } else {
552 $blob = $deflated;
553 $blobFlags[] = 'gzip';
554 }
555 } else {
556 wfDebug( __METHOD__ . " -- no zlib support, not compressing" );
557 }
558 }
559 return implode( ',', $blobFlags );
560 }
561
577 public function decompressData( string $blob, array $blobFlags ) {
578 if ( in_array( 'error', $blobFlags ) ) {
579 // Error row, return false
580 return false;
581 }
582
583 if ( in_array( 'gzip', $blobFlags ) ) {
584 # Deal with optional compression of archived pages.
585 # This can be done periodically via maintenance/compressOld.php, and
586 # as pages are saved if $wgCompressRevisions is set.
587 $blob = gzinflate( $blob );
588
589 if ( $blob === false ) {
590 wfWarn( __METHOD__ . ': gzinflate() failed' );
591 return false;
592 }
593 }
594
595 if ( in_array( 'object', $blobFlags ) ) {
596 # Generic compressed storage
597 $obj = unserialize( $blob );
598 if ( !is_object( $obj ) ) {
599 // Invalid object
600 return false;
601 }
602 $blob = $obj->getText();
603 }
604
605 // Needed to support old revisions left over from the 1.4 / 1.5 migration.
606 if ( $blob !== false && $this->legacyEncoding
607 && !in_array( 'utf-8', $blobFlags ) && !in_array( 'utf8', $blobFlags )
608 ) {
609 # Old revisions kept around in a legacy encoding?
610 # Upconvert on demand.
611 # ("utf8" checked for compatibility with some broken
612 # conversion scripts 2008-12-30)
613 # Even with //IGNORE iconv can whine about illegal characters in
614 # *input* string. We just ignore those too.
615 # REF: https://bugs.php.net/bug.php?id=37166
616 # REF: https://phabricator.wikimedia.org/T18885
617 AtEase::suppressWarnings();
618 $blob = iconv( $this->legacyEncoding, 'UTF-8//IGNORE', $blob );
619 AtEase::restoreWarnings();
620 }
621
622 return $blob;
623 }
624
632 private function getCacheTTL() {
634
635 if ( $cache->getQoS( $cache::ATTR_DURABILITY ) >= $cache::QOS_DURABILITY_RDBMS ) {
636 // Do not cache RDBMs blobs in...the RDBMs store
637 $ttl = $cache::TTL_UNCACHEABLE;
638 } else {
639 $ttl = $this->cacheExpiry ?: $cache::TTL_UNCACHEABLE;
640 }
641
642 return $ttl;
643 }
644
665 public function getTextIdFromAddress( $address ) {
666 list( $schema, $id, ) = self::splitBlobAddress( $address );
667
668 if ( $schema !== 'tt' ) {
669 return null;
670 }
671
672 $textId = intval( $id );
673
674 if ( !$textId || $id !== (string)$textId ) {
675 throw new InvalidArgumentException( "Malformed text_id: $id" );
676 }
677
678 return $textId;
679 }
680
694 public static function makeAddressFromTextId( $id ) {
695 return 'tt:' . $id;
696 }
697
708 public static function splitBlobAddress( $address ) {
709 if ( !preg_match( '/^([-+.\w]+):([^\s?]+)(\?([^\s]*))?$/', $address, $m ) ) {
710 throw new InvalidArgumentException( "Bad blob address: $address" );
711 }
712
713 $schema = strtolower( $m[1] );
714 $id = $m[2];
715 $parameters = isset( $m[4] ) ? wfCgiToArray( $m[4] ) : [];
716
717 return [ $schema, $id, $parameters ];
718 }
719
720 public function isReadOnly() {
721 if ( $this->useExternalStore && $this->extStoreAccess->isReadOnly() ) {
722 return true;
723 }
724
725 return ( $this->getDBLoadBalancer()->getReadOnlyReason() !== false );
726 }
727}
unserialize( $serialized)
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfWarn( $msg, $callerOffset=1, $level=E_USER_NOTICE)
Send a warning either to the debug log or in a PHP error depending on $wgDevelopmentWarnings.
wfLogWarning( $msg, $callerOffset=1, $level=E_USER_WARNING)
Send a warning as a PHP error and the debug log.
wfCgiToArray( $query)
This is the logical opposite of wfArrayToCgi(): it accepts a query string as its argument and returns...
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
Helper class for DAO classes.
Key/value blob storage for a collection of storage medium types (e.g.
MediaWiki exception.
Exception representing a failure to access a data blob.
Service for storing and loading Content objects.
static makeAddressFromTextId( $id)
Returns an address referring to content stored in the text table row with the given ID.
ExternalStoreAccess $extStoreAccess
getTextIdFromAddress( $address)
Returns an ID corresponding to the old_id field in the text table, corresponding to the given $addres...
getCacheTTL()
Get the text cache TTL.
string bool $dbDomain
DB domain ID of a wiki or false for the local one.
__construct(ILoadBalancer $dbLoadBalancer, ExternalStoreAccess $extStoreAccess, WANObjectCache $cache, $dbDomain=false)
decompressData(string $blob, array $blobFlags)
Re-converts revision text according to its flags.
getBlob( $blobAddress, $queryFlags=0)
Retrieve a blob, given an address.
expandBlob( $raw, $flags, $cacheKey=null)
Expand a raw data blob according to the flags given.
setLegacyEncoding(string $legacyEncoding)
Set the legacy encoding to assume for blobs that do not have the utf-8 flag set.
compressData(&$blob)
If $wgCompressRevisions is enabled, we will compress data.
getCacheKey( $blobAddress)
Get a cache key for a given Blob address.
static splitBlobAddress( $address)
Splits a blob address into three parts: the schema, the ID, and parameters/flags.
getBlobBatch( $blobAddresses, $queryFlags=0)
A batched version of BlobStore::getBlob.
storeBlob( $data, $hints=[])
Stores an arbitrary blob of data and returns an address that can be used with getBlob() to retrieve t...
setUseExternalStore(bool $useExternalStore)
isReadOnly()
Check if the blob metadata or backing blob data store is read-only.
fetchBlobs( $blobAddresses, $queryFlags)
MCR migration note: this corresponded to Revision::fetchText.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Multi-datacenter aware caching interface.
Interface for database access objects.
Generic interface providing TTL constants for lightweight expiring object stores.
Service for loading and storing data blobs.
Definition BlobStore.php:35
Basic database interface for live and lazy-loaded relation database handles.
Definition IDatabase.php:38
Database cluster connection, tracking, load balancing, and transaction manager interface.
const DB_PRIMARY
Definition defines.php:27