MediaWiki REL1_35
SqlBlobStore.php
Go to the documentation of this file.
1<?php
27namespace MediaWiki\Storage;
28
29use AppendIterator;
34use InvalidArgumentException;
35use MWException;
36use StatusValue;
38use Wikimedia\Assert\Assert;
39use Wikimedia\AtEase\AtEase;
42
52
53 // Note: the name has been taken unchanged from the Revision class.
54 public const TEXT_CACHE_GROUP = 'revisiontext:10';
55
60
65
69 private $cache;
70
74 private $dbDomain;
75
79 private $cacheExpiry = 604800; // 7 days
80
84 private $compressBlobs = false;
85
89 private $legacyEncoding = false;
90
94 private $useExternalStore = false;
95
107 public function __construct(
111 $dbDomain = false
112 ) {
113 $this->dbLoadBalancer = $dbLoadBalancer;
114 $this->extStoreAccess = $extStoreAccess;
115 $this->cache = $cache;
116 $this->dbDomain = $dbDomain;
117 }
118
122 public function getCacheExpiry() {
123 return $this->cacheExpiry;
124 }
125
129 public function setCacheExpiry( $cacheExpiry ) {
130 Assert::parameterType( 'integer', $cacheExpiry, '$cacheExpiry' );
131
132 $this->cacheExpiry = $cacheExpiry;
133 }
134
138 public function getCompressBlobs() {
140 }
141
145 public function setCompressBlobs( $compressBlobs ) {
146 $this->compressBlobs = $compressBlobs;
147 }
148
153 public function getLegacyEncoding() {
155 }
156
162 wfDeprecated( __METHOD__ );
163 return null;
164 }
165
175 Assert::parameterType( 'string', $legacyEncoding, '$legacyEncoding' );
176
177 $this->legacyEncoding = $legacyEncoding;
178 }
179
183 public function getUseExternalStore() {
185 }
186
191 Assert::parameterType( 'boolean', $useExternalStore, '$useExternalStore' );
192
193 $this->useExternalStore = $useExternalStore;
194 }
195
199 private function getDBLoadBalancer() {
201 }
202
208 private function getDBConnection( $index ) {
209 $lb = $this->getDBLoadBalancer();
210 return $lb->getConnectionRef( $index, [], $this->dbDomain );
211 }
212
223 public function storeBlob( $data, $hints = [] ) {
224 try {
225 $flags = $this->compressData( $data );
226
227 # Write to external storage if required
228 if ( $this->useExternalStore ) {
229 // Store and get the URL
230 $data = $this->extStoreAccess->insert( $data, [ 'domain' => $this->dbDomain ] );
231 if ( !$data ) {
232 throw new BlobAccessException( "Failed to store text to external storage" );
233 }
234 if ( $flags ) {
235 $flags .= ',';
236 }
237 $flags .= 'external';
238
239 // TODO: we could also return an address for the external store directly here.
240 // That would mean bypassing the text table entirely when the external store is
241 // used. We'll need to assess expected fallout before doing that.
242 }
243
244 $dbw = $this->getDBConnection( DB_MASTER );
245
246 $old_id = $dbw->nextSequenceValue( 'text_old_id_seq' );
247 $dbw->insert(
248 'text',
249 [
250 'old_id' => $old_id,
251 'old_text' => $data,
252 'old_flags' => $flags,
253 ],
254 __METHOD__
255 );
256
257 $textId = $dbw->insertId();
258
259 return self::makeAddressFromTextId( $textId );
260 } catch ( MWException $e ) {
261 throw new BlobAccessException( $e->getMessage(), 0, $e );
262 }
263 }
264
277 public function getBlob( $blobAddress, $queryFlags = 0 ) {
278 Assert::parameterType( 'string', $blobAddress, '$blobAddress' );
279
280 $error = null;
281 $blob = $this->cache->getWithSetCallback(
282 $this->getCacheKey( $blobAddress ),
283 $this->getCacheTTL(),
284 function ( $unused, &$ttl, &$setOpts ) use ( $blobAddress, $queryFlags, &$error ) {
285 // Ignore $setOpts; blobs are immutable and negatives are not cached
286 list( $result, $errors ) = $this->fetchBlobs( [ $blobAddress ], $queryFlags );
287 // No negative caching; negative hits on text rows may be due to corrupted replica DBs
288 $error = $errors[$blobAddress] ?? null;
289 return $result[$blobAddress];
290 },
291 [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => IExpiringStore::TTL_PROC_LONG ]
292 );
293
294 if ( $error ) {
295 throw new BlobAccessException( $error );
296 }
297
298 Assert::postcondition( is_string( $blob ), 'Blob must not be null' );
299 return $blob;
300 }
301
313 public function getBlobBatch( $blobAddresses, $queryFlags = 0 ) {
314 // FIXME: All caching has temporarily been removed in I94c6f9ba7b9caeeb due to T235188.
315 // Caching behavior should be restored by reverting I94c6f9ba7b9caeeb as soon as
316 // the root cause of T235188 has been resolved.
317
318 list( $blobsByAddress, $errors ) = $this->fetchBlobs( $blobAddresses, $queryFlags );
319
320 $blobsByAddress = array_map( function ( $blob ) {
321 return $blob === false ? null : $blob;
322 }, $blobsByAddress );
323
324 $result = StatusValue::newGood( $blobsByAddress );
325 if ( $errors ) {
326 foreach ( $errors as $error ) {
327 $result->warning( 'internalerror', $error );
328 }
329 }
330 return $result;
331 }
332
343 private function fetchBlobs( $blobAddresses, $queryFlags ) {
344 $textIdToBlobAddress = [];
345 $result = [];
346 $errors = [];
347 foreach ( $blobAddresses as $blobAddress ) {
348 try {
349 list( $schema, $id ) = self::splitBlobAddress( $blobAddress );
350 } catch ( InvalidArgumentException $ex ) {
351 throw new BlobAccessException( $ex->getMessage(), 0, $ex );
352 }
353
354 // TODO: MCR: also support 'ex' schema with ExternalStore URLs, plus flags encoded in the URL!
355 if ( $schema === 'bad' ) {
356 // Database row was marked as "known bad", no need to trigger an error.
357 wfDebug(
358 __METHOD__
359 . ": loading known-bad content ($blobAddress), returning empty string"
360 );
361 $result[$blobAddress] = '';
362 continue;
363 } elseif ( $schema === 'tt' ) {
364 $textId = intval( $id );
365
366 if ( $textId < 1 || $id !== (string)$textId ) {
367 $errors[$blobAddress] = "Bad blob address: $blobAddress";
368 $result[$blobAddress] = false;
369 }
370
371 $textIdToBlobAddress[$textId] = $blobAddress;
372 } else {
373 $errors[$blobAddress] = "Unknown blob address schema: $schema";
374 $result[$blobAddress] = false;
375 continue;
376 }
377 }
378
379 $textIds = array_keys( $textIdToBlobAddress );
380 if ( !$textIds ) {
381 return [ $result, $errors ];
382 }
383 // Callers doing updates will pass in READ_LATEST as usual. Since the text/blob tables
384 // do not normally get rows changed around, set READ_LATEST_IMMUTABLE in those cases.
385 $queryFlags |= DBAccessObjectUtils::hasFlags( $queryFlags, self::READ_LATEST )
386 ? self::READ_LATEST_IMMUTABLE
387 : 0;
388 list( $index, $options, $fallbackIndex, $fallbackOptions ) =
389 DBAccessObjectUtils::getDBOptions( $queryFlags );
390 // Text data is immutable; check replica DBs first.
391 $dbConnection = $this->getDBConnection( $index );
392 $rows = $dbConnection->select(
393 'text',
394 [ 'old_id', 'old_text', 'old_flags' ],
395 [ 'old_id' => $textIds ],
396 __METHOD__,
397 $options
398 );
399
400 // Fallback to DB_MASTER in some cases if not all the rows were found, using the appropriate
401 // options, such as FOR UPDATE to avoid missing rows due to REPEATABLE-READ.
402 if ( $dbConnection->numRows( $rows ) !== count( $textIds ) && $fallbackIndex !== null ) {
403 $fetchedTextIds = [];
404 foreach ( $rows as $row ) {
405 $fetchedTextIds[] = $row->old_id;
406 }
407 $missingTextIds = array_diff( $textIds, $fetchedTextIds );
408 $dbConnection = $this->getDBConnection( $fallbackIndex );
409 $rowsFromFallback = $dbConnection->select(
410 'text',
411 [ 'old_id', 'old_text', 'old_flags' ],
412 [ 'old_id' => $missingTextIds ],
413 __METHOD__,
414 $fallbackOptions
415 );
416 $appendIterator = new AppendIterator();
417 $appendIterator->append( $rows );
418 $appendIterator->append( $rowsFromFallback );
419 $rows = $appendIterator;
420 }
421
422 foreach ( $rows as $row ) {
423 $blobAddress = $textIdToBlobAddress[$row->old_id];
424 $blob = false;
425 if ( $row->old_text !== null ) {
426 $blob = $this->expandBlob( $row->old_text, $row->old_flags, $blobAddress );
427 }
428 if ( $blob === false ) {
429 $errors[$blobAddress] = "Bad data in text row {$row->old_id}.";
430 }
431 $result[$blobAddress] = $blob;
432 }
433
434 // If we're still missing some of the rows, set errors for missing blobs.
435 if ( count( $result ) !== count( $blobAddresses ) ) {
436 foreach ( $blobAddresses as $blobAddress ) {
437 if ( !isset( $result[$blobAddress ] ) ) {
438 $errors[$blobAddress] = "Unable to fetch blob at $blobAddress";
439 $result[$blobAddress] = false;
440 }
441 }
442 }
443 return [ $result, $errors ];
444 }
445
456 private function getCacheKey( $blobAddress ) {
457 return $this->cache->makeGlobalKey(
458 'SqlBlobStore-blob',
459 $this->dbLoadBalancer->resolveDomainID( $this->dbDomain ),
460 $blobAddress
461 );
462 }
463
483 public function expandBlob( $raw, $flags, $cacheKey = null ) {
484 if ( is_string( $flags ) ) {
485 $flags = explode( ',', $flags );
486 }
487
488 // Use external methods for external objects, text in table is URL-only then
489 if ( in_array( 'external', $flags ) ) {
490 $url = $raw;
491 $parts = explode( '://', $url, 2 );
492 if ( count( $parts ) == 1 || $parts[1] == '' ) {
493 return false;
494 }
495
496 if ( $cacheKey ) {
497 // The cached value should be decompressed, so handle that and return here.
498 return $this->cache->getWithSetCallback(
499 $this->getCacheKey( $cacheKey ),
500 $this->getCacheTTL(),
501 function () use ( $url, $flags ) {
502 // Ignore $setOpts; blobs are immutable and negatives are not cached
503 $blob = $this->extStoreAccess
504 ->fetchFromURL( $url, [ 'domain' => $this->dbDomain ] );
505
506 return $blob === false ? false : $this->decompressData( $blob, $flags );
507 },
508 [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => WANObjectCache::TTL_PROC_LONG ]
509 );
510 } else {
511 $blob = $this->extStoreAccess->fetchFromURL( $url, [ 'domain' => $this->dbDomain ] );
512 return $blob === false ? false : $this->decompressData( $blob, $flags );
513 }
514 } else {
515 return $this->decompressData( $raw, $flags );
516 }
517 }
518
535 public function compressData( &$blob ) {
536 $blobFlags = [];
537
538 // Revisions not marked as UTF-8 will have legacy decoding applied by decompressData().
539 // XXX: if $this->legacyEncoding is not set, we could skip this. That would however be
540 // risky, since $this->legacyEncoding being set in the future would lead to data corruption.
541 $blobFlags[] = 'utf-8';
542
543 if ( $this->compressBlobs ) {
544 if ( function_exists( 'gzdeflate' ) ) {
545 $deflated = gzdeflate( $blob );
546
547 if ( $deflated === false ) {
548 wfLogWarning( __METHOD__ . ': gzdeflate() failed' );
549 } else {
550 $blob = $deflated;
551 $blobFlags[] = 'gzip';
552 }
553 } else {
554 wfDebug( __METHOD__ . " -- no zlib support, not compressing" );
555 }
556 }
557 return implode( ',', $blobFlags );
558 }
559
575 public function decompressData( $blob, array $blobFlags ) {
576 // Revision::decompressRevisionText accepted false here, so defend against that
577 Assert::parameterType( 'string', $blob, '$blob' );
578
579 if ( in_array( 'error', $blobFlags ) ) {
580 // Error row, return false
581 return false;
582 }
583
584 if ( in_array( 'gzip', $blobFlags ) ) {
585 # Deal with optional compression of archived pages.
586 # This can be done periodically via maintenance/compressOld.php, and
587 # as pages are saved if $wgCompressRevisions is set.
588 $blob = gzinflate( $blob );
589
590 if ( $blob === false ) {
591 wfWarn( __METHOD__ . ': gzinflate() failed' );
592 return false;
593 }
594 }
595
596 if ( in_array( 'object', $blobFlags ) ) {
597 # Generic compressed storage
598 $obj = unserialize( $blob );
599 if ( !is_object( $obj ) ) {
600 // Invalid object
601 return false;
602 }
603 $blob = $obj->getText();
604 }
605
606 // Needed to support old revisions left over from from the 1.4 / 1.5 migration.
607 if ( $blob !== false && $this->legacyEncoding
608 && !in_array( 'utf-8', $blobFlags ) && !in_array( 'utf8', $blobFlags )
609 ) {
610 # Old revisions kept around in a legacy encoding?
611 # Upconvert on demand.
612 # ("utf8" checked for compatibility with some broken
613 # conversion scripts 2008-12-30)
614 # Even with //IGNORE iconv can whine about illegal characters in
615 # *input* string. We just ignore those too.
616 # REF: https://bugs.php.net/bug.php?id=37166
617 # REF: https://phabricator.wikimedia.org/T18885
618 AtEase::suppressWarnings();
619 $blob = iconv( $this->legacyEncoding, 'UTF-8//IGNORE', $blob );
620 AtEase::restoreWarnings();
621 }
622
623 return $blob;
624 }
625
633 private function getCacheTTL() {
634 if ( $this->cache->getQoS( WANObjectCache::ATTR_EMULATION )
635 <= WANObjectCache::QOS_EMULATION_SQL
636 ) {
637 // Do not cache RDBMs blobs in...the RDBMs store
638 $ttl = WANObjectCache::TTL_UNCACHEABLE;
639 } else {
640 $ttl = $this->cacheExpiry ?: WANObjectCache::TTL_UNCACHEABLE;
641 }
642
643 return $ttl;
644 }
645
666 public function getTextIdFromAddress( $address ) {
667 list( $schema, $id, ) = self::splitBlobAddress( $address );
668
669 if ( $schema !== 'tt' ) {
670 return null;
671 }
672
673 $textId = intval( $id );
674
675 if ( !$textId || $id !== (string)$textId ) {
676 throw new InvalidArgumentException( "Malformed text_id: $id" );
677 }
678
679 return $textId;
680 }
681
695 public static function makeAddressFromTextId( $id ) {
696 return 'tt:' . $id;
697 }
698
709 public static function splitBlobAddress( $address ) {
710 if ( !preg_match( '/^([-+.\w]+):([^\s?]+)(\?([^\s]*))?$/', $address, $m ) ) {
711 throw new InvalidArgumentException( "Bad blob address: $address" );
712 }
713
714 $schema = strtolower( $m[1] );
715 $id = $m[2];
716 $parameters = isset( $m[4] ) ? wfCgiToArray( $m[4] ) : [];
717
718 return [ $schema, $id, $parameters ];
719 }
720
721 public function isReadOnly() {
722 if ( $this->useExternalStore && $this->extStoreAccess->isReadOnly() ) {
723 return true;
724 }
725
726 return ( $this->getDBLoadBalancer()->getReadOnlyReason() !== false );
727 }
728}
unserialize( $serialized)
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfWarn( $msg, $callerOffset=1, $level=E_USER_NOTICE)
Send a warning either to the debug log or in a PHP error depending on $wgDevelopmentWarnings.
wfLogWarning( $msg, $callerOffset=1, $level=E_USER_WARNING)
Send a warning as a PHP error and the debug log.
wfCgiToArray( $query)
This is the logical opposite of wfArrayToCgi(): it accepts a query string as its argument and returns...
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that $function is deprecated.
Helper class for DAO classes.
Key/value blob storage for a collection of storage medium types (e.g.
MediaWiki exception.
Exception representing a failure to access a data blob.
Service for storing and loading Content objects.
static makeAddressFromTextId( $id)
Returns an address referring to content stored in the text table row with the given ID.
ExternalStoreAccess $extStoreAccess
getTextIdFromAddress( $address)
Returns an ID corresponding to the old_id field in the text table, corresponding to the given $addres...
getCacheTTL()
Get the text cache TTL.
string bool $dbDomain
DB domain ID of a wiki or false for the local one.
__construct(ILoadBalancer $dbLoadBalancer, ExternalStoreAccess $extStoreAccess, WANObjectCache $cache, $dbDomain=false)
setLegacyEncoding( $legacyEncoding)
Set the legacy encoding to assume for blobs that do not have the utf-8 flag set.
getBlob( $blobAddress, $queryFlags=0)
Retrieve a blob, given an address.
expandBlob( $raw, $flags, $cacheKey=null)
Expand a raw data blob according to the flags given.
setUseExternalStore( $useExternalStore)
compressData(&$blob)
If $wgCompressRevisions is enabled, we will compress data.
getCacheKey( $blobAddress)
Get a cache key for a given Blob address.
static splitBlobAddress( $address)
Splits a blob address into three parts: the schema, the ID, and parameters/flags.
getBlobBatch( $blobAddresses, $queryFlags=0)
A batched version of BlobStore::getBlob.
storeBlob( $data, $hints=[])
Stores an arbitrary blob of data and returns an address that can be used with getBlob() to retrieve t...
isReadOnly()
Check if the blob metadata or backing blob data store is read-only.
fetchBlobs( $blobAddresses, $queryFlags)
MCR migration note: this corresponds to Revision::fetchText.
decompressData( $blob, array $blobFlags)
Re-converts revision text according to its flags.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Multi-datacenter aware caching interface.
Interface for database access objects.
Generic interface providing TTL constants for lightweight expiring object stores.
Service for loading and storing data blobs.
Definition BlobStore.php:35
Basic database interface for live and lazy-loaded relation database handles.
Definition IDatabase.php:38
Database cluster connection, tracking, load balancing, and transaction manager interface.
const DB_MASTER
Definition defines.php:29