MediaWiki REL1_39
SqlBlobStore.php
Go to the documentation of this file.
1<?php
28namespace MediaWiki\Storage;
29
30use AppendIterator;
35use InvalidArgumentException;
36use MWException;
37use StatusValue;
39use Wikimedia\Assert\Assert;
40use Wikimedia\AtEase\AtEase;
44
54
55 // Note: the name has been taken unchanged from the old Revision class.
56 public const TEXT_CACHE_GROUP = 'revisiontext:10';
57
61 private $dbLoadBalancer;
62
66 private $extStoreAccess;
67
71 private $cache;
72
76 private $dbDomain;
77
81 private $cacheExpiry = 604800; // 7 days
82
86 private $compressBlobs = false;
87
91 private $legacyEncoding = false;
92
96 private $useExternalStore = false;
97
109 public function __construct(
110 ILoadBalancer $dbLoadBalancer,
111 ExternalStoreAccess $extStoreAccess,
112 WANObjectCache $cache,
113 $dbDomain = false
114 ) {
115 $this->dbLoadBalancer = $dbLoadBalancer;
116 $this->extStoreAccess = $extStoreAccess;
117 $this->cache = $cache;
118 $this->dbDomain = $dbDomain;
119 }
120
124 public function getCacheExpiry() {
125 return $this->cacheExpiry;
126 }
127
131 public function setCacheExpiry( int $cacheExpiry ) {
132 $this->cacheExpiry = $cacheExpiry;
133 }
134
138 public function getCompressBlobs() {
139 return $this->compressBlobs;
140 }
141
145 public function setCompressBlobs( $compressBlobs ) {
146 $this->compressBlobs = $compressBlobs;
147 }
148
153 public function getLegacyEncoding() {
154 return $this->legacyEncoding;
155 }
156
165 public function setLegacyEncoding( string $legacyEncoding ) {
166 $this->legacyEncoding = $legacyEncoding;
167 }
168
172 public function getUseExternalStore() {
173 return $this->useExternalStore;
174 }
175
179 public function setUseExternalStore( bool $useExternalStore ) {
180 $this->useExternalStore = $useExternalStore;
181 }
182
186 private function getDBLoadBalancer() {
187 return $this->dbLoadBalancer;
188 }
189
195 private function getDBConnection( $index ) {
196 $lb = $this->getDBLoadBalancer();
197 return $lb->getConnectionRef( $index, [], $this->dbDomain );
198 }
199
210 public function storeBlob( $data, $hints = [] ) {
211 try {
212 $flags = $this->compressData( $data );
213
214 # Write to external storage if required
215 if ( $this->useExternalStore ) {
216 // Store and get the URL
217 $data = $this->extStoreAccess->insert( $data, [ 'domain' => $this->dbDomain ] );
218 if ( !$data ) {
219 throw new BlobAccessException( "Failed to store text to external storage" );
220 }
221 if ( $flags ) {
222 $flags .= ',';
223 }
224 $flags .= 'external';
225
226 // TODO: we could also return an address for the external store directly here.
227 // That would mean bypassing the text table entirely when the external store is
228 // used. We'll need to assess expected fallout before doing that.
229 }
230
231 $dbw = $this->getDBConnection( DB_PRIMARY );
232
233 $dbw->insert(
234 'text',
235 [ 'old_text' => $data, 'old_flags' => $flags ],
236 __METHOD__
237 );
238
239 $textId = $dbw->insertId();
240
241 return self::makeAddressFromTextId( $textId );
242 } catch ( MWException $e ) {
243 throw new BlobAccessException( $e->getMessage(), 0, $e );
244 }
245 }
246
259 public function getBlob( $blobAddress, $queryFlags = 0 ) {
260 Assert::parameterType( 'string', $blobAddress, '$blobAddress' );
261
262 $error = null;
263 $blob = $this->cache->getWithSetCallback(
264 $this->getCacheKey( $blobAddress ),
265 $this->getCacheTTL(),
266 function ( $unused, &$ttl, &$setOpts ) use ( $blobAddress, $queryFlags, &$error ) {
267 // Ignore $setOpts; blobs are immutable and negatives are not cached
268 list( $result, $errors ) = $this->fetchBlobs( [ $blobAddress ], $queryFlags );
269 // No negative caching; negative hits on text rows may be due to corrupted replica DBs
270 $error = $errors[$blobAddress] ?? null;
271 return $result[$blobAddress];
272 },
273 [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => IExpiringStore::TTL_PROC_LONG ]
274 );
275
276 if ( $error ) {
277 throw new BlobAccessException( $error );
278 }
279
280 Assert::postcondition( is_string( $blob ), 'Blob must not be null' );
281 return $blob;
282 }
283
295 public function getBlobBatch( $blobAddresses, $queryFlags = 0 ) {
296 // FIXME: All caching has temporarily been removed in I94c6f9ba7b9caeeb due to T235188.
297 // Caching behavior should be restored by reverting I94c6f9ba7b9caeeb as soon as
298 // the root cause of T235188 has been resolved.
299
300 list( $blobsByAddress, $errors ) = $this->fetchBlobs( $blobAddresses, $queryFlags );
301
302 $blobsByAddress = array_map( static function ( $blob ) {
303 return $blob === false ? null : $blob;
304 }, $blobsByAddress );
305
306 $result = StatusValue::newGood( $blobsByAddress );
307 if ( $errors ) {
308 foreach ( $errors as $error ) {
309 $result->warning( 'internalerror', $error );
310 }
311 }
312 return $result;
313 }
314
325 private function fetchBlobs( $blobAddresses, $queryFlags ) {
326 $textIdToBlobAddress = [];
327 $result = [];
328 $errors = [];
329 foreach ( $blobAddresses as $blobAddress ) {
330 try {
331 list( $schema, $id ) = self::splitBlobAddress( $blobAddress );
332 } catch ( InvalidArgumentException $ex ) {
333 throw new BlobAccessException(
334 $ex->getMessage() . '. Use findBadBlobs.php to remedy.',
335 0,
336 $ex
337 );
338 }
339
340 // TODO: MCR: also support 'ex' schema with ExternalStore URLs, plus flags encoded in the URL!
341 if ( $schema === 'bad' ) {
342 // Database row was marked as "known bad", no need to trigger an error.
343 wfDebug(
344 __METHOD__
345 . ": loading known-bad content ($blobAddress), returning empty string"
346 );
347 $result[$blobAddress] = '';
348 continue;
349 } elseif ( $schema === 'tt' ) {
350 $textId = intval( $id );
351
352 if ( $textId < 1 || $id !== (string)$textId ) {
353 $errors[$blobAddress] = "Bad blob address: $blobAddress."
354 . ' Use findBadBlobs.php to remedy.';
355 $result[$blobAddress] = false;
356 }
357
358 $textIdToBlobAddress[$textId] = $blobAddress;
359 } else {
360 $errors[$blobAddress] = "Unknown blob address schema: $schema."
361 . ' Use findBadBlobs.php to remedy.';
362 $result[$blobAddress] = false;
363 }
364 }
365
366 $textIds = array_keys( $textIdToBlobAddress );
367 if ( !$textIds ) {
368 return [ $result, $errors ];
369 }
370 // Callers doing updates will pass in READ_LATEST as usual. Since the text/blob tables
371 // do not normally get rows changed around, set READ_LATEST_IMMUTABLE in those cases.
372 $queryFlags |= DBAccessObjectUtils::hasFlags( $queryFlags, self::READ_LATEST )
373 ? self::READ_LATEST_IMMUTABLE
374 : 0;
375 list( $index, $options, $fallbackIndex, $fallbackOptions ) =
377 // Text data is immutable; check replica DBs first.
378 $dbConnection = $this->getDBConnection( $index );
379 $rows = $dbConnection->select(
380 'text',
381 [ 'old_id', 'old_text', 'old_flags' ],
382 [ 'old_id' => $textIds ],
383 __METHOD__,
384 $options
385 );
386 $numRows = 0;
387 if ( $rows instanceof IResultWrapper ) {
388 $numRows = $rows->numRows();
389 }
390
391 // Fallback to DB_PRIMARY in some cases if not all the rows were found, using the appropriate
392 // options, such as FOR UPDATE to avoid missing rows due to REPEATABLE-READ.
393 if ( $numRows !== count( $textIds ) && $fallbackIndex !== null ) {
394 $fetchedTextIds = [];
395 foreach ( $rows as $row ) {
396 $fetchedTextIds[] = $row->old_id;
397 }
398 $missingTextIds = array_diff( $textIds, $fetchedTextIds );
399 $dbConnection = $this->getDBConnection( $fallbackIndex );
400 $rowsFromFallback = $dbConnection->select(
401 'text',
402 [ 'old_id', 'old_text', 'old_flags' ],
403 [ 'old_id' => $missingTextIds ],
404 __METHOD__,
405 $fallbackOptions
406 );
407 $appendIterator = new AppendIterator();
408 $appendIterator->append( $rows );
409 $appendIterator->append( $rowsFromFallback );
410 $rows = $appendIterator;
411 }
412
413 foreach ( $rows as $row ) {
414 $blobAddress = $textIdToBlobAddress[$row->old_id];
415 $blob = false;
416 if ( $row->old_text !== null ) {
417 $blob = $this->expandBlob( $row->old_text, $row->old_flags, $blobAddress );
418 }
419 if ( $blob === false ) {
420 $errors[$blobAddress] = "Bad data in text row {$row->old_id}."
421 . ' Use findBadBlobs.php to remedy.';
422 }
423 $result[$blobAddress] = $blob;
424 }
425
426 // If we're still missing some of the rows, set errors for missing blobs.
427 if ( count( $result ) !== count( $blobAddresses ) ) {
428 foreach ( $blobAddresses as $blobAddress ) {
429 if ( !isset( $result[$blobAddress ] ) ) {
430 $errors[$blobAddress] = "Unable to fetch blob at $blobAddress."
431 . ' Use findBadBlobs.php to remedy.';
432 $result[$blobAddress] = false;
433 }
434 }
435 }
436 return [ $result, $errors ];
437 }
438
449 private function getCacheKey( $blobAddress ) {
450 return $this->cache->makeGlobalKey(
451 'SqlBlobStore-blob',
452 $this->dbLoadBalancer->resolveDomainID( $this->dbDomain ),
453 $blobAddress
454 );
455 }
456
476 public function expandBlob( $raw, $flags, $cacheKey = null ) {
477 if ( is_string( $flags ) ) {
478 $flags = explode( ',', $flags );
479 }
480
481 // Use external methods for external objects, text in table is URL-only then
482 if ( in_array( 'external', $flags ) ) {
483 $url = $raw;
484 $parts = explode( '://', $url, 2 );
485 if ( count( $parts ) == 1 || $parts[1] == '' ) {
486 return false;
487 }
488
489 if ( $cacheKey ) {
490 // The cached value should be decompressed, so handle that and return here.
491 return $this->cache->getWithSetCallback(
492 $this->getCacheKey( $cacheKey ),
493 $this->getCacheTTL(),
494 function () use ( $url, $flags ) {
495 // Ignore $setOpts; blobs are immutable and negatives are not cached
496 $blob = $this->extStoreAccess
497 ->fetchFromURL( $url, [ 'domain' => $this->dbDomain ] );
498
499 return $blob === false ? false : $this->decompressData( $blob, $flags );
500 },
501 [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => WANObjectCache::TTL_PROC_LONG ]
502 );
503 } else {
504 $blob = $this->extStoreAccess->fetchFromURL( $url, [ 'domain' => $this->dbDomain ] );
505 return $blob === false ? false : $this->decompressData( $blob, $flags );
506 }
507 } else {
508 return $this->decompressData( $raw, $flags );
509 }
510 }
511
528 public function compressData( &$blob ) {
529 $blobFlags = [];
530
531 // Revisions not marked as UTF-8 will have legacy decoding applied by decompressData().
532 // XXX: if $this->legacyEncoding is not set, we could skip this. That would however be
533 // risky, since $this->legacyEncoding being set in the future would lead to data corruption.
534 $blobFlags[] = 'utf-8';
535
536 if ( $this->compressBlobs ) {
537 if ( function_exists( 'gzdeflate' ) ) {
538 $deflated = gzdeflate( $blob );
539
540 if ( $deflated === false ) {
541 wfLogWarning( __METHOD__ . ': gzdeflate() failed' );
542 } else {
543 $blob = $deflated;
544 $blobFlags[] = 'gzip';
545 }
546 } else {
547 wfDebug( __METHOD__ . " -- no zlib support, not compressing" );
548 }
549 }
550 return implode( ',', $blobFlags );
551 }
552
568 public function decompressData( string $blob, array $blobFlags ) {
569 if ( in_array( 'error', $blobFlags ) ) {
570 // Error row, return false
571 return false;
572 }
573
574 if ( in_array( 'gzip', $blobFlags ) ) {
575 # Deal with optional compression of archived pages.
576 # This can be done periodically via maintenance/compressOld.php, and
577 # as pages are saved if $wgCompressRevisions is set.
578 $blob = gzinflate( $blob );
579
580 if ( $blob === false ) {
581 wfWarn( __METHOD__ . ': gzinflate() failed' );
582 return false;
583 }
584 }
585
586 if ( in_array( 'object', $blobFlags ) ) {
587 # Generic compressed storage
588 $obj = unserialize( $blob );
589 if ( !is_object( $obj ) ) {
590 // Invalid object
591 return false;
592 }
593 $blob = $obj->getText();
594 }
595
596 // Needed to support old revisions left over from the 1.4 / 1.5 migration.
597 if ( $blob !== false && $this->legacyEncoding
598 && !in_array( 'utf-8', $blobFlags ) && !in_array( 'utf8', $blobFlags )
599 ) {
600 # Old revisions kept around in a legacy encoding?
601 # Upconvert on demand.
602 # ("utf8" checked for compatibility with some broken
603 # conversion scripts 2008-12-30)
604 # Even with //IGNORE iconv can whine about illegal characters in
605 # *input* string. We just ignore those too.
606 # REF: https://bugs.php.net/bug.php?id=37166
607 # REF: https://phabricator.wikimedia.org/T18885
608 AtEase::suppressWarnings();
609 $blob = iconv( $this->legacyEncoding, 'UTF-8//IGNORE', $blob );
610 AtEase::restoreWarnings();
611 }
612
613 return $blob;
614 }
615
623 private function getCacheTTL() {
624 $cache = $this->cache;
625
626 if ( $cache->getQoS( $cache::ATTR_DURABILITY ) >= $cache::QOS_DURABILITY_RDBMS ) {
627 // Do not cache RDBMs blobs in...the RDBMs store
628 $ttl = $cache::TTL_UNCACHEABLE;
629 } else {
630 $ttl = $this->cacheExpiry ?: $cache::TTL_UNCACHEABLE;
631 }
632
633 return $ttl;
634 }
635
656 public function getTextIdFromAddress( $address ) {
657 list( $schema, $id, ) = self::splitBlobAddress( $address );
658
659 if ( $schema !== 'tt' ) {
660 return null;
661 }
662
663 $textId = intval( $id );
664
665 if ( !$textId || $id !== (string)$textId ) {
666 throw new InvalidArgumentException( "Malformed text_id: $id" );
667 }
668
669 return $textId;
670 }
671
685 public static function makeAddressFromTextId( $id ) {
686 return 'tt:' . $id;
687 }
688
699 public static function splitBlobAddress( $address ) {
700 if ( !preg_match( '/^([-+.\w]+):([^\s?]+)(\?([^\s]*))?$/', $address, $m ) ) {
701 throw new InvalidArgumentException( "Bad blob address: $address" );
702 }
703
704 $schema = strtolower( $m[1] );
705 $id = $m[2];
706 $parameters = wfCgiToArray( $m[4] ?? '' );
707
708 return [ $schema, $id, $parameters ];
709 }
710
711 public function isReadOnly() {
712 if ( $this->useExternalStore && $this->extStoreAccess->isReadOnly() ) {
713 return true;
714 }
715
716 return ( $this->getDBLoadBalancer()->getReadOnlyReason() !== false );
717 }
718}
unserialize( $serialized)
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfWarn( $msg, $callerOffset=1, $level=E_USER_NOTICE)
Send a warning either to the debug log or in a PHP error depending on $wgDevelopmentWarnings.
wfLogWarning( $msg, $callerOffset=1, $level=E_USER_WARNING)
Send a warning as a PHP error and the debug log.
wfCgiToArray( $query)
This is the logical opposite of wfArrayToCgi(): it accepts a query string as its argument and returns...
Helper class for DAO classes.
static getDBOptions( $bitfield)
Get an appropriate DB index, options, and fallback DB index for a query.
static hasFlags( $bitfield, $flags)
This is the main interface for fetching or inserting objects with ExternalStore.
MediaWiki exception.
Exception representing a failure to access a data blob.
Service for storing and loading Content objects.
static makeAddressFromTextId( $id)
Returns an address referring to content stored in the text table row with the given ID.
getTextIdFromAddress( $address)
Returns an ID corresponding to the old_id field in the text table, corresponding to the given $addres...
__construct(ILoadBalancer $dbLoadBalancer, ExternalStoreAccess $extStoreAccess, WANObjectCache $cache, $dbDomain=false)
decompressData(string $blob, array $blobFlags)
Re-converts revision text according to its flags.
getBlob( $blobAddress, $queryFlags=0)
Retrieve a blob, given an address.
expandBlob( $raw, $flags, $cacheKey=null)
Expand a raw data blob according to the flags given.
setLegacyEncoding(string $legacyEncoding)
Set the legacy encoding to assume for blobs that do not have the utf-8 flag set.
compressData(&$blob)
If $wgCompressRevisions is enabled, we will compress data.
static splitBlobAddress( $address)
Splits a blob address into three parts: the schema, the ID, and parameters/flags.
getBlobBatch( $blobAddresses, $queryFlags=0)
A batched version of BlobStore::getBlob.
storeBlob( $data, $hints=[])
Stores an arbitrary blob of data and returns an address that can be used with getBlob() to retrieve t...
setUseExternalStore(bool $useExternalStore)
isReadOnly()
Check if the blob metadata or backing blob data store is read-only.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Multi-datacenter aware caching interface.
Interface for database access objects.
Generic interface providing TTL constants for lightweight expiring object stores.
Service for loading and storing data blobs.
Definition BlobStore.php:35
Basic database interface for live and lazy-loaded relation database handles.
Definition IDatabase.php:39
Create and track the database connections and transactions for a given database cluster.
Result wrapper for grabbing data queried from an IDatabase object.
$cache
Definition mcc.php:33
const DB_PRIMARY
Definition defines.php:28