MediaWiki REL1_38
SqlBlobStore.php
Go to the documentation of this file.
1<?php
28namespace MediaWiki\Storage;
29
30use AppendIterator;
35use InvalidArgumentException;
36use MWException;
37use StatusValue;
39use Wikimedia\Assert\Assert;
40use Wikimedia\AtEase\AtEase;
44
54
55 // Note: the name has been taken unchanged from the old Revision class.
56 public const TEXT_CACHE_GROUP = 'revisiontext:10';
57
62
67
71 private $cache;
72
76 private $dbDomain;
77
81 private $cacheExpiry = 604800; // 7 days
82
86 private $compressBlobs = false;
87
91 private $legacyEncoding = false;
92
96 private $useExternalStore = false;
97
109 public function __construct(
113 $dbDomain = false
114 ) {
115 $this->dbLoadBalancer = $dbLoadBalancer;
116 $this->extStoreAccess = $extStoreAccess;
117 $this->cache = $cache;
118 $this->dbDomain = $dbDomain;
119 }
120
124 public function getCacheExpiry() {
125 return $this->cacheExpiry;
126 }
127
131 public function setCacheExpiry( int $cacheExpiry ) {
132 $this->cacheExpiry = $cacheExpiry;
133 }
134
138 public function getCompressBlobs() {
140 }
141
145 public function setCompressBlobs( $compressBlobs ) {
146 $this->compressBlobs = $compressBlobs;
147 }
148
153 public function getLegacyEncoding() {
155 }
156
162 wfDeprecated( __METHOD__ );
163 return null;
164 }
165
174 public function setLegacyEncoding( string $legacyEncoding ) {
175 $this->legacyEncoding = $legacyEncoding;
176 }
177
181 public function getUseExternalStore() {
183 }
184
188 public function setUseExternalStore( bool $useExternalStore ) {
189 $this->useExternalStore = $useExternalStore;
190 }
191
195 private function getDBLoadBalancer() {
197 }
198
204 private function getDBConnection( $index ) {
205 $lb = $this->getDBLoadBalancer();
206 return $lb->getConnectionRef( $index, [], $this->dbDomain );
207 }
208
219 public function storeBlob( $data, $hints = [] ) {
220 try {
221 $flags = $this->compressData( $data );
222
223 # Write to external storage if required
224 if ( $this->useExternalStore ) {
225 // Store and get the URL
226 $data = $this->extStoreAccess->insert( $data, [ 'domain' => $this->dbDomain ] );
227 if ( !$data ) {
228 throw new BlobAccessException( "Failed to store text to external storage" );
229 }
230 if ( $flags ) {
231 $flags .= ',';
232 }
233 $flags .= 'external';
234
235 // TODO: we could also return an address for the external store directly here.
236 // That would mean bypassing the text table entirely when the external store is
237 // used. We'll need to assess expected fallout before doing that.
238 }
239
240 $dbw = $this->getDBConnection( DB_PRIMARY );
241
242 $old_id = $dbw->nextSequenceValue( 'text_old_id_seq' );
243 $dbw->insert(
244 'text',
245 [
246 'old_id' => $old_id,
247 'old_text' => $data,
248 'old_flags' => $flags,
249 ],
250 __METHOD__
251 );
252
253 $textId = $dbw->insertId();
254
255 return self::makeAddressFromTextId( $textId );
256 } catch ( MWException $e ) {
257 throw new BlobAccessException( $e->getMessage(), 0, $e );
258 }
259 }
260
273 public function getBlob( $blobAddress, $queryFlags = 0 ) {
274 Assert::parameterType( 'string', $blobAddress, '$blobAddress' );
275
276 $error = null;
277 $blob = $this->cache->getWithSetCallback(
278 $this->getCacheKey( $blobAddress ),
279 $this->getCacheTTL(),
280 function ( $unused, &$ttl, &$setOpts ) use ( $blobAddress, $queryFlags, &$error ) {
281 // Ignore $setOpts; blobs are immutable and negatives are not cached
282 list( $result, $errors ) = $this->fetchBlobs( [ $blobAddress ], $queryFlags );
283 // No negative caching; negative hits on text rows may be due to corrupted replica DBs
284 $error = $errors[$blobAddress] ?? null;
285 return $result[$blobAddress];
286 },
287 [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => IExpiringStore::TTL_PROC_LONG ]
288 );
289
290 if ( $error ) {
291 throw new BlobAccessException( $error );
292 }
293
294 Assert::postcondition( is_string( $blob ), 'Blob must not be null' );
295 return $blob;
296 }
297
309 public function getBlobBatch( $blobAddresses, $queryFlags = 0 ) {
310 // FIXME: All caching has temporarily been removed in I94c6f9ba7b9caeeb due to T235188.
311 // Caching behavior should be restored by reverting I94c6f9ba7b9caeeb as soon as
312 // the root cause of T235188 has been resolved.
313
314 list( $blobsByAddress, $errors ) = $this->fetchBlobs( $blobAddresses, $queryFlags );
315
316 $blobsByAddress = array_map( static function ( $blob ) {
317 return $blob === false ? null : $blob;
318 }, $blobsByAddress );
319
320 $result = StatusValue::newGood( $blobsByAddress );
321 if ( $errors ) {
322 foreach ( $errors as $error ) {
323 $result->warning( 'internalerror', $error );
324 }
325 }
326 return $result;
327 }
328
339 private function fetchBlobs( $blobAddresses, $queryFlags ) {
340 $textIdToBlobAddress = [];
341 $result = [];
342 $errors = [];
343 foreach ( $blobAddresses as $blobAddress ) {
344 try {
345 list( $schema, $id ) = self::splitBlobAddress( $blobAddress );
346 } catch ( InvalidArgumentException $ex ) {
347 throw new BlobAccessException(
348 $ex->getMessage() . '. Use findBadBlobs.php to remedy.',
349 0,
350 $ex
351 );
352 }
353
354 // TODO: MCR: also support 'ex' schema with ExternalStore URLs, plus flags encoded in the URL!
355 if ( $schema === 'bad' ) {
356 // Database row was marked as "known bad", no need to trigger an error.
357 wfDebug(
358 __METHOD__
359 . ": loading known-bad content ($blobAddress), returning empty string"
360 );
361 $result[$blobAddress] = '';
362 continue;
363 } elseif ( $schema === 'tt' ) {
364 $textId = intval( $id );
365
366 if ( $textId < 1 || $id !== (string)$textId ) {
367 $errors[$blobAddress] = "Bad blob address: $blobAddress."
368 . ' Use findBadBlobs.php to remedy.';
369 $result[$blobAddress] = false;
370 }
371
372 $textIdToBlobAddress[$textId] = $blobAddress;
373 } else {
374 $errors[$blobAddress] = "Unknown blob address schema: $schema."
375 . ' Use findBadBlobs.php to remedy.';
376 $result[$blobAddress] = false;
377 }
378 }
379
380 $textIds = array_keys( $textIdToBlobAddress );
381 if ( !$textIds ) {
382 return [ $result, $errors ];
383 }
384 // Callers doing updates will pass in READ_LATEST as usual. Since the text/blob tables
385 // do not normally get rows changed around, set READ_LATEST_IMMUTABLE in those cases.
386 $queryFlags |= DBAccessObjectUtils::hasFlags( $queryFlags, self::READ_LATEST )
387 ? self::READ_LATEST_IMMUTABLE
388 : 0;
389 list( $index, $options, $fallbackIndex, $fallbackOptions ) =
390 DBAccessObjectUtils::getDBOptions( $queryFlags );
391 // Text data is immutable; check replica DBs first.
392 $dbConnection = $this->getDBConnection( $index );
393 $rows = $dbConnection->select(
394 'text',
395 [ 'old_id', 'old_text', 'old_flags' ],
396 [ 'old_id' => $textIds ],
397 __METHOD__,
398 $options
399 );
400 $numRows = 0;
401 if ( $rows instanceof IResultWrapper ) {
402 $numRows = $rows->numRows();
403 }
404
405 // Fallback to DB_PRIMARY in some cases if not all the rows were found, using the appropriate
406 // options, such as FOR UPDATE to avoid missing rows due to REPEATABLE-READ.
407 if ( $numRows !== count( $textIds ) && $fallbackIndex !== null ) {
408 $fetchedTextIds = [];
409 foreach ( $rows as $row ) {
410 $fetchedTextIds[] = $row->old_id;
411 }
412 $missingTextIds = array_diff( $textIds, $fetchedTextIds );
413 $dbConnection = $this->getDBConnection( $fallbackIndex );
414 $rowsFromFallback = $dbConnection->select(
415 'text',
416 [ 'old_id', 'old_text', 'old_flags' ],
417 [ 'old_id' => $missingTextIds ],
418 __METHOD__,
419 $fallbackOptions
420 );
421 $appendIterator = new AppendIterator();
422 $appendIterator->append( $rows );
423 $appendIterator->append( $rowsFromFallback );
424 $rows = $appendIterator;
425 }
426
427 foreach ( $rows as $row ) {
428 $blobAddress = $textIdToBlobAddress[$row->old_id];
429 $blob = false;
430 if ( $row->old_text !== null ) {
431 $blob = $this->expandBlob( $row->old_text, $row->old_flags, $blobAddress );
432 }
433 if ( $blob === false ) {
434 $errors[$blobAddress] = "Bad data in text row {$row->old_id}."
435 . ' Use findBadBlobs.php to remedy.';
436 }
437 $result[$blobAddress] = $blob;
438 }
439
440 // If we're still missing some of the rows, set errors for missing blobs.
441 if ( count( $result ) !== count( $blobAddresses ) ) {
442 foreach ( $blobAddresses as $blobAddress ) {
443 if ( !isset( $result[$blobAddress ] ) ) {
444 $errors[$blobAddress] = "Unable to fetch blob at $blobAddress."
445 . ' Use findBadBlobs.php to remedy.';
446 $result[$blobAddress] = false;
447 }
448 }
449 }
450 return [ $result, $errors ];
451 }
452
463 private function getCacheKey( $blobAddress ) {
464 return $this->cache->makeGlobalKey(
465 'SqlBlobStore-blob',
466 $this->dbLoadBalancer->resolveDomainID( $this->dbDomain ),
467 $blobAddress
468 );
469 }
470
490 public function expandBlob( $raw, $flags, $cacheKey = null ) {
491 if ( is_string( $flags ) ) {
492 $flags = explode( ',', $flags );
493 }
494
495 // Use external methods for external objects, text in table is URL-only then
496 if ( in_array( 'external', $flags ) ) {
497 $url = $raw;
498 $parts = explode( '://', $url, 2 );
499 if ( count( $parts ) == 1 || $parts[1] == '' ) {
500 return false;
501 }
502
503 if ( $cacheKey ) {
504 // The cached value should be decompressed, so handle that and return here.
505 return $this->cache->getWithSetCallback(
506 $this->getCacheKey( $cacheKey ),
507 $this->getCacheTTL(),
508 function () use ( $url, $flags ) {
509 // Ignore $setOpts; blobs are immutable and negatives are not cached
510 $blob = $this->extStoreAccess
511 ->fetchFromURL( $url, [ 'domain' => $this->dbDomain ] );
512
513 return $blob === false ? false : $this->decompressData( $blob, $flags );
514 },
515 [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => WANObjectCache::TTL_PROC_LONG ]
516 );
517 } else {
518 $blob = $this->extStoreAccess->fetchFromURL( $url, [ 'domain' => $this->dbDomain ] );
519 return $blob === false ? false : $this->decompressData( $blob, $flags );
520 }
521 } else {
522 return $this->decompressData( $raw, $flags );
523 }
524 }
525
542 public function compressData( &$blob ) {
543 $blobFlags = [];
544
545 // Revisions not marked as UTF-8 will have legacy decoding applied by decompressData().
546 // XXX: if $this->legacyEncoding is not set, we could skip this. That would however be
547 // risky, since $this->legacyEncoding being set in the future would lead to data corruption.
548 $blobFlags[] = 'utf-8';
549
550 if ( $this->compressBlobs ) {
551 if ( function_exists( 'gzdeflate' ) ) {
552 $deflated = gzdeflate( $blob );
553
554 if ( $deflated === false ) {
555 wfLogWarning( __METHOD__ . ': gzdeflate() failed' );
556 } else {
557 $blob = $deflated;
558 $blobFlags[] = 'gzip';
559 }
560 } else {
561 wfDebug( __METHOD__ . " -- no zlib support, not compressing" );
562 }
563 }
564 return implode( ',', $blobFlags );
565 }
566
582 public function decompressData( string $blob, array $blobFlags ) {
583 if ( in_array( 'error', $blobFlags ) ) {
584 // Error row, return false
585 return false;
586 }
587
588 if ( in_array( 'gzip', $blobFlags ) ) {
589 # Deal with optional compression of archived pages.
590 # This can be done periodically via maintenance/compressOld.php, and
591 # as pages are saved if $wgCompressRevisions is set.
592 $blob = gzinflate( $blob );
593
594 if ( $blob === false ) {
595 wfWarn( __METHOD__ . ': gzinflate() failed' );
596 return false;
597 }
598 }
599
600 if ( in_array( 'object', $blobFlags ) ) {
601 # Generic compressed storage
602 $obj = unserialize( $blob );
603 if ( !is_object( $obj ) ) {
604 // Invalid object
605 return false;
606 }
607 $blob = $obj->getText();
608 }
609
610 // Needed to support old revisions left over from the 1.4 / 1.5 migration.
611 if ( $blob !== false && $this->legacyEncoding
612 && !in_array( 'utf-8', $blobFlags ) && !in_array( 'utf8', $blobFlags )
613 ) {
614 # Old revisions kept around in a legacy encoding?
615 # Upconvert on demand.
616 # ("utf8" checked for compatibility with some broken
617 # conversion scripts 2008-12-30)
618 # Even with //IGNORE iconv can whine about illegal characters in
619 # *input* string. We just ignore those too.
620 # REF: https://bugs.php.net/bug.php?id=37166
621 # REF: https://phabricator.wikimedia.org/T18885
622 AtEase::suppressWarnings();
623 $blob = iconv( $this->legacyEncoding, 'UTF-8//IGNORE', $blob );
624 AtEase::restoreWarnings();
625 }
626
627 return $blob;
628 }
629
637 private function getCacheTTL() {
639
640 if ( $cache->getQoS( $cache::ATTR_DURABILITY ) >= $cache::QOS_DURABILITY_RDBMS ) {
641 // Do not cache RDBMs blobs in...the RDBMs store
642 $ttl = $cache::TTL_UNCACHEABLE;
643 } else {
644 $ttl = $this->cacheExpiry ?: $cache::TTL_UNCACHEABLE;
645 }
646
647 return $ttl;
648 }
649
670 public function getTextIdFromAddress( $address ) {
671 list( $schema, $id, ) = self::splitBlobAddress( $address );
672
673 if ( $schema !== 'tt' ) {
674 return null;
675 }
676
677 $textId = intval( $id );
678
679 if ( !$textId || $id !== (string)$textId ) {
680 throw new InvalidArgumentException( "Malformed text_id: $id" );
681 }
682
683 return $textId;
684 }
685
699 public static function makeAddressFromTextId( $id ) {
700 return 'tt:' . $id;
701 }
702
713 public static function splitBlobAddress( $address ) {
714 if ( !preg_match( '/^([-+.\w]+):([^\s?]+)(\?([^\s]*))?$/', $address, $m ) ) {
715 throw new InvalidArgumentException( "Bad blob address: $address" );
716 }
717
718 $schema = strtolower( $m[1] );
719 $id = $m[2];
720 $parameters = wfCgiToArray( $m[4] ?? '' );
721
722 return [ $schema, $id, $parameters ];
723 }
724
725 public function isReadOnly() {
726 if ( $this->useExternalStore && $this->extStoreAccess->isReadOnly() ) {
727 return true;
728 }
729
730 return ( $this->getDBLoadBalancer()->getReadOnlyReason() !== false );
731 }
732}
unserialize( $serialized)
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfWarn( $msg, $callerOffset=1, $level=E_USER_NOTICE)
Send a warning either to the debug log or in a PHP error depending on $wgDevelopmentWarnings.
wfLogWarning( $msg, $callerOffset=1, $level=E_USER_WARNING)
Send a warning as a PHP error and the debug log.
wfCgiToArray( $query)
This is the logical opposite of wfArrayToCgi(): it accepts a query string as its argument and returns...
wfDeprecated( $function, $version=false, $component=false, $callerOffset=2)
Logs a warning that a deprecated feature was used.
Helper class for DAO classes.
This is the main interface for fetching or inserting objects with ExternalStore.
MediaWiki exception.
Exception representing a failure to access a data blob.
Service for storing and loading Content objects.
static makeAddressFromTextId( $id)
Returns an address referring to content stored in the text table row with the given ID.
ExternalStoreAccess $extStoreAccess
getTextIdFromAddress( $address)
Returns an ID corresponding to the old_id field in the text table, corresponding to the given $addres...
getCacheTTL()
Get the text cache TTL.
string bool $dbDomain
DB domain ID of a wiki or false for the local one.
__construct(ILoadBalancer $dbLoadBalancer, ExternalStoreAccess $extStoreAccess, WANObjectCache $cache, $dbDomain=false)
decompressData(string $blob, array $blobFlags)
Re-converts revision text according to its flags.
getBlob( $blobAddress, $queryFlags=0)
Retrieve a blob, given an address.
expandBlob( $raw, $flags, $cacheKey=null)
Expand a raw data blob according to the flags given.
setLegacyEncoding(string $legacyEncoding)
Set the legacy encoding to assume for blobs that do not have the utf-8 flag set.
compressData(&$blob)
If $wgCompressRevisions is enabled, we will compress data.
getCacheKey( $blobAddress)
Get a cache key for a given Blob address.
static splitBlobAddress( $address)
Splits a blob address into three parts: the schema, the ID, and parameters/flags.
getBlobBatch( $blobAddresses, $queryFlags=0)
A batched version of BlobStore::getBlob.
storeBlob( $data, $hints=[])
Stores an arbitrary blob of data and returns an address that can be used with getBlob() to retrieve t...
setUseExternalStore(bool $useExternalStore)
isReadOnly()
Check if the blob metadata or backing blob data store is read-only.
fetchBlobs( $blobAddresses, $queryFlags)
MCR migration note: this corresponded to Revision::fetchText.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
Multi-datacenter aware caching interface.
Interface for database access objects.
Generic interface providing TTL constants for lightweight expiring object stores.
Service for loading and storing data blobs.
Definition BlobStore.php:35
Basic database interface for live and lazy-loaded relation database handles.
Definition IDatabase.php:38
Database cluster connection, tracking, load balancing, and transaction manager interface.
Result wrapper for grabbing data queried from an IDatabase object.
const DB_PRIMARY
Definition defines.php:27