34 use InvalidArgumentException;
37 use Wikimedia\Assert\Assert;
38 use Wikimedia\AtEase\AtEase;
59 private $dbLoadBalancer;
64 private $extStoreAccess;
79 private $cacheExpiry = 604800;
84 private $compressBlobs =
false;
89 private $legacyEncoding =
false;
94 private $useExternalStore =
false;
113 $this->dbLoadBalancer = $dbLoadBalancer;
114 $this->extStoreAccess = $extStoreAccess;
115 $this->cache = $cache;
116 $this->dbDomain = $dbDomain;
123 return $this->cacheExpiry;
130 $this->cacheExpiry = $cacheExpiry;
137 return $this->compressBlobs;
144 $this->compressBlobs = $compressBlobs;
152 return $this->legacyEncoding;
164 $this->legacyEncoding = $legacyEncoding;
171 return $this->useExternalStore;
178 $this->useExternalStore = $useExternalStore;
184 private function getDBLoadBalancer() {
185 return $this->dbLoadBalancer;
193 private function getDBConnection( $index ) {
194 $lb = $this->getDBLoadBalancer();
195 return $lb->getConnectionRef( $index, [], $this->dbDomain );
211 # Write to external storage if required
212 if ( $this->useExternalStore ) {
215 $data = $this->extStoreAccess->insert( $data, [
'domain' => $this->dbDomain ] );
225 $flags .=
'external';
234 $dbw->newInsertQueryBuilder()
235 ->insertInto(
'text' )
236 ->row( [
'old_text' => $data,
'old_flags' => $flags ] )
237 ->caller( __METHOD__ )->execute();
239 $textId = $dbw->insertId();
256 public function getBlob( $blobAddress, $queryFlags = 0 ) {
257 Assert::parameterType(
'string', $blobAddress,
'$blobAddress' );
260 $blob = $this->cache->getWithSetCallback(
261 $this->getCacheKey( $blobAddress ),
262 $this->getCacheTTL(),
263 function ( $unused, &$ttl, &$setOpts ) use ( $blobAddress, $queryFlags, &$error ) {
265 [ $result, $errors ] = $this->fetchBlobs( [ $blobAddress ], $queryFlags );
267 $error = $errors[$blobAddress] ??
null;
269 $ttl = WANObjectCache::TTL_UNCACHEABLE;
271 return $result[$blobAddress];
273 $this->getCacheOptions()
277 if ( $error[0] ===
'badrevision' ) {
284 Assert::postcondition( is_string( $blob ),
'Blob must not be null' );
304 [ $blobsByAddress, $errors ] = $this->fetchBlobs( $blobAddresses, $queryFlags );
306 $blobsByAddress = array_map(
static function ( $blob ) {
307 return $blob ===
false ? null : $blob;
308 }, $blobsByAddress );
311 foreach ( $errors as $error ) {
313 $result->warning( ...$error );
332 private function fetchBlobs( $blobAddresses, $queryFlags ) {
333 $textIdToBlobAddress = [];
336 foreach ( $blobAddresses as $blobAddress ) {
339 }
catch ( InvalidArgumentException $ex ) {
340 throw new BlobAccessException(
341 $ex->getMessage() .
'. Use findBadBlobs.php to remedy.',
348 if ( $schema ===
'bad' ) {
352 .
": loading known-bad content ($blobAddress), returning empty string"
354 $result[$blobAddress] =
'';
355 $errors[$blobAddress] = [
357 'The content of this revision is missing or corrupted (bad schema)'
359 } elseif ( $schema ===
'tt' ) {
360 $textId = intval( $id );
362 if ( $textId < 1 || $id !== (
string)$textId ) {
363 $errors[$blobAddress] = [
365 "Bad blob address: $blobAddress. Use findBadBlobs.php to remedy."
367 $result[$blobAddress] =
false;
370 $textIdToBlobAddress[$textId] = $blobAddress;
372 $errors[$blobAddress] = [
374 "Unknown blob address schema: $schema. Use findBadBlobs.php to remedy."
376 $result[$blobAddress] =
false;
380 $textIds = array_keys( $textIdToBlobAddress );
382 return [ $result, $errors ];
387 ? self::READ_LATEST_IMMUTABLE
389 [ $index, $options, $fallbackIndex, $fallbackOptions ] =
392 $dbConnection = $this->getDBConnection( $index );
393 $rows = $dbConnection->newSelectQueryBuilder()
394 ->select( [
'old_id',
'old_text',
'old_flags' ] )
396 ->where( [
'old_id' => $textIds ] )
397 ->options( $options )
398 ->caller( __METHOD__ )->fetchResultSet();
400 if ( $rows instanceof IResultWrapper ) {
401 $numRows = $rows->numRows();
406 if ( $numRows !== count( $textIds ) && $fallbackIndex !==
null ) {
407 $fetchedTextIds = [];
408 foreach ( $rows as $row ) {
409 $fetchedTextIds[] = $row->old_id;
411 $missingTextIds = array_diff( $textIds, $fetchedTextIds );
412 $dbConnection = $this->getDBConnection( $fallbackIndex );
413 $rowsFromFallback = $dbConnection->newSelectQueryBuilder()
414 ->select( [
'old_id',
'old_text',
'old_flags' ] )
416 ->where( [
'old_id' => $missingTextIds ] )
417 ->options( $fallbackOptions )
418 ->caller( __METHOD__ )->fetchResultSet();
419 $appendIterator =
new AppendIterator();
420 $appendIterator->append( $rows );
421 $appendIterator->append( $rowsFromFallback );
422 $rows = $appendIterator;
425 foreach ( $rows as $row ) {
426 $blobAddress = $textIdToBlobAddress[$row->old_id];
428 if ( $row->old_text !==
null ) {
429 $blob = $this->
expandBlob( $row->old_text, $row->old_flags, $blobAddress );
431 if ( $blob ===
false ) {
432 $errors[$blobAddress] = [
434 "Bad data in text row {$row->old_id}. Use findBadBlobs.php to remedy."
437 $result[$blobAddress] = $blob;
441 if ( count( $result ) !== count( $blobAddresses ) ) {
442 foreach ( $blobAddresses as $blobAddress ) {
443 if ( !isset( $result[$blobAddress ] ) ) {
444 $errors[$blobAddress] = [
446 "Unable to fetch blob at $blobAddress. Use findBadBlobs.php to remedy."
448 $result[$blobAddress] =
false;
452 return [ $result, $errors ];
465 private function getCacheKey( $blobAddress ) {
466 return $this->cache->makeGlobalKey(
468 $this->dbLoadBalancer->resolveDomainID( $this->dbDomain ),
478 private function getCacheOptions() {
481 'pcTTL' => WANObjectCache::TTL_PROC_LONG,
482 'segmentable' =>
true
506 public function expandBlob( $raw, $flags, $blobAddress =
null ) {
507 if ( is_string( $flags ) ) {
510 if ( in_array(
'error', $flags ) ) {
512 "The content of this revision is missing or corrupted (error flag)"
517 if ( in_array(
'external', $flags ) ) {
519 $parts = explode(
'://', $url, 2 );
520 if ( count( $parts ) == 1 || $parts[1] ==
'' ) {
524 if ( $blobAddress ) {
526 return $this->cache->getWithSetCallback(
527 $this->getCacheKey( $blobAddress ),
528 $this->getCacheTTL(),
529 function () use ( $url, $flags ) {
531 $blob = $this->extStoreAccess
532 ->fetchFromURL( $url, [
'domain' => $this->dbDomain ] );
534 return $blob ===
false ? false : $this->
decompressData( $blob, $flags );
536 $this->getCacheOptions()
539 $blob = $this->extStoreAccess->fetchFromURL( $url, [
'domain' => $this->dbDomain ] );
540 return $blob ===
false ? false : $this->
decompressData( $blob, $flags );
569 $blobFlags[] =
'utf-8';
571 if ( $this->compressBlobs ) {
572 if ( function_exists(
'gzdeflate' ) ) {
573 $deflated = gzdeflate( $blob );
575 if ( $deflated ===
false ) {
579 $blobFlags[] =
'gzip';
582 wfDebug( __METHOD__ .
" -- no zlib support, not compressing" );
585 return implode(
',', $blobFlags );
604 if ( in_array(
'error', $blobFlags ) ) {
609 if ( in_array(
'gzip', $blobFlags ) ) {
610 # Deal with optional compression of archived pages.
611 # This can be done periodically via maintenance/compressOld.php, and
612 # as pages are saved if $wgCompressRevisions is set.
613 $blob = gzinflate( $blob );
615 if ( $blob ===
false ) {
616 wfWarn( __METHOD__ .
': gzinflate() failed' );
621 if ( in_array(
'object', $blobFlags ) ) {
622 # Generic compressed storage
628 $blob = $obj->getText();
632 if ( $blob !==
false && $this->legacyEncoding
633 && !in_array(
'utf-8', $blobFlags ) && !in_array(
'utf8', $blobFlags )
635 # Old revisions kept around in a legacy encoding?
636 # Upconvert on demand.
637 # ("utf8" checked for compatibility with some broken
638 # conversion scripts 2008-12-30)
640 # *input* string. We just ignore those too.
643 AtEase::suppressWarnings();
644 $blob = iconv( $this->legacyEncoding,
'UTF-8//IGNORE', $blob );
645 AtEase::restoreWarnings();
658 private function getCacheTTL() {
659 $cache = $this->cache;
661 if ( $cache->
getQoS( $cache::ATTR_DURABILITY ) >= $cache::QOS_DURABILITY_RDBMS ) {
663 $ttl = $cache::TTL_UNCACHEABLE;
665 $ttl = $this->cacheExpiry ?: $cache::TTL_UNCACHEABLE;
694 if ( $schema !==
'tt' ) {
698 $textId = intval( $id );
700 if ( !$textId || $id !== (
string)$textId ) {
701 throw new InvalidArgumentException(
"Malformed text_id: $id" );
731 return $flagsString ===
'' ? [] : explode(
',', $flagsString );
745 if ( !preg_match(
'/^([-+.\w]+):([^\s?]+)(\?([^\s]*))?$/', $address, $m ) ) {
746 throw new InvalidArgumentException(
"Bad blob address: $address" );
749 $schema = strtolower( $m[1] );
753 return [ $schema, $id, $parameters ];
757 if ( $this->useExternalStore && $this->extStoreAccess->isReadOnly() ) {
761 return ( $this->getDBLoadBalancer()->getReadOnlyReason() !==
false );
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfWarn( $msg, $callerOffset=1, $level=E_USER_NOTICE)
Send a warning either to the debug log or in a PHP error depending on $wgDevelopmentWarnings.
wfLogWarning( $msg, $callerOffset=1, $level=E_USER_WARNING)
Send a warning as a PHP error and the debug log.
wfCgiToArray( $query)
This is the logical opposite of wfArrayToCgi(): it accepts a query string as its argument and returns...
Helper class for DAO classes.
static getDBOptions( $bitfield)
Get an appropriate DB index, options, and fallback DB index for a query.
static hasFlags( $bitfield, $flags)
This is the main interface for fetching or inserting objects with ExternalStore.
static unserialize(string $str, bool $allowDouble=false)
Unserialize a HistoryBlob.
Generic operation result class Has warning/error list, boolean status and arbitrary value.
static newGood( $value=null)
Factory function for good results.
Multi-datacenter aware caching interface.
Interface for database access objects.