MediaWiki REL1_32
SqlBlobStore.php
Go to the documentation of this file.
1<?php
27namespace MediaWiki\Storage;
28
33use InvalidArgumentException;
34use Language;
35use MWException;
37use Wikimedia\Assert\Assert;
41
51
52 // Note: the name has been taken unchanged from the Revision class.
53 const TEXT_CACHE_GROUP = 'revisiontext:10';
54
59
63 private $cache;
64
68 private $wikiId;
69
73 private $cacheExpiry = 604800; // 7 days
74
78 private $compressBlobs = false;
79
83 private $legacyEncoding = false;
84
89
93 private $useExternalStore = false;
94
105 public function __construct(
108 $wikiId = false
109 ) {
110 $this->dbLoadBalancer = $dbLoadBalancer;
111 $this->cache = $cache;
112 $this->wikiId = $wikiId;
113 }
114
118 public function getCacheExpiry() {
119 return $this->cacheExpiry;
120 }
121
125 public function setCacheExpiry( $cacheExpiry ) {
126 Assert::parameterType( 'integer', $cacheExpiry, '$cacheExpiry' );
127
128 $this->cacheExpiry = $cacheExpiry;
129 }
130
134 public function getCompressBlobs() {
136 }
137
141 public function setCompressBlobs( $compressBlobs ) {
142 $this->compressBlobs = $compressBlobs;
143 }
144
149 public function getLegacyEncoding() {
151 }
152
160
166 public function setLegacyEncoding( $legacyEncoding, Language $language ) {
167 Assert::parameterType( 'string', $legacyEncoding, '$legacyEncoding' );
168
169 $this->legacyEncoding = $legacyEncoding;
170 $this->legacyEncodingConversionLang = $language;
171 }
172
176 public function getUseExternalStore() {
178 }
179
184 Assert::parameterType( 'boolean', $useExternalStore, '$useExternalStore' );
185
186 $this->useExternalStore = $useExternalStore;
187 }
188
192 private function getDBLoadBalancer() {
194 }
195
201 private function getDBConnection( $index ) {
202 $lb = $this->getDBLoadBalancer();
203 return $lb->getConnection( $index, [], $this->wikiId );
204 }
205
216 public function storeBlob( $data, $hints = [] ) {
217 try {
218 $flags = $this->compressData( $data );
219
220 # Write to external storage if required
221 if ( $this->useExternalStore ) {
222 // Store and get the URL
223 $data = ExternalStore::insertToDefault( $data );
224 if ( !$data ) {
225 throw new BlobAccessException( "Failed to store text to external storage" );
226 }
227 if ( $flags ) {
228 $flags .= ',';
229 }
230 $flags .= 'external';
231
232 // TODO: we could also return an address for the external store directly here.
233 // That would mean bypassing the text table entirely when the external store is
234 // used. We'll need to assess expected fallout before doing that.
235 }
236
237 $dbw = $this->getDBConnection( DB_MASTER );
238
239 $old_id = $dbw->nextSequenceValue( 'text_old_id_seq' );
240 $dbw->insert(
241 'text',
242 [
243 'old_id' => $old_id,
244 'old_text' => $data,
245 'old_flags' => $flags,
246 ],
247 __METHOD__
248 );
249
250 $textId = $dbw->insertId();
251
252 return self::makeAddressFromTextId( $textId );
253 } catch ( MWException $e ) {
254 throw new BlobAccessException( $e->getMessage(), 0, $e );
255 }
256 }
257
270 public function getBlob( $blobAddress, $queryFlags = 0 ) {
271 Assert::parameterType( 'string', $blobAddress, '$blobAddress' );
272
273 // No negative caching; negative hits on text rows may be due to corrupted replica DBs
274 $blob = $this->cache->getWithSetCallback(
275 $this->getCacheKey( $blobAddress ),
276 $this->getCacheTTL(),
277 function ( $unused, &$ttl, &$setOpts ) use ( $blobAddress, $queryFlags ) {
278 list( $index ) = DBAccessObjectUtils::getDBOptions( $queryFlags );
279 $setOpts += Database::getCacheSetOptions( $this->getDBConnection( $index ) );
280
281 return $this->fetchBlob( $blobAddress, $queryFlags );
282 },
283 [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => IExpiringStore::TTL_PROC_LONG ]
284 );
285
286 if ( $blob === false ) {
287 throw new BlobAccessException( 'Failed to load blob from address ' . $blobAddress );
288 }
289
290 return $blob;
291 }
292
302 private function fetchBlob( $blobAddress, $queryFlags ) {
303 list( $schema, $id, ) = self::splitBlobAddress( $blobAddress );
304
305 //TODO: MCR: also support 'ex' schema with ExternalStore URLs, plus flags encoded in the URL!
306 if ( $schema === 'tt' ) {
307 $textId = intval( $id );
308 } else {
309 // XXX: change to better exceptions! That makes migration more difficult, though.
310 throw new BlobAccessException( "Unknown blob address schema: $schema" );
311 }
312
313 if ( !$textId || $id !== (string)$textId ) {
314 // XXX: change to better exceptions! That makes migration more difficult, though.
315 throw new BlobAccessException( "Bad blob address: $blobAddress" );
316 }
317
318 // Callers doing updates will pass in READ_LATEST as usual. Since the text/blob tables
319 // do not normally get rows changed around, set READ_LATEST_IMMUTABLE in those cases.
320 $queryFlags |= DBAccessObjectUtils::hasFlags( $queryFlags, self::READ_LATEST )
321 ? self::READ_LATEST_IMMUTABLE
322 : 0;
323
324 list( $index, $options, $fallbackIndex, $fallbackOptions ) =
325 DBAccessObjectUtils::getDBOptions( $queryFlags );
326
327 // Text data is immutable; check replica DBs first.
328 $row = $this->getDBConnection( $index )->selectRow(
329 'text',
330 [ 'old_text', 'old_flags' ],
331 [ 'old_id' => $textId ],
332 __METHOD__,
334 );
335
336 // Fallback to DB_MASTER in some cases if the row was not found, using the appropriate
337 // options, such as FOR UPDATE to avoid missing rows due to REPEATABLE-READ.
338 if ( !$row && $fallbackIndex !== null ) {
339 $row = $this->getDBConnection( $fallbackIndex )->selectRow(
340 'text',
341 [ 'old_text', 'old_flags' ],
342 [ 'old_id' => $textId ],
343 __METHOD__,
344 $fallbackOptions
345 );
346 }
347
348 if ( !$row ) {
349 wfWarn( __METHOD__ . ": No text row with ID $textId." );
350 return false;
351 }
352
353 $blob = $this->expandBlob( $row->old_text, $row->old_flags, $blobAddress );
354
355 if ( $blob === false ) {
356 wfLogWarning( __METHOD__ . ": Bad data in text row $textId." );
357 return false;
358 }
359
360 return $blob;
361 }
362
373 private function getCacheKey( $blobAddress ) {
374 return $this->cache->makeGlobalKey(
375 'BlobStore',
376 'address',
377 $this->dbLoadBalancer->resolveDomainID( $this->wikiId ),
378 $blobAddress
379 );
380 }
381
401 public function expandBlob( $raw, $flags, $cacheKey = null ) {
402 if ( is_string( $flags ) ) {
403 $flags = explode( ',', $flags );
404 }
405
406 // Use external methods for external objects, text in table is URL-only then
407 if ( in_array( 'external', $flags ) ) {
408 $url = $raw;
409 $parts = explode( '://', $url, 2 );
410 if ( count( $parts ) == 1 || $parts[1] == '' ) {
411 return false;
412 }
413
414 if ( $cacheKey ) {
415 // The cached value should be decompressed, so handle that and return here.
416 return $this->cache->getWithSetCallback(
417 $this->getCacheKey( $cacheKey ),
418 $this->getCacheTTL(),
419 function () use ( $url, $flags ) {
420 // No negative caching per BlobStore::getBlob()
421 $blob = ExternalStore::fetchFromURL( $url, [ 'wiki' => $this->wikiId ] );
422
423 return $blob === false ? false : $this->decompressData( $blob, $flags );
424 },
425 [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => WANObjectCache::TTL_PROC_LONG ]
426 );
427 } else {
428 $blob = ExternalStore::fetchFromURL( $url, [ 'wiki' => $this->wikiId ] );
429 return $blob === false ? false : $this->decompressData( $blob, $flags );
430 }
431 } else {
432 return $this->decompressData( $raw, $flags );
433 }
434 }
435
452 public function compressData( &$blob ) {
453 $blobFlags = [];
454
455 // Revisions not marked as UTF-8 will have legacy decoding applied by decompressData().
456 // XXX: if $this->legacyEncoding is not set, we could skip this. That would however be
457 // risky, since $this->legacyEncoding being set in the future would lead to data corruption.
458 $blobFlags[] = 'utf-8';
459
460 if ( $this->compressBlobs ) {
461 if ( function_exists( 'gzdeflate' ) ) {
462 $deflated = gzdeflate( $blob );
463
464 if ( $deflated === false ) {
465 wfLogWarning( __METHOD__ . ': gzdeflate() failed' );
466 } else {
467 $blob = $deflated;
468 $blobFlags[] = 'gzip';
469 }
470 } else {
471 wfDebug( __METHOD__ . " -- no zlib support, not compressing\n" );
472 }
473 }
474 return implode( ',', $blobFlags );
475 }
476
492 public function decompressData( $blob, array $blobFlags ) {
493 // Revision::decompressRevisionText accepted false here, so defend against that
494 Assert::parameterType( 'string', $blob, '$blob' );
495
496 if ( in_array( 'error', $blobFlags ) ) {
497 // Error row, return false
498 return false;
499 }
500
501 if ( in_array( 'gzip', $blobFlags ) ) {
502 # Deal with optional compression of archived pages.
503 # This can be done periodically via maintenance/compressOld.php, and
504 # as pages are saved if $wgCompressRevisions is set.
505 $blob = gzinflate( $blob );
506
507 if ( $blob === false ) {
508 wfWarn( __METHOD__ . ': gzinflate() failed' );
509 return false;
510 }
511 }
512
513 if ( in_array( 'object', $blobFlags ) ) {
514 # Generic compressed storage
515 $obj = unserialize( $blob );
516 if ( !is_object( $obj ) ) {
517 // Invalid object
518 return false;
519 }
520 $blob = $obj->getText();
521 }
522
523 // Needed to support old revisions left over from from the 1.4 / 1.5 migration.
524 if ( $blob !== false && $this->legacyEncoding && $this->legacyEncodingConversionLang
525 && !in_array( 'utf-8', $blobFlags ) && !in_array( 'utf8', $blobFlags )
526 ) {
527 # Old revisions kept around in a legacy encoding?
528 # Upconvert on demand.
529 # ("utf8" checked for compatibility with some broken
530 # conversion scripts 2008-12-30)
531 $blob = $this->legacyEncodingConversionLang->iconv( $this->legacyEncoding, 'UTF-8', $blob );
532 }
533
534 return $blob;
535 }
536
544 private function getCacheTTL() {
545 if ( $this->cache->getQoS( WANObjectCache::ATTR_EMULATION )
546 <= WANObjectCache::QOS_EMULATION_SQL
547 ) {
548 // Do not cache RDBMs blobs in...the RDBMs store
549 $ttl = WANObjectCache::TTL_UNCACHEABLE;
550 } else {
551 $ttl = $this->cacheExpiry ?: WANObjectCache::TTL_UNCACHEABLE;
552 }
553
554 return $ttl;
555 }
556
577 public function getTextIdFromAddress( $address ) {
578 list( $schema, $id, ) = self::splitBlobAddress( $address );
579
580 if ( $schema !== 'tt' ) {
581 return null;
582 }
583
584 $textId = intval( $id );
585
586 if ( !$textId || $id !== (string)$textId ) {
587 throw new InvalidArgumentException( "Malformed text_id: $id" );
588 }
589
590 return $textId;
591 }
592
605 public static function makeAddressFromTextId( $id ) {
606 return 'tt:' . $id;
607 }
608
617 private static function splitBlobAddress( $address ) {
618 if ( !preg_match( '/^(\w+):(\w+)(\?(.*))?$/', $address, $m ) ) {
619 throw new InvalidArgumentException( "Bad blob address: $address" );
620 }
621
622 $schema = strtolower( $m[1] );
623 $id = $m[2];
624 $parameters = isset( $m[4] ) ? wfCgiToArray( $m[4] ) : [];
625
626 return [ $schema, $id, $parameters ];
627 }
628
629 public function isReadOnly() {
630 if ( $this->useExternalStore && ExternalStore::defaultStoresAreReadOnly() ) {
631 return true;
632 }
633
634 return ( $this->getDBLoadBalancer()->getReadOnlyReason() !== false );
635 }
636}
unserialize( $serialized)
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfWarn( $msg, $callerOffset=1, $level=E_USER_NOTICE)
Send a warning either to the debug log or in a PHP error depending on $wgDevelopmentWarnings.
wfLogWarning( $msg, $callerOffset=1, $level=E_USER_WARNING)
Send a warning as a PHP error and the debug log.
wfCgiToArray( $query)
This is the logical opposite of wfArrayToCgi(): it accepts a query string as its argument and returns...
Helper class for DAO classes.
Constructor class for key/value blob data kept in external repositories.
static fetchFromURL( $url, array $params=[])
Fetch data from given URL.
static insertToDefault( $data, array $params=[])
Like insert() above, but does more of the work for us.
static defaultStoresAreReadOnly()
Internationalisation code.
Definition Language.php:35
MediaWiki exception.
Exception representing a failure to access a data blob.
Service for storing and loading Content objects.
static makeAddressFromTextId( $id)
Returns an address referring to content stored in the text table row with the given ID.
getTextIdFromAddress( $address)
Returns an ID corresponding to the old_id field in the text table, corresponding to the given $addres...
getCacheTTL()
Get the text cache TTL.
__construct(LoadBalancer $dbLoadBalancer, WANObjectCache $cache, $wikiId=false)
getBlob( $blobAddress, $queryFlags=0)
Retrieve a blob, given an address.
setLegacyEncoding( $legacyEncoding, Language $language)
expandBlob( $raw, $flags, $cacheKey=null)
Expand a raw data blob according to the flags given.
setUseExternalStore( $useExternalStore)
compressData(&$blob)
If $wgCompressRevisions is enabled, we will compress data.
getCacheKey( $blobAddress)
Get a cache key for a given Blob address.
static splitBlobAddress( $address)
Splits a blob address into three parts: the schema, the ID, and parameters/flags.
fetchBlob( $blobAddress, $queryFlags)
MCR migration note: this corresponds to Revision::fetchText.
bool string $wikiId
Wiki ID.
storeBlob( $data, $hints=[])
Stores an arbitrary blob of data and returns an address that can be used with getBlob() to retrieve t...
Language null $legacyEncodingConversionLang
isReadOnly()
Check if the blob metadata or backing blob data store is read-only.
decompressData( $blob, array $blobFlags)
Re-converts revision text according to its flags.
Multi-datacenter aware caching interface.
Relational database abstraction object.
Definition Database.php:48
Database connection, tracking, load balancing, and transaction manager for a cluster.
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition hooks.txt:2050
processing should stop and the error should be shown to the user * false
Definition hooks.txt:187
returning false will NOT prevent logging $e
Definition hooks.txt:2226
Interface for database access objects.
Generic base class for storage interfaces.
Service for loading and storing data blobs.
Definition BlobStore.php:33
Basic database interface for live and lazy-loaded relation database handles.
Definition IDatabase.php:38
you have access to all of the normal MediaWiki so you can get a DB use the cache
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
const DB_MASTER
Definition defines.php:26