MediaWiki REL1_33
SqlBlobStore.php
Go to the documentation of this file.
1<?php
27namespace MediaWiki\Storage;
28
33use InvalidArgumentException;
37use Wikimedia\Assert\Assert;
40
50
51 // Note: the name has been taken unchanged from the Revision class.
52 const TEXT_CACHE_GROUP = 'revisiontext:10';
53
58
62 private $cache;
63
67 private $wikiId;
68
72 private $cacheExpiry = 604800; // 7 days
73
77 private $compressBlobs = false;
78
82 private $legacyEncoding = false;
83
88
92 private $useExternalStore = false;
93
104 public function __construct(
107 $wikiId = false
108 ) {
109 $this->dbLoadBalancer = $dbLoadBalancer;
110 $this->cache = $cache;
111 $this->wikiId = $wikiId;
112 }
113
117 public function getCacheExpiry() {
118 return $this->cacheExpiry;
119 }
120
124 public function setCacheExpiry( $cacheExpiry ) {
125 Assert::parameterType( 'integer', $cacheExpiry, '$cacheExpiry' );
126
127 $this->cacheExpiry = $cacheExpiry;
128 }
129
133 public function getCompressBlobs() {
135 }
136
140 public function setCompressBlobs( $compressBlobs ) {
141 $this->compressBlobs = $compressBlobs;
142 }
143
148 public function getLegacyEncoding() {
150 }
151
159
165 public function setLegacyEncoding( $legacyEncoding, Language $language ) {
166 Assert::parameterType( 'string', $legacyEncoding, '$legacyEncoding' );
167
168 $this->legacyEncoding = $legacyEncoding;
169 $this->legacyEncodingConversionLang = $language;
170 }
171
175 public function getUseExternalStore() {
177 }
178
183 Assert::parameterType( 'boolean', $useExternalStore, '$useExternalStore' );
184
185 $this->useExternalStore = $useExternalStore;
186 }
187
191 private function getDBLoadBalancer() {
193 }
194
200 private function getDBConnection( $index ) {
201 $lb = $this->getDBLoadBalancer();
202 return $lb->getConnection( $index, [], $this->wikiId );
203 }
204
215 public function storeBlob( $data, $hints = [] ) {
216 try {
217 $flags = $this->compressData( $data );
218
219 # Write to external storage if required
220 if ( $this->useExternalStore ) {
221 // Store and get the URL
223 if ( !$data ) {
224 throw new BlobAccessException( "Failed to store text to external storage" );
225 }
226 if ( $flags ) {
227 $flags .= ',';
228 }
229 $flags .= 'external';
230
231 // TODO: we could also return an address for the external store directly here.
232 // That would mean bypassing the text table entirely when the external store is
233 // used. We'll need to assess expected fallout before doing that.
234 }
235
236 $dbw = $this->getDBConnection( DB_MASTER );
237
238 $old_id = $dbw->nextSequenceValue( 'text_old_id_seq' );
239 $dbw->insert(
240 'text',
241 [
242 'old_id' => $old_id,
243 'old_text' => $data,
244 'old_flags' => $flags,
245 ],
246 __METHOD__
247 );
248
249 $textId = $dbw->insertId();
250
251 return self::makeAddressFromTextId( $textId );
252 } catch ( MWException $e ) {
253 throw new BlobAccessException( $e->getMessage(), 0, $e );
254 }
255 }
256
269 public function getBlob( $blobAddress, $queryFlags = 0 ) {
270 Assert::parameterType( 'string', $blobAddress, '$blobAddress' );
271
272 // No negative caching; negative hits on text rows may be due to corrupted replica DBs
273 $blob = $this->cache->getWithSetCallback(
274 $this->getCacheKey( $blobAddress ),
275 $this->getCacheTTL(),
276 function ( $unused, &$ttl, &$setOpts ) use ( $blobAddress, $queryFlags ) {
277 // Ignore $setOpts; blobs are immutable and negatives are not cached
278 return $this->fetchBlob( $blobAddress, $queryFlags );
279 },
280 [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => IExpiringStore::TTL_PROC_LONG ]
281 );
282
283 if ( $blob === false ) {
284 throw new BlobAccessException( 'Failed to load blob from address ' . $blobAddress );
285 }
286
287 return $blob;
288 }
289
299 private function fetchBlob( $blobAddress, $queryFlags ) {
300 list( $schema, $id, ) = self::splitBlobAddress( $blobAddress );
301
302 //TODO: MCR: also support 'ex' schema with ExternalStore URLs, plus flags encoded in the URL!
303 if ( $schema === 'tt' ) {
304 $textId = intval( $id );
305 } else {
306 // XXX: change to better exceptions! That makes migration more difficult, though.
307 throw new BlobAccessException( "Unknown blob address schema: $schema" );
308 }
309
310 if ( !$textId || $id !== (string)$textId ) {
311 // XXX: change to better exceptions! That makes migration more difficult, though.
312 throw new BlobAccessException( "Bad blob address: $blobAddress" );
313 }
314
315 // Callers doing updates will pass in READ_LATEST as usual. Since the text/blob tables
316 // do not normally get rows changed around, set READ_LATEST_IMMUTABLE in those cases.
317 $queryFlags |= DBAccessObjectUtils::hasFlags( $queryFlags, self::READ_LATEST )
318 ? self::READ_LATEST_IMMUTABLE
319 : 0;
320
321 list( $index, $options, $fallbackIndex, $fallbackOptions ) =
322 DBAccessObjectUtils::getDBOptions( $queryFlags );
323
324 // Text data is immutable; check replica DBs first.
325 $row = $this->getDBConnection( $index )->selectRow(
326 'text',
327 [ 'old_text', 'old_flags' ],
328 [ 'old_id' => $textId ],
329 __METHOD__,
331 );
332
333 // Fallback to DB_MASTER in some cases if the row was not found, using the appropriate
334 // options, such as FOR UPDATE to avoid missing rows due to REPEATABLE-READ.
335 if ( !$row && $fallbackIndex !== null ) {
336 $row = $this->getDBConnection( $fallbackIndex )->selectRow(
337 'text',
338 [ 'old_text', 'old_flags' ],
339 [ 'old_id' => $textId ],
340 __METHOD__,
341 $fallbackOptions
342 );
343 }
344
345 if ( !$row ) {
346 wfWarn( __METHOD__ . ": No text row with ID $textId." );
347 return false;
348 }
349
350 $blob = $this->expandBlob( $row->old_text, $row->old_flags, $blobAddress );
351
352 if ( $blob === false ) {
353 wfLogWarning( __METHOD__ . ": Bad data in text row $textId." );
354 return false;
355 }
356
357 return $blob;
358 }
359
370 private function getCacheKey( $blobAddress ) {
371 return $this->cache->makeGlobalKey(
372 'BlobStore',
373 'address',
374 $this->dbLoadBalancer->resolveDomainID( $this->wikiId ),
375 $blobAddress
376 );
377 }
378
398 public function expandBlob( $raw, $flags, $cacheKey = null ) {
399 if ( is_string( $flags ) ) {
400 $flags = explode( ',', $flags );
401 }
402
403 // Use external methods for external objects, text in table is URL-only then
404 if ( in_array( 'external', $flags ) ) {
405 $url = $raw;
406 $parts = explode( '://', $url, 2 );
407 if ( count( $parts ) == 1 || $parts[1] == '' ) {
408 return false;
409 }
410
411 if ( $cacheKey ) {
412 // The cached value should be decompressed, so handle that and return here.
413 return $this->cache->getWithSetCallback(
414 $this->getCacheKey( $cacheKey ),
415 $this->getCacheTTL(),
416 function () use ( $url, $flags ) {
417 // Ignore $setOpts; blobs are immutable and negatives are not cached
418 $blob = ExternalStore::fetchFromURL( $url, [ 'wiki' => $this->wikiId ] );
419
420 return $blob === false ? false : $this->decompressData( $blob, $flags );
421 },
422 [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => WANObjectCache::TTL_PROC_LONG ]
423 );
424 } else {
425 $blob = ExternalStore::fetchFromURL( $url, [ 'wiki' => $this->wikiId ] );
426 return $blob === false ? false : $this->decompressData( $blob, $flags );
427 }
428 } else {
429 return $this->decompressData( $raw, $flags );
430 }
431 }
432
449 public function compressData( &$blob ) {
450 $blobFlags = [];
451
452 // Revisions not marked as UTF-8 will have legacy decoding applied by decompressData().
453 // XXX: if $this->legacyEncoding is not set, we could skip this. That would however be
454 // risky, since $this->legacyEncoding being set in the future would lead to data corruption.
455 $blobFlags[] = 'utf-8';
456
457 if ( $this->compressBlobs ) {
458 if ( function_exists( 'gzdeflate' ) ) {
459 $deflated = gzdeflate( $blob );
460
461 if ( $deflated === false ) {
462 wfLogWarning( __METHOD__ . ': gzdeflate() failed' );
463 } else {
464 $blob = $deflated;
465 $blobFlags[] = 'gzip';
466 }
467 } else {
468 wfDebug( __METHOD__ . " -- no zlib support, not compressing\n" );
469 }
470 }
471 return implode( ',', $blobFlags );
472 }
473
489 public function decompressData( $blob, array $blobFlags ) {
490 // Revision::decompressRevisionText accepted false here, so defend against that
491 Assert::parameterType( 'string', $blob, '$blob' );
492
493 if ( in_array( 'error', $blobFlags ) ) {
494 // Error row, return false
495 return false;
496 }
497
498 if ( in_array( 'gzip', $blobFlags ) ) {
499 # Deal with optional compression of archived pages.
500 # This can be done periodically via maintenance/compressOld.php, and
501 # as pages are saved if $wgCompressRevisions is set.
502 $blob = gzinflate( $blob );
503
504 if ( $blob === false ) {
505 wfWarn( __METHOD__ . ': gzinflate() failed' );
506 return false;
507 }
508 }
509
510 if ( in_array( 'object', $blobFlags ) ) {
511 # Generic compressed storage
512 $obj = unserialize( $blob );
513 if ( !is_object( $obj ) ) {
514 // Invalid object
515 return false;
516 }
517 $blob = $obj->getText();
518 }
519
520 // Needed to support old revisions left over from from the 1.4 / 1.5 migration.
521 if ( $blob !== false && $this->legacyEncoding && $this->legacyEncodingConversionLang
522 && !in_array( 'utf-8', $blobFlags ) && !in_array( 'utf8', $blobFlags )
523 ) {
524 # Old revisions kept around in a legacy encoding?
525 # Upconvert on demand.
526 # ("utf8" checked for compatibility with some broken
527 # conversion scripts 2008-12-30)
528 $blob = $this->legacyEncodingConversionLang->iconv( $this->legacyEncoding, 'UTF-8', $blob );
529 }
530
531 return $blob;
532 }
533
541 private function getCacheTTL() {
542 if ( $this->cache->getQoS( WANObjectCache::ATTR_EMULATION )
543 <= WANObjectCache::QOS_EMULATION_SQL
544 ) {
545 // Do not cache RDBMs blobs in...the RDBMs store
546 $ttl = WANObjectCache::TTL_UNCACHEABLE;
547 } else {
548 $ttl = $this->cacheExpiry ?: WANObjectCache::TTL_UNCACHEABLE;
549 }
550
551 return $ttl;
552 }
553
574 public function getTextIdFromAddress( $address ) {
575 list( $schema, $id, ) = self::splitBlobAddress( $address );
576
577 if ( $schema !== 'tt' ) {
578 return null;
579 }
580
581 $textId = intval( $id );
582
583 if ( !$textId || $id !== (string)$textId ) {
584 throw new InvalidArgumentException( "Malformed text_id: $id" );
585 }
586
587 return $textId;
588 }
589
602 public static function makeAddressFromTextId( $id ) {
603 return 'tt:' . $id;
604 }
605
616 public static function splitBlobAddress( $address ) {
617 if ( !preg_match( '/^(\w+):(\w+)(\?(.*))?$/', $address, $m ) ) {
618 throw new InvalidArgumentException( "Bad blob address: $address" );
619 }
620
621 $schema = strtolower( $m[1] );
622 $id = $m[2];
623 $parameters = isset( $m[4] ) ? wfCgiToArray( $m[4] ) : [];
624
625 return [ $schema, $id, $parameters ];
626 }
627
628 public function isReadOnly() {
629 if ( $this->useExternalStore && ExternalStore::defaultStoresAreReadOnly() ) {
630 return true;
631 }
632
633 return ( $this->getDBLoadBalancer()->getReadOnlyReason() !== false );
634 }
635}
Apache License January AND DISTRIBUTION Definitions License shall mean the terms and conditions for use
unserialize( $serialized)
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfWarn( $msg, $callerOffset=1, $level=E_USER_NOTICE)
Send a warning either to the debug log or in a PHP error depending on $wgDevelopmentWarnings.
wfLogWarning( $msg, $callerOffset=1, $level=E_USER_WARNING)
Send a warning as a PHP error and the debug log.
wfCgiToArray( $query)
This is the logical opposite of wfArrayToCgi(): it accepts a query string as its argument and returns...
Helper class for DAO classes.
Constructor class for key/value blob data kept in external repositories.
static fetchFromURL( $url, array $params=[])
Fetch data from given URL.
static insertToDefault( $data, array $params=[])
Like insert() above, but does more of the work for us.
static defaultStoresAreReadOnly()
Internationalisation code.
Definition Language.php:36
MediaWiki exception.
Exception representing a failure to access a data blob.
Service for storing and loading Content objects.
static makeAddressFromTextId( $id)
Returns an address referring to content stored in the text table row with the given ID.
getTextIdFromAddress( $address)
Returns an ID corresponding to the old_id field in the text table, corresponding to the given $addres...
getCacheTTL()
Get the text cache TTL.
__construct(LoadBalancer $dbLoadBalancer, WANObjectCache $cache, $wikiId=false)
getBlob( $blobAddress, $queryFlags=0)
Retrieve a blob, given an address.
setLegacyEncoding( $legacyEncoding, Language $language)
expandBlob( $raw, $flags, $cacheKey=null)
Expand a raw data blob according to the flags given.
setUseExternalStore( $useExternalStore)
compressData(&$blob)
If $wgCompressRevisions is enabled, we will compress data.
getCacheKey( $blobAddress)
Get a cache key for a given Blob address.
static splitBlobAddress( $address)
Splits a blob address into three parts: the schema, the ID, and parameters/flags.
fetchBlob( $blobAddress, $queryFlags)
MCR migration note: this corresponds to Revision::fetchText.
bool string $wikiId
Wiki ID.
storeBlob( $data, $hints=[])
Stores an arbitrary blob of data and returns an address that can be used with getBlob() to retrieve t...
Language null $legacyEncodingConversionLang
isReadOnly()
Check if the blob metadata or backing blob data store is read-only.
decompressData( $blob, array $blobFlags)
Re-converts revision text according to its flags.
Multi-datacenter aware caching interface.
Database connection, tracking, load balancing, and transaction manager for a cluster.
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
$data
Utility to generate mapping file used in mw.Title (phpCharToUpper.json)
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition hooks.txt:1999
processing should stop and the error should be shown to the user * false
Definition hooks.txt:187
returning false will NOT prevent logging $e
Definition hooks.txt:2175
injection txt This is an overview of how MediaWiki makes use of dependency injection The design described here grew from the discussion of RFC T384 The term dependency this means that anything an object needs to operate should be injected from the the object itself should only know narrow no concrete implementation of the logic it relies on The requirement to inject everything typically results in an architecture that based on two main types of and essentially stateless service objects that use other service objects to operate on the value objects As of the beginning MediaWiki is only starting to use the DI approach Much of the code still relies on global state or direct resulting in a highly cyclical dependency which acts as the top level factory for services in MediaWiki which can be used to gain access to default instances of various services MediaWikiServices however also allows new services to be defined and default services to be redefined Services are defined or redefined by providing a callback the instantiator that will return a new instance of the service When it will create an instance of MediaWikiServices and populate it with the services defined in the files listed by thereby bootstrapping the DI framework Per $wgServiceWiringFiles lists includes ServiceWiring php
Definition injection.txt:37
Interface for database access objects.
Generic interface for lightweight expiring object stores.
Service for loading and storing data blobs.
Definition BlobStore.php:33
Basic database interface for live and lazy-loaded relation database handles.
Definition IDatabase.php:38
you have access to all of the normal MediaWiki so you can get a DB use the cache
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))
const DB_MASTER
Definition defines.php:26