MediaWiki REL1_31
SqlBlobStore.php
Go to the documentation of this file.
1<?php
27namespace MediaWiki\Storage;
28
33use InvalidArgumentException;
34use Language;
35use MWException;
37use Wikimedia\Assert\Assert;
41
51
52 // Note: the name has been taken unchanged from the Revision class.
53 const TEXT_CACHE_GROUP = 'revisiontext:10';
54
59
63 private $cache;
64
68 private $wikiId;
69
73 private $cacheExpiry = 604800; // 7 days
74
78 private $compressBlobs = false;
79
83 private $legacyEncoding = false;
84
89
93 private $useExternalStore = false;
94
100 public function __construct(
103 $wikiId = false
104 ) {
105 $this->dbLoadBalancer = $dbLoadBalancer;
106 $this->cache = $cache;
107 $this->wikiId = $wikiId;
108 }
109
113 public function getCacheExpiry() {
114 return $this->cacheExpiry;
115 }
116
120 public function setCacheExpiry( $cacheExpiry ) {
121 Assert::parameterType( 'integer', $cacheExpiry, '$cacheExpiry' );
122
123 $this->cacheExpiry = $cacheExpiry;
124 }
125
129 public function getCompressBlobs() {
131 }
132
136 public function setCompressBlobs( $compressBlobs ) {
137 $this->compressBlobs = $compressBlobs;
138 }
139
144 public function getLegacyEncoding() {
146 }
147
155
161 public function setLegacyEncoding( $legacyEncoding, Language $language ) {
162 Assert::parameterType( 'string', $legacyEncoding, '$legacyEncoding' );
163
164 $this->legacyEncoding = $legacyEncoding;
165 $this->legacyEncodingConversionLang = $language;
166 }
167
171 public function getUseExternalStore() {
173 }
174
179 Assert::parameterType( 'boolean', $useExternalStore, '$useExternalStore' );
180
181 $this->useExternalStore = $useExternalStore;
182 }
183
187 private function getDBLoadBalancer() {
189 }
190
196 private function getDBConnection( $index ) {
197 $lb = $this->getDBLoadBalancer();
198 return $lb->getConnection( $index, [], $this->wikiId );
199 }
200
211 public function storeBlob( $data, $hints = [] ) {
212 try {
213 $flags = $this->compressData( $data );
214
215 # Write to external storage if required
216 if ( $this->useExternalStore ) {
217 // Store and get the URL
218 $data = ExternalStore::insertToDefault( $data );
219 if ( !$data ) {
220 throw new BlobAccessException( "Failed to store text to external storage" );
221 }
222 if ( $flags ) {
223 $flags .= ',';
224 }
225 $flags .= 'external';
226
227 // TODO: we could also return an address for the external store directly here.
228 // That would mean bypassing the text table entirely when the external store is
229 // used. We'll need to assess expected fallout before doing that.
230 }
231
232 $dbw = $this->getDBConnection( DB_MASTER );
233
234 $old_id = $dbw->nextSequenceValue( 'text_old_id_seq' );
235 $dbw->insert(
236 'text',
237 [
238 'old_id' => $old_id,
239 'old_text' => $data,
240 'old_flags' => $flags,
241 ],
242 __METHOD__
243 );
244
245 $textId = $dbw->insertId();
246
247 return 'tt:' . $textId;
248 } catch ( MWException $e ) {
249 throw new BlobAccessException( $e->getMessage(), 0, $e );
250 }
251 }
252
265 public function getBlob( $blobAddress, $queryFlags = 0 ) {
266 Assert::parameterType( 'string', $blobAddress, '$blobAddress' );
267
268 // No negative caching; negative hits on text rows may be due to corrupted replica DBs
269 $blob = $this->cache->getWithSetCallback(
270 // TODO: change key, since this is not necessarily revision text!
271 $this->cache->makeKey( 'revisiontext', 'textid', $blobAddress ),
272 $this->getCacheTTL(),
273 function ( $unused, &$ttl, &$setOpts ) use ( $blobAddress, $queryFlags ) {
274 list( $index ) = DBAccessObjectUtils::getDBOptions( $queryFlags );
275 $setOpts += Database::getCacheSetOptions( $this->getDBConnection( $index ) );
276
277 return $this->fetchBlob( $blobAddress, $queryFlags );
278 },
279 [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => IExpiringStore::TTL_PROC_LONG ]
280 );
281
282 if ( $blob === false ) {
283 throw new BlobAccessException( 'Failed to load blob from address ' . $blobAddress );
284 }
285
286 return $blob;
287 }
288
298 private function fetchBlob( $blobAddress, $queryFlags ) {
299 list( $schema, $id, ) = self::splitBlobAddress( $blobAddress );
300
301 //TODO: MCR: also support 'ex' schema with ExternalStore URLs, plus flags encoded in the URL!
302 if ( $schema === 'tt' ) {
303 $textId = intval( $id );
304 } else {
305 // XXX: change to better exceptions! That makes migration more difficult, though.
306 throw new BlobAccessException( "Unknown blob address schema: $schema" );
307 }
308
309 if ( !$textId || $id !== (string)$textId ) {
310 // XXX: change to better exceptions! That makes migration more difficult, though.
311 throw new BlobAccessException( "Bad blob address: $blobAddress" );
312 }
313
314 // Callers doing updates will pass in READ_LATEST as usual. Since the text/blob tables
315 // do not normally get rows changed around, set READ_LATEST_IMMUTABLE in those cases.
316 $queryFlags |= DBAccessObjectUtils::hasFlags( $queryFlags, self::READ_LATEST )
317 ? self::READ_LATEST_IMMUTABLE
318 : 0;
319
320 list( $index, $options, $fallbackIndex, $fallbackOptions ) =
321 DBAccessObjectUtils::getDBOptions( $queryFlags );
322
323 // Text data is immutable; check replica DBs first.
324 $row = $this->getDBConnection( $index )->selectRow(
325 'text',
326 [ 'old_text', 'old_flags' ],
327 [ 'old_id' => $textId ],
328 __METHOD__,
330 );
331
332 // Fallback to DB_MASTER in some cases if the row was not found, using the appropriate
333 // options, such as FOR UPDATE to avoid missing rows due to REPEATABLE-READ.
334 if ( !$row && $fallbackIndex !== null ) {
335 $row = $this->getDBConnection( $fallbackIndex )->selectRow(
336 'text',
337 [ 'old_text', 'old_flags' ],
338 [ 'old_id' => $textId ],
339 __METHOD__,
340 $fallbackOptions
341 );
342 }
343
344 if ( !$row ) {
345 wfWarn( __METHOD__ . ": No text row with ID $textId." );
346 return false;
347 }
348
349 $blob = $this->expandBlob( $row->old_text, $row->old_flags, $blobAddress );
350
351 if ( $blob === false ) {
352 wfWarn( __METHOD__ . ": Bad data in text row $textId." );
353 return false;
354 }
355
356 return $blob;
357 }
358
377 public function expandBlob( $raw, $flags, $cacheKey = null ) {
378 if ( is_string( $flags ) ) {
379 $flags = explode( ',', $flags );
380 }
381
382 // Use external methods for external objects, text in table is URL-only then
383 if ( in_array( 'external', $flags ) ) {
384 $url = $raw;
385 $parts = explode( '://', $url, 2 );
386 if ( count( $parts ) == 1 || $parts[1] == '' ) {
387 return false;
388 }
389
390 if ( $cacheKey && $this->wikiId === false ) {
391 // Make use of the wiki-local revision text cache.
392 // The cached value should be decompressed, so handle that and return here.
393 // NOTE: we rely on $this->cache being the right cache for $this->wikiId!
394 return $this->cache->getWithSetCallback(
395 // TODO: change key, since this is not necessarily revision text!
396 $this->cache->makeKey( 'revisiontext', 'textid', $cacheKey ),
397 $this->getCacheTTL(),
398 function () use ( $url, $flags ) {
399 // No negative caching per BlobStore::getBlob()
400 $blob = ExternalStore::fetchFromURL( $url, [ 'wiki' => $this->wikiId ] );
401
402 return $this->decompressData( $blob, $flags );
403 },
404 [ 'pcGroup' => self::TEXT_CACHE_GROUP, 'pcTTL' => WANObjectCache::TTL_PROC_LONG ]
405 );
406 } else {
407 $blob = ExternalStore::fetchFromURL( $url, [ 'wiki' => $this->wikiId ] );
408 return $this->decompressData( $blob, $flags );
409 }
410 } else {
411 return $this->decompressData( $raw, $flags );
412 }
413 }
414
431 public function compressData( &$blob ) {
432 $blobFlags = [];
433
434 // Revisions not marked as UTF-8 will have legacy decoding applied by decompressData().
435 // XXX: if $this->legacyEncoding is not set, we could skip this. That would however be
436 // risky, since $this->legacyEncoding being set in the future would lead to data corruption.
437 $blobFlags[] = 'utf-8';
438
439 if ( $this->compressBlobs ) {
440 if ( function_exists( 'gzdeflate' ) ) {
441 $deflated = gzdeflate( $blob );
442
443 if ( $deflated === false ) {
444 wfLogWarning( __METHOD__ . ': gzdeflate() failed' );
445 } else {
446 $blob = $deflated;
447 $blobFlags[] = 'gzip';
448 }
449 } else {
450 wfDebug( __METHOD__ . " -- no zlib support, not compressing\n" );
451 }
452 }
453 return implode( ',', $blobFlags );
454 }
455
471 public function decompressData( $blob, array $blobFlags ) {
472 if ( $blob === false ) {
473 // Text failed to be fetched; nothing to do
474 return false;
475 }
476
477 if ( in_array( 'error', $blobFlags ) ) {
478 // Error row, return false
479 return false;
480 }
481
482 if ( in_array( 'gzip', $blobFlags ) ) {
483 # Deal with optional compression of archived pages.
484 # This can be done periodically via maintenance/compressOld.php, and
485 # as pages are saved if $wgCompressRevisions is set.
486 $blob = gzinflate( $blob );
487
488 if ( $blob === false ) {
489 wfLogWarning( __METHOD__ . ': gzinflate() failed' );
490 return false;
491 }
492 }
493
494 if ( in_array( 'object', $blobFlags ) ) {
495 # Generic compressed storage
496 $obj = unserialize( $blob );
497 if ( !is_object( $obj ) ) {
498 // Invalid object
499 return false;
500 }
501 $blob = $obj->getText();
502 }
503
504 // Needed to support old revisions left over from from the 1.4 / 1.5 migration.
505 if ( $blob !== false && $this->legacyEncoding && $this->legacyEncodingConversionLang
506 && !in_array( 'utf-8', $blobFlags ) && !in_array( 'utf8', $blobFlags )
507 ) {
508 # Old revisions kept around in a legacy encoding?
509 # Upconvert on demand.
510 # ("utf8" checked for compatibility with some broken
511 # conversion scripts 2008-12-30)
512 $blob = $this->legacyEncodingConversionLang->iconv( $this->legacyEncoding, 'UTF-8', $blob );
513 }
514
515 return $blob;
516 }
517
525 private function getCacheTTL() {
526 if ( $this->cache->getQoS( WANObjectCache::ATTR_EMULATION )
527 <= WANObjectCache::QOS_EMULATION_SQL
528 ) {
529 // Do not cache RDBMs blobs in...the RDBMs store
530 $ttl = WANObjectCache::TTL_UNCACHEABLE;
531 } else {
532 $ttl = $this->cacheExpiry ?: WANObjectCache::TTL_UNCACHEABLE;
533 }
534
535 return $ttl;
536 }
537
557 public function getTextIdFromAddress( $address ) {
558 list( $schema, $id, ) = self::splitBlobAddress( $address );
559
560 if ( $schema !== 'tt' ) {
561 return null;
562 }
563
564 $textId = intval( $id );
565
566 if ( !$textId || $id !== (string)$textId ) {
567 throw new InvalidArgumentException( "Malformed text_id: $id" );
568 }
569
570 return $textId;
571 }
572
581 private static function splitBlobAddress( $address ) {
582 if ( !preg_match( '/^(\w+):(\w+)(\?(.*))?$/', $address, $m ) ) {
583 throw new InvalidArgumentException( "Bad blob address: $address" );
584 }
585
586 $schema = strtolower( $m[1] );
587 $id = $m[2];
588 $parameters = isset( $m[4] ) ? wfCgiToArray( $m[4] ) : [];
589
590 return [ $schema, $id, $parameters ];
591 }
592
593 public function isReadOnly() {
594 if ( $this->useExternalStore && ExternalStore::defaultStoresAreReadOnly() ) {
595 return true;
596 }
597
598 return ( $this->getDBLoadBalancer()->getReadOnlyReason() !== false );
599 }
600}
unserialize( $serialized)
wfDebug( $text, $dest='all', array $context=[])
Sends a line to the debug log if enabled or, optionally, to a comment in output.
wfWarn( $msg, $callerOffset=1, $level=E_USER_NOTICE)
Send a warning either to the debug log or in a PHP error depending on $wgDevelopmentWarnings.
wfLogWarning( $msg, $callerOffset=1, $level=E_USER_WARNING)
Send a warning as a PHP error and the debug log.
wfCgiToArray( $query)
This is the logical opposite of wfArrayToCgi(): it accepts a query string as its argument and returns...
Helper class for DAO classes.
Constructor class for key/value blob data kept in external repositories.
static fetchFromURL( $url, array $params=[])
Fetch data from given URL.
static insertToDefault( $data, array $params=[])
Like insert() above, but does more of the work for us.
static defaultStoresAreReadOnly()
Internationalisation code.
Definition Language.php:35
MediaWiki exception.
Exception representing a failure to access a data blob.
Service for storing and loading Content objects.
getTextIdFromAddress( $address)
Returns an ID corresponding to the old_id field in the text table, corresponding to the given $addres...
getCacheTTL()
Get the text cache TTL.
__construct(LoadBalancer $dbLoadBalancer, WANObjectCache $cache, $wikiId=false)
getBlob( $blobAddress, $queryFlags=0)
Retrieve a blob, given an address.
setLegacyEncoding( $legacyEncoding, Language $language)
expandBlob( $raw, $flags, $cacheKey=null)
Expand a raw data blob according to the flags given.
setUseExternalStore( $useExternalStore)
compressData(&$blob)
If $wgCompressRevisions is enabled, we will compress data.
static splitBlobAddress( $address)
Splits a blob address into three parts: the schema, the ID, and parameters/flags.
fetchBlob( $blobAddress, $queryFlags)
MCR migration note: this corresponds to Revision::fetchText.
bool string $wikiId
Wiki ID.
storeBlob( $data, $hints=[])
Stores an arbitrary blob of data and returns an address that can be used with getBlob() to retrieve t...
Language null $legacyEncodingConversionLang
isReadOnly()
Check if the blob metadata or backing blob data store is read-only.
decompressData( $blob, array $blobFlags)
Re-converts revision text according to its flags.
Multi-datacenter aware caching interface.
Relational database abstraction object.
Definition Database.php:48
Database connection, tracking, load balancing, and transaction manager for a cluster.
deferred txt A few of the database updates required by various functions here can be deferred until after the result page is displayed to the user For updating the view updating the linked to tables after a etc PHP does not yet have any way to tell the server to actually return and disconnect while still running these but it might have such a feature in the future We handle these by creating a deferred update object and putting those objects on a global list
Definition deferred.txt:11
when a variable name is used in a function
Definition design.txt:94
null means default in associative array with keys and values unescaped Should be merged with default with a value of false meaning to suppress the attribute in associative array with keys and values unescaped & $options
Definition hooks.txt:2001
returning false will NOT prevent logging $e
Definition hooks.txt:2176
Interface for database access objects.
Generic base class for storage interfaces.
Service for loading and storing data blobs.
Definition BlobStore.php:33
Basic database interface for live and lazy-loaded relation database handles.
Definition IDatabase.php:38
you have access to all of the normal MediaWiki so you can get a DB use the cache
const DB_MASTER
Definition defines.php:29