Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
79.84% |
202 / 253 |
|
74.07% |
20 / 27 |
CRAP | |
0.00% |
0 / 1 |
SqlBlobStore | |
79.84% |
202 / 253 |
|
74.07% |
20 / 27 |
149.00 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
getCacheExpiry | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setCacheExpiry | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getCompressBlobs | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setCompressBlobs | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getLegacyEncoding | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setLegacyEncoding | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getUseExternalStore | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setUseExternalStore | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getDBLoadBalancer | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getDBConnection | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
storeBlob | |
52.94% |
9 / 17 |
|
0.00% |
0 / 1 |
7.61 | |||
getBlob | |
100.00% |
20 / 20 |
|
100.00% |
1 / 1 |
4 | |||
getBlobBatch | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
3 | |||
fetchBlobs | |
70.11% |
61 / 87 |
|
0.00% |
0 / 1 |
26.65 | |||
getDBOptions | |
57.89% |
11 / 19 |
|
0.00% |
0 / 1 |
8.69 | |||
getCacheKey | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
getCacheOptions | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
expandBlob | |
88.00% |
22 / 25 |
|
0.00% |
0 / 1 |
9.14 | |||
compressData | |
81.82% |
9 / 11 |
|
0.00% |
0 / 1 |
4.10 | |||
decompressData | |
100.00% |
18 / 18 |
|
100.00% |
1 / 1 |
10 | |||
getCacheTTL | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
3.07 | |||
getTextIdFromAddress | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
4 | |||
makeAddressFromTextId | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
explodeFlags | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
splitBlobAddress | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
2 | |||
isReadOnly | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * Attribution notice: when this file was created, much of its content was taken |
19 | * from the Revision.php file as present in release 1.30. Refer to the history |
20 | * of that file for original authorship (that file was removed entirely in 1.37, |
21 | * but its history can still be found in prior versions of MediaWiki). |
22 | * |
23 | * @file |
24 | */ |
25 | |
26 | namespace MediaWiki\Storage; |
27 | |
28 | use AppendIterator; |
29 | use DBAccessObjectUtils; |
30 | use ExternalStoreAccess; |
31 | use ExternalStoreException; |
32 | use HistoryBlobUtils; |
33 | use IDBAccessObject; |
34 | use InvalidArgumentException; |
35 | use StatusValue; |
36 | use WANObjectCache; |
37 | use Wikimedia\Assert\Assert; |
38 | use Wikimedia\AtEase\AtEase; |
39 | use Wikimedia\Rdbms\IDatabase; |
40 | use Wikimedia\Rdbms\ILoadBalancer; |
41 | |
42 | /** |
43 | * Service for storing and loading Content objects representing revision data blobs. |
44 | * |
45 | * @since 1.31 |
46 | * |
47 | * @note This was written to act as a drop-in replacement for the corresponding |
48 | * static methods in the old Revision class (which was later removed in 1.37). |
49 | */ |
50 | class SqlBlobStore implements BlobStore { |
51 | |
52 | // Note: the name has been taken unchanged from the old Revision class. |
53 | public const TEXT_CACHE_GROUP = 'revisiontext:10'; |
54 | |
55 | /** @internal */ |
56 | public const DEFAULT_TTL = 7 * 24 * 3600; // 7 days |
57 | |
58 | /** |
59 | * @var ILoadBalancer |
60 | */ |
61 | private $dbLoadBalancer; |
62 | |
63 | /** |
64 | * @var ExternalStoreAccess |
65 | */ |
66 | private $extStoreAccess; |
67 | |
68 | /** |
69 | * @var WANObjectCache |
70 | */ |
71 | private $cache; |
72 | |
73 | /** |
74 | * @var string|bool DB domain ID of a wiki or false for the local one |
75 | */ |
76 | private $dbDomain; |
77 | |
78 | /** |
79 | * @var int |
80 | */ |
81 | private $cacheExpiry = self::DEFAULT_TTL; |
82 | |
83 | /** |
84 | * @var bool |
85 | */ |
86 | private $compressBlobs = false; |
87 | |
88 | /** |
89 | * @var string|false |
90 | */ |
91 | private $legacyEncoding = false; |
92 | |
93 | /** |
94 | * @var bool |
95 | */ |
96 | private $useExternalStore = false; |
97 | |
98 | /** |
99 | * @param ILoadBalancer $dbLoadBalancer A load balancer for acquiring database connections |
100 | * @param ExternalStoreAccess $extStoreAccess Access layer for external storage |
101 | * @param WANObjectCache $cache A cache manager for caching blobs. This can be the local |
102 | * wiki's default instance even if $dbDomain refers to a different wiki, since |
103 | * makeGlobalKey() is used to construct a key that allows cached blobs from the |
104 | * same database to be re-used between wikis. For example, wiki A and wiki B will |
105 | * use the same cache keys for blobs fetched from wiki C, regardless of the |
106 | * wiki-specific default key space. |
107 | * @param bool|string $dbDomain The ID of the target wiki database. Use false for the local wiki. |
108 | */ |
109 | public function __construct( |
110 | ILoadBalancer $dbLoadBalancer, |
111 | ExternalStoreAccess $extStoreAccess, |
112 | WANObjectCache $cache, |
113 | $dbDomain = false |
114 | ) { |
115 | $this->dbLoadBalancer = $dbLoadBalancer; |
116 | $this->extStoreAccess = $extStoreAccess; |
117 | $this->cache = $cache; |
118 | $this->dbDomain = $dbDomain; |
119 | } |
120 | |
121 | /** |
122 | * @return int Time for which blobs can be cached, in seconds |
123 | */ |
124 | public function getCacheExpiry() { |
125 | return $this->cacheExpiry; |
126 | } |
127 | |
128 | /** |
129 | * @param int $cacheExpiry Time for which blobs can be cached, in seconds |
130 | */ |
131 | public function setCacheExpiry( int $cacheExpiry ) { |
132 | $this->cacheExpiry = $cacheExpiry; |
133 | } |
134 | |
135 | /** |
136 | * @return bool Whether blobs should be compressed for storage |
137 | */ |
138 | public function getCompressBlobs() { |
139 | return $this->compressBlobs; |
140 | } |
141 | |
142 | /** |
143 | * @param bool $compressBlobs Whether blobs should be compressed for storage |
144 | */ |
145 | public function setCompressBlobs( $compressBlobs ) { |
146 | $this->compressBlobs = $compressBlobs; |
147 | } |
148 | |
149 | /** |
150 | * @return false|string The legacy encoding to assume for blobs that are not marked as utf8. |
151 | * False means handling of legacy encoding is disabled, and utf8 assumed. |
152 | */ |
153 | public function getLegacyEncoding() { |
154 | return $this->legacyEncoding; |
155 | } |
156 | |
157 | /** |
158 | * Set the legacy encoding to assume for blobs that do not have the utf-8 flag set. |
159 | * |
160 | * @note The second parameter, Language $language, was removed in 1.34. |
161 | * |
162 | * @param string $legacyEncoding The legacy encoding to assume for blobs that are |
163 | * not marked as utf8. |
164 | */ |
165 | public function setLegacyEncoding( string $legacyEncoding ) { |
166 | $this->legacyEncoding = $legacyEncoding; |
167 | } |
168 | |
169 | /** |
170 | * @return bool Whether to use the ExternalStore mechanism for storing blobs. |
171 | */ |
172 | public function getUseExternalStore() { |
173 | return $this->useExternalStore; |
174 | } |
175 | |
176 | /** |
177 | * @param bool $useExternalStore Whether to use the ExternalStore mechanism for storing blobs. |
178 | */ |
179 | public function setUseExternalStore( bool $useExternalStore ) { |
180 | $this->useExternalStore = $useExternalStore; |
181 | } |
182 | |
183 | /** |
184 | * @return ILoadBalancer |
185 | */ |
186 | private function getDBLoadBalancer() { |
187 | return $this->dbLoadBalancer; |
188 | } |
189 | |
190 | /** |
191 | * @param int $index A database index, like DB_PRIMARY or DB_REPLICA |
192 | * |
193 | * @return IDatabase |
194 | */ |
195 | private function getDBConnection( $index ) { |
196 | $lb = $this->getDBLoadBalancer(); |
197 | return $lb->getConnectionRef( $index, [], $this->dbDomain ); |
198 | } |
199 | |
200 | /** |
201 | * Stores an arbitrary blob of data and returns an address that can be used with |
202 | * getBlob() to retrieve the same blob of data, |
203 | * |
204 | * @param string $data |
205 | * @param array $hints An array of hints. |
206 | * |
207 | * @throws BlobAccessException |
208 | * @return string an address that can be used with getBlob() to retrieve the data. |
209 | */ |
210 | public function storeBlob( $data, $hints = [] ) { |
211 | $flags = $this->compressData( $data ); |
212 | |
213 | # Write to external storage if required |
214 | if ( $this->useExternalStore ) { |
215 | // Store and get the URL |
216 | try { |
217 | $data = $this->extStoreAccess->insert( $data, [ 'domain' => $this->dbDomain ] ); |
218 | } catch ( ExternalStoreException $e ) { |
219 | throw new BlobAccessException( $e->getMessage(), 0, $e ); |
220 | } |
221 | if ( !$data ) { |
222 | throw new BlobAccessException( "Failed to store text to external storage" ); |
223 | } |
224 | if ( $flags ) { |
225 | $flags .= ','; |
226 | } |
227 | $flags .= 'external'; |
228 | |
229 | // TODO: we could also return an address for the external store directly here. |
230 | // That would mean bypassing the text table entirely when the external store is |
231 | // used. We'll need to assess expected fallout before doing that. |
232 | } |
233 | |
234 | $dbw = $this->getDBConnection( DB_PRIMARY ); |
235 | |
236 | $dbw->newInsertQueryBuilder() |
237 | ->insertInto( 'text' ) |
238 | ->row( [ 'old_text' => $data, 'old_flags' => $flags ] ) |
239 | ->caller( __METHOD__ )->execute(); |
240 | |
241 | $textId = $dbw->insertId(); |
242 | |
243 | return self::makeAddressFromTextId( $textId ); |
244 | } |
245 | |
246 | /** |
247 | * Retrieve a blob, given an address. |
248 | * Currently hardcoded to the 'text' table storage engine. |
249 | * |
250 | * MCR migration note: this replaced Revision::loadText |
251 | * |
252 | * @param string $blobAddress |
253 | * @param int $queryFlags |
254 | * |
255 | * @throws BlobAccessException |
256 | * @return string |
257 | */ |
258 | public function getBlob( $blobAddress, $queryFlags = 0 ) { |
259 | Assert::parameterType( 'string', $blobAddress, '$blobAddress' ); |
260 | |
261 | $error = null; |
262 | $blob = $this->cache->getWithSetCallback( |
263 | $this->getCacheKey( $blobAddress ), |
264 | $this->getCacheTTL(), |
265 | function ( $unused, &$ttl, &$setOpts ) use ( $blobAddress, $queryFlags, &$error ) { |
266 | // Ignore $setOpts; blobs are immutable and negatives are not cached |
267 | [ $result, $errors ] = $this->fetchBlobs( [ $blobAddress ], $queryFlags ); |
268 | // No negative caching; negative hits on text rows may be due to corrupted replica DBs |
269 | $error = $errors[$blobAddress] ?? null; |
270 | if ( $error ) { |
271 | $ttl = WANObjectCache::TTL_UNCACHEABLE; |
272 | } |
273 | return $result[$blobAddress]; |
274 | }, |
275 | $this->getCacheOptions() |
276 | ); |
277 | |
278 | if ( $error ) { |
279 | if ( $error[0] === 'badrevision' ) { |
280 | throw new BadBlobException( $error[1] ); |
281 | } else { |
282 | throw new BlobAccessException( $error[1] ); |
283 | } |
284 | } |
285 | |
286 | Assert::postcondition( is_string( $blob ), 'Blob must not be null' ); |
287 | return $blob; |
288 | } |
289 | |
290 | /** |
291 | * A batched version of BlobStore::getBlob. |
292 | * |
293 | * @param string[] $blobAddresses An array of blob addresses. |
294 | * @param int $queryFlags See IDBAccessObject. |
295 | * @throws BlobAccessException |
296 | * @return StatusValue A status with a map of blobAddress => binary blob data or null |
297 | * if fetching the blob has failed. Fetch failures errors are the |
298 | * warnings in the status object. |
299 | * @since 1.34 |
300 | */ |
301 | public function getBlobBatch( $blobAddresses, $queryFlags = 0 ) { |
302 | // FIXME: All caching has temporarily been removed in I94c6f9ba7b9caeeb due to T235188. |
303 | // Caching behavior should be restored by reverting I94c6f9ba7b9caeeb as soon as |
304 | // the root cause of T235188 has been resolved. |
305 | |
306 | [ $blobsByAddress, $errors ] = $this->fetchBlobs( $blobAddresses, $queryFlags ); |
307 | |
308 | $blobsByAddress = array_map( static function ( $blob ) { |
309 | return $blob === false ? null : $blob; |
310 | }, $blobsByAddress ); |
311 | |
312 | $result = StatusValue::newGood( $blobsByAddress ); |
313 | foreach ( $errors as $error ) { |
314 | // @phan-suppress-next-line PhanParamTooFewUnpack |
315 | $result->warning( ...$error ); |
316 | } |
317 | return $result; |
318 | } |
319 | |
320 | /** |
321 | * MCR migration note: this corresponded to Revision::fetchText |
322 | * |
323 | * @param string[] $blobAddresses |
324 | * @param int $queryFlags |
325 | * |
326 | * @throws BlobAccessException |
327 | * @return array [ $result, $errors ] A list with the following elements: |
328 | * - The result: a map of blob addresses to successfully fetched blobs |
329 | * or false if fetch failed |
330 | * - Errors: a map of blob addresses to error information about the blob. |
331 | * On success, the relevant key will be absent. Each error is a list of |
332 | * parameters to be passed to StatusValue::warning(). |
333 | */ |
334 | private function fetchBlobs( $blobAddresses, $queryFlags ) { |
335 | $textIdToBlobAddress = []; |
336 | $result = []; |
337 | $errors = []; |
338 | foreach ( $blobAddresses as $blobAddress ) { |
339 | try { |
340 | [ $schema, $id ] = self::splitBlobAddress( $blobAddress ); |
341 | } catch ( InvalidArgumentException $ex ) { |
342 | throw new BlobAccessException( |
343 | $ex->getMessage() . '. Use findBadBlobs.php to remedy.', |
344 | 0, |
345 | $ex |
346 | ); |
347 | } |
348 | |
349 | // TODO: MCR: also support 'ex' schema with ExternalStore URLs, plus flags encoded in the URL! |
350 | if ( $schema === 'bad' ) { |
351 | // Database row was marked as "known bad" |
352 | wfDebug( |
353 | __METHOD__ |
354 | . ": loading known-bad content ($blobAddress), returning empty string" |
355 | ); |
356 | $result[$blobAddress] = ''; |
357 | $errors[$blobAddress] = [ |
358 | 'badrevision', |
359 | 'The content of this revision is missing or corrupted (bad schema)' |
360 | ]; |
361 | } elseif ( $schema === 'tt' ) { |
362 | $textId = intval( $id ); |
363 | |
364 | if ( $textId < 1 || $id !== (string)$textId ) { |
365 | $errors[$blobAddress] = [ |
366 | 'internalerror', |
367 | "Bad blob address: $blobAddress. Use findBadBlobs.php to remedy." |
368 | ]; |
369 | $result[$blobAddress] = false; |
370 | } |
371 | |
372 | $textIdToBlobAddress[$textId] = $blobAddress; |
373 | } else { |
374 | $errors[$blobAddress] = [ |
375 | 'internalerror', |
376 | "Unknown blob address schema: $schema. Use findBadBlobs.php to remedy." |
377 | ]; |
378 | $result[$blobAddress] = false; |
379 | } |
380 | } |
381 | |
382 | $textIds = array_keys( $textIdToBlobAddress ); |
383 | if ( !$textIds ) { |
384 | return [ $result, $errors ]; |
385 | } |
386 | // Callers doing updates will pass in READ_LATEST as usual. Since the text/blob tables |
387 | // do not normally get rows changed around, set READ_LATEST_IMMUTABLE in those cases. |
388 | $queryFlags |= DBAccessObjectUtils::hasFlags( $queryFlags, IDBAccessObject::READ_LATEST ) |
389 | ? IDBAccessObject::READ_LATEST_IMMUTABLE |
390 | : 0; |
391 | [ $index, $options, $fallbackIndex, $fallbackOptions ] = |
392 | self::getDBOptions( $queryFlags ); |
393 | // Text data is immutable; check replica DBs first. |
394 | $dbConnection = $this->getDBConnection( $index ); |
395 | $rows = $dbConnection->newSelectQueryBuilder() |
396 | ->select( [ 'old_id', 'old_text', 'old_flags' ] ) |
397 | ->from( 'text' ) |
398 | ->where( [ 'old_id' => $textIds ] ) |
399 | ->options( $options ) |
400 | ->caller( __METHOD__ )->fetchResultSet(); |
401 | $numRows = $rows->numRows(); |
402 | |
403 | // Fallback to DB_PRIMARY in some cases if not all the rows were found, using the appropriate |
404 | // options, such as FOR UPDATE to avoid missing rows due to REPEATABLE-READ. |
405 | if ( $numRows !== count( $textIds ) && $fallbackIndex !== null ) { |
406 | $fetchedTextIds = []; |
407 | foreach ( $rows as $row ) { |
408 | $fetchedTextIds[] = $row->old_id; |
409 | } |
410 | $missingTextIds = array_diff( $textIds, $fetchedTextIds ); |
411 | $dbConnection = $this->getDBConnection( $fallbackIndex ); |
412 | $rowsFromFallback = $dbConnection->newSelectQueryBuilder() |
413 | ->select( [ 'old_id', 'old_text', 'old_flags' ] ) |
414 | ->from( 'text' ) |
415 | ->where( [ 'old_id' => $missingTextIds ] ) |
416 | ->options( $fallbackOptions ) |
417 | ->caller( __METHOD__ )->fetchResultSet(); |
418 | $appendIterator = new AppendIterator(); |
419 | $appendIterator->append( $rows ); |
420 | $appendIterator->append( $rowsFromFallback ); |
421 | $rows = $appendIterator; |
422 | } |
423 | |
424 | foreach ( $rows as $row ) { |
425 | $blobAddress = $textIdToBlobAddress[$row->old_id]; |
426 | $blob = false; |
427 | if ( $row->old_text !== null ) { |
428 | $blob = $this->expandBlob( $row->old_text, $row->old_flags, $blobAddress ); |
429 | } |
430 | if ( $blob === false ) { |
431 | $errors[$blobAddress] = [ |
432 | 'internalerror', |
433 | "Bad data in text row {$row->old_id}. Use findBadBlobs.php to remedy." |
434 | ]; |
435 | } |
436 | $result[$blobAddress] = $blob; |
437 | } |
438 | |
439 | // If we're still missing some of the rows, set errors for missing blobs. |
440 | if ( count( $result ) !== count( $blobAddresses ) ) { |
441 | foreach ( $blobAddresses as $blobAddress ) { |
442 | if ( !isset( $result[$blobAddress ] ) ) { |
443 | $errors[$blobAddress] = [ |
444 | 'internalerror', |
445 | "Unable to fetch blob at $blobAddress. Use findBadBlobs.php to remedy." |
446 | ]; |
447 | $result[$blobAddress] = false; |
448 | } |
449 | } |
450 | } |
451 | return [ $result, $errors ]; |
452 | } |
453 | |
454 | private static function getDBOptions( $bitfield ) { |
455 | if ( DBAccessObjectUtils::hasFlags( $bitfield, IDBAccessObject::READ_LATEST_IMMUTABLE ) ) { |
456 | $index = DB_REPLICA; // override READ_LATEST if set |
457 | $fallbackIndex = DB_PRIMARY; |
458 | } elseif ( DBAccessObjectUtils::hasFlags( $bitfield, IDBAccessObject::READ_LATEST ) ) { |
459 | $index = DB_PRIMARY; |
460 | $fallbackIndex = null; |
461 | } else { |
462 | $index = DB_REPLICA; |
463 | $fallbackIndex = null; |
464 | } |
465 | |
466 | $lockingOptions = []; |
467 | if ( DBAccessObjectUtils::hasFlags( $bitfield, IDBAccessObject::READ_EXCLUSIVE ) ) { |
468 | $lockingOptions[] = 'FOR UPDATE'; |
469 | } elseif ( DBAccessObjectUtils::hasFlags( $bitfield, IDBAccessObject::READ_LOCKING ) ) { |
470 | $lockingOptions[] = 'LOCK IN SHARE MODE'; |
471 | } |
472 | |
473 | if ( $fallbackIndex !== null ) { |
474 | $options = []; // locks on DB_REPLICA make no sense |
475 | $fallbackOptions = $lockingOptions; |
476 | } else { |
477 | $options = $lockingOptions; |
478 | $fallbackOptions = []; // no fallback |
479 | } |
480 | |
481 | return [ $index, $options, $fallbackIndex, $fallbackOptions ]; |
482 | } |
483 | |
484 | /** |
485 | * Get a cache key for a given Blob address. |
486 | * |
487 | * The cache key is constructed in a way that allows cached blobs from the same database |
488 | * to be re-used between wikis. For example, wiki A and wiki B will use the same cache keys |
489 | * for blobs fetched from wiki C. |
490 | * |
491 | * @param string $blobAddress |
492 | * @return string |
493 | */ |
494 | private function getCacheKey( $blobAddress ) { |
495 | return $this->cache->makeGlobalKey( |
496 | 'SqlBlobStore-blob', |
497 | $this->dbLoadBalancer->resolveDomainID( $this->dbDomain ), |
498 | $blobAddress |
499 | ); |
500 | } |
501 | |
502 | /** |
503 | * Get the cache key options for a given Blob |
504 | * |
505 | * @return array<string,mixed> |
506 | */ |
507 | private function getCacheOptions() { |
508 | return [ |
509 | 'pcGroup' => self::TEXT_CACHE_GROUP, |
510 | 'pcTTL' => WANObjectCache::TTL_PROC_LONG, |
511 | 'segmentable' => true |
512 | ]; |
513 | } |
514 | |
515 | /** |
516 | * Expand a raw data blob according to the flags given. |
517 | * |
518 | * MCR migration note: this replaced Revision::getRevisionText |
519 | * |
520 | * @note direct use is deprecated, use getBlob() or SlotRecord::getContent() instead. |
521 | * @todo make this private, there should be no need to use this method outside this class. |
522 | * |
523 | * @param string $raw The raw blob data, to be processed according to $flags. |
524 | * May be the blob itself, or the blob compressed, or just the address |
525 | * of the actual blob, depending on $flags. |
526 | * @param string|string[] $flags Blob flags, such as 'external' or 'gzip'. |
527 | * Note that not including 'utf-8' in $flags will cause the data to be decoded |
528 | * according to the legacy encoding specified via setLegacyEncoding. |
529 | * @param string|null $blobAddress A blob address for use in the cache key. If not given, |
530 | * caching is disabled. |
531 | * |
532 | * @return false|string The expanded blob or false on failure |
533 | * @throws BlobAccessException |
534 | */ |
535 | public function expandBlob( $raw, $flags, $blobAddress = null ) { |
536 | if ( is_string( $flags ) ) { |
537 | $flags = self::explodeFlags( $flags ); |
538 | } |
539 | if ( in_array( 'error', $flags ) ) { |
540 | throw new BadBlobException( |
541 | "The content of this revision is missing or corrupted (error flag)" |
542 | ); |
543 | } |
544 | |
545 | // Use external methods for external objects, text in table is URL-only then |
546 | if ( in_array( 'external', $flags ) ) { |
547 | $url = $raw; |
548 | $parts = explode( '://', $url, 2 ); |
549 | if ( count( $parts ) == 1 || $parts[1] == '' ) { |
550 | return false; |
551 | } |
552 | |
553 | if ( $blobAddress ) { |
554 | // The cached value should be decompressed, so handle that and return here. |
555 | return $this->cache->getWithSetCallback( |
556 | $this->getCacheKey( $blobAddress ), |
557 | $this->getCacheTTL(), |
558 | function () use ( $url, $flags ) { |
559 | // Ignore $setOpts; blobs are immutable and negatives are not cached |
560 | $blob = $this->extStoreAccess |
561 | ->fetchFromURL( $url, [ 'domain' => $this->dbDomain ] ); |
562 | |
563 | return $blob === false ? false : $this->decompressData( $blob, $flags ); |
564 | }, |
565 | $this->getCacheOptions() |
566 | ); |
567 | } else { |
568 | $blob = $this->extStoreAccess->fetchFromURL( $url, [ 'domain' => $this->dbDomain ] ); |
569 | return $blob === false ? false : $this->decompressData( $blob, $flags ); |
570 | } |
571 | } else { |
572 | return $this->decompressData( $raw, $flags ); |
573 | } |
574 | } |
575 | |
576 | /** |
577 | * If $wgCompressRevisions is enabled, we will compress data. |
578 | * The input string is modified in place. |
579 | * Return value is the flags field: contains 'gzip' if the |
580 | * data is compressed, and 'utf-8' if we're saving in UTF-8 |
581 | * mode. |
582 | * |
583 | * MCR migration note: this replaced Revision::compressRevisionText |
584 | * |
585 | * @note direct use is deprecated! |
586 | * @todo make this private, there should be no need to use this method outside this class. |
587 | * |
588 | * @param string &$blob |
589 | * |
590 | * @return string |
591 | */ |
592 | public function compressData( &$blob ) { |
593 | $blobFlags = []; |
594 | |
595 | // Revisions not marked as UTF-8 will have legacy decoding applied by decompressData(). |
596 | // XXX: if $this->legacyEncoding is not set, we could skip this. That would however be |
597 | // risky, since $this->legacyEncoding being set in the future would lead to data corruption. |
598 | $blobFlags[] = 'utf-8'; |
599 | |
600 | if ( $this->compressBlobs ) { |
601 | if ( function_exists( 'gzdeflate' ) ) { |
602 | $deflated = gzdeflate( $blob ); |
603 | |
604 | if ( $deflated === false ) { |
605 | wfLogWarning( __METHOD__ . ': gzdeflate() failed' ); |
606 | } else { |
607 | $blob = $deflated; |
608 | $blobFlags[] = 'gzip'; |
609 | } |
610 | } else { |
611 | wfDebug( __METHOD__ . " -- no zlib support, not compressing" ); |
612 | } |
613 | } |
614 | return implode( ',', $blobFlags ); |
615 | } |
616 | |
617 | /** |
618 | * Re-converts revision text according to its flags. |
619 | * |
620 | * MCR migration note: this replaced Revision::decompressRevisionText |
621 | * |
622 | * @note direct use is deprecated, use getBlob() or SlotRecord::getContent() instead. |
623 | * @todo make this private, there should be no need to use this method outside this class. |
624 | * |
625 | * @param string $blob Blob in compressed/encoded form. |
626 | * @param array $blobFlags Compression flags, such as 'gzip'. |
627 | * Note that not including 'utf-8' in $blobFlags will cause the data to be decoded |
628 | * according to the legacy encoding specified via setLegacyEncoding. |
629 | * |
630 | * @return string|bool Decompressed text, or false on failure |
631 | */ |
632 | public function decompressData( string $blob, array $blobFlags ) { |
633 | if ( in_array( 'error', $blobFlags ) ) { |
634 | // Error row, return false |
635 | return false; |
636 | } |
637 | |
638 | if ( in_array( 'gzip', $blobFlags ) ) { |
639 | # Deal with optional compression of archived pages. |
640 | # This can be done periodically via maintenance/compressOld.php, and |
641 | # as pages are saved if $wgCompressRevisions is set. |
642 | $blob = gzinflate( $blob ); |
643 | |
644 | if ( $blob === false ) { |
645 | wfWarn( __METHOD__ . ': gzinflate() failed' ); |
646 | return false; |
647 | } |
648 | } |
649 | |
650 | if ( in_array( 'object', $blobFlags ) ) { |
651 | # Generic compressed storage |
652 | $obj = HistoryBlobUtils::unserialize( $blob ); |
653 | if ( !$obj ) { |
654 | // Invalid object |
655 | return false; |
656 | } |
657 | $blob = $obj->getText(); |
658 | } |
659 | |
660 | // Needed to support old revisions from before MW 1.5. |
661 | if ( $blob !== false && $this->legacyEncoding |
662 | && !in_array( 'utf-8', $blobFlags ) && !in_array( 'utf8', $blobFlags ) |
663 | ) { |
664 | # Old revisions kept around in a legacy encoding? |
665 | # Upconvert on demand. |
666 | # ("utf8" checked for compatibility with some broken |
667 | # conversion scripts 2008-12-30) |
668 | # Even with //IGNORE iconv can whine about illegal characters in |
669 | # *input* string. We just ignore those too. |
670 | # REF: https://bugs.php.net/bug.php?id=37166 |
671 | # REF: https://phabricator.wikimedia.org/T18885 |
672 | AtEase::suppressWarnings(); |
673 | $blob = iconv( $this->legacyEncoding, 'UTF-8//IGNORE', $blob ); |
674 | AtEase::restoreWarnings(); |
675 | } |
676 | |
677 | return $blob; |
678 | } |
679 | |
680 | /** |
681 | * Get the text cache TTL |
682 | * |
683 | * MCR migration note: this replaced Revision::getCacheTTL |
684 | * |
685 | * @return int |
686 | */ |
687 | private function getCacheTTL() { |
688 | $cache = $this->cache; |
689 | |
690 | if ( $cache->getQoS( $cache::ATTR_DURABILITY ) >= $cache::QOS_DURABILITY_RDBMS ) { |
691 | // Do not cache RDBMs blobs in...the RDBMs store |
692 | $ttl = $cache::TTL_UNCACHEABLE; |
693 | } else { |
694 | $ttl = $this->cacheExpiry ?: $cache::TTL_UNCACHEABLE; |
695 | } |
696 | |
697 | return $ttl; |
698 | } |
699 | |
700 | /** |
701 | * Returns an ID corresponding to the old_id field in the text table, corresponding |
702 | * to the given $address. |
703 | * |
704 | * Currently, $address must start with 'tt:' followed by a decimal integer representing |
705 | * the old_id; if $address does not start with 'tt:', null is returned. However, |
706 | * the implementation may change to insert rows into the text table on the fly. |
707 | * This implies that this method cannot be static. |
708 | * |
709 | * @note This method exists for use with the text table based storage schema. |
710 | * It should not be assumed that is will function with all future kinds of content addresses. |
711 | * |
712 | * @deprecated since 1.31, so don't assume that all blob addresses refer to a row in the text |
713 | * table. This method should become private once the relevant refactoring in WikiPage is |
714 | * complete. |
715 | * |
716 | * @param string $address |
717 | * |
718 | * @return int|null |
719 | */ |
720 | public function getTextIdFromAddress( $address ) { |
721 | [ $schema, $id, ] = self::splitBlobAddress( $address ); |
722 | |
723 | if ( $schema !== 'tt' ) { |
724 | return null; |
725 | } |
726 | |
727 | $textId = intval( $id ); |
728 | |
729 | if ( !$textId || $id !== (string)$textId ) { |
730 | throw new InvalidArgumentException( "Malformed text_id: $id" ); |
731 | } |
732 | |
733 | return $textId; |
734 | } |
735 | |
736 | /** |
737 | * Returns an address referring to content stored in the text table row with the given ID. |
738 | * The address schema for blobs stored in the text table is "tt:" followed by an integer |
739 | * that corresponds to a value of the old_id field. |
740 | * |
741 | * @internal |
742 | * @note This method should not be used by regular application logic. It is public so |
743 | * maintenance scripts can use it for bulk operations on the text table. |
744 | * |
745 | * @param int $id |
746 | * |
747 | * @return string |
748 | */ |
749 | public static function makeAddressFromTextId( $id ) { |
750 | return 'tt:' . $id; |
751 | } |
752 | |
753 | /** |
754 | * Split a comma-separated old_flags value into its constituent parts |
755 | * |
756 | * @param string $flagsString |
757 | * @return array |
758 | */ |
759 | public static function explodeFlags( string $flagsString ) { |
760 | return $flagsString === '' ? [] : explode( ',', $flagsString ); |
761 | } |
762 | |
763 | /** |
764 | * Splits a blob address into three parts: the schema, the ID, and parameters/flags. |
765 | * |
766 | * @since 1.33 |
767 | * |
768 | * @param string $address |
769 | * |
770 | * @throws InvalidArgumentException |
771 | * @return array [ $schema, $id, $parameters ], with $parameters being an assoc array. |
772 | */ |
773 | public static function splitBlobAddress( $address ) { |
774 | if ( !preg_match( '/^([-+.\w]+):([^\s?]+)(\?([^\s]*))?$/', $address, $m ) ) { |
775 | throw new InvalidArgumentException( "Bad blob address: $address" ); |
776 | } |
777 | |
778 | $schema = strtolower( $m[1] ); |
779 | $id = $m[2]; |
780 | $parameters = wfCgiToArray( $m[4] ?? '' ); |
781 | |
782 | return [ $schema, $id, $parameters ]; |
783 | } |
784 | |
785 | public function isReadOnly() { |
786 | if ( $this->useExternalStore && $this->extStoreAccess->isReadOnly() ) { |
787 | return true; |
788 | } |
789 | |
790 | return ( $this->getDBLoadBalancer()->getReadOnlyReason() !== false ); |
791 | } |
792 | } |