Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
80.15% |
210 / 262 |
|
74.07% |
20 / 27 |
CRAP | |
0.00% |
0 / 1 |
SqlBlobStore | |
80.15% |
210 / 262 |
|
74.07% |
20 / 27 |
158.17 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
getCacheExpiry | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setCacheExpiry | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getCompressBlobs | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setCompressBlobs | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getLegacyEncoding | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setLegacyEncoding | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getUseExternalStore | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setUseExternalStore | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getDBLoadBalancer | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getDBConnection | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
storeBlob | |
76.47% |
13 / 17 |
|
0.00% |
0 / 1 |
5.33 | |||
getBlob | |
100.00% |
20 / 20 |
|
100.00% |
1 / 1 |
4 | |||
getBlobBatch | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
3 | |||
fetchBlobs | |
68.04% |
66 / 97 |
|
0.00% |
0 / 1 |
37.80 | |||
getDBOptions | |
57.89% |
11 / 19 |
|
0.00% |
0 / 1 |
8.69 | |||
getCacheKey | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
getCacheOptions | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
expandBlob | |
88.00% |
22 / 25 |
|
0.00% |
0 / 1 |
9.14 | |||
compressData | |
81.82% |
9 / 11 |
|
0.00% |
0 / 1 |
4.10 | |||
decompressData | |
100.00% |
17 / 17 |
|
100.00% |
1 / 1 |
11 | |||
getCacheTTL | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
3.07 | |||
getTextIdFromAddress | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
4 | |||
makeAddressFromTextId | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
explodeFlags | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
splitBlobAddress | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
2 | |||
isReadOnly | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * Attribution notice: when this file was created, much of its content was taken |
19 | * from the Revision.php file as present in release 1.30. Refer to the history |
20 | * of that file for original authorship (that file was removed entirely in 1.37, |
21 | * but its history can still be found in prior versions of MediaWiki). |
22 | * |
23 | * @file |
24 | */ |
25 | |
26 | namespace MediaWiki\Storage; |
27 | |
28 | use AppendIterator; |
29 | use ExternalStoreAccess; |
30 | use ExternalStoreException; |
31 | use HistoryBlobUtils; |
32 | use InvalidArgumentException; |
33 | use StatusValue; |
34 | use Wikimedia\Assert\Assert; |
35 | use Wikimedia\ObjectCache\BagOStuff; |
36 | use Wikimedia\ObjectCache\WANObjectCache; |
37 | use Wikimedia\Rdbms\DBAccessObjectUtils; |
38 | use Wikimedia\Rdbms\IDatabase; |
39 | use Wikimedia\Rdbms\IDBAccessObject; |
40 | use Wikimedia\Rdbms\ILoadBalancer; |
41 | |
42 | /** |
43 | * Service for storing and loading Content objects representing revision data blobs. |
44 | * |
45 | * @since 1.31 |
46 | * |
47 | * @note This was written to act as a drop-in replacement for the corresponding |
48 | * static methods in the old Revision class (which was later removed in 1.37). |
49 | */ |
50 | class SqlBlobStore implements BlobStore { |
51 | |
52 | // Note: the name has been taken unchanged from the old Revision class. |
53 | public const TEXT_CACHE_GROUP = 'revisiontext:10'; |
54 | |
55 | /** @internal */ |
56 | public const DEFAULT_TTL = 7 * 24 * 3600; // 7 days |
57 | |
58 | /** |
59 | * @var ILoadBalancer |
60 | */ |
61 | private $dbLoadBalancer; |
62 | |
63 | /** |
64 | * @var ExternalStoreAccess |
65 | */ |
66 | private $extStoreAccess; |
67 | |
68 | /** |
69 | * @var WANObjectCache |
70 | */ |
71 | private $cache; |
72 | |
73 | /** |
74 | * @var string|bool DB domain ID of a wiki or false for the local one |
75 | */ |
76 | private $dbDomain; |
77 | |
78 | /** |
79 | * @var int |
80 | */ |
81 | private $cacheExpiry = self::DEFAULT_TTL; |
82 | |
83 | /** |
84 | * @var bool |
85 | */ |
86 | private $compressBlobs = false; |
87 | |
88 | /** |
89 | * @var string|false |
90 | */ |
91 | private $legacyEncoding = false; |
92 | |
93 | /** |
94 | * @var bool |
95 | */ |
96 | private $useExternalStore = false; |
97 | |
98 | /** |
99 | * @param ILoadBalancer $dbLoadBalancer A load balancer for acquiring database connections |
100 | * @param ExternalStoreAccess $extStoreAccess Access layer for external storage |
101 | * @param WANObjectCache $cache A cache manager for caching blobs. This can be the local |
102 | * wiki's default instance even if $dbDomain refers to a different wiki, since |
103 | * makeGlobalKey() is used to construct a key that allows cached blobs from the |
104 | * same database to be re-used between wikis. For example, wiki A and wiki B will |
105 | * use the same cache keys for blobs fetched from wiki C, regardless of the |
106 | * wiki-specific default key space. |
107 | * @param bool|string $dbDomain The ID of the target wiki database. Use false for the local wiki. |
108 | */ |
109 | public function __construct( |
110 | ILoadBalancer $dbLoadBalancer, |
111 | ExternalStoreAccess $extStoreAccess, |
112 | WANObjectCache $cache, |
113 | $dbDomain = false |
114 | ) { |
115 | $this->dbLoadBalancer = $dbLoadBalancer; |
116 | $this->extStoreAccess = $extStoreAccess; |
117 | $this->cache = $cache; |
118 | $this->dbDomain = $dbDomain; |
119 | } |
120 | |
121 | /** |
122 | * @return int Time for which blobs can be cached, in seconds |
123 | */ |
124 | public function getCacheExpiry() { |
125 | return $this->cacheExpiry; |
126 | } |
127 | |
128 | /** |
129 | * @param int $cacheExpiry Time for which blobs can be cached, in seconds |
130 | */ |
131 | public function setCacheExpiry( int $cacheExpiry ) { |
132 | $this->cacheExpiry = $cacheExpiry; |
133 | } |
134 | |
135 | /** |
136 | * @return bool Whether blobs should be compressed for storage |
137 | */ |
138 | public function getCompressBlobs() { |
139 | return $this->compressBlobs; |
140 | } |
141 | |
142 | /** |
143 | * @param bool $compressBlobs Whether blobs should be compressed for storage |
144 | */ |
145 | public function setCompressBlobs( $compressBlobs ) { |
146 | $this->compressBlobs = $compressBlobs; |
147 | } |
148 | |
149 | /** |
150 | * @return false|string The legacy encoding to assume for blobs that are not marked as utf8. |
151 | * False means handling of legacy encoding is disabled, and utf8 assumed. |
152 | */ |
153 | public function getLegacyEncoding() { |
154 | return $this->legacyEncoding; |
155 | } |
156 | |
157 | /** |
158 | * Set the legacy encoding to assume for blobs that do not have the utf-8 flag set. |
159 | * |
160 | * @note The second parameter, Language $language, was removed in 1.34. |
161 | * |
162 | * @param string $legacyEncoding The legacy encoding to assume for blobs that are |
163 | * not marked as utf8. |
164 | */ |
165 | public function setLegacyEncoding( string $legacyEncoding ) { |
166 | $this->legacyEncoding = $legacyEncoding; |
167 | } |
168 | |
169 | /** |
170 | * @return bool Whether to use the ExternalStore mechanism for storing blobs. |
171 | */ |
172 | public function getUseExternalStore() { |
173 | return $this->useExternalStore; |
174 | } |
175 | |
176 | /** |
177 | * @param bool $useExternalStore Whether to use the ExternalStore mechanism for storing blobs. |
178 | */ |
179 | public function setUseExternalStore( bool $useExternalStore ) { |
180 | $this->useExternalStore = $useExternalStore; |
181 | } |
182 | |
183 | /** |
184 | * @return ILoadBalancer |
185 | */ |
186 | private function getDBLoadBalancer() { |
187 | return $this->dbLoadBalancer; |
188 | } |
189 | |
190 | /** |
191 | * @param int $index A database index, like DB_PRIMARY or DB_REPLICA |
192 | * |
193 | * @return IDatabase |
194 | */ |
195 | private function getDBConnection( $index ) { |
196 | $lb = $this->getDBLoadBalancer(); |
197 | return $lb->getConnection( $index, [], $this->dbDomain ); |
198 | } |
199 | |
200 | /** |
201 | * Stores an arbitrary blob of data and returns an address that can be used with |
202 | * getBlob() to retrieve the same blob of data, |
203 | * |
204 | * @param string $data |
205 | * @param array $hints An array of hints. |
206 | * |
207 | * @throws BlobAccessException |
208 | * @return string an address that can be used with getBlob() to retrieve the data. |
209 | */ |
210 | public function storeBlob( $data, $hints = [] ) { |
211 | $flags = $this->compressData( $data ); |
212 | |
213 | # Write to external storage if required |
214 | if ( $this->useExternalStore ) { |
215 | // Store and get the URL |
216 | try { |
217 | $data = $this->extStoreAccess->insert( $data, [ 'domain' => $this->dbDomain ] ); |
218 | } catch ( ExternalStoreException $e ) { |
219 | throw new BlobAccessException( $e->getMessage(), 0, $e ); |
220 | } |
221 | if ( !$data ) { |
222 | throw new BlobAccessException( "Failed to store text to external storage" ); |
223 | } |
224 | if ( $flags ) { |
225 | return 'es:' . $data . '?flags=' . $flags; |
226 | } else { |
227 | return 'es:' . $data; |
228 | } |
229 | } else { |
230 | $dbw = $this->getDBConnection( DB_PRIMARY ); |
231 | |
232 | $dbw->newInsertQueryBuilder() |
233 | ->insertInto( 'text' ) |
234 | ->row( [ 'old_text' => $data, 'old_flags' => $flags ] ) |
235 | ->caller( __METHOD__ )->execute(); |
236 | |
237 | $textId = $dbw->insertId(); |
238 | |
239 | return self::makeAddressFromTextId( $textId ); |
240 | } |
241 | } |
242 | |
243 | /** |
244 | * Retrieve a blob, given an address. |
245 | * Currently hardcoded to the 'text' table storage engine. |
246 | * |
247 | * MCR migration note: this replaced Revision::loadText |
248 | * |
249 | * @param string $blobAddress |
250 | * @param int $queryFlags |
251 | * |
252 | * @throws BlobAccessException |
253 | * @return string |
254 | */ |
255 | public function getBlob( $blobAddress, $queryFlags = 0 ) { |
256 | Assert::parameterType( 'string', $blobAddress, '$blobAddress' ); |
257 | |
258 | $error = null; |
259 | $blob = $this->cache->getWithSetCallback( |
260 | $this->getCacheKey( $blobAddress ), |
261 | $this->getCacheTTL(), |
262 | function ( $unused, &$ttl, &$setOpts ) use ( $blobAddress, $queryFlags, &$error ) { |
263 | // Ignore $setOpts; blobs are immutable and negatives are not cached |
264 | [ $result, $errors ] = $this->fetchBlobs( [ $blobAddress ], $queryFlags ); |
265 | // No negative caching; negative hits on text rows may be due to corrupted replica DBs |
266 | $error = $errors[$blobAddress] ?? null; |
267 | if ( $error ) { |
268 | $ttl = WANObjectCache::TTL_UNCACHEABLE; |
269 | } |
270 | return $result[$blobAddress]; |
271 | }, |
272 | $this->getCacheOptions() |
273 | ); |
274 | |
275 | if ( $error ) { |
276 | if ( $error[0] === 'badrevision' ) { |
277 | throw new BadBlobException( $error[1] ); |
278 | } else { |
279 | throw new BlobAccessException( $error[1] ); |
280 | } |
281 | } |
282 | |
283 | Assert::postcondition( is_string( $blob ), 'Blob must not be null' ); |
284 | return $blob; |
285 | } |
286 | |
287 | /** |
288 | * A batched version of BlobStore::getBlob. |
289 | * |
290 | * @param string[] $blobAddresses An array of blob addresses. |
291 | * @param int $queryFlags See IDBAccessObject. |
292 | * @throws BlobAccessException |
293 | * @return StatusValue A status with a map of blobAddress => binary blob data or null |
294 | * if fetching the blob has failed. Fetch failures errors are the |
295 | * warnings in the status object. |
296 | * @since 1.34 |
297 | */ |
298 | public function getBlobBatch( $blobAddresses, $queryFlags = 0 ) { |
299 | // FIXME: All caching has temporarily been removed in I94c6f9ba7b9caeeb due to T235188. |
300 | // Caching behavior should be restored by reverting I94c6f9ba7b9caeeb as soon as |
301 | // the root cause of T235188 has been resolved. |
302 | |
303 | [ $blobsByAddress, $errors ] = $this->fetchBlobs( $blobAddresses, $queryFlags ); |
304 | |
305 | $blobsByAddress = array_map( static function ( $blob ) { |
306 | return $blob === false ? null : $blob; |
307 | }, $blobsByAddress ); |
308 | |
309 | $result = StatusValue::newGood( $blobsByAddress ); |
310 | foreach ( $errors as $error ) { |
311 | // @phan-suppress-next-line PhanParamTooFewUnpack |
312 | $result->warning( ...$error ); |
313 | } |
314 | return $result; |
315 | } |
316 | |
317 | /** |
318 | * MCR migration note: this corresponded to Revision::fetchText |
319 | * |
320 | * @param string[] $blobAddresses |
321 | * @param int $queryFlags |
322 | * |
323 | * @throws BlobAccessException |
324 | * @return array [ $result, $errors ] A list with the following elements: |
325 | * - The result: a map of blob addresses to successfully fetched blobs |
326 | * or false if fetch failed |
327 | * - Errors: a map of blob addresses to error information about the blob. |
328 | * On success, the relevant key will be absent. Each error is a list of |
329 | * parameters to be passed to StatusValue::warning(). |
330 | */ |
331 | private function fetchBlobs( $blobAddresses, $queryFlags ) { |
332 | $textIdToBlobAddress = []; |
333 | $result = []; |
334 | $errors = []; |
335 | foreach ( $blobAddresses as $blobAddress ) { |
336 | try { |
337 | [ $schema, $id, $params ] = self::splitBlobAddress( $blobAddress ); |
338 | } catch ( InvalidArgumentException $ex ) { |
339 | throw new BlobAccessException( |
340 | $ex->getMessage() . '. Use findBadBlobs.php to remedy.', |
341 | 0, |
342 | $ex |
343 | ); |
344 | } |
345 | |
346 | if ( $schema === 'es' ) { |
347 | if ( $params && isset( $params['flags'] ) ) { |
348 | $blob = $this->expandBlob( $id, $params['flags'] . ',external', $blobAddress ); |
349 | } else { |
350 | $blob = $this->expandBlob( $id, 'external', $blobAddress ); |
351 | } |
352 | |
353 | if ( $blob === false ) { |
354 | $errors[$blobAddress] = [ |
355 | 'internalerror', |
356 | "Bad data in external store address $id. Use findBadBlobs.php to remedy." |
357 | ]; |
358 | } |
359 | $result[$blobAddress] = $blob; |
360 | } elseif ( $schema === 'bad' ) { |
361 | // Database row was marked as "known bad" |
362 | wfDebug( |
363 | __METHOD__ |
364 | . ": loading known-bad content ($blobAddress), returning empty string" |
365 | ); |
366 | $result[$blobAddress] = ''; |
367 | $errors[$blobAddress] = [ |
368 | 'badrevision', |
369 | 'The content of this revision is missing or corrupted (bad schema)' |
370 | ]; |
371 | } elseif ( $schema === 'tt' ) { |
372 | $textId = intval( $id ); |
373 | |
374 | if ( $textId < 1 || $id !== (string)$textId ) { |
375 | $errors[$blobAddress] = [ |
376 | 'internalerror', |
377 | "Bad blob address: $blobAddress. Use findBadBlobs.php to remedy." |
378 | ]; |
379 | $result[$blobAddress] = false; |
380 | } |
381 | |
382 | $textIdToBlobAddress[$textId] = $blobAddress; |
383 | } else { |
384 | $errors[$blobAddress] = [ |
385 | 'internalerror', |
386 | "Unknown blob address schema: $schema. Use findBadBlobs.php to remedy." |
387 | ]; |
388 | $result[$blobAddress] = false; |
389 | } |
390 | } |
391 | |
392 | $textIds = array_keys( $textIdToBlobAddress ); |
393 | if ( !$textIds ) { |
394 | return [ $result, $errors ]; |
395 | } |
396 | // Callers doing updates will pass in READ_LATEST as usual. Since the text/blob tables |
397 | // do not normally get rows changed around, set READ_LATEST_IMMUTABLE in those cases. |
398 | $queryFlags |= DBAccessObjectUtils::hasFlags( $queryFlags, IDBAccessObject::READ_LATEST ) |
399 | ? IDBAccessObject::READ_LATEST_IMMUTABLE |
400 | : 0; |
401 | [ $index, $options, $fallbackIndex, $fallbackOptions ] = |
402 | self::getDBOptions( $queryFlags ); |
403 | // Text data is immutable; check replica DBs first. |
404 | $dbConnection = $this->getDBConnection( $index ); |
405 | $rows = $dbConnection->newSelectQueryBuilder() |
406 | ->select( [ 'old_id', 'old_text', 'old_flags' ] ) |
407 | ->from( 'text' ) |
408 | ->where( [ 'old_id' => $textIds ] ) |
409 | ->options( $options ) |
410 | ->caller( __METHOD__ )->fetchResultSet(); |
411 | $numRows = $rows->numRows(); |
412 | |
413 | // Fallback to DB_PRIMARY in some cases if not all the rows were found, using the appropriate |
414 | // options, such as FOR UPDATE to avoid missing rows due to REPEATABLE-READ. |
415 | if ( $numRows !== count( $textIds ) && $fallbackIndex !== null ) { |
416 | $fetchedTextIds = []; |
417 | foreach ( $rows as $row ) { |
418 | $fetchedTextIds[] = $row->old_id; |
419 | } |
420 | $missingTextIds = array_diff( $textIds, $fetchedTextIds ); |
421 | $dbConnection = $this->getDBConnection( $fallbackIndex ); |
422 | $rowsFromFallback = $dbConnection->newSelectQueryBuilder() |
423 | ->select( [ 'old_id', 'old_text', 'old_flags' ] ) |
424 | ->from( 'text' ) |
425 | ->where( [ 'old_id' => $missingTextIds ] ) |
426 | ->options( $fallbackOptions ) |
427 | ->caller( __METHOD__ )->fetchResultSet(); |
428 | $appendIterator = new AppendIterator(); |
429 | $appendIterator->append( $rows ); |
430 | $appendIterator->append( $rowsFromFallback ); |
431 | $rows = $appendIterator; |
432 | } |
433 | |
434 | foreach ( $rows as $row ) { |
435 | $blobAddress = $textIdToBlobAddress[$row->old_id]; |
436 | $blob = false; |
437 | if ( $row->old_text !== null ) { |
438 | $blob = $this->expandBlob( $row->old_text, $row->old_flags, $blobAddress ); |
439 | } |
440 | if ( $blob === false ) { |
441 | $errors[$blobAddress] = [ |
442 | 'internalerror', |
443 | "Bad data in text row {$row->old_id}. Use findBadBlobs.php to remedy." |
444 | ]; |
445 | } |
446 | $result[$blobAddress] = $blob; |
447 | } |
448 | |
449 | // If we're still missing some of the rows, set errors for missing blobs. |
450 | if ( count( $result ) !== count( $blobAddresses ) ) { |
451 | foreach ( $blobAddresses as $blobAddress ) { |
452 | if ( !isset( $result[$blobAddress ] ) ) { |
453 | $errors[$blobAddress] = [ |
454 | 'internalerror', |
455 | "Unable to fetch blob at $blobAddress. Use findBadBlobs.php to remedy." |
456 | ]; |
457 | $result[$blobAddress] = false; |
458 | } |
459 | } |
460 | } |
461 | return [ $result, $errors ]; |
462 | } |
463 | |
464 | private static function getDBOptions( int $bitfield ): array { |
465 | if ( DBAccessObjectUtils::hasFlags( $bitfield, IDBAccessObject::READ_LATEST_IMMUTABLE ) ) { |
466 | $index = DB_REPLICA; // override READ_LATEST if set |
467 | $fallbackIndex = DB_PRIMARY; |
468 | } elseif ( DBAccessObjectUtils::hasFlags( $bitfield, IDBAccessObject::READ_LATEST ) ) { |
469 | $index = DB_PRIMARY; |
470 | $fallbackIndex = null; |
471 | } else { |
472 | $index = DB_REPLICA; |
473 | $fallbackIndex = null; |
474 | } |
475 | |
476 | $lockingOptions = []; |
477 | if ( DBAccessObjectUtils::hasFlags( $bitfield, IDBAccessObject::READ_EXCLUSIVE ) ) { |
478 | $lockingOptions[] = 'FOR UPDATE'; |
479 | } elseif ( DBAccessObjectUtils::hasFlags( $bitfield, IDBAccessObject::READ_LOCKING ) ) { |
480 | $lockingOptions[] = 'LOCK IN SHARE MODE'; |
481 | } |
482 | |
483 | if ( $fallbackIndex !== null ) { |
484 | $options = []; // locks on DB_REPLICA make no sense |
485 | $fallbackOptions = $lockingOptions; |
486 | } else { |
487 | $options = $lockingOptions; |
488 | $fallbackOptions = []; // no fallback |
489 | } |
490 | |
491 | return [ $index, $options, $fallbackIndex, $fallbackOptions ]; |
492 | } |
493 | |
494 | /** |
495 | * Get a cache key for a given Blob address. |
496 | * |
497 | * The cache key is constructed in a way that allows cached blobs from the same database |
498 | * to be re-used between wikis. For example, wiki A and wiki B will use the same cache keys |
499 | * for blobs fetched from wiki C. |
500 | * |
501 | * @param string $blobAddress |
502 | * @return string |
503 | */ |
504 | private function getCacheKey( $blobAddress ) { |
505 | return $this->cache->makeGlobalKey( |
506 | 'SqlBlobStore-blob', |
507 | $this->dbLoadBalancer->resolveDomainID( $this->dbDomain ), |
508 | $blobAddress |
509 | ); |
510 | } |
511 | |
512 | /** |
513 | * Get the cache key options for a given Blob |
514 | * |
515 | * @return array<string,mixed> |
516 | */ |
517 | private function getCacheOptions() { |
518 | return [ |
519 | 'pcGroup' => self::TEXT_CACHE_GROUP, |
520 | 'pcTTL' => WANObjectCache::TTL_PROC_LONG, |
521 | 'segmentable' => true |
522 | ]; |
523 | } |
524 | |
525 | /** |
526 | * Expand a raw data blob according to the flags given. |
527 | * |
528 | * MCR migration note: this replaced Revision::getRevisionText |
529 | * |
530 | * @note direct use is deprecated, use getBlob() or SlotRecord::getContent() instead. |
531 | * @todo make this private, there should be no need to use this method outside this class. |
532 | * |
533 | * @param string $raw The raw blob data, to be processed according to $flags. |
534 | * May be the blob itself, or the blob compressed, or just the address |
535 | * of the actual blob, depending on $flags. |
536 | * @param string|string[] $flags Blob flags, such as 'external' or 'gzip'. |
537 | * Note that not including 'utf-8' in $flags will cause the data to be decoded |
538 | * according to the legacy encoding specified via setLegacyEncoding. |
539 | * @param string|null $blobAddress A blob address for use in the cache key. If not given, |
540 | * caching is disabled. |
541 | * |
542 | * @return false|string The expanded blob or false on failure |
543 | * @throws BlobAccessException |
544 | */ |
545 | public function expandBlob( $raw, $flags, $blobAddress = null ) { |
546 | if ( is_string( $flags ) ) { |
547 | $flags = self::explodeFlags( $flags ); |
548 | } |
549 | if ( in_array( 'error', $flags ) ) { |
550 | throw new BadBlobException( |
551 | "The content of this revision is missing or corrupted (error flag)" |
552 | ); |
553 | } |
554 | |
555 | // Use external methods for external objects, text in table is URL-only then |
556 | if ( in_array( 'external', $flags ) ) { |
557 | $url = $raw; |
558 | $parts = explode( '://', $url, 2 ); |
559 | if ( count( $parts ) == 1 || $parts[1] == '' ) { |
560 | return false; |
561 | } |
562 | |
563 | if ( $blobAddress ) { |
564 | // The cached value should be decompressed, so handle that and return here. |
565 | return $this->cache->getWithSetCallback( |
566 | $this->getCacheKey( $blobAddress ), |
567 | $this->getCacheTTL(), |
568 | function () use ( $url, $flags, $blobAddress ) { |
569 | // Ignore $setOpts; blobs are immutable and negatives are not cached |
570 | $blob = $this->extStoreAccess |
571 | ->fetchFromURL( $url, [ 'domain' => $this->dbDomain ] ); |
572 | |
573 | return $blob === false ? false : $this->decompressData( $blob, $flags, $blobAddress ); |
574 | }, |
575 | $this->getCacheOptions() |
576 | ); |
577 | } else { |
578 | $blob = $this->extStoreAccess->fetchFromURL( $url, [ 'domain' => $this->dbDomain ] ); |
579 | return $blob === false ? false : $this->decompressData( $blob, $flags, $blobAddress ); |
580 | } |
581 | } else { |
582 | return $this->decompressData( $raw, $flags, $blobAddress ); |
583 | } |
584 | } |
585 | |
586 | /** |
587 | * If $wgCompressRevisions is enabled, we will compress data. |
588 | * The input string is modified in place. |
589 | * Return value is the flags field: contains 'gzip' if the |
590 | * data is compressed, and 'utf-8' if we're saving in UTF-8 |
591 | * mode. |
592 | * |
593 | * MCR migration note: this replaced Revision::compressRevisionText |
594 | * |
595 | * @note direct use is deprecated! |
596 | * @todo make this private, there should be no need to use this method outside this class. |
597 | * |
598 | * @param string &$blob |
599 | * |
600 | * @return string |
601 | */ |
602 | public function compressData( &$blob ) { |
603 | $blobFlags = []; |
604 | |
605 | // Revisions not marked as UTF-8 will have legacy decoding applied by decompressData(). |
606 | // XXX: if $this->legacyEncoding is not set, we could skip this. That would however be |
607 | // risky, since $this->legacyEncoding being set in the future would lead to data corruption. |
608 | $blobFlags[] = 'utf-8'; |
609 | |
610 | if ( $this->compressBlobs ) { |
611 | if ( function_exists( 'gzdeflate' ) ) { |
612 | $deflated = gzdeflate( $blob ); |
613 | |
614 | if ( $deflated === false ) { |
615 | wfLogWarning( __METHOD__ . ': gzdeflate() failed' ); |
616 | } else { |
617 | $blob = $deflated; |
618 | $blobFlags[] = 'gzip'; |
619 | } |
620 | } else { |
621 | wfDebug( __METHOD__ . " -- no zlib support, not compressing" ); |
622 | } |
623 | } |
624 | return implode( ',', $blobFlags ); |
625 | } |
626 | |
627 | /** |
628 | * Re-converts revision text according to its flags. |
629 | * |
630 | * MCR migration note: this replaced Revision::decompressRevisionText |
631 | * |
632 | * @note direct use is deprecated, use getBlob() or SlotRecord::getContent() instead. |
633 | * @todo make this private, there should be no need to use this method outside this class. |
634 | * |
635 | * @param string $blob Blob in compressed/encoded form. |
636 | * @param array $blobFlags Compression flags, such as 'gzip'. |
637 | * Note that not including 'utf-8' in $blobFlags will cause the data to be decoded |
638 | * according to the legacy encoding specified via setLegacyEncoding. |
639 | * @param string|null $blobAddress Used for log message |
640 | * |
641 | * @return string|false Decompressed text, or false on failure |
642 | */ |
643 | public function decompressData( string $blob, array $blobFlags, ?string $blobAddress = null ) { |
644 | if ( in_array( 'error', $blobFlags ) ) { |
645 | // Error row, return false |
646 | return false; |
647 | } |
648 | |
649 | // Deal with optional compression of archived pages. |
650 | // This can be done periodically via maintenance/compressOld.php, and |
651 | // as pages are saved if $wgCompressRevisions is set. |
652 | if ( in_array( 'gzip', $blobFlags ) ) { |
653 | // Silence native warning in favour of more detailed warning (T380347) |
654 | // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged |
655 | $blob = @gzinflate( $blob ); |
656 | if ( $blob === false ) { |
657 | wfWarn( __METHOD__ . ': gzinflate() failed' . |
658 | ( $blobAddress ? ' (at blob address ' . $blobAddress . ')' : '' ) ); |
659 | return false; |
660 | } |
661 | } |
662 | |
663 | if ( in_array( 'object', $blobFlags ) ) { |
664 | # Generic compressed storage |
665 | $obj = HistoryBlobUtils::unserialize( $blob ); |
666 | if ( !$obj ) { |
667 | // Invalid object |
668 | return false; |
669 | } |
670 | $blob = $obj->getText(); |
671 | } |
672 | |
673 | // Needed to support old revisions from before MW 1.5. |
674 | if ( $blob !== false && $this->legacyEncoding |
675 | && !in_array( 'utf-8', $blobFlags ) && !in_array( 'utf8', $blobFlags ) |
676 | ) { |
677 | // - Old revisions kept around in a legacy encoding? |
678 | // Upconvert on demand. |
679 | // - "utf8" checked for compatibility with some broken |
680 | // conversion scripts 2008-12-30. |
681 | // - Even with "//IGNORE" iconv can whine about illegal characters in |
682 | // *input* string. We just ignore those too. |
683 | // Ref https://bugs.php.net/bug.php?id=37166 |
684 | // Ref https://phabricator.wikimedia.org/T18885 |
685 | // |
686 | // phpcs:ignore Generic.PHP.NoSilencedErrors.Discouraged |
687 | $blob = @iconv( $this->legacyEncoding, 'UTF-8//IGNORE', $blob ); |
688 | } |
689 | |
690 | return $blob; |
691 | } |
692 | |
693 | /** |
694 | * Get the text cache TTL |
695 | * |
696 | * MCR migration note: this replaced Revision::getCacheTTL |
697 | * |
698 | * @return int |
699 | */ |
700 | private function getCacheTTL() { |
701 | $cache = $this->cache; |
702 | |
703 | if ( $cache->getQoS( BagOStuff::ATTR_DURABILITY ) >= BagOStuff::QOS_DURABILITY_RDBMS ) { |
704 | // Do not cache RDBMs blobs in...the RDBMs store |
705 | $ttl = $cache::TTL_UNCACHEABLE; |
706 | } else { |
707 | $ttl = $this->cacheExpiry ?: $cache::TTL_UNCACHEABLE; |
708 | } |
709 | |
710 | return $ttl; |
711 | } |
712 | |
713 | /** |
714 | * Returns an ID corresponding to the old_id field in the text table, corresponding |
715 | * to the given $address. |
716 | * |
717 | * Currently, $address must start with 'tt:' followed by a decimal integer representing |
718 | * the old_id; if $address does not start with 'tt:', null is returned. However, |
719 | * the implementation may change to insert rows into the text table on the fly. |
720 | * This implies that this method cannot be static. |
721 | * |
722 | * @note This method exists for use with the text table based storage schema. |
723 | * It should not be assumed that is will function with all future kinds of content addresses. |
724 | * |
725 | * @deprecated since 1.31, so don't assume that all blob addresses refer to a row in the text |
726 | * table. This method should become private once the relevant refactoring in WikiPage is |
727 | * complete. |
728 | * |
729 | * @param string $address |
730 | * |
731 | * @return int|null |
732 | */ |
733 | public function getTextIdFromAddress( $address ) { |
734 | [ $schema, $id, ] = self::splitBlobAddress( $address ); |
735 | |
736 | if ( $schema !== 'tt' ) { |
737 | return null; |
738 | } |
739 | |
740 | $textId = intval( $id ); |
741 | |
742 | if ( !$textId || $id !== (string)$textId ) { |
743 | throw new InvalidArgumentException( "Malformed text_id: $id" ); |
744 | } |
745 | |
746 | return $textId; |
747 | } |
748 | |
749 | /** |
750 | * Returns an address referring to content stored in the text table row with the given ID. |
751 | * The address schema for blobs stored in the text table is "tt:" followed by an integer |
752 | * that corresponds to a value of the old_id field. |
753 | * |
754 | * @internal |
755 | * @note This method should not be used by regular application logic. It is public so |
756 | * maintenance scripts can use it for bulk operations on the text table. |
757 | * |
758 | * @param int $id |
759 | * |
760 | * @return string |
761 | */ |
762 | public static function makeAddressFromTextId( $id ) { |
763 | return 'tt:' . $id; |
764 | } |
765 | |
766 | /** |
767 | * Split a comma-separated old_flags value into its constituent parts |
768 | * |
769 | * @param string $flagsString |
770 | * @return array |
771 | */ |
772 | public static function explodeFlags( string $flagsString ) { |
773 | return $flagsString === '' ? [] : explode( ',', $flagsString ); |
774 | } |
775 | |
776 | /** |
777 | * Splits a blob address into three parts: the schema, the ID, and parameters/flags. |
778 | * |
779 | * @since 1.33 |
780 | * |
781 | * @param string $address |
782 | * |
783 | * @return array [ $schema, $id, $parameters ], with $parameters being an assoc array. |
784 | */ |
785 | public static function splitBlobAddress( $address ) { |
786 | if ( !preg_match( '/^([-+.\w]+):([^\s?]+)(\?([^\s]*))?$/', $address, $m ) ) { |
787 | throw new InvalidArgumentException( "Bad blob address: $address" ); |
788 | } |
789 | |
790 | $schema = strtolower( $m[1] ); |
791 | $id = $m[2]; |
792 | $parameters = wfCgiToArray( $m[4] ?? '' ); |
793 | |
794 | return [ $schema, $id, $parameters ]; |
795 | } |
796 | |
797 | public function isReadOnly() { |
798 | if ( $this->useExternalStore && $this->extStoreAccess->isReadOnly() ) { |
799 | return true; |
800 | } |
801 | |
802 | return ( $this->getDBLoadBalancer()->getReadOnlyReason() !== false ); |
803 | } |
804 | } |