Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
91.16% |
165 / 181 |
|
78.57% |
11 / 14 |
CRAP | |
0.00% |
0 / 1 |
NameTableStore | |
91.16% |
165 / 181 |
|
78.57% |
11 / 14 |
36.90 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
10 / 10 |
|
100.00% |
1 / 1 |
1 | |||
getDBConnection | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getCacheKey | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
normalizeName | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
acquireId | |
70.37% |
19 / 27 |
|
0.00% |
0 / 1 |
5.65 | |||
reloadMap | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
3 | |||
getId | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
2 | |||
getName | |
77.42% |
24 / 31 |
|
0.00% |
0 / 1 |
8.74 | |||
getMap | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getTableFromCachesOrReplica | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
2 | |||
loadTable | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
2 | |||
store | |
100.00% |
27 / 27 |
|
100.00% |
1 / 1 |
2 | |||
retryStore | |
96.67% |
29 / 30 |
|
0.00% |
0 / 1 |
3 | |||
getFieldsToStore | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
3 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | */ |
20 | |
21 | namespace MediaWiki\Storage; |
22 | |
23 | use Exception; |
24 | use Psr\Log\LoggerInterface; |
25 | use WANObjectCache; |
26 | use Wikimedia\Assert\Assert; |
27 | use Wikimedia\LightweightObjectStore\ExpirationAwareness; |
28 | use Wikimedia\Rdbms\Database; |
29 | use Wikimedia\Rdbms\IDatabase; |
30 | use Wikimedia\Rdbms\ILoadBalancer; |
31 | use Wikimedia\RequestTimeout\TimeoutException; |
32 | |
33 | /** |
34 | * @since 1.31 |
35 | * @author Addshore |
36 | */ |
37 | class NameTableStore { |
38 | |
39 | /** @var ILoadBalancer */ |
40 | private $loadBalancer; |
41 | |
42 | /** @var WANObjectCache */ |
43 | private $cache; |
44 | |
45 | /** @var LoggerInterface */ |
46 | private $logger; |
47 | |
48 | /** @var string[] */ |
49 | private $tableCache = null; |
50 | |
51 | /** @var bool|string */ |
52 | private $domain; |
53 | |
54 | /** @var int */ |
55 | private $cacheTTL; |
56 | |
57 | /** @var string */ |
58 | private $table; |
59 | /** @var string */ |
60 | private $idField; |
61 | /** @var string */ |
62 | private $nameField; |
63 | /** @var null|callable */ |
64 | private $normalizationCallback; |
65 | /** @var null|callable */ |
66 | private $insertCallback; |
67 | |
68 | /** |
69 | * @param ILoadBalancer $dbLoadBalancer A load balancer for acquiring database connections |
70 | * @param WANObjectCache $cache A cache manager for caching data. This can be the local |
71 | * wiki's default instance even if $dbDomain refers to a different wiki, since |
72 | * makeGlobalKey() is used to constructed a key that allows cached names from |
73 | * the same database to be re-used between wikis. For example, enwiki and frwiki will |
74 | * use the same cache keys for names from the wikidatawiki database, regardless |
75 | * of the cache's default key space. |
76 | * @param LoggerInterface $logger |
77 | * @param string $table |
78 | * @param string $idField |
79 | * @param string $nameField |
80 | * @param callable|null $normalizationCallback Normalization to be applied to names before being |
81 | * saved or queried. This should be a callback that accepts and returns a single string. |
82 | * @param bool|string $dbDomain Database domain ID. Use false for the local database domain. |
83 | * @param callable|null $insertCallback Callback to change insert fields accordingly. |
84 | * This parameter was introduced in 1.32 |
85 | */ |
86 | public function __construct( |
87 | ILoadBalancer $dbLoadBalancer, |
88 | WANObjectCache $cache, |
89 | LoggerInterface $logger, |
90 | $table, |
91 | $idField, |
92 | $nameField, |
93 | callable $normalizationCallback = null, |
94 | $dbDomain = false, |
95 | callable $insertCallback = null |
96 | ) { |
97 | $this->loadBalancer = $dbLoadBalancer; |
98 | $this->cache = $cache; |
99 | $this->logger = $logger; |
100 | $this->table = $table; |
101 | $this->idField = $idField; |
102 | $this->nameField = $nameField; |
103 | $this->normalizationCallback = $normalizationCallback; |
104 | $this->domain = $dbDomain; |
105 | $this->cacheTTL = ExpirationAwareness::TTL_MONTH; |
106 | $this->insertCallback = $insertCallback; |
107 | } |
108 | |
109 | /** |
110 | * @param int $index A database index, like DB_PRIMARY or DB_REPLICA |
111 | * @param int $flags Database connection flags |
112 | * @return IDatabase |
113 | */ |
114 | private function getDBConnection( $index, $flags = 0 ) { |
115 | return $this->loadBalancer->getConnection( $index, [], $this->domain, $flags ); |
116 | } |
117 | |
118 | /** |
119 | * Gets the cache key for names. |
120 | * |
121 | * The cache key is constructed based on the wiki ID passed to the constructor, and allows |
122 | * sharing of name tables cached for a specific database between wikis. |
123 | * |
124 | * @return string |
125 | */ |
126 | private function getCacheKey() { |
127 | return $this->cache->makeGlobalKey( |
128 | 'NameTableSqlStore', |
129 | $this->table, |
130 | $this->loadBalancer->resolveDomainID( $this->domain ) |
131 | ); |
132 | } |
133 | |
134 | /** |
135 | * @param string $name |
136 | * @return string |
137 | */ |
138 | private function normalizeName( $name ) { |
139 | if ( $this->normalizationCallback === null ) { |
140 | return $name; |
141 | } |
142 | return call_user_func( $this->normalizationCallback, $name ); |
143 | } |
144 | |
145 | /** |
146 | * Acquire the id of the given name. |
147 | * This creates a row in the table if it doesn't already exist. |
148 | * |
149 | * @note If called within an atomic section, there is a chance for the acquired ID |
150 | * to be lost on rollback. A best effort is made to re-insert the mapping |
151 | * in this case, and consistency of the cache with the database table is ensured |
152 | * by re-loading the map after a failed atomic section. However, there is no guarantee |
153 | * that an ID returned by this method is valid outside the transaction in which it |
154 | * was produced. This means that calling code should not retain the return value beyond |
155 | * the scope of a transaction, but rather call acquireId() again after the transaction |
156 | * is complete. In some rare cases, this may produce an ID different from the first call. |
157 | * |
158 | * @param string $name |
159 | * @throws NameTableAccessException |
160 | * @return int |
161 | */ |
162 | public function acquireId( string $name ) { |
163 | $name = $this->normalizeName( $name ); |
164 | |
165 | $table = $this->getTableFromCachesOrReplica(); |
166 | $searchResult = array_search( $name, $table, true ); |
167 | if ( $searchResult === false ) { |
168 | $id = $this->store( $name ); |
169 | if ( $id === null ) { |
170 | // RACE: $name was already in the db, probably just inserted, so load from primary DB. |
171 | // Use DBO_TRX to avoid missing inserts due to other threads or REPEATABLE-READs. |
172 | $table = $this->reloadMap( ILoadBalancer::CONN_TRX_AUTOCOMMIT ); |
173 | |
174 | $searchResult = array_search( $name, $table, true ); |
175 | if ( $searchResult === false ) { |
176 | // Insert failed due to IGNORE flag, but DB_PRIMARY didn't give us the data |
177 | $m = "No insert possible but primary DB didn't give us a record for " . |
178 | "'{$name}' in '{$this->table}'"; |
179 | $this->logger->error( $m ); |
180 | throw new NameTableAccessException( $m ); |
181 | } |
182 | } else { |
183 | if ( isset( $table[$id] ) ) { |
184 | // This can happen when a transaction is rolled back and acquireId is called in |
185 | // an onTransactionResolution() callback, which gets executed before retryStore() |
186 | // has a chance to run. The right thing to do in this case is to discard the old |
187 | // value. According to the contract of acquireId, the caller should not have |
188 | // used it outside the transaction, so it should not be persisted anywhere after |
189 | // the rollback. |
190 | $m = "Got ID $id for '$name' from insert" |
191 | . " into '{$this->table}', but ID $id was previously associated with" |
192 | . " the name '{$table[$id]}'. Overriding the old value, which presumably" |
193 | . " has been removed from the database due to a transaction rollback."; |
194 | |
195 | $this->logger->warning( $m ); |
196 | } |
197 | |
198 | $table[$id] = $name; |
199 | $searchResult = $id; |
200 | |
201 | // As store returned an ID we know we inserted so delete from WAN cache |
202 | $dbw = $this->getDBConnection( DB_PRIMARY ); |
203 | $dbw->onTransactionPreCommitOrIdle( function () { |
204 | $this->cache->delete( $this->getCacheKey(), WANObjectCache::HOLDOFF_TTL_NONE ); |
205 | }, __METHOD__ ); |
206 | } |
207 | $this->tableCache = $table; |
208 | } |
209 | |
210 | return $searchResult; |
211 | } |
212 | |
213 | /** |
214 | * Reloads the name table from the primary database, and purges the WAN cache entry. |
215 | * |
216 | * @note This should only be called in situations where the local cache has been detected |
217 | * to be out of sync with the database. There should be no reason to call this method |
218 | * from outside the NameTableStore during normal operation. This method may however be |
219 | * useful in unit tests. |
220 | * |
221 | * @param int $connFlags ILoadBalancer::CONN_XXX flags. Optional. |
222 | * |
223 | * @return string[] The freshly reloaded name map |
224 | */ |
225 | public function reloadMap( $connFlags = 0 ) { |
226 | if ( $connFlags !== 0 && defined( 'MW_PHPUNIT_TEST' ) ) { |
227 | // HACK: We can't use $connFlags while doing PHPUnit tests, because the |
228 | // fake database tables are bound to a single connection. |
229 | $connFlags = 0; |
230 | } |
231 | |
232 | $dbw = $this->getDBConnection( DB_PRIMARY, $connFlags ); |
233 | $this->tableCache = $this->loadTable( $dbw ); |
234 | $dbw->onTransactionPreCommitOrIdle( function () { |
235 | $this->cache->delete( $this->getCacheKey() ); |
236 | }, __METHOD__ ); |
237 | |
238 | return $this->tableCache; |
239 | } |
240 | |
241 | /** |
242 | * Get the id of the given name. |
243 | * If the name doesn't exist this will throw. |
244 | * This should be used in cases where we believe the name already exists or want to check for |
245 | * existence. |
246 | * |
247 | * @param string $name |
248 | * @throws NameTableAccessException The name does not exist |
249 | * @return int Id |
250 | */ |
251 | public function getId( string $name ) { |
252 | $name = $this->normalizeName( $name ); |
253 | |
254 | $table = $this->getTableFromCachesOrReplica(); |
255 | $searchResult = array_search( $name, $table, true ); |
256 | |
257 | if ( $searchResult !== false ) { |
258 | return $searchResult; |
259 | } |
260 | |
261 | throw NameTableAccessException::newFromDetails( $this->table, 'name', $name ); |
262 | } |
263 | |
264 | /** |
265 | * Get the name of the given id. |
266 | * If the id doesn't exist this will throw. |
267 | * This should be used in cases where we believe the id already exists. |
268 | * |
269 | * Note: Calls to this method will result in a primary DB select for non existing IDs. |
270 | * |
271 | * @param int $id |
272 | * @throws NameTableAccessException The id does not exist |
273 | * @return string name |
274 | */ |
275 | public function getName( int $id ) { |
276 | $table = $this->getTableFromCachesOrReplica(); |
277 | if ( array_key_exists( $id, $table ) ) { |
278 | return $table[$id]; |
279 | } |
280 | $fname = __METHOD__; |
281 | |
282 | $table = $this->cache->getWithSetCallback( |
283 | $this->getCacheKey(), |
284 | $this->cacheTTL, |
285 | function ( $oldValue, &$ttl, &$setOpts ) use ( $id, $fname ) { |
286 | // Check if cached value is up-to-date enough to have $id |
287 | if ( is_array( $oldValue ) && array_key_exists( $id, $oldValue ) ) { |
288 | // Completely leave the cache key alone |
289 | $ttl = WANObjectCache::TTL_UNCACHEABLE; |
290 | // Use the old value |
291 | return $oldValue; |
292 | } |
293 | // Regenerate from replica DB, and primary DB if needed |
294 | foreach ( [ DB_REPLICA, DB_PRIMARY ] as $source ) { |
295 | // Log a fallback to primary |
296 | if ( $source === DB_PRIMARY ) { |
297 | $this->logger->info( |
298 | $fname . ' falling back to primary select from ' . |
299 | $this->table . ' with id ' . $id |
300 | ); |
301 | } |
302 | $db = $this->getDBConnection( $source ); |
303 | $cacheSetOpts = Database::getCacheSetOptions( $db ); |
304 | $table = $this->loadTable( $db ); |
305 | if ( array_key_exists( $id, $table ) ) { |
306 | break; // found it |
307 | } |
308 | } |
309 | // Use the value from last source checked |
310 | $setOpts += $cacheSetOpts; |
311 | |
312 | return $table; |
313 | }, |
314 | [ 'minAsOf' => INF ] // force callback run |
315 | ); |
316 | |
317 | $this->tableCache = $table; |
318 | |
319 | if ( array_key_exists( $id, $table ) ) { |
320 | return $table[$id]; |
321 | } |
322 | |
323 | throw NameTableAccessException::newFromDetails( $this->table, 'id', $id ); |
324 | } |
325 | |
326 | /** |
327 | * Get the whole table, in no particular order as a map of ids to names. |
328 | * This method could be subject to DB or cache lag. |
329 | * |
330 | * @return string[] keys are the name ids, values are the names themselves |
331 | * Example: [ 1 => 'foo', 3 => 'bar' ] |
332 | */ |
333 | public function getMap() { |
334 | return $this->getTableFromCachesOrReplica(); |
335 | } |
336 | |
337 | /** |
338 | * @return string[] |
339 | */ |
340 | private function getTableFromCachesOrReplica() { |
341 | if ( $this->tableCache !== null ) { |
342 | return $this->tableCache; |
343 | } |
344 | |
345 | $table = $this->cache->getWithSetCallback( |
346 | $this->getCacheKey(), |
347 | $this->cacheTTL, |
348 | function ( $oldValue, &$ttl, &$setOpts ) { |
349 | $dbr = $this->getDBConnection( DB_REPLICA ); |
350 | $setOpts += Database::getCacheSetOptions( $dbr ); |
351 | return $this->loadTable( $dbr ); |
352 | } |
353 | ); |
354 | |
355 | $this->tableCache = $table; |
356 | |
357 | return $table; |
358 | } |
359 | |
360 | /** |
361 | * Gets the table from the db |
362 | * |
363 | * @param IDatabase $db |
364 | * |
365 | * @return string[] |
366 | */ |
367 | private function loadTable( IDatabase $db ) { |
368 | $result = $db->newSelectQueryBuilder() |
369 | ->select( [ |
370 | 'id' => $this->idField, |
371 | 'name' => $this->nameField |
372 | ] ) |
373 | ->from( $this->table ) |
374 | ->orderBy( 'id' ) |
375 | ->caller( __METHOD__ )->fetchResultSet(); |
376 | |
377 | $assocArray = []; |
378 | foreach ( $result as $row ) { |
379 | $assocArray[$row->id] = $row->name; |
380 | } |
381 | |
382 | return $assocArray; |
383 | } |
384 | |
385 | /** |
386 | * Stores the given name in the DB, returning the ID when an insert occurs. |
387 | * |
388 | * @param string $name |
389 | * @return int|null int if we know the ID, null if we don't |
390 | */ |
391 | private function store( string $name ) { |
392 | Assert::parameter( $name !== '', '$name', 'should not be an empty string' ); |
393 | // Note: this is only called internally so normalization of $name has already occurred. |
394 | |
395 | $dbw = $this->getDBConnection( DB_PRIMARY ); |
396 | |
397 | $id = null; |
398 | $dbw->doAtomicSection( |
399 | __METHOD__, |
400 | function ( IDatabase $unused, $fname ) |
401 | use ( $name, &$id, $dbw ) { |
402 | // NOTE: use IDatabase from the parent scope here, not the function parameter. |
403 | // If $dbw is a wrapper around the actual DB, we need to call the wrapper here, |
404 | // not the inner instance. |
405 | $dbw->newInsertQueryBuilder() |
406 | ->insertInto( $this->table ) |
407 | ->ignore() |
408 | ->row( $this->getFieldsToStore( $name ) ) |
409 | ->caller( $fname )->execute(); |
410 | |
411 | if ( $dbw->affectedRows() === 0 ) { |
412 | $this->logger->info( |
413 | 'Tried to insert name into table ' . $this->table . ', but value already existed.' |
414 | ); |
415 | |
416 | return; |
417 | } |
418 | |
419 | $id = $dbw->insertId(); |
420 | |
421 | // Any open transaction may still be rolled back. If that happens, we have to re-try the |
422 | // insertion and restore a consistent state of the cached table. |
423 | $dbw->onAtomicSectionCancel( |
424 | function ( $trigger, IDatabase $unused ) use ( $name, $id, $dbw ) { |
425 | $this->retryStore( $dbw, $name, $id ); |
426 | }, |
427 | $fname ); |
428 | }, |
429 | IDatabase::ATOMIC_CANCELABLE |
430 | ); |
431 | |
432 | return $id; |
433 | } |
434 | |
435 | /** |
436 | * After the initial insertion got rolled back, this can be used to try the insertion again, |
437 | * and ensure a consistent state of the cache. |
438 | * |
439 | * @param IDatabase $dbw |
440 | * @param string $name |
441 | * @param int $id |
442 | */ |
443 | private function retryStore( IDatabase $dbw, $name, $id ) { |
444 | // NOTE: in the closure below, use the IDatabase from the original method call, |
445 | // not the one passed to the closure as a parameter. |
446 | // If $dbw is a wrapper around the actual DB, we need to call the wrapper, |
447 | // not the inner instance. |
448 | |
449 | try { |
450 | $dbw->doAtomicSection( |
451 | __METHOD__, |
452 | function ( IDatabase $unused, $fname ) use ( $name, $id, $dbw ) { |
453 | // Try to insert a row with the ID we originally got. |
454 | // If that fails (because of a key conflict), we will just try to get another ID again later. |
455 | $dbw->newInsertQueryBuilder() |
456 | ->insertInto( $this->table ) |
457 | ->row( $this->getFieldsToStore( $name, $id ) ) |
458 | ->caller( $fname )->execute(); |
459 | |
460 | // Make sure we re-load the map in case this gets rolled back again. |
461 | // We could re-try once more, but that bears the risk of an infinite loop. |
462 | // So let's just give up on the ID. |
463 | $dbw->onAtomicSectionCancel( |
464 | function ( $trigger, IDatabase $unused ) { |
465 | $this->logger->warning( |
466 | 'Re-insertion of name into table ' . $this->table |
467 | . ' was rolled back. Giving up and reloading the cache.' |
468 | ); |
469 | $this->reloadMap( ILoadBalancer::CONN_TRX_AUTOCOMMIT ); |
470 | }, |
471 | $fname |
472 | ); |
473 | |
474 | $this->logger->info( |
475 | 'Re-insert name into table ' . $this->table . ' after failed transaction.' |
476 | ); |
477 | }, |
478 | IDatabase::ATOMIC_CANCELABLE |
479 | ); |
480 | } catch ( TimeoutException $e ) { |
481 | throw $e; |
482 | } catch ( Exception $ex ) { |
483 | $this->logger->error( |
484 | 'Re-insertion of name into table ' . $this->table . ' failed: ' . $ex->getMessage() |
485 | ); |
486 | } finally { |
487 | // NOTE: we reload regardless of whether the above insert succeeded. There is |
488 | // only three possibilities: the insert succeeded, so the new map will have |
489 | // the desired $id/$name mapping. Or the insert failed because another |
490 | // process already inserted that same $id/$name mapping, in which case the |
491 | // new map will also have it. Or another process grabbed the desired ID for |
492 | // another name, or the database refuses to insert the given ID into the |
493 | // auto increment field - in that case, the new map will not have a mapping |
494 | // for $name (or has a different mapping for $name). In that last case, we can |
495 | // only hope that the ID produced within the failed transaction has not been |
496 | // used outside that transaction. |
497 | |
498 | $this->reloadMap( ILoadBalancer::CONN_TRX_AUTOCOMMIT ); |
499 | } |
500 | } |
501 | |
502 | /** |
503 | * @param string $name |
504 | * @param int|null $id |
505 | * @return array |
506 | */ |
507 | private function getFieldsToStore( $name, $id = null ) { |
508 | $fields = []; |
509 | |
510 | $fields[$this->nameField] = $name; |
511 | |
512 | if ( $id !== null ) { |
513 | $fields[$this->idField] = $id; |
514 | } |
515 | |
516 | if ( $this->insertCallback !== null ) { |
517 | $fields = call_user_func( $this->insertCallback, $fields ); |
518 | } |
519 | return $fields; |
520 | } |
521 | |
522 | } |