Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
37.93% |
55 / 145 |
|
20.00% |
4 / 20 |
CRAP | |
0.00% |
0 / 1 |
FeatureIndex | |
37.93% |
55 / 145 |
|
20.00% |
4 / 20 |
920.85 | |
0.00% |
0 / 1 |
getLimit | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
queryOptions | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
limitIndexSize | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
removeFromIndex | n/a |
0 / 0 |
n/a |
0 / 0 |
0 | |||||
__construct | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
getPrimaryKeyColumns | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
canAnswer | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
30 | |||
getSort | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
getOrder | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
12 | |||
cachePurge | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
onAfterInsert | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
onAfterUpdate | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
30 | |||
onAfterRemove | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
onAfterLoad | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
onAfterClear | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
find | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
findMulti | |
80.00% |
24 / 30 |
|
0.00% |
0 / 1 |
9.65 | |||
filterResults | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
found | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
foundMulti | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
110 | |||
getCacheKeys | |
70.00% |
7 / 10 |
|
0.00% |
0 / 1 |
3.24 | |||
backingStoreFindMulti | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
20 | |||
cacheKey | |
83.33% |
10 / 12 |
|
0.00% |
0 / 1 |
5.12 | |||
cachedDbId | |
66.67% |
2 / 3 |
|
0.00% |
0 / 1 |
2.15 |
1 | <?php |
2 | |
3 | namespace Flow\Data\Index; |
4 | |
5 | use Flow\Data\Compactor; |
6 | use Flow\Data\Compactor\FeatureCompactor; |
7 | use Flow\Data\Compactor\ShallowCompactor; |
8 | use Flow\Data\FlowObjectCache; |
9 | use Flow\Data\Index; |
10 | use Flow\Data\ObjectManager; |
11 | use Flow\Data\ObjectMapper; |
12 | use Flow\Data\ObjectStorage; |
13 | use Flow\Exception\DataModelException; |
14 | use Flow\Model\UUID; |
15 | use FormatJson; |
16 | use MediaWiki\WikiMap\WikiMap; |
17 | |
18 | /** |
19 | * Index objects with equal features($indexedColumns) into the same buckets. |
20 | */ |
21 | abstract class FeatureIndex implements Index { |
22 | |
23 | /** |
24 | * @var FlowObjectCache |
25 | */ |
26 | protected $cache; |
27 | |
28 | /** |
29 | * @var ObjectStorage |
30 | */ |
31 | protected $storage; |
32 | |
33 | /** |
34 | * @var ObjectMapper |
35 | */ |
36 | protected $mapper; |
37 | |
38 | /** |
39 | * @var string |
40 | */ |
41 | protected $prefix; |
42 | |
43 | /** |
44 | * @var Compactor |
45 | */ |
46 | protected $rowCompactor; |
47 | |
48 | /** |
49 | * @var string[] |
50 | */ |
51 | protected $indexed; |
52 | |
53 | /** |
54 | * @var string[] The indexed columns in alphabetical order. This is |
55 | * ordered so that cache keys can be generated in a stable manner. |
56 | */ |
57 | protected $indexedOrdered; |
58 | |
59 | /** |
60 | * @var array |
61 | */ |
62 | protected $options; |
63 | |
64 | /** |
65 | * @inheritDoc |
66 | */ |
67 | abstract public function getLimit(); |
68 | |
69 | /** |
70 | * @return array The options used for querying self::$storage |
71 | */ |
72 | abstract public function queryOptions(); |
73 | |
74 | /** |
75 | * @todo this doesn't need to be abstract |
76 | * @param array $values The current contents of a single feature bucket |
77 | * @return array $values trimmed to respect self::getLimit() |
78 | */ |
79 | abstract public function limitIndexSize( array $values ); |
80 | |
81 | /** |
82 | * @todo Similar, Could the cache key be passed in instead of $indexed? |
83 | * @param array $indexed The portion of $row that makes up the cache key |
84 | * @param array $row A single row of data to remove from its related feature bucket |
85 | */ |
86 | abstract protected function removeFromIndex( array $indexed, array $row ); |
87 | |
88 | /** |
89 | * @param FlowObjectCache $cache |
90 | * @param ObjectStorage $storage |
91 | * @param ObjectMapper $mapper |
92 | * @param string $prefix Prefix to utilize for all cache keys |
93 | * @param string[] $indexedColumns List of columns to index |
94 | */ |
95 | public function __construct( FlowObjectCache $cache, ObjectStorage $storage, ObjectMapper $mapper, $prefix, array $indexedColumns ) { |
96 | $this->cache = $cache; |
97 | $this->storage = $storage; |
98 | $this->mapper = $mapper; |
99 | $this->prefix = $prefix; |
100 | $this->rowCompactor = new FeatureCompactor( $indexedColumns ); |
101 | $this->indexed = $indexedColumns; |
102 | // sort this and ksort in self::cacheKey to always have cache key |
103 | // fields in same order |
104 | sort( $indexedColumns ); |
105 | $this->indexedOrdered = $indexedColumns; |
106 | } |
107 | |
108 | /** |
109 | * @return string[] The list of columns to bucket database rows by in |
110 | * the same order as provided to the constructor. |
111 | */ |
112 | public function getPrimaryKeyColumns() { |
113 | return $this->indexed; |
114 | } |
115 | |
116 | /** |
117 | * @inheritDoc |
118 | */ |
119 | public function canAnswer( array $featureColumns, array $options ) { |
120 | sort( $featureColumns ); |
121 | if ( $featureColumns !== $this->indexedOrdered ) { |
122 | return false; |
123 | } |
124 | |
125 | // This can probably be moved to TopKIndex if it's not used |
126 | // by anything else. |
127 | if ( isset( $options['limit'] ) ) { |
128 | $max = $options['limit']; |
129 | if ( isset( $options['offset'] ) ) { |
130 | $max += $options['offset']; |
131 | } |
132 | if ( $max > $this->getLimit() ) { |
133 | return false; |
134 | } |
135 | } |
136 | return true; |
137 | } |
138 | |
139 | /** |
140 | * Rows are first sorted based on the first term of the result, then ties |
141 | * are broken by evaluating the second term and so on. |
142 | * |
143 | * @return string[]|false The columns to sort by, or false if no sorting is defined |
144 | */ |
145 | public function getSort() { |
146 | return $this->options['sort'] ?? false; |
147 | } |
148 | |
149 | /** |
150 | * @inheritDoc |
151 | */ |
152 | public function getOrder() { |
153 | if ( isset( $this->options['order'] ) && strtoupper( $this->options['order'] ) === 'ASC' ) { |
154 | return 'ASC'; |
155 | } else { |
156 | return 'DESC'; |
157 | } |
158 | } |
159 | |
160 | /** |
161 | * Delete any feature bucket $object would be contained in from the cache |
162 | * |
163 | * @param object $object |
164 | * @param array $row |
165 | * @throws DataModelException |
166 | */ |
167 | public function cachePurge( $object, array $row ) { |
168 | $indexed = ObjectManager::splitFromRow( $row, $this->indexed ); |
169 | if ( !$indexed ) { |
170 | throw new DataModelException( 'Un-indexable row: ' . FormatJson::encode( $row ), 'process-data' ); |
171 | } |
172 | // We don't want to just remove this object from the index, then the index would be incorrect. |
173 | // We want to delete the bucket that contains this object. |
174 | $this->cache->delete( $this->cacheKey( $indexed ) ); |
175 | } |
176 | |
177 | /** |
178 | * @inheritDoc |
179 | */ |
180 | public function onAfterInsert( $object, array $new, array $metadata ) { |
181 | $indexed = ObjectManager::splitFromRow( $new, $this->indexed ); |
182 | // is un-indexable a bail-worthy occasion? Probably not but makes debugging easier |
183 | if ( !$indexed ) { |
184 | throw new DataModelException( 'Un-indexable row: ' . FormatJson::encode( $new ), 'process-data' ); |
185 | } |
186 | $compacted = $this->rowCompactor->compactRow( UUID::convertUUIDs( $new, 'alphadecimal' ) ); |
187 | $this->removeFromIndex( $indexed, $compacted ); |
188 | } |
189 | |
190 | /** |
191 | * @inheritDoc |
192 | */ |
193 | public function onAfterUpdate( $object, array $old, array $new, array $metadata ) { |
194 | $oldIndexed = ObjectManager::splitFromRow( $old, $this->indexed ); |
195 | $newIndexed = ObjectManager::splitFromRow( $new, $this->indexed ); |
196 | if ( !$oldIndexed ) { |
197 | throw new DataModelException( 'Un-indexable row: ' . FormatJson::encode( $oldIndexed ), 'process-data' ); |
198 | } |
199 | if ( !$newIndexed ) { |
200 | throw new DataModelException( 'Un-indexable row: ' . FormatJson::encode( $newIndexed ), 'process-data' ); |
201 | } |
202 | $oldCompacted = $this->rowCompactor->compactRow( UUID::convertUUIDs( $old, 'alphadecimal' ) ); |
203 | $newCompacted = $this->rowCompactor->compactRow( UUID::convertUUIDs( $new, 'alphadecimal' ) ); |
204 | $oldIndexedForComparison = UUID::convertUUIDs( $oldIndexed, 'alphadecimal' ); |
205 | $newIndexedForComparison = UUID::convertUUIDs( $newIndexed, 'alphadecimal' ); |
206 | if ( ObjectManager::arrayEquals( $oldIndexedForComparison, $newIndexedForComparison ) ) { |
207 | if ( ObjectManager::arrayEquals( $oldCompacted, $newCompacted ) ) { |
208 | // Nothing changed in the index |
209 | return; |
210 | } |
211 | // object representation in feature bucket has changed |
212 | $this->removeFromIndex( $oldIndexed, $oldCompacted ); |
213 | } else { |
214 | // object has moved from one feature bucket to another |
215 | $this->removeFromIndex( $oldIndexed, $oldCompacted ); |
216 | } |
217 | } |
218 | |
219 | /** |
220 | * @inheritDoc |
221 | */ |
222 | public function onAfterRemove( $object, array $old, array $metadata ) { |
223 | $indexed = ObjectManager::splitFromRow( $old, $this->indexed ); |
224 | if ( !$indexed ) { |
225 | throw new DataModelException( 'Unindexable row: ' . FormatJson::encode( $old ), 'process-data' ); |
226 | } |
227 | $compacted = $this->rowCompactor->compactRow( UUID::convertUUIDs( $old, 'alphadecimal' ) ); |
228 | $this->removeFromIndex( $indexed, $compacted ); |
229 | } |
230 | |
231 | /** |
232 | * @inheritDoc |
233 | */ |
234 | public function onAfterLoad( $object, array $old ) { |
235 | // nothing to do |
236 | } |
237 | |
238 | /** |
239 | * @inheritDoc |
240 | */ |
241 | public function onAfterClear() { |
242 | // nothing to do |
243 | } |
244 | |
245 | /** |
246 | * @inheritDoc |
247 | */ |
248 | public function find( array $attributes, array $options = [] ) { |
249 | $results = $this->findMulti( [ $attributes ], $options ); |
250 | return reset( $results ); |
251 | } |
252 | |
253 | /** |
254 | * @inheritDoc |
255 | */ |
256 | public function findMulti( array $queries, array $options = [] ) { |
257 | if ( !$queries ) { |
258 | return []; |
259 | } |
260 | |
261 | // get cache keys for all queries |
262 | $cacheKeys = $this->getCacheKeys( $queries ); |
263 | |
264 | // retrieve from cache (only query duplicate queries once) |
265 | // $fromCache will be an array containing compacted results as value and |
266 | // cache keys as key |
267 | $fromCache = $this->cache->getMulti( array_unique( $cacheKeys ) ); |
268 | |
269 | // figure out what queries were resolved in cache |
270 | // $keysFromCache will be an array where values are cache keys and keys |
271 | // are the same index as their corresponding $queries |
272 | // (intersect with $cacheKeys to guarantee order) |
273 | $keysFromCache = array_intersect( $cacheKeys, array_keys( $fromCache ) ); |
274 | |
275 | // filter out all queries that have been resolved from cache and fetch |
276 | // them from storage |
277 | // $fromStorage will be an array containing (expanded) results as value |
278 | // and indexes matching $query as key |
279 | $storageQueries = array_diff_key( $queries, $keysFromCache ); |
280 | $fromStorage = []; |
281 | if ( $storageQueries ) { |
282 | $fromStorage = $this->backingStoreFindMulti( $storageQueries ); |
283 | foreach ( $fromStorage as $idx => $resultFromStorage ) { |
284 | $key = $this->cacheKey( $storageQueries[$idx] ); |
285 | $this->cache->set( $key, $resultFromStorage ); |
286 | } |
287 | } |
288 | |
289 | $results = $fromStorage; |
290 | |
291 | // $queries may have had duplicates that we've ignored to minimize |
292 | // cache requests - now re-duplicate values from cache & match the |
293 | // results against their respective original keys in $queries |
294 | foreach ( $keysFromCache as $index => $cacheKey ) { |
295 | $results[$index] = $fromCache[$cacheKey]; |
296 | } |
297 | |
298 | // now that we have all data, both from cache & backing storage, filter |
299 | // out all data we don't need |
300 | $results = $this->filterResults( $results, $options ); |
301 | |
302 | // if we have no data from cache, there's nothing left - quit early |
303 | if ( !$fromCache ) { |
304 | return $results; |
305 | } |
306 | |
307 | // because we may have combined data from 2 different sources, chances |
308 | // are the order of the data is no longer in sync with the order |
309 | // $queries were in - fix that by replacing $queries values with |
310 | // the corresponding $results value |
311 | // note that there may be missing results, hence the intersect ;) |
312 | $order = array_intersect_key( $queries, $results ); |
313 | $results = array_replace( $order, $results ); |
314 | |
315 | $keyToQuery = []; |
316 | foreach ( $keysFromCache as $index => $key ) { |
317 | // all redundant data has been stripped, now expand all cache values |
318 | // (we're only doing this now to avoid expanding redundant data) |
319 | $fromCache[$key] = $results[$index]; |
320 | |
321 | // to expand rows, we'll need the $query info mapped to the cache |
322 | // key instead of the $query index |
323 | if ( !isset( $keyToQuery[$key] ) ) { |
324 | $keyToQuery[$key] = $queries[$index]; |
325 | $keyToQuery[$key] = UUID::convertUUIDs( $keyToQuery[$key], 'alphadecimal' ); |
326 | } |
327 | } |
328 | |
329 | // expand and replace the stubs in $results with complete data |
330 | $fromCache = $this->rowCompactor->expandCacheResult( $fromCache, $keyToQuery ); |
331 | foreach ( $keysFromCache as $index => $cacheKey ) { |
332 | $results[$index] = $fromCache[$cacheKey]; |
333 | } |
334 | |
335 | return $results; |
336 | } |
337 | |
338 | /** |
339 | * Get rid of unneeded, according to the given $options. |
340 | * |
341 | * This is used to strip entries before expanding them; |
342 | * basically, at that point, we may only have a list of ids, which we need |
343 | * to expand (= fetch from cache) - don't want to do this for more than |
344 | * what is needed |
345 | * |
346 | * @param array[] $results |
347 | * @param array $options |
348 | * @return array[] |
349 | */ |
350 | protected function filterResults( array $results, array $options = [] ) { |
351 | // Overriden in TopKIndex |
352 | return $results; |
353 | } |
354 | |
355 | /** |
356 | * Returns a boolean true/false if the find()-operation for the given |
357 | * attributes has already been resolves and doesn't need to query any |
358 | * outside cache/database. |
359 | * Determining if a find() has not yet been resolved may be useful so that |
360 | * additional data may be loaded at once. |
361 | * |
362 | * @param array $attributes Attributes to find() |
363 | * @param array $options Options to find() |
364 | * @return bool |
365 | */ |
366 | public function found( array $attributes, array $options = [] ) { |
367 | return $this->foundMulti( [ $attributes ], $options ); |
368 | } |
369 | |
370 | /** |
371 | * Returns a boolean true/false if the findMulti()-operation for the given |
372 | * attributes has already been resolves and doesn't need to query any |
373 | * outside cache/database. |
374 | * Determining if a find() has not yet been resolved may be useful so that |
375 | * additional data may be loaded at once. |
376 | * |
377 | * @param array $queries Queries to findMulti() |
378 | * @param array $options Options to findMulti() |
379 | * @return bool |
380 | */ |
381 | public function foundMulti( array $queries, array $options = [] ) { |
382 | if ( !$queries ) { |
383 | return true; |
384 | } |
385 | |
386 | // get cache keys for all queries |
387 | $cacheKeys = $this->getCacheKeys( $queries ); |
388 | |
389 | // check if cache has a way of identifying what's stored locally |
390 | if ( !method_exists( $this->cache, 'has' ) ) { |
391 | return false; |
392 | } |
393 | |
394 | // check if keys matching given queries are already known in local cache |
395 | foreach ( $cacheKeys as $key ) { |
396 | // @phan-suppress-next-line PhanUndeclaredMethod Checked with method_exists above |
397 | if ( !$this->cache->has( $key ) ) { |
398 | return false; |
399 | } |
400 | } |
401 | |
402 | $keyToQuery = []; |
403 | foreach ( $cacheKeys as $i => $key ) { |
404 | // These results will be merged into the query results, and as such need binary |
405 | // uuid's as would be received from storage |
406 | if ( !isset( $keyToQuery[$key] ) ) { |
407 | $keyToQuery[$key] = $queries[$i]; |
408 | } |
409 | } |
410 | |
411 | // retrieve from cache - this is cheap, it's is local storage |
412 | $cached = $this->cache->getMulti( $cacheKeys ); |
413 | foreach ( $cached as $i => $result ) { |
414 | $limit = $options['limit'] ?? $this->getLimit(); |
415 | $cached[$i] = array_splice( $result, 0, $limit ); |
416 | } |
417 | |
418 | // if we have a shallow compactor, the returned data are PKs of objects |
419 | // that need to be fetched too |
420 | if ( $this->rowCompactor instanceof ShallowCompactor ) { |
421 | // test of the keys to be expanded are already in local cache |
422 | $duplicator = $this->rowCompactor->getResultDuplicator( $cached, $keyToQuery ); |
423 | $queries = $duplicator->getUniqueQueries(); |
424 | if ( !$this->rowCompactor->getShallow()->foundMulti( $queries ) ) { |
425 | return false; |
426 | } |
427 | } |
428 | |
429 | return true; |
430 | } |
431 | |
432 | /** |
433 | * Build a map from cache key to its index in $queries. |
434 | * |
435 | * @param array $queries |
436 | * @return array Array of [query index => cache key] |
437 | * @throws DataModelException |
438 | */ |
439 | protected function getCacheKeys( $queries ) { |
440 | $idxToKey = []; |
441 | foreach ( $queries as $idx => $query ) { |
442 | ksort( $query ); |
443 | if ( array_keys( $query ) !== $this->indexedOrdered ) { |
444 | throw new DataModelException( |
445 | 'Cannot answer query for columns: ' . implode( ', ', array_keys( $queries[$idx] ) ), 'process-data' |
446 | ); |
447 | } |
448 | $key = $this->cacheKey( $query ); |
449 | $idxToKey[$idx] = $key; |
450 | } |
451 | |
452 | return $idxToKey; |
453 | } |
454 | |
455 | /** |
456 | * Query persistent storage for data not found in cache. Note that this |
457 | * does not use the query options because an individual bucket contents is |
458 | * based on constructor options, and not query options. Query options merely |
459 | * change what part of the bucket is returned(or if the query has to fail over |
460 | * to direct from storage due to being beyond the set of cached values). |
461 | * |
462 | * @param array $queries |
463 | * @return array |
464 | */ |
465 | protected function backingStoreFindMulti( array $queries ) { |
466 | // query backing store |
467 | $options = $this->queryOptions(); |
468 | $stored = $this->storage->findMulti( $queries, $options ); |
469 | $results = []; |
470 | |
471 | // map store results to cache key |
472 | foreach ( $stored as $idx => $rows ) { |
473 | if ( !$rows ) { |
474 | // Nothing found, should we cache failures as well as success? |
475 | continue; |
476 | } |
477 | $results[$idx] = $rows; |
478 | unset( $queries[$idx] ); |
479 | } |
480 | |
481 | if ( count( $queries ) !== 0 ) { |
482 | // Log something about not finding everything? |
483 | } |
484 | |
485 | return $results; |
486 | } |
487 | |
488 | /** |
489 | * Generate the cache key representing the attributes |
490 | * @param array $attributes |
491 | * @return string |
492 | */ |
493 | protected function cacheKey( array $attributes ) { |
494 | global $wgFlowCacheVersion; |
495 | foreach ( $attributes as $key => $attr ) { |
496 | if ( $attr instanceof UUID ) { |
497 | $attributes[$key] = $attr->getAlphadecimal(); |
498 | } elseif ( strlen( $attr ) === UUID::BIN_LEN && substr( $key, -3 ) === '_id' ) { |
499 | $attributes[$key] = UUID::create( $attr )->getAlphadecimal(); |
500 | } |
501 | } |
502 | |
503 | // values in $attributes may not always be in the exact same order, |
504 | // which would lead to differences in cache key if we don't force that |
505 | ksort( $attributes ); |
506 | |
507 | return $this->cache->makeGlobalKey( |
508 | $this->prefix, |
509 | self::cachedDbId(), |
510 | md5( implode( ':', $attributes ) ), |
511 | $wgFlowCacheVersion |
512 | ); |
513 | } |
514 | |
515 | /** |
516 | * @return string The id of the database being cached |
517 | */ |
518 | public static function cachedDbId() { |
519 | global $wgFlowDefaultWikiDb; |
520 | if ( $wgFlowDefaultWikiDb === false ) { |
521 | return WikiMap::getCurrentWikiDbDomain()->getId(); |
522 | } else { |
523 | return $wgFlowDefaultWikiDb; |
524 | } |
525 | } |
526 | } |