Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
97.37% |
148 / 152 |
|
78.57% |
11 / 14 |
CRAP | |
0.00% |
0 / 1 |
CachingResultsSource | |
97.37% |
148 / 152 |
|
78.57% |
11 / 14 |
49 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
1 | |||
getResults | |
100.00% |
24 / 24 |
|
100.00% |
1 / 1 |
9 | |||
canUseStoredResults | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
4 | |||
statusSelected | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
getAndStoreResults | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
canStoreResults | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
3 | |||
storeResults | |
94.74% |
18 / 19 |
|
0.00% |
0 / 1 |
6.01 | |||
getStoredResults | |
100.00% |
22 / 22 |
|
100.00% |
1 / 1 |
4 | |||
checkDependencyMetadata | |
96.30% |
26 / 27 |
|
0.00% |
0 / 1 |
7 | |||
deserializeCheckResult | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
2 | |||
isPossiblyStaleResult | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
2 | |||
getLatestRevisionIds | |
84.62% |
11 / 13 |
|
0.00% |
0 / 1 |
4.06 | |||
hasFalseElements | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
setMicrotimeFunction | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace WikibaseQuality\ConstraintReport\Api; |
4 | |
5 | use DataValues\TimeValue; |
6 | use Wikibase\DataModel\Entity\EntityId; |
7 | use Wikibase\DataModel\Entity\EntityIdParser; |
8 | use Wikibase\Lib\Store\LookupConstants; |
9 | use Wikibase\Lib\Store\Sql\WikiPageEntityMetaDataAccessor; |
10 | use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\CachedCheckResults; |
11 | use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\CachingMetadata; |
12 | use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\DependencyMetadata; |
13 | use WikibaseQuality\ConstraintReport\ConstraintCheck\Cache\Metadata; |
14 | use WikibaseQuality\ConstraintReport\ConstraintCheck\Helper\LoggingHelper; |
15 | use WikibaseQuality\ConstraintReport\ConstraintCheck\Helper\TimeValueComparer; |
16 | use WikibaseQuality\ConstraintReport\ConstraintCheck\Result\CheckResult; |
17 | use WikibaseQuality\ConstraintReport\ConstraintCheck\Result\CheckResultDeserializer; |
18 | use WikibaseQuality\ConstraintReport\ConstraintCheck\Result\CheckResultSerializer; |
19 | use WikibaseQuality\ConstraintReport\ConstraintCheck\Result\NullResult; |
20 | use Wikimedia\ObjectCache\WANObjectCache; |
21 | |
22 | /** |
23 | * A ResultsSource that wraps another ResultsSource, |
24 | * storing results in a cache |
25 | * and retrieving them from there if the results are still fresh. |
26 | * |
27 | * @author Lucas Werkmeister |
28 | * @license GPL-2.0-or-later |
29 | */ |
30 | class CachingResultsSource implements ResultsSource { |
31 | |
32 | public const CACHED_STATUSES = [ |
33 | CheckResult::STATUS_VIOLATION, |
34 | CheckResult::STATUS_WARNING, |
35 | CheckResult::STATUS_SUGGESTION, |
36 | CheckResult::STATUS_BAD_PARAMETERS, |
37 | ]; |
38 | |
39 | /** |
40 | * @var ResultsSource |
41 | */ |
42 | private $resultsSource; |
43 | |
44 | /** |
45 | * @var ResultsCache |
46 | */ |
47 | private $cache; |
48 | |
49 | /** |
50 | * @var CheckResultSerializer |
51 | */ |
52 | private $checkResultSerializer; |
53 | |
54 | /** |
55 | * @var CheckResultDeserializer |
56 | */ |
57 | private $checkResultDeserializer; |
58 | |
59 | /** |
60 | * @var WikiPageEntityMetaDataAccessor |
61 | */ |
62 | private $wikiPageEntityMetaDataAccessor; |
63 | |
64 | /** |
65 | * @var EntityIdParser |
66 | */ |
67 | private $entityIdParser; |
68 | |
69 | /** |
70 | * @var int |
71 | */ |
72 | private $ttlInSeconds; |
73 | |
74 | /** |
75 | * @var string[] |
76 | */ |
77 | private $possiblyStaleConstraintTypes; |
78 | |
79 | /** |
80 | * @var int |
81 | */ |
82 | private $maxRevisionIds; |
83 | |
84 | /** |
85 | * @var LoggingHelper |
86 | */ |
87 | private $loggingHelper; |
88 | |
89 | /** |
90 | * @var TimeValueComparer |
91 | */ |
92 | private $timeValueComparer; |
93 | |
94 | /** |
95 | * @var callable |
96 | */ |
97 | private $microtime = 'microtime'; |
98 | |
99 | /** |
100 | * @param ResultsSource $resultsSource The ResultsSource that cache misses are delegated to. |
101 | * @param ResultsCache $cache The cache where results can be stored. |
102 | * @param CheckResultSerializer $checkResultSerializer Used to serialize check results. |
103 | * @param CheckResultDeserializer $checkResultDeserializer Used to deserialize check results. |
104 | * @param WikiPageEntityMetaDataAccessor $wikiPageEntityMetaDataAccessor Used to get the latest revision ID. |
105 | * @param EntityIdParser $entityIdParser Used to parse entity IDs in cached objects. |
106 | * @param int $ttlInSeconds Time-to-live of the cached values, in seconds. |
107 | * @param string[] $possiblyStaleConstraintTypes item IDs of constraint types |
108 | * where cached results may always be stale, regardless of invalidation logic |
109 | * @param int $maxRevisionIds The maximum number of revision IDs to check; |
110 | * if a check result depends on more entity IDs than this number, it is not cached. |
111 | * @param LoggingHelper $loggingHelper |
112 | */ |
113 | public function __construct( |
114 | ResultsSource $resultsSource, |
115 | ResultsCache $cache, |
116 | CheckResultSerializer $checkResultSerializer, |
117 | CheckResultDeserializer $checkResultDeserializer, |
118 | WikiPageEntityMetaDataAccessor $wikiPageEntityMetaDataAccessor, |
119 | EntityIdParser $entityIdParser, |
120 | $ttlInSeconds, |
121 | array $possiblyStaleConstraintTypes, |
122 | $maxRevisionIds, |
123 | LoggingHelper $loggingHelper |
124 | ) { |
125 | $this->resultsSource = $resultsSource; |
126 | $this->cache = $cache; |
127 | $this->checkResultSerializer = $checkResultSerializer; |
128 | $this->checkResultDeserializer = $checkResultDeserializer; |
129 | $this->wikiPageEntityMetaDataAccessor = $wikiPageEntityMetaDataAccessor; |
130 | $this->entityIdParser = $entityIdParser; |
131 | $this->ttlInSeconds = $ttlInSeconds; |
132 | $this->possiblyStaleConstraintTypes = $possiblyStaleConstraintTypes; |
133 | $this->maxRevisionIds = $maxRevisionIds; |
134 | $this->loggingHelper = $loggingHelper; |
135 | $this->timeValueComparer = new TimeValueComparer(); |
136 | } |
137 | |
138 | public function getResults( |
139 | array $entityIds, |
140 | array $claimIds, |
141 | ?array $constraintIds, |
142 | array $statuses |
143 | ) { |
144 | $results = []; |
145 | $metadatas = []; |
146 | if ( $this->canUseStoredResults( $entityIds, $claimIds, $constraintIds, $statuses ) ) { |
147 | $storedEntityIds = []; |
148 | foreach ( $entityIds as $entityId ) { |
149 | $storedResults = $this->getStoredResults( $entityId ); |
150 | if ( $storedResults !== null ) { |
151 | $this->loggingHelper->logCheckConstraintsCacheHit( $entityId ); |
152 | foreach ( $storedResults->getArray() as $checkResult ) { |
153 | if ( $this->statusSelected( $statuses, $checkResult ) ) { |
154 | $results[] = $checkResult; |
155 | } |
156 | } |
157 | $metadatas[] = $storedResults->getMetadata(); |
158 | $storedEntityIds[] = $entityId; |
159 | } |
160 | } |
161 | $entityIds = array_values( array_diff( $entityIds, $storedEntityIds ) ); |
162 | } |
163 | if ( $entityIds !== [] || $claimIds !== [] ) { |
164 | if ( $entityIds !== [] ) { |
165 | $this->loggingHelper->logCheckConstraintsCacheMisses( $entityIds ); |
166 | } |
167 | $response = $this->getAndStoreResults( $entityIds, $claimIds, $constraintIds, $statuses ); |
168 | $results = array_merge( $results, $response->getArray() ); |
169 | $metadatas[] = $response->getMetadata(); |
170 | } |
171 | return new CachedCheckResults( |
172 | $results, |
173 | Metadata::merge( $metadatas ) |
174 | ); |
175 | } |
176 | |
177 | /** |
178 | * We can only use cached constraint results |
179 | * if nothing more than the problematic results of a full constraint check were requested: |
180 | * constraint checks for the full entity (not just individual statements), |
181 | * without restricting the set of constraints to check, |
182 | * and with no statuses other than 'violation', 'warning' and 'bad-parameters'. |
183 | * |
184 | * @param EntityId[] $entityIds |
185 | * @param string[] $claimIds |
186 | * @param string[]|null $constraintIds |
187 | * @param string[] $statuses |
188 | * @return bool |
189 | */ |
190 | private function canUseStoredResults( |
191 | array $entityIds, |
192 | array $claimIds, |
193 | ?array $constraintIds, |
194 | array $statuses |
195 | ) { |
196 | if ( $claimIds !== [] ) { |
197 | return false; |
198 | } |
199 | if ( $constraintIds !== null ) { |
200 | return false; |
201 | } |
202 | if ( array_diff( $statuses, self::CACHED_STATUSES ) !== [] ) { |
203 | return false; |
204 | } |
205 | return true; |
206 | } |
207 | |
208 | /** |
209 | * Check whether a check result should be used, |
210 | * either because it has the right status |
211 | * or because it is a NullResult whose metadata should be preserved. |
212 | * |
213 | * @param string[] $statuses |
214 | * @param CheckResult $result |
215 | * @return bool |
216 | */ |
217 | private function statusSelected( array $statuses, CheckResult $result ) { |
218 | return in_array( $result->getStatus(), $statuses, true ) || |
219 | $result instanceof NullResult; |
220 | } |
221 | |
222 | /** |
223 | * @param EntityId[] $entityIds |
224 | * @param string[] $claimIds |
225 | * @param string[]|null $constraintIds |
226 | * @param string[] $statuses |
227 | * @return CachedCheckResults |
228 | */ |
229 | public function getAndStoreResults( |
230 | array $entityIds, |
231 | array $claimIds, |
232 | ?array $constraintIds, |
233 | array $statuses |
234 | ) { |
235 | $results = $this->resultsSource->getResults( $entityIds, $claimIds, $constraintIds, $statuses ); |
236 | |
237 | if ( $this->canStoreResults( $entityIds, $claimIds, $constraintIds, $statuses ) ) { |
238 | foreach ( $entityIds as $entityId ) { |
239 | $this->storeResults( $entityId, $results ); |
240 | } |
241 | } |
242 | |
243 | return $results; |
244 | } |
245 | |
246 | /** |
247 | * We can only store constraint results |
248 | * if the set of constraints to check was not restricted |
249 | * and all the problematic results were requested. |
250 | * However, it doesn’t matter whether constraint checks on individual statements were requested: |
251 | * we only store results for the mentioned entity IDs, |
252 | * and those will be complete regardless of what’s in the statement IDs. |
253 | * And it also doesn’t matter whether the set of statuses requested |
254 | * was exactly the statuses we cache or a superset of it: |
255 | * as long as all the results we want to cache are there, |
256 | * we can filter out the extraneous ones before we serialize them. |
257 | * |
258 | * @param EntityId[] $entityIds |
259 | * @param string[] $claimIds |
260 | * @param ?string[] $constraintIds |
261 | * @param string[] $statuses |
262 | * @return bool |
263 | */ |
264 | private function canStoreResults( |
265 | array $entityIds, |
266 | array $claimIds, |
267 | ?array $constraintIds, |
268 | array $statuses |
269 | ) { |
270 | if ( $constraintIds !== null ) { |
271 | return false; |
272 | } |
273 | if ( array_diff( self::CACHED_STATUSES, $statuses ) !== [] ) { |
274 | return false; |
275 | } |
276 | return true; |
277 | } |
278 | |
279 | /** |
280 | * Store check results for the given entity ID in the cache, if possible. |
281 | * |
282 | * @param EntityId $entityId The entity ID. |
283 | * @param CachedCheckResults $results A collection of check results with metadata. |
284 | * May include check results for other entity IDs as well, |
285 | * or check results with statuses that we’re not interested in caching. |
286 | */ |
287 | private function storeResults( EntityId $entityId, CachedCheckResults $results ) { |
288 | $latestRevisionIds = $this->getLatestRevisionIds( |
289 | $results->getMetadata()->getDependencyMetadata()->getEntityIds() |
290 | ); |
291 | if ( $latestRevisionIds === null ) { |
292 | return; |
293 | } |
294 | |
295 | $resultSerializations = []; |
296 | foreach ( $results->getArray() as $checkResult ) { |
297 | if ( $checkResult->getContextCursor()->getEntityId() !== $entityId->getSerialization() ) { |
298 | continue; |
299 | } |
300 | if ( $this->statusSelected( self::CACHED_STATUSES, $checkResult ) ) { |
301 | $resultSerializations[] = $this->checkResultSerializer->serialize( $checkResult ); |
302 | } |
303 | } |
304 | |
305 | $value = [ |
306 | 'results' => $resultSerializations, |
307 | 'latestRevisionIds' => $latestRevisionIds, |
308 | ]; |
309 | $futureTime = $results->getMetadata()->getDependencyMetadata()->getFutureTime(); |
310 | if ( $futureTime !== null ) { |
311 | $value['futureTime'] = $futureTime->getArrayValue(); |
312 | } |
313 | |
314 | $this->cache->set( $entityId, $value, $this->ttlInSeconds ); |
315 | } |
316 | |
317 | /** |
318 | * @param EntityId $entityId |
319 | * @param int $forRevision Requested revision of $entityId |
320 | * If this parameter is not zero, the results are returned if this is the latest revision, |
321 | * otherwise null is returned, since we can't get constraints for past revisions. |
322 | * @return CachedCheckResults|null |
323 | */ |
324 | public function getStoredResults( |
325 | EntityId $entityId, |
326 | $forRevision = 0 |
327 | ) { |
328 | $cacheInfo = WANObjectCache::PASS_BY_REF; |
329 | $value = $this->cache->get( $entityId, $curTTL, [], $cacheInfo ); |
330 | $now = call_user_func( $this->microtime, true ); |
331 | |
332 | $dependencyMetadata = $this->checkDependencyMetadata( $value, |
333 | [ $entityId->getSerialization() => $forRevision ] ); |
334 | if ( $dependencyMetadata === null ) { |
335 | return null; |
336 | } |
337 | |
338 | // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset False positive |
339 | $asOf = $cacheInfo[WANObjectCache::KEY_AS_OF]; |
340 | $ageInSeconds = (int)ceil( $now - $asOf ); |
341 | $cachingMetadata = $ageInSeconds > 0 ? |
342 | CachingMetadata::ofMaximumAgeInSeconds( $ageInSeconds ) : |
343 | CachingMetadata::fresh(); |
344 | |
345 | $results = []; |
346 | foreach ( $value['results'] as $resultSerialization ) { |
347 | $results[] = $this->deserializeCheckResult( $resultSerialization, $cachingMetadata ); |
348 | } |
349 | |
350 | return new CachedCheckResults( |
351 | $results, |
352 | Metadata::merge( [ |
353 | Metadata::ofCachingMetadata( $cachingMetadata ), |
354 | Metadata::ofDependencyMetadata( $dependencyMetadata ), |
355 | ] ) |
356 | ); |
357 | } |
358 | |
359 | /** |
360 | * Extract the dependency metadata of $value |
361 | * and check that the dependency metadata does not indicate staleness. |
362 | * |
363 | * @param array|false $value |
364 | * @param int[] $paramRevs Revisions from parameters, id => revision |
365 | * These revisions are used instead of ones recorded in the metadata, |
366 | * so we can serve requests specifying concrete revisions, and if they are not latest, |
367 | * we will reject then. |
368 | * @return DependencyMetadata|null the dependency metadata, |
369 | * or null if $value should no longer be used |
370 | */ |
371 | private function checkDependencyMetadata( $value, $paramRevs ) { |
372 | if ( $value === false ) { |
373 | return null; |
374 | } |
375 | |
376 | if ( array_key_exists( 'futureTime', $value ) ) { |
377 | $futureTime = TimeValue::newFromArray( $value['futureTime'] ); |
378 | if ( !$this->timeValueComparer->isFutureTime( $futureTime ) ) { |
379 | return null; |
380 | } |
381 | $futureTimeDependencyMetadata = DependencyMetadata::ofFutureTime( $futureTime ); |
382 | } else { |
383 | $futureTimeDependencyMetadata = DependencyMetadata::blank(); |
384 | } |
385 | |
386 | foreach ( $paramRevs as $id => $revision ) { |
387 | if ( $revision > 0 ) { |
388 | $value['latestRevisionIds'][$id] = min( $revision, $value['latestRevisionIds'][$id] ?? PHP_INT_MAX ); |
389 | } |
390 | } |
391 | |
392 | $dependedEntityIds = array_map( |
393 | [ $this->entityIdParser, "parse" ], |
394 | array_keys( $value['latestRevisionIds'] ) |
395 | ); |
396 | |
397 | if ( $value['latestRevisionIds'] !== $this->getLatestRevisionIds( $dependedEntityIds ) ) { |
398 | return null; |
399 | } |
400 | |
401 | return array_reduce( |
402 | $dependedEntityIds, |
403 | static function ( DependencyMetadata $metadata, EntityId $entityId ) { |
404 | return DependencyMetadata::merge( [ |
405 | $metadata, |
406 | DependencyMetadata::ofEntityId( $entityId ), |
407 | ] ); |
408 | }, |
409 | $futureTimeDependencyMetadata |
410 | ); |
411 | } |
412 | |
413 | /** |
414 | * Deserialize a check result. |
415 | * If the result might be stale after caching |
416 | * (because its dependencies cannot be fully tracked in its dependency metadata), |
417 | * also add $cachingMetadata to it. |
418 | * |
419 | * @param array $resultSerialization |
420 | * @param CachingMetadata $cachingMetadata |
421 | * @return CheckResult |
422 | */ |
423 | private function deserializeCheckResult( |
424 | array $resultSerialization, |
425 | CachingMetadata $cachingMetadata |
426 | ) { |
427 | $result = $this->checkResultDeserializer->deserialize( $resultSerialization ); |
428 | if ( $this->isPossiblyStaleResult( $result ) ) { |
429 | $result->withMetadata( |
430 | Metadata::merge( [ |
431 | $result->getMetadata(), |
432 | Metadata::ofCachingMetadata( $cachingMetadata ), |
433 | ] ) |
434 | ); |
435 | } |
436 | return $result; |
437 | } |
438 | |
439 | /** |
440 | * @param CheckResult $result |
441 | * @return bool |
442 | */ |
443 | private function isPossiblyStaleResult( CheckResult $result ) { |
444 | if ( $result instanceof NullResult ) { |
445 | return false; |
446 | } |
447 | |
448 | return in_array( |
449 | $result->getConstraint()->getConstraintTypeItemId(), |
450 | $this->possiblyStaleConstraintTypes |
451 | ); |
452 | } |
453 | |
454 | /** |
455 | * @param EntityId[] $entityIds |
456 | * @return int[]|null array from entity ID serializations to revision ID, |
457 | * or null to indicate that not all revision IDs could be loaded |
458 | */ |
459 | private function getLatestRevisionIds( array $entityIds ) { |
460 | if ( $entityIds === [] ) { |
461 | $this->loggingHelper->logEmptyDependencyMetadata(); |
462 | return []; |
463 | } |
464 | if ( count( $entityIds ) > $this->maxRevisionIds ) { |
465 | // one of those entities will probably be edited soon, so might as well skip caching |
466 | $this->loggingHelper->logHugeDependencyMetadata( $entityIds, $this->maxRevisionIds ); |
467 | return null; |
468 | } |
469 | |
470 | $latestRevisionIds = $this->wikiPageEntityMetaDataAccessor->loadLatestRevisionIds( |
471 | $entityIds, |
472 | LookupConstants::LATEST_FROM_REPLICA |
473 | ); |
474 | if ( $this->hasFalseElements( $latestRevisionIds ) ) { |
475 | return null; |
476 | } |
477 | return $latestRevisionIds; |
478 | } |
479 | |
480 | /** |
481 | * @param array $array |
482 | * @return bool |
483 | */ |
484 | private function hasFalseElements( array $array ) { |
485 | return in_array( false, $array, true ); |
486 | } |
487 | |
488 | /** |
489 | * Set a custom function to get the current time, instead of microtime(). |
490 | * |
491 | * @param callable $microtime |
492 | */ |
493 | public function setMicrotimeFunction( callable $microtime ) { |
494 | $this->microtime = $microtime; |
495 | } |
496 | |
497 | } |