Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
90.32% |
112 / 124 |
|
72.73% |
8 / 11 |
CRAP | |
0.00% |
0 / 1 |
| CachedPageViewService | |
90.32% |
112 / 124 |
|
72.73% |
8 / 11 |
40.38 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| setLogger | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| setCachedDays | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| supports | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getPageData | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
4 | |||
| getSiteData | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
4 | |||
| getTopPages | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getCacheExpiry | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| getWithCache | |
96.30% |
26 / 27 |
|
0.00% |
0 / 1 |
8 | |||
| getTitlesWithCache | |
85.29% |
58 / 68 |
|
0.00% |
0 / 1 |
15.72 | |||
| extendDateRange | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace MediaWiki\Extension\PageViewInfo; |
| 4 | |
| 5 | use InvalidArgumentException; |
| 6 | use MediaWiki\Message\Message; |
| 7 | use MediaWiki\Page\PageReference; |
| 8 | use MediaWiki\Status\Status; |
| 9 | use MediaWiki\Title\TitleFormatter; |
| 10 | use Psr\Log\LoggerAwareInterface; |
| 11 | use Psr\Log\LoggerInterface; |
| 12 | use Psr\Log\NullLogger; |
| 13 | use StatusValue; |
| 14 | use Wikimedia\ObjectCache\BagOStuff; |
| 15 | |
| 16 | /** |
| 17 | * Wraps a PageViewService and caches the results. |
| 18 | */ |
| 19 | class CachedPageViewService implements PageViewService, LoggerAwareInterface { |
| 20 | private const ERROR_EXPIRY = 1800; |
| 21 | |
| 22 | /** @var LoggerInterface */ |
| 23 | protected $logger; |
| 24 | |
| 25 | /** @var string Cache prefix, in case multiple instances of this service coexist */ |
| 26 | protected $prefix; |
| 27 | |
| 28 | /** @var int */ |
| 29 | protected $cachedDays = 30; |
| 30 | |
| 31 | public function __construct( |
| 32 | private readonly PageViewService $service, |
| 33 | private readonly BagOStuff $cache, |
| 34 | private readonly TitleFormatter $titleFormatter, |
| 35 | string $prefix = '' |
| 36 | ) { |
| 37 | $this->logger = new NullLogger(); |
| 38 | } |
| 39 | |
| 40 | public function setLogger( LoggerInterface $logger ): void { |
| 41 | $this->logger = $logger; |
| 42 | } |
| 43 | |
| 44 | /** |
| 45 | * Set the number of days that will be cached. To avoid cache fragmentation, the inner service |
| 46 | * is always called with this number of days; if necessary, the response will be expanded with |
| 47 | * nulls. |
| 48 | * @param int $cachedDays |
| 49 | */ |
| 50 | public function setCachedDays( $cachedDays ) { |
| 51 | $this->cachedDays = $cachedDays; |
| 52 | } |
| 53 | |
| 54 | /** @inheritDoc */ |
| 55 | public function supports( $metric, $scope ) { |
| 56 | return $this->service->supports( $metric, $scope ); |
| 57 | } |
| 58 | |
| 59 | /** @inheritDoc */ |
| 60 | public function getPageData( array $titles, $days, $metric = self::METRIC_VIEW ) { |
| 61 | $status = $this->getTitlesWithCache( $metric, $titles ); |
| 62 | $data = $status->getValue(); |
| 63 | foreach ( $data as $title => $titleData ) { |
| 64 | if ( $days < $this->cachedDays ) { |
| 65 | $data[$title] = array_slice( $titleData, -$days, null, true ); |
| 66 | } elseif ( $days > $this->cachedDays ) { |
| 67 | $data[$title] = $this->extendDateRange( $titleData, $days ); |
| 68 | } |
| 69 | } |
| 70 | $status->setResult( $status->isOK(), $data ); |
| 71 | return $status; |
| 72 | } |
| 73 | |
| 74 | /** @inheritDoc */ |
| 75 | public function getSiteData( $days, $metric = self::METRIC_VIEW ) { |
| 76 | $status = $this->getWithCache( $metric, self::SCOPE_SITE ); |
| 77 | if ( $status->isOK() ) { |
| 78 | $data = $status->getValue(); |
| 79 | if ( $days < $this->cachedDays ) { |
| 80 | $data = array_slice( $data, -$days, null, true ); |
| 81 | } elseif ( $days > $this->cachedDays ) { |
| 82 | $data = $this->extendDateRange( $data, $days ); |
| 83 | } |
| 84 | $status->setResult( true, $data ); |
| 85 | } |
| 86 | return $status; |
| 87 | } |
| 88 | |
| 89 | /** @inheritDoc */ |
| 90 | public function getTopPages( $metric = self::METRIC_VIEW ) { |
| 91 | return $this->getWithCache( $metric, self::SCOPE_TOP ); |
| 92 | } |
| 93 | |
| 94 | /** @inheritDoc */ |
| 95 | public function getCacheExpiry( $metric, $scope ) { |
| 96 | // add some random delay to avoid cache stampedes |
| 97 | return $this->service->getCacheExpiry( $metric, $scope ) + mt_rand( 0, 600 ); |
| 98 | } |
| 99 | |
| 100 | /** |
| 101 | * Like BagOStuff::getWithSetCallback, but returns a StatusValue like PageViewService calls do. |
| 102 | * Returns (and caches) null wrapped in a StatusValue on error. |
| 103 | * @param string $metric A METRIC_* constant |
| 104 | * @param string $scope A SCOPE_* constant (except SCOPE_ARTICLE which has its own method) |
| 105 | * @return StatusValue |
| 106 | */ |
| 107 | protected function getWithCache( $metric, $scope ) { |
| 108 | $key = $this->cache->makeKey( |
| 109 | 'pvi', |
| 110 | $this->prefix, |
| 111 | ( $scope === self::SCOPE_SITE ) ? $this->cachedDays : "", |
| 112 | $metric, |
| 113 | $scope |
| 114 | ); |
| 115 | $data = $this->cache->get( $key ); |
| 116 | |
| 117 | if ( $data === false ) { |
| 118 | // no cached data |
| 119 | /** @var StatusValue $status */ |
| 120 | switch ( $scope ) { |
| 121 | case self::SCOPE_SITE: |
| 122 | $status = $this->service->getSiteData( $this->cachedDays, $metric ); |
| 123 | break; |
| 124 | case self::SCOPE_TOP: |
| 125 | $status = $this->service->getTopPages( $metric ); |
| 126 | break; |
| 127 | default: |
| 128 | throw new InvalidArgumentException( "invalid scope: $scope" ); |
| 129 | } |
| 130 | if ( $status->isOK() ) { |
| 131 | $data = $status->getValue(); |
| 132 | $expiry = $this->getCacheExpiry( $metric, $scope ); |
| 133 | } else { |
| 134 | $data = null; |
| 135 | $expiry = self::ERROR_EXPIRY; |
| 136 | } |
| 137 | $this->cache->set( $key, $data, $expiry ); |
| 138 | } elseif ( $data === null ) { |
| 139 | // cached error |
| 140 | $status = StatusValue::newGood( [] ); |
| 141 | $status->fatal( 'pvi-cached-error', Message::durationParam( self::ERROR_EXPIRY ) ); |
| 142 | } else { |
| 143 | // valid cached data |
| 144 | $status = StatusValue::newGood( $data ); |
| 145 | } |
| 146 | return $status; |
| 147 | } |
| 148 | |
| 149 | /** |
| 150 | * The equivalent of getWithCache for multiple titles (ie. for SCOPE_ARTICLE). |
| 151 | * Errors are also handled per-article. |
| 152 | * @param string $metric A METRIC_* constant |
| 153 | * @param PageReference[] $titles |
| 154 | * @return StatusValue |
| 155 | * @suppress SecurityCheck-DoubleEscaped |
| 156 | */ |
| 157 | protected function getTitlesWithCache( $metric, array $titles ) { |
| 158 | if ( !$titles ) { |
| 159 | return StatusValue::newGood( [] ); |
| 160 | } |
| 161 | |
| 162 | // Set up the response array, without any values. This will help preserve the order of titles. |
| 163 | $data = array_fill_keys( array_map( function ( PageReference $t ) { |
| 164 | return $this->titleFormatter->getPrefixedDBkey( $t ); |
| 165 | }, $titles ), false ); |
| 166 | |
| 167 | // Fetch data for all titles from cache. Hopefully we are using a cache which has |
| 168 | // a cheap getMulti implementation. |
| 169 | $titleToCacheKey = $statuses = []; |
| 170 | foreach ( $titles as $title ) { |
| 171 | $dbKey = $this->titleFormatter->getPrefixedDBkey( $title ); |
| 172 | $titleToCacheKey[$dbKey] = $this->cache->makeKey( |
| 173 | 'pvi', $this->prefix, |
| 174 | $this->cachedDays, |
| 175 | $metric, |
| 176 | self::SCOPE_ARTICLE, |
| 177 | md5( $dbKey ) |
| 178 | ); |
| 179 | } |
| 180 | $cacheKeyToTitle = array_flip( $titleToCacheKey ); |
| 181 | $rawData = $this->cache->getMulti( array_keys( $cacheKeyToTitle ) ); |
| 182 | foreach ( $rawData as $key => $value ) { |
| 183 | // BagOStuff::getMulti is unclear on how missing items should be handled; let's |
| 184 | // assume some implementations might return that key with a value of false |
| 185 | if ( $value !== false ) { |
| 186 | $statuses[$cacheKeyToTitle[$key]] = empty( $value['#error'] ) ? StatusValue::newGood() |
| 187 | : StatusValue::newFatal( |
| 188 | 'pvi-cached-error-title', |
| 189 | wfEscapeWikiText( $cacheKeyToTitle[$key] ), |
| 190 | Message::durationParam( self::ERROR_EXPIRY ) |
| 191 | ); |
| 192 | unset( $value['#error'] ); |
| 193 | $data[$cacheKeyToTitle[$key]] = $value; |
| 194 | } |
| 195 | } |
| 196 | |
| 197 | // Now get and cache the data for the remaining titles from the real service. It might not |
| 198 | // return data for all of them. |
| 199 | foreach ( $titles as $i => $titleObj ) { |
| 200 | if ( $data[$this->titleFormatter->getPrefixedDBkey( $titleObj )] !== false ) { |
| 201 | unset( $titles[$i] ); |
| 202 | } |
| 203 | } |
| 204 | $uncachedStatus = $this->service->getPageData( $titles, $this->cachedDays, $metric ); |
| 205 | foreach ( $uncachedStatus->success as $title => $success ) { |
| 206 | $titleData = $uncachedStatus->getValue()[$title] ?? null; |
| 207 | if ( !is_array( $titleData ) || count( $titleData ) < $this->cachedDays ) { |
| 208 | // PageViewService is expected to return [ date => null ] for all requested dates |
| 209 | $this->logger->warning( 'Upstream service returned invalid data for {title}', [ |
| 210 | 'title' => $title, |
| 211 | 'statusMessage' => Status::wrap( $uncachedStatus ) |
| 212 | ->getWikiText( false, false, 'en' ), |
| 213 | ] ); |
| 214 | $titleData = $this->extendDateRange( |
| 215 | is_array( $titleData ) ? $titleData : [], |
| 216 | $this->cachedDays |
| 217 | ); |
| 218 | } |
| 219 | $data[$title] = $titleData; |
| 220 | if ( $success ) { |
| 221 | $statuses[$title] = StatusValue::newGood(); |
| 222 | $expiry = $this->getCacheExpiry( $metric, self::SCOPE_ARTICLE ); |
| 223 | } else { |
| 224 | $data[$title]['#error'] = true; |
| 225 | $statuses[$title] = StatusValue::newFatal( |
| 226 | 'pvi-cached-error-title', |
| 227 | wfEscapeWikiText( $title ), |
| 228 | Message::durationParam( self::ERROR_EXPIRY ) |
| 229 | ); |
| 230 | $expiry = self::ERROR_EXPIRY; |
| 231 | } |
| 232 | $this->cache->set( $titleToCacheKey[$title], $data[$title], $expiry ); |
| 233 | unset( $data[$title]['#error'] ); |
| 234 | } |
| 235 | |
| 236 | // Almost done; we need to truncate the data at the first "hole" (title not returned |
| 237 | // either by getMulti or getPageData) so we return a consecutive prefix of the |
| 238 | // requested titles and do not mess up continuation. |
| 239 | $holeIndex = array_search( false, array_values( $data ), true ); |
| 240 | $data = array_slice( $data, 0, $holeIndex ?: null, true ); |
| 241 | $statuses = array_slice( $statuses, 0, $holeIndex ?: null, true ); |
| 242 | |
| 243 | $status = StatusValue::newGood( $data ); |
| 244 | array_walk( $statuses, [ $status, 'merge' ] ); |
| 245 | $status->success = array_map( static function ( StatusValue $s ) { |
| 246 | return $s->isOK(); |
| 247 | }, $statuses ); |
| 248 | $status->successCount = count( array_filter( $status->success ) ); |
| 249 | $status->failCount = count( $status->success ) - $status->successCount; |
| 250 | $status->setResult( (bool)$status->successCount, $data ); |
| 251 | return $status; |
| 252 | } |
| 253 | |
| 254 | /** |
| 255 | * Add extra days (with a null value) to the beginning of a date range to make it have at least |
| 256 | * ::$cachedDays days. |
| 257 | * @param array $data YYYY-MM-DD => count, ordered, has less than $cachedDays items |
| 258 | * @param int $days |
| 259 | * @return array |
| 260 | */ |
| 261 | protected function extendDateRange( $data, $days ) { |
| 262 | // set to noon to avoid skip second and similar problems |
| 263 | $day = strtotime( array_key_first( $data ) . 'T00:00Z' ) + 12 * 3600; |
| 264 | for ( $i = $days - count( $data ); $i > 0; $i-- ) { |
| 265 | $day -= 24 * 3600; |
| 266 | $data = [ gmdate( 'Y-m-d', $day ) => null ] + $data; |
| 267 | } |
| 268 | return $data; |
| 269 | } |
| 270 | } |