Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 127 |
|
0.00% |
0 / 6 |
CRAP | |
0.00% |
0 / 1 |
ApiFeatureUsageQueryEngineSql | |
0.00% |
0 / 127 |
|
0.00% |
0 / 6 |
420 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
2 | |||
enumerate | |
0.00% |
0 / 22 |
|
0.00% |
0 / 1 |
6 | |||
suggestDateRange | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
6 | |||
record | |
0.00% |
0 / 50 |
|
0.00% |
0 / 1 |
42 | |||
getCounterLotteryDelta | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
12 | |||
pingInsertLimiter | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
42 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\ApiFeatureUsage; |
4 | |
5 | use BagOStuff; |
6 | use MediaWiki\Status\Status; |
7 | use MediaWiki\User\UserIdentity; |
8 | use MediaWiki\Utils\MWTimestamp; |
9 | use ObjectCacheFactory; |
10 | use Wikimedia\IPUtils; |
11 | use Wikimedia\LightweightObjectStore\ExpirationAwareness; |
12 | use Wikimedia\LightweightObjectStore\StorageAwareness; |
13 | use Wikimedia\Rdbms\IConnectionProvider; |
14 | use Wikimedia\Rdbms\IExpression; |
15 | use Wikimedia\Rdbms\LikeValue; |
16 | use Wikimedia\Rdbms\RawSQLValue; |
17 | use Wikimedia\Rdbms\SelectQueryBuilder; |
18 | use Wikimedia\WRStats\LimitCondition; |
19 | use Wikimedia\WRStats\WRStatsFactory; |
20 | |
21 | class ApiFeatureUsageQueryEngineSql extends ApiFeatureUsageQueryEngine { |
22 | /** @var IConnectionProvider */ |
23 | private $dbProvider; |
24 | /** @var WRStatsFactory */ |
25 | private $wrStatsFactory; |
26 | /** @var BagOStuff */ |
27 | private $cache; |
28 | |
29 | /** |
30 | * @param IConnectionProvider $dbProvider |
31 | * @param WRStatsFactory $wrStatsFactory |
32 | * @param ObjectCacheFactory $objectCacheFactory |
33 | * @param array $options Additional options include: |
34 | * - updateSampleFactorRatio: target ratio of ((hits per sampled hit) / total hits) |
35 | * for updates to daily, per-agent, API feature use counters. |
36 | * - minUpdateSampleFactor: minimum number of hits per sampled hit for updates to daily, |
37 | * per-agent, API feature use counters. |
38 | * - maxUpdateSampleFactor: maximum number of hits per sampled hit for updates to daily, |
39 | * per-agent, API feature use counters. |
40 | * - insertRateLimits: map with possible "ip" and "subnet" keys. Each value is a tuple |
41 | * of (maximum new rows in the time window, the time window in seconds). The "ip" |
42 | * entry applies to single client IP addresses. The "subnet" entry applies to the |
43 | * /16 CIDR of IPv4 client addresses and the /64 CIDR of IPv6 client addresses. |
44 | * These are safety limits to avoid flooding the database due to bots randomizing |
45 | * their User-Agent or rotating their IP address. |
46 | */ |
47 | public function __construct( |
48 | IConnectionProvider $dbProvider, |
49 | WRStatsFactory $wrStatsFactory, |
50 | ObjectCacheFactory $objectCacheFactory, |
51 | array $options |
52 | ) { |
53 | $options += [ |
54 | 'updateSampleFactorRatio' => 0.1, |
55 | 'minUpdateSampleFactor' => 10, |
56 | 'maxUpdateSampleFactor' => 1_000, |
57 | 'insertRateLimits' => [ |
58 | 'ip' => [ 30, 60 ] |
59 | ] |
60 | ]; |
61 | parent::__construct( $options ); |
62 | |
63 | $this->dbProvider = $dbProvider; |
64 | $this->wrStatsFactory = $wrStatsFactory; |
65 | $this->cache = $objectCacheFactory->getLocalClusterInstance(); |
66 | } |
67 | |
68 | /** @inheritDoc */ |
69 | public function enumerate( |
70 | $agent, |
71 | MWTimestamp $start, |
72 | MWTimestamp $end, |
73 | array $features = null |
74 | ) { |
75 | $dbr = $this->dbProvider->getReplicaDatabase( 'virtual-apifeatureusage' ); |
76 | |
77 | $res = $dbr->newSelectQueryBuilder() |
78 | ->select( [ 'afu_date', 'afu_feature', 'hits' => 'SUM(afu_hits)' ] ) |
79 | ->from( 'api_feature_usage' ) |
80 | ->where( $dbr->expr( |
81 | 'afu_agent', |
82 | IExpression::LIKE, |
83 | new LikeValue( $agent, $dbr->anyString() ) |
84 | ) ) |
85 | ->groupBy( [ 'afu_date', 'afu_feature' ] ) |
86 | ->orderBy( [ 'afu_date', 'afu_feature' ], SelectQueryBuilder::SORT_ASC ) |
87 | ->caller( __METHOD__ ) |
88 | ->fetchResultSet(); |
89 | |
90 | $ret = []; |
91 | foreach ( $res as $row ) { |
92 | // Pad afu_date into TS_MW so that MWTimestamp can parse it |
93 | $date = new MWTimestamp( $row->afu_date . '000000' ); |
94 | $ret[] = [ |
95 | 'feature' => $row->afu_feature, |
96 | 'date' => $date->format( 'Y-m-d' ), |
97 | 'count' => $row->hits |
98 | ]; |
99 | } |
100 | |
101 | return Status::newGood( $ret ); |
102 | } |
103 | |
104 | /** @inheritDoc */ |
105 | public function suggestDateRange() { |
106 | $start = new MWTimestamp(); |
107 | $start->setTimezone( 'UTC' ); |
108 | $start->timestamp->setTime( 0, 0, 0 ); |
109 | $end = new MWTimestamp(); |
110 | $end->setTimezone( 'UTC' ); |
111 | |
112 | $dbr = $this->dbProvider->getReplicaDatabase( 'virtual-apifeatureusage' ); |
113 | $date = $dbr->newSelectQueryBuilder() |
114 | ->select( 'afu_date' ) |
115 | ->from( 'api_feature_usage' ) |
116 | ->orderBy( 'afu_date', SelectQueryBuilder::SORT_ASC ) |
117 | ->caller( __METHOD__ ) |
118 | ->fetchField(); |
119 | |
120 | if ( $date !== false ) { |
121 | // Convert afu_data to TS_MW |
122 | $start->setTimestamp( $date ); |
123 | } |
124 | |
125 | return [ $start, $end ]; |
126 | } |
127 | |
128 | /** @inheritDoc */ |
129 | public function record( |
130 | string $feature, |
131 | string $userAgent, |
132 | string $ipAddress, |
133 | UserIdentity $userIdentity |
134 | ) { |
135 | $now = MWTimestamp::now( TS_MW ); |
136 | |
137 | $key = $this->cache->makeGlobalKey( |
138 | 'afu-recent-hits', |
139 | $feature, |
140 | sha1( $userAgent ), |
141 | substr( $now, 0, 8 ) |
142 | ); |
143 | |
144 | $this->cache->watchErrors(); |
145 | $hits = $this->cache->get( $key ); |
146 | $error = $this->cache->getLastError(); |
147 | if ( $error !== StorageAwareness::ERR_NONE ) { |
148 | // Do not risk flooding the DB |
149 | return false; |
150 | } |
151 | |
152 | if ( $hits === false ) { |
153 | $dbr = $this->dbProvider->getReplicaDatabase( 'virtual-apifeatureusage' ); |
154 | |
155 | $hits = (int)$dbr->newSelectQueryBuilder() |
156 | ->select( 'afu_hits' ) |
157 | ->from( 'api_feature_usage' ) |
158 | ->where( [ |
159 | 'afu_feature' => $feature, |
160 | 'afu_agent' => substr( $userAgent, 0, 255 ), |
161 | 'afu_date' => substr( $now, 0, 8 ) |
162 | ] ) |
163 | ->caller( __METHOD__ ) |
164 | ->fetchField(); |
165 | |
166 | if ( $hits ) { |
167 | $this->cache->add( $key, $hits, ExpirationAwareness::TTL_HOUR ); |
168 | } elseif ( $this->pingInsertLimiter( $ipAddress ) ) { |
169 | // Do not flood the DB due to user agent churn |
170 | return 0; |
171 | } |
172 | } |
173 | |
174 | $delta = $this->getCounterLotteryDelta( $hits ); |
175 | if ( $delta > 0 ) { |
176 | $this->cache->incrWithInit( $key, ExpirationAwareness::TTL_HOUR, $delta, $delta ); |
177 | |
178 | $dbw = $this->dbProvider->getPrimaryDatabase( 'virtual-apifeatureusage' ); |
179 | // Increment the counter in way that is safe for both primary/replica replication |
180 | // and circular statement-based replication. Do the query in autocommit mode to |
181 | // limit lock contention. |
182 | $fname = __METHOD__; |
183 | $dbw->onTransactionCommitOrIdle( |
184 | static function () use ( $dbw, $feature, $userAgent, $delta, $now, $fname ) { |
185 | $dbw->newInsertQueryBuilder() |
186 | ->insertInto( 'api_feature_usage' ) |
187 | ->row( [ |
188 | 'afu_feature' => $feature, |
189 | 'afu_agent' => $userAgent, |
190 | 'afu_date' => substr( $now, 0, 8 ), |
191 | 'afu_hits' => $delta |
192 | ] ) |
193 | ->onDuplicateKeyUpdate() |
194 | ->uniqueIndexFields( [ 'afu_date', 'afu_feature', 'afu_agent' ] ) |
195 | ->set( [ 'afu_hits' => new RawSQLValue( "afu_hits + $delta" ) ] ) |
196 | ->caller( $fname ) |
197 | ->execute(); |
198 | } |
199 | ); |
200 | } |
201 | } |
202 | |
203 | /** |
204 | * @param int $dayHitTotal |
205 | * @return int Number of samples represented by this hit (0 if not sampled) |
206 | */ |
207 | private function getCounterLotteryDelta( $dayHitTotal ) { |
208 | // Always sample the first hit |
209 | if ( $dayHitTotal <= 0 ) { |
210 | return 1; |
211 | } |
212 | |
213 | // How much to increment the feature use count for each sampled use hit |
214 | $currentSampleFactor = (int)min( |
215 | max( |
216 | ceil( $this->options['updateSampleFactorRatio'] * $dayHitTotal ), |
217 | $this->options['minUpdateSampleFactor'] |
218 | ), |
219 | $this->options['maxUpdateSampleFactor'] |
220 | ); |
221 | |
222 | // Randomly decide whether to sample this feature use hit |
223 | return ( mt_rand( 1, $currentSampleFactor ) == 1 ) ? $currentSampleFactor : 0; |
224 | } |
225 | |
226 | /** |
227 | * @param string $ipAddress IP address of user triggering a row insertion |
228 | * @return bool Whether the row insertion limit was tripped |
229 | */ |
230 | private function pingInsertLimiter( $ipAddress ) { |
231 | if ( $ipAddress === '' ) { |
232 | return false; |
233 | } |
234 | |
235 | $conds = []; |
236 | foreach ( $this->options['insertRateLimits'] as $type => [ $limit, $window ] ) { |
237 | $conds[$type] = new LimitCondition( $limit, $window ); |
238 | } |
239 | $limiter = $this->wrStatsFactory->createRateLimiter( |
240 | $conds, |
241 | [ 'limiter', 'apifeatureusage-counter-init' ] |
242 | ); |
243 | $limitBatch = $limiter->createBatch( 1 ); |
244 | if ( isset( $conds['ip'] ) ) { |
245 | $limitBatch->globalOp( 'ip', $ipAddress ); |
246 | } |
247 | if ( isset( $conds['subnet'] ) ) { |
248 | $subnet = IPUtils::getSubnet( $ipAddress ); |
249 | if ( $subnet !== false ) { |
250 | $limitBatch->globalOp( 'subnet', $subnet ); |
251 | } |
252 | } |
253 | |
254 | $batchResult = $limitBatch->tryIncr(); |
255 | |
256 | return !$batchResult->isAllowed(); |
257 | } |
258 | } |