Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 127
0.00% covered (danger)
0.00%
0 / 6
CRAP
0.00% covered (danger)
0.00%
0 / 1
ApiFeatureUsageQueryEngineSql
0.00% covered (danger)
0.00%
0 / 127
0.00% covered (danger)
0.00%
0 / 6
420
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 12
0.00% covered (danger)
0.00%
0 / 1
2
 enumerate
0.00% covered (danger)
0.00%
0 / 22
0.00% covered (danger)
0.00%
0 / 1
6
 suggestDateRange
0.00% covered (danger)
0.00%
0 / 15
0.00% covered (danger)
0.00%
0 / 1
6
 record
0.00% covered (danger)
0.00%
0 / 50
0.00% covered (danger)
0.00%
0 / 1
42
 getCounterLotteryDelta
0.00% covered (danger)
0.00%
0 / 10
0.00% covered (danger)
0.00%
0 / 1
12
 pingInsertLimiter
0.00% covered (danger)
0.00%
0 / 18
0.00% covered (danger)
0.00%
0 / 1
42
1<?php
2
3namespace MediaWiki\Extension\ApiFeatureUsage;
4
5use BagOStuff;
6use MediaWiki\Status\Status;
7use MediaWiki\User\UserIdentity;
8use MediaWiki\Utils\MWTimestamp;
9use ObjectCacheFactory;
10use Wikimedia\IPUtils;
11use Wikimedia\LightweightObjectStore\ExpirationAwareness;
12use Wikimedia\LightweightObjectStore\StorageAwareness;
13use Wikimedia\Rdbms\IConnectionProvider;
14use Wikimedia\Rdbms\IExpression;
15use Wikimedia\Rdbms\LikeValue;
16use Wikimedia\Rdbms\RawSQLValue;
17use Wikimedia\Rdbms\SelectQueryBuilder;
18use Wikimedia\WRStats\LimitCondition;
19use Wikimedia\WRStats\WRStatsFactory;
20
21class ApiFeatureUsageQueryEngineSql extends ApiFeatureUsageQueryEngine {
22    /** @var IConnectionProvider */
23    private $dbProvider;
24    /** @var WRStatsFactory */
25    private $wrStatsFactory;
26    /** @var BagOStuff */
27    private $cache;
28
29    /**
30     * @param IConnectionProvider $dbProvider
31     * @param WRStatsFactory $wrStatsFactory
32     * @param ObjectCacheFactory $objectCacheFactory
33     * @param array $options Additional options include:
34     *   - updateSampleFactorRatio: target ratio of ((hits per sampled hit) / total hits)
35     *      for updates to daily, per-agent, API feature use counters.
36     *   - minUpdateSampleFactor: minimum number of hits per sampled hit for updates to daily,
37     *      per-agent, API feature use counters.
38     *   - maxUpdateSampleFactor: maximum number of hits per sampled hit for updates to daily,
39     *      per-agent, API feature use counters.
40     *   - insertRateLimits: map with possible "ip" and "subnet" keys. Each value is a tuple
41     *      of (maximum new rows in the time window, the time window in seconds). The "ip"
42     *      entry applies to single client IP addresses. The "subnet" entry applies to the
43     *      /16 CIDR of IPv4 client addresses and the /64 CIDR of IPv6 client addresses.
44     *      These are safety limits to avoid flooding the database due to bots randomizing
45     *      their User-Agent or rotating their IP address.
46     */
47    public function __construct(
48        IConnectionProvider $dbProvider,
49        WRStatsFactory $wrStatsFactory,
50        ObjectCacheFactory $objectCacheFactory,
51        array $options
52    ) {
53        $options += [
54            'updateSampleFactorRatio' => 0.1,
55            'minUpdateSampleFactor' => 10,
56            'maxUpdateSampleFactor' => 1_000,
57            'insertRateLimits' => [
58                'ip' => [ 30, 60 ]
59            ]
60        ];
61        parent::__construct( $options );
62
63        $this->dbProvider = $dbProvider;
64        $this->wrStatsFactory = $wrStatsFactory;
65        $this->cache = $objectCacheFactory->getLocalClusterInstance();
66    }
67
68    /** @inheritDoc */
69    public function enumerate(
70        $agent,
71        MWTimestamp $start,
72        MWTimestamp $end,
73        array $features = null
74    ) {
75        $dbr = $this->dbProvider->getReplicaDatabase( 'virtual-apifeatureusage' );
76
77        $res = $dbr->newSelectQueryBuilder()
78            ->select( [ 'afu_date', 'afu_feature', 'hits' => 'SUM(afu_hits)' ] )
79            ->from( 'api_feature_usage' )
80            ->where( $dbr->expr(
81                'afu_agent',
82                IExpression::LIKE,
83                new LikeValue( $agent, $dbr->anyString() )
84            ) )
85            ->groupBy( [ 'afu_date', 'afu_feature' ] )
86            ->orderBy( [ 'afu_date', 'afu_feature' ], SelectQueryBuilder::SORT_ASC )
87            ->caller( __METHOD__ )
88            ->fetchResultSet();
89
90        $ret = [];
91        foreach ( $res as $row ) {
92            // Pad afu_date into TS_MW so that MWTimestamp can parse it
93            $date = new MWTimestamp( $row->afu_date . '000000' );
94            $ret[] = [
95                'feature' => $row->afu_feature,
96                'date' => $date->format( 'Y-m-d' ),
97                'count' => $row->hits
98            ];
99        }
100
101        return Status::newGood( $ret );
102    }
103
104    /** @inheritDoc */
105    public function suggestDateRange() {
106        $start = new MWTimestamp();
107        $start->setTimezone( 'UTC' );
108        $start->timestamp->setTime( 0, 0, 0 );
109        $end = new MWTimestamp();
110        $end->setTimezone( 'UTC' );
111
112        $dbr = $this->dbProvider->getReplicaDatabase( 'virtual-apifeatureusage' );
113        $date = $dbr->newSelectQueryBuilder()
114            ->select( 'afu_date' )
115            ->from( 'api_feature_usage' )
116            ->orderBy( 'afu_date', SelectQueryBuilder::SORT_ASC )
117            ->caller( __METHOD__ )
118            ->fetchField();
119
120        if ( $date !== false ) {
121            // Convert afu_data to TS_MW
122            $start->setTimestamp( $date );
123        }
124
125        return [ $start, $end ];
126    }
127
128    /** @inheritDoc */
129    public function record(
130        string $feature,
131        string $userAgent,
132        string $ipAddress,
133        UserIdentity $userIdentity
134    ) {
135        $now = MWTimestamp::now( TS_MW );
136
137        $key = $this->cache->makeGlobalKey(
138            'afu-recent-hits',
139            $feature,
140            sha1( $userAgent ),
141            substr( $now, 0, 8 )
142        );
143
144        $this->cache->watchErrors();
145        $hits = $this->cache->get( $key );
146        $error = $this->cache->getLastError();
147        if ( $error !== StorageAwareness::ERR_NONE ) {
148            // Do not risk flooding the DB
149            return false;
150        }
151
152        if ( $hits === false ) {
153            $dbr = $this->dbProvider->getReplicaDatabase( 'virtual-apifeatureusage' );
154
155            $hits = (int)$dbr->newSelectQueryBuilder()
156                ->select( 'afu_hits' )
157                ->from( 'api_feature_usage' )
158                ->where( [
159                    'afu_feature' => $feature,
160                    'afu_agent' => substr( $userAgent, 0, 255 ),
161                    'afu_date' => substr( $now, 0, 8 )
162                ] )
163                ->caller( __METHOD__ )
164                ->fetchField();
165
166            if ( $hits ) {
167                $this->cache->add( $key, $hits, ExpirationAwareness::TTL_HOUR );
168            } elseif ( $this->pingInsertLimiter( $ipAddress ) ) {
169                // Do not flood the DB due to user agent churn
170                return 0;
171            }
172        }
173
174        $delta = $this->getCounterLotteryDelta( $hits );
175        if ( $delta > 0 ) {
176            $this->cache->incrWithInit( $key, ExpirationAwareness::TTL_HOUR, $delta, $delta );
177
178            $dbw = $this->dbProvider->getPrimaryDatabase( 'virtual-apifeatureusage' );
179            // Increment the counter in way that is safe for both primary/replica replication
180            // and circular statement-based replication. Do the query in autocommit mode to
181            // limit lock contention.
182            $fname = __METHOD__;
183            $dbw->onTransactionCommitOrIdle(
184                static function () use ( $dbw, $feature, $userAgent, $delta, $now, $fname ) {
185                    $dbw->newInsertQueryBuilder()
186                        ->insertInto( 'api_feature_usage' )
187                        ->row( [
188                            'afu_feature' => $feature,
189                            'afu_agent' => $userAgent,
190                            'afu_date' => substr( $now, 0, 8 ),
191                            'afu_hits' => $delta
192                        ] )
193                        ->onDuplicateKeyUpdate()
194                        ->uniqueIndexFields( [ 'afu_date', 'afu_feature', 'afu_agent' ] )
195                        ->set( [ 'afu_hits' => new RawSQLValue( "afu_hits + $delta" ) ] )
196                        ->caller( $fname )
197                        ->execute();
198                }
199            );
200        }
201    }
202
203    /**
204     * @param int $dayHitTotal
205     * @return int Number of samples represented by this hit (0 if not sampled)
206     */
207    private function getCounterLotteryDelta( $dayHitTotal ) {
208        // Always sample the first hit
209        if ( $dayHitTotal <= 0 ) {
210            return 1;
211        }
212
213        // How much to increment the feature use count for each sampled use hit
214        $currentSampleFactor = (int)min(
215            max(
216                ceil( $this->options['updateSampleFactorRatio'] * $dayHitTotal ),
217                $this->options['minUpdateSampleFactor']
218            ),
219            $this->options['maxUpdateSampleFactor']
220        );
221
222        // Randomly decide whether to sample this feature use hit
223        return ( mt_rand( 1, $currentSampleFactor ) == 1 ) ? $currentSampleFactor : 0;
224    }
225
226    /**
227     * @param string $ipAddress IP address of user triggering a row insertion
228     * @return bool Whether the row insertion limit was tripped
229     */
230    private function pingInsertLimiter( $ipAddress ) {
231        if ( $ipAddress === '' ) {
232            return false;
233        }
234
235        $conds = [];
236        foreach ( $this->options['insertRateLimits'] as $type => [ $limit, $window ] ) {
237            $conds[$type] = new LimitCondition( $limit, $window );
238        }
239        $limiter = $this->wrStatsFactory->createRateLimiter(
240            $conds,
241            [ 'limiter', 'apifeatureusage-counter-init' ]
242        );
243        $limitBatch = $limiter->createBatch( 1 );
244        if ( isset( $conds['ip'] ) ) {
245            $limitBatch->globalOp( 'ip', $ipAddress );
246        }
247        if ( isset( $conds['subnet'] ) ) {
248            $subnet = IPUtils::getSubnet( $ipAddress );
249            if ( $subnet !== false ) {
250                $limitBatch->globalOp( 'subnet', $subnet );
251            }
252        }
253
254        $batchResult = $limitBatch->tryIncr();
255
256        return !$batchResult->isAllowed();
257    }
258}