Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
100.00% covered (success)
100.00%
94 / 94
100.00% covered (success)
100.00%
10 / 10
CRAP
100.00% covered (success)
100.00%
1 / 1
FilterProfiler
100.00% covered (success)
100.00%
94 / 94
100.00% covered (success)
100.00%
10 / 10
17
100.00% covered (success)
100.00%
1 / 1
 __construct
100.00% covered (success)
100.00%
14 / 14
100.00% covered (success)
100.00%
1 / 1
1
 resetFilterProfile
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
1
 getFilterProfile
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
1
 getGroupProfile
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
1
 recordProfilingResult
100.00% covered (success)
100.00%
11 / 11
100.00% covered (success)
100.00%
1 / 1
2
 recordStats
100.00% covered (success)
100.00%
13 / 13
100.00% covered (success)
100.00%
1 / 1
3
 recordRuntimeProfilingResult
n/a
0 / 0
n/a
0 / 0
1
 recordPerFilterProfiling
100.00% covered (success)
100.00%
19 / 19
100.00% covered (success)
100.00%
1 / 1
4
 recordSlowFilter
100.00% covered (success)
100.00%
12 / 12
100.00% covered (success)
100.00%
1 / 1
1
 filterProfileKey
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 filterProfileGroupKey
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
1<?php
2
3namespace MediaWiki\Extension\AbuseFilter;
4
5use BagOStuff;
6use IBufferingStatsdDataFactory;
7use MediaWiki\Config\ServiceOptions;
8use MediaWiki\Title\Title;
9use Psr\Log\LoggerInterface;
10use Wikimedia\WRStats\LocalEntityKey;
11use Wikimedia\WRStats\WRStatsFactory;
12
13/**
14 * This class is used to create, store, and retrieve profiling information for single filters and
15 * groups of filters.
16 *
17 * @internal
18 */
19class FilterProfiler {
20    public const SERVICE_NAME = 'AbuseFilterFilterProfiler';
21
22    public const CONSTRUCTOR_OPTIONS = [
23        'AbuseFilterConditionLimit',
24        'AbuseFilterSlowFilterRuntimeLimit',
25    ];
26
27    /**
28     * How long to keep profiling data in cache (in seconds)
29     */
30    private const STATS_STORAGE_PERIOD = BagOStuff::TTL_DAY;
31
32    /** The stats time bucket size */
33    private const STATS_TIME_STEP = self::STATS_STORAGE_PERIOD / 12;
34
35    /** The WRStats spec common to all metrics */
36    private const STATS_TEMPLATE = [
37        'sequences' => [ [
38            'timeStep' => self::STATS_TIME_STEP,
39            'expiry' => self::STATS_STORAGE_PERIOD,
40        ] ],
41    ];
42
43    private const KEY_PREFIX = 'abusefilter-profile';
44
45    /** @var WRStatsFactory */
46    private $statsFactory;
47
48    /** @var ServiceOptions */
49    private $options;
50
51    /** @var string */
52    private $localWikiID;
53
54    /** @var IBufferingStatsdDataFactory */
55    private $statsd;
56
57    /** @var LoggerInterface */
58    private $logger;
59
60    /** @var array */
61    private $statsSpecs;
62
63    /**
64     * @param WRStatsFactory $statsFactory
65     * @param ServiceOptions $options
66     * @param string $localWikiID
67     * @param IBufferingStatsdDataFactory $statsd
68     * @param LoggerInterface $logger
69     */
70    public function __construct(
71        WRStatsFactory $statsFactory,
72        ServiceOptions $options,
73        string $localWikiID,
74        IBufferingStatsdDataFactory $statsd,
75        LoggerInterface $logger
76    ) {
77        $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
78        $this->statsFactory = $statsFactory;
79        $this->options = $options;
80        $this->localWikiID = $localWikiID;
81        $this->statsd = $statsd;
82        $this->logger = $logger;
83        $this->statsSpecs = [
84            'count' => self::STATS_TEMPLATE,
85            'total' => self::STATS_TEMPLATE,
86            'overflow' => self::STATS_TEMPLATE,
87            'matches' => self::STATS_TEMPLATE,
88            'total-time' => [ 'resolution' => 1e-3 ] + self::STATS_TEMPLATE,
89            'total-cond' => self::STATS_TEMPLATE
90        ];
91    }
92
93    /**
94     * @param int $filter
95     */
96    public function resetFilterProfile( int $filter ): void {
97        $writer = $this->statsFactory->createWriter(
98            $this->statsSpecs,
99            self::KEY_PREFIX
100        );
101        $writer->resetAll( [ $this->filterProfileKey( $filter ) ] );
102    }
103
104    /**
105     * Retrieve per-filter statistics.
106     *
107     * @param int $filter
108     * @return array See self::NULL_FILTER_PROFILE for the returned array structure
109     * @phan-return array{count:int,matches:int,total-time:float,total-cond:int}
110     */
111    public function getFilterProfile( int $filter ): array {
112        $reader = $this->statsFactory->createReader(
113            $this->statsSpecs,
114            self::KEY_PREFIX
115        );
116        return $reader->total( $reader->getRates(
117            [ 'count', 'matches', 'total-time', 'total-cond' ],
118            $this->filterProfileKey( $filter ),
119            $reader->latest( self::STATS_STORAGE_PERIOD )
120        ) );
121    }
122
123    /**
124     * Retrieve per-group statistics.
125     *
126     * @param string $group
127     * @return array See self::NULL_GROUP_PROFILE for the returned array structure
128     * @phan-return array{total:int,overflow:int,total-time:float,total-cond:int,matches:int}
129     */
130    public function getGroupProfile( string $group ): array {
131        $reader = $this->statsFactory->createReader(
132            $this->statsSpecs,
133            self::KEY_PREFIX
134        );
135        return $reader->total( $reader->getRates(
136            [ 'total', 'overflow', 'total-time', 'total-cond', 'matches' ],
137            $this->filterProfileGroupKey( $group ),
138            $reader->latest( self::STATS_STORAGE_PERIOD )
139        ) );
140    }
141
142    /**
143     * Record per-filter profiling data
144     *
145     * @param int $filter
146     * @param float $time Time taken, in milliseconds
147     * @param int $conds
148     * @param bool $matched
149     */
150    private function recordProfilingResult( int $filter, float $time, int $conds, bool $matched ): void {
151        $key = $this->filterProfileKey( $filter );
152        $writer = $this->statsFactory->createWriter(
153            $this->statsSpecs,
154            self::KEY_PREFIX
155        );
156        $writer->incr( 'count', $key );
157        if ( $matched ) {
158            $writer->incr( 'matches', $key );
159        }
160        $writer->incr( 'total-time', $key, $time );
161        $writer->incr( 'total-cond', $key, $conds );
162        $writer->flush();
163    }
164
165    /**
166     * Update global statistics
167     *
168     * @param string $group
169     * @param int $condsUsed The amount of used conditions
170     * @param float $totalTime Time taken, in milliseconds
171     * @param bool $anyMatch Whether at least one filter matched the action
172     */
173    public function recordStats( string $group, int $condsUsed, float $totalTime, bool $anyMatch ): void {
174        $writer = $this->statsFactory->createWriter(
175            $this->statsSpecs,
176            self::KEY_PREFIX
177        );
178        $key = $this->filterProfileGroupKey( $group );
179
180        $writer->incr( 'total', $key );
181        $writer->incr( 'total-time', $key, $totalTime );
182        $writer->incr( 'total-cond', $key, $condsUsed );
183
184        // Increment overflow counter, if our condition limit overflowed
185        if ( $condsUsed > $this->options->get( 'AbuseFilterConditionLimit' ) ) {
186            $writer->incr( 'overflow', $key );
187        }
188
189        // Increment counter by 1 if there was at least one match
190        if ( $anyMatch ) {
191            $writer->incr( 'matches', $key );
192        }
193        $writer->flush();
194    }
195
196    /**
197     * Record runtime profiling data for all filters together
198     *
199     * @param int $totalFilters
200     * @param int $totalConditions
201     * @param float $runtime
202     * @codeCoverageIgnore
203     */
204    public function recordRuntimeProfilingResult( int $totalFilters, int $totalConditions, float $runtime ): void {
205        $keyPrefix = 'abusefilter.runtime-profile.' . $this->localWikiID . '.';
206
207        $this->statsd->timing( $keyPrefix . 'runtime', $runtime );
208        $this->statsd->timing( $keyPrefix . 'total_filters', $totalFilters );
209        $this->statsd->timing( $keyPrefix . 'total_conditions', $totalConditions );
210    }
211
212    /**
213     * Record per-filter profiling, for all filters
214     *
215     * @param Title $title
216     * @param array $data Profiling data
217     * @phan-param array<string,array{time:float,conds:int,result:bool}> $data
218     */
219    public function recordPerFilterProfiling( Title $title, array $data ): void {
220        $slowFilterThreshold = $this->options->get( 'AbuseFilterSlowFilterRuntimeLimit' );
221
222        foreach ( $data as $filterName => $params ) {
223            [ $filterID, $global ] = GlobalNameUtils::splitGlobalName( $filterName );
224            // @todo Maybe add a parameter to recordProfilingResult to record global filters
225            // data separately (in the foreign wiki)
226            if ( !$global ) {
227                $this->recordProfilingResult(
228                    $filterID,
229                    $params['time'],
230                    $params['conds'],
231                    $params['result']
232                );
233            }
234
235            if ( $params['time'] > $slowFilterThreshold ) {
236                $this->recordSlowFilter(
237                    $title,
238                    $filterName,
239                    $params['time'],
240                    $params['conds'],
241                    $params['result'],
242                    $global
243                );
244            }
245        }
246    }
247
248    /**
249     * Logs slow filter's runtime data for later analysis
250     *
251     * @param Title $title
252     * @param string $filterId
253     * @param float $runtime
254     * @param int $totalConditions
255     * @param bool $matched
256     * @param bool $global
257     */
258    private function recordSlowFilter(
259        Title $title,
260        string $filterId,
261        float $runtime,
262        int $totalConditions,
263        bool $matched,
264        bool $global
265    ): void {
266        $this->logger->info(
267            'Edit filter {filter_id} on {wiki} is taking longer than expected',
268            [
269                'wiki' => $this->localWikiID,
270                'filter_id' => $filterId,
271                'title' => $title->getPrefixedText(),
272                'runtime' => $runtime,
273                'matched' => $matched,
274                'total_conditions' => $totalConditions,
275                'global' => $global
276            ]
277        );
278    }
279
280    /**
281     * Get the WRStats entity key used to store per-filter profiling data.
282     *
283     * @param int $filter
284     * @return LocalEntityKey
285     */
286    private function filterProfileKey( int $filter ): LocalEntityKey {
287        return new LocalEntityKey( [ 'filter', (string)$filter ] );
288    }
289
290    /**
291     * WRStats entity key used to store overall profiling data for rule groups
292     *
293     * @param string $group
294     * @return LocalEntityKey
295     */
296    private function filterProfileGroupKey( string $group ): LocalEntityKey {
297        return new LocalEntityKey( [ 'group', $group ] );
298    }
299}