Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
100.00% covered (success)
100.00%
88 / 88
100.00% covered (success)
100.00%
10 / 10
CRAP
100.00% covered (success)
100.00%
1 / 1
FilterProfiler
100.00% covered (success)
100.00%
88 / 88
100.00% covered (success)
100.00%
10 / 10
17
100.00% covered (success)
100.00%
1 / 1
 __construct
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
1
 resetFilterProfile
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
1
 getFilterProfile
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
1
 getGroupProfile
100.00% covered (success)
100.00%
9 / 9
100.00% covered (success)
100.00%
1 / 1
1
 recordProfilingResult
100.00% covered (success)
100.00%
11 / 11
100.00% covered (success)
100.00%
1 / 1
2
 recordStats
100.00% covered (success)
100.00%
13 / 13
100.00% covered (success)
100.00%
1 / 1
3
 recordRuntimeProfilingResult
n/a
0 / 0
n/a
0 / 0
1
 recordPerFilterProfiling
100.00% covered (success)
100.00%
19 / 19
100.00% covered (success)
100.00%
1 / 1
4
 recordSlowFilter
100.00% covered (success)
100.00%
12 / 12
100.00% covered (success)
100.00%
1 / 1
1
 filterProfileKey
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 filterProfileGroupKey
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
1<?php
2
3namespace MediaWiki\Extension\AbuseFilter;
4
5use MediaWiki\Config\ServiceOptions;
6use MediaWiki\Title\Title;
7use Psr\Log\LoggerInterface;
8use Wikimedia\ObjectCache\BagOStuff;
9use Wikimedia\Stats\IBufferingStatsdDataFactory;
10use Wikimedia\WRStats\LocalEntityKey;
11use Wikimedia\WRStats\WRStatsFactory;
12
13/**
14 * This class is used to create, store, and retrieve profiling information for single filters and
15 * groups of filters.
16 *
17 * @internal
18 */
19class FilterProfiler {
20    public const SERVICE_NAME = 'AbuseFilterFilterProfiler';
21
22    public const CONSTRUCTOR_OPTIONS = [
23        'AbuseFilterConditionLimit',
24        'AbuseFilterSlowFilterRuntimeLimit',
25    ];
26
27    /**
28     * How long to keep profiling data in cache (in seconds)
29     */
30    private const STATS_STORAGE_PERIOD = BagOStuff::TTL_DAY;
31
32    /** The stats time bucket size */
33    private const STATS_TIME_STEP = self::STATS_STORAGE_PERIOD / 12;
34
35    /** The WRStats spec common to all metrics */
36    private const STATS_TEMPLATE = [
37        'sequences' => [ [
38            'timeStep' => self::STATS_TIME_STEP,
39            'expiry' => self::STATS_STORAGE_PERIOD,
40        ] ],
41    ];
42
43    private const KEY_PREFIX = 'abusefilter-profile';
44
45    /** @var array<string,array> */
46    private $statsSpecs;
47
48    public function __construct(
49        private readonly WRStatsFactory $statsFactory,
50        private readonly ServiceOptions $options,
51        private readonly string $localWikiID,
52        private readonly IBufferingStatsdDataFactory $statsd,
53        private readonly LoggerInterface $logger
54    ) {
55        $this->statsSpecs = [
56            'count' => self::STATS_TEMPLATE,
57            'total' => self::STATS_TEMPLATE,
58            'overflow' => self::STATS_TEMPLATE,
59            'matches' => self::STATS_TEMPLATE,
60            'total-time' => [ 'resolution' => 1e-3 ] + self::STATS_TEMPLATE,
61            'total-cond' => self::STATS_TEMPLATE
62        ];
63    }
64
65    public function resetFilterProfile( int $filter ): void {
66        $writer = $this->statsFactory->createWriter(
67            $this->statsSpecs,
68            self::KEY_PREFIX
69        );
70        $writer->resetAll( [ $this->filterProfileKey( $filter ) ] );
71    }
72
73    /**
74     * Retrieve per-filter statistics.
75     *
76     * @param int $filter
77     * @return array{count:int,matches:int,total-time:float,total-cond:int}
78     * See self::NULL_FILTER_PROFILE for the returned array structure
79     */
80    public function getFilterProfile( int $filter ): array {
81        $reader = $this->statsFactory->createReader(
82            $this->statsSpecs,
83            self::KEY_PREFIX
84        );
85        return $reader->total( $reader->getRates(
86            [ 'count', 'matches', 'total-time', 'total-cond' ],
87            $this->filterProfileKey( $filter ),
88            $reader->latest( self::STATS_STORAGE_PERIOD )
89        ) );
90    }
91
92    /**
93     * Retrieve per-group statistics.
94     *
95     * @param string $group
96     * @return array{total:int,overflow:int,total-time:float,total-cond:int,matches:int}
97     * See self::NULL_GROUP_PROFILE for the returned array structure
98     */
99    public function getGroupProfile( string $group ): array {
100        $reader = $this->statsFactory->createReader(
101            $this->statsSpecs,
102            self::KEY_PREFIX
103        );
104        return $reader->total( $reader->getRates(
105            [ 'total', 'overflow', 'total-time', 'total-cond', 'matches' ],
106            $this->filterProfileGroupKey( $group ),
107            $reader->latest( self::STATS_STORAGE_PERIOD )
108        ) );
109    }
110
111    /**
112     * Record per-filter profiling data
113     *
114     * @param int $filter
115     * @param float $time Time taken, in milliseconds
116     * @param int $conds
117     * @param bool $matched
118     */
119    private function recordProfilingResult( int $filter, float $time, int $conds, bool $matched ): void {
120        $key = $this->filterProfileKey( $filter );
121        $writer = $this->statsFactory->createWriter(
122            $this->statsSpecs,
123            self::KEY_PREFIX
124        );
125        $writer->incr( 'count', $key );
126        if ( $matched ) {
127            $writer->incr( 'matches', $key );
128        }
129        $writer->incr( 'total-time', $key, $time );
130        $writer->incr( 'total-cond', $key, $conds );
131        $writer->flush();
132    }
133
134    /**
135     * Update global statistics
136     *
137     * @param string $group
138     * @param int $condsUsed The amount of used conditions
139     * @param float $totalTime Time taken, in milliseconds
140     * @param bool $anyMatch Whether at least one filter matched the action
141     */
142    public function recordStats( string $group, int $condsUsed, float $totalTime, bool $anyMatch ): void {
143        $writer = $this->statsFactory->createWriter(
144            $this->statsSpecs,
145            self::KEY_PREFIX
146        );
147        $key = $this->filterProfileGroupKey( $group );
148
149        $writer->incr( 'total', $key );
150        $writer->incr( 'total-time', $key, $totalTime );
151        $writer->incr( 'total-cond', $key, $condsUsed );
152
153        // Increment overflow counter, if our condition limit overflowed
154        if ( $condsUsed > $this->options->get( 'AbuseFilterConditionLimit' ) ) {
155            $writer->incr( 'overflow', $key );
156        }
157
158        // Increment counter by 1 if there was at least one match
159        if ( $anyMatch ) {
160            $writer->incr( 'matches', $key );
161        }
162        $writer->flush();
163    }
164
165    /**
166     * Record runtime profiling data for all filters together
167     *
168     * @param int $totalFilters
169     * @param int $totalConditions
170     * @param float $runtime
171     * @codeCoverageIgnore
172     */
173    public function recordRuntimeProfilingResult( int $totalFilters, int $totalConditions, float $runtime ): void {
174        $keyPrefix = 'abusefilter.runtime-profile.' . $this->localWikiID . '.';
175
176        $this->statsd->timing( $keyPrefix . 'runtime', $runtime );
177        $this->statsd->timing( $keyPrefix . 'total_filters', $totalFilters );
178        $this->statsd->timing( $keyPrefix . 'total_conditions', $totalConditions );
179    }
180
181    /**
182     * Record per-filter profiling, for all filters
183     *
184     * @param Title $title
185     * @param array<string,array{time:float,conds:int,result:bool}> $data Profiling data
186     */
187    public function recordPerFilterProfiling( Title $title, array $data ): void {
188        $slowFilterThreshold = $this->options->get( 'AbuseFilterSlowFilterRuntimeLimit' );
189
190        foreach ( $data as $filterName => $params ) {
191            [ $filterID, $global ] = GlobalNameUtils::splitGlobalName( $filterName );
192            // @todo Maybe add a parameter to recordProfilingResult to record global filters
193            // data separately (in the foreign wiki)
194            if ( !$global ) {
195                $this->recordProfilingResult(
196                    $filterID,
197                    $params['time'],
198                    $params['conds'],
199                    $params['result']
200                );
201            }
202
203            if ( $params['time'] > $slowFilterThreshold ) {
204                $this->recordSlowFilter(
205                    $title,
206                    $filterName,
207                    $params['time'],
208                    $params['conds'],
209                    $params['result'],
210                    $global
211                );
212            }
213        }
214    }
215
216    /**
217     * Logs slow filter's runtime data for later analysis
218     *
219     * @param Title $title
220     * @param string $filterId
221     * @param float $runtime
222     * @param int $totalConditions
223     * @param bool $matched
224     * @param bool $global
225     */
226    private function recordSlowFilter(
227        Title $title,
228        string $filterId,
229        float $runtime,
230        int $totalConditions,
231        bool $matched,
232        bool $global
233    ): void {
234        $this->logger->info(
235            'Edit filter {filter_id} on {wiki} is taking longer than expected',
236            [
237                'wiki' => $this->localWikiID,
238                'filter_id' => $filterId,
239                'title' => $title->getPrefixedText(),
240                'runtime' => $runtime,
241                'matched' => $matched,
242                'total_conditions' => $totalConditions,
243                'global' => $global
244            ]
245        );
246    }
247
248    /**
249     * Get the WRStats entity key used to store per-filter profiling data.
250     *
251     * @param int $filter
252     * @return LocalEntityKey
253     */
254    private function filterProfileKey( int $filter ): LocalEntityKey {
255        return new LocalEntityKey( [ 'filter', (string)$filter ] );
256    }
257
258    /**
259     * WRStats entity key used to store overall profiling data for rule groups
260     *
261     * @param string $group
262     * @return LocalEntityKey
263     */
264    private function filterProfileGroupKey( string $group ): LocalEntityKey {
265        return new LocalEntityKey( [ 'group', $group ] );
266    }
267}