Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
100.00% |
94 / 94 |
|
100.00% |
10 / 10 |
CRAP | |
100.00% |
1 / 1 |
FilterProfiler | |
100.00% |
94 / 94 |
|
100.00% |
10 / 10 |
17 | |
100.00% |
1 / 1 |
__construct | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
1 | |||
resetFilterProfile | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
getFilterProfile | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
1 | |||
getGroupProfile | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
1 | |||
recordProfilingResult | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
2 | |||
recordStats | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
3 | |||
recordRuntimeProfilingResult | n/a |
0 / 0 |
n/a |
0 / 0 |
1 | |||||
recordPerFilterProfiling | |
100.00% |
19 / 19 |
|
100.00% |
1 / 1 |
4 | |||
recordSlowFilter | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
1 | |||
filterProfileKey | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
filterProfileGroupKey | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Extension\AbuseFilter; |
4 | |
5 | use BagOStuff; |
6 | use IBufferingStatsdDataFactory; |
7 | use MediaWiki\Config\ServiceOptions; |
8 | use MediaWiki\Title\Title; |
9 | use Psr\Log\LoggerInterface; |
10 | use Wikimedia\WRStats\LocalEntityKey; |
11 | use Wikimedia\WRStats\WRStatsFactory; |
12 | |
13 | /** |
14 | * This class is used to create, store, and retrieve profiling information for single filters and |
15 | * groups of filters. |
16 | * |
17 | * @internal |
18 | */ |
19 | class FilterProfiler { |
20 | public const SERVICE_NAME = 'AbuseFilterFilterProfiler'; |
21 | |
22 | public const CONSTRUCTOR_OPTIONS = [ |
23 | 'AbuseFilterConditionLimit', |
24 | 'AbuseFilterSlowFilterRuntimeLimit', |
25 | ]; |
26 | |
27 | /** |
28 | * How long to keep profiling data in cache (in seconds) |
29 | */ |
30 | private const STATS_STORAGE_PERIOD = BagOStuff::TTL_DAY; |
31 | |
32 | /** The stats time bucket size */ |
33 | private const STATS_TIME_STEP = self::STATS_STORAGE_PERIOD / 12; |
34 | |
35 | /** The WRStats spec common to all metrics */ |
36 | private const STATS_TEMPLATE = [ |
37 | 'sequences' => [ [ |
38 | 'timeStep' => self::STATS_TIME_STEP, |
39 | 'expiry' => self::STATS_STORAGE_PERIOD, |
40 | ] ], |
41 | ]; |
42 | |
43 | private const KEY_PREFIX = 'abusefilter-profile'; |
44 | |
45 | /** @var WRStatsFactory */ |
46 | private $statsFactory; |
47 | |
48 | /** @var ServiceOptions */ |
49 | private $options; |
50 | |
51 | /** @var string */ |
52 | private $localWikiID; |
53 | |
54 | /** @var IBufferingStatsdDataFactory */ |
55 | private $statsd; |
56 | |
57 | /** @var LoggerInterface */ |
58 | private $logger; |
59 | |
60 | /** @var array */ |
61 | private $statsSpecs; |
62 | |
63 | /** |
64 | * @param WRStatsFactory $statsFactory |
65 | * @param ServiceOptions $options |
66 | * @param string $localWikiID |
67 | * @param IBufferingStatsdDataFactory $statsd |
68 | * @param LoggerInterface $logger |
69 | */ |
70 | public function __construct( |
71 | WRStatsFactory $statsFactory, |
72 | ServiceOptions $options, |
73 | string $localWikiID, |
74 | IBufferingStatsdDataFactory $statsd, |
75 | LoggerInterface $logger |
76 | ) { |
77 | $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS ); |
78 | $this->statsFactory = $statsFactory; |
79 | $this->options = $options; |
80 | $this->localWikiID = $localWikiID; |
81 | $this->statsd = $statsd; |
82 | $this->logger = $logger; |
83 | $this->statsSpecs = [ |
84 | 'count' => self::STATS_TEMPLATE, |
85 | 'total' => self::STATS_TEMPLATE, |
86 | 'overflow' => self::STATS_TEMPLATE, |
87 | 'matches' => self::STATS_TEMPLATE, |
88 | 'total-time' => [ 'resolution' => 1e-3 ] + self::STATS_TEMPLATE, |
89 | 'total-cond' => self::STATS_TEMPLATE |
90 | ]; |
91 | } |
92 | |
93 | /** |
94 | * @param int $filter |
95 | */ |
96 | public function resetFilterProfile( int $filter ): void { |
97 | $writer = $this->statsFactory->createWriter( |
98 | $this->statsSpecs, |
99 | self::KEY_PREFIX |
100 | ); |
101 | $writer->resetAll( [ $this->filterProfileKey( $filter ) ] ); |
102 | } |
103 | |
104 | /** |
105 | * Retrieve per-filter statistics. |
106 | * |
107 | * @param int $filter |
108 | * @return array See self::NULL_FILTER_PROFILE for the returned array structure |
109 | * @phan-return array{count:int,matches:int,total-time:float,total-cond:int} |
110 | */ |
111 | public function getFilterProfile( int $filter ): array { |
112 | $reader = $this->statsFactory->createReader( |
113 | $this->statsSpecs, |
114 | self::KEY_PREFIX |
115 | ); |
116 | return $reader->total( $reader->getRates( |
117 | [ 'count', 'matches', 'total-time', 'total-cond' ], |
118 | $this->filterProfileKey( $filter ), |
119 | $reader->latest( self::STATS_STORAGE_PERIOD ) |
120 | ) ); |
121 | } |
122 | |
123 | /** |
124 | * Retrieve per-group statistics. |
125 | * |
126 | * @param string $group |
127 | * @return array See self::NULL_GROUP_PROFILE for the returned array structure |
128 | * @phan-return array{total:int,overflow:int,total-time:float,total-cond:int,matches:int} |
129 | */ |
130 | public function getGroupProfile( string $group ): array { |
131 | $reader = $this->statsFactory->createReader( |
132 | $this->statsSpecs, |
133 | self::KEY_PREFIX |
134 | ); |
135 | return $reader->total( $reader->getRates( |
136 | [ 'total', 'overflow', 'total-time', 'total-cond', 'matches' ], |
137 | $this->filterProfileGroupKey( $group ), |
138 | $reader->latest( self::STATS_STORAGE_PERIOD ) |
139 | ) ); |
140 | } |
141 | |
142 | /** |
143 | * Record per-filter profiling data |
144 | * |
145 | * @param int $filter |
146 | * @param float $time Time taken, in milliseconds |
147 | * @param int $conds |
148 | * @param bool $matched |
149 | */ |
150 | private function recordProfilingResult( int $filter, float $time, int $conds, bool $matched ): void { |
151 | $key = $this->filterProfileKey( $filter ); |
152 | $writer = $this->statsFactory->createWriter( |
153 | $this->statsSpecs, |
154 | self::KEY_PREFIX |
155 | ); |
156 | $writer->incr( 'count', $key ); |
157 | if ( $matched ) { |
158 | $writer->incr( 'matches', $key ); |
159 | } |
160 | $writer->incr( 'total-time', $key, $time ); |
161 | $writer->incr( 'total-cond', $key, $conds ); |
162 | $writer->flush(); |
163 | } |
164 | |
165 | /** |
166 | * Update global statistics |
167 | * |
168 | * @param string $group |
169 | * @param int $condsUsed The amount of used conditions |
170 | * @param float $totalTime Time taken, in milliseconds |
171 | * @param bool $anyMatch Whether at least one filter matched the action |
172 | */ |
173 | public function recordStats( string $group, int $condsUsed, float $totalTime, bool $anyMatch ): void { |
174 | $writer = $this->statsFactory->createWriter( |
175 | $this->statsSpecs, |
176 | self::KEY_PREFIX |
177 | ); |
178 | $key = $this->filterProfileGroupKey( $group ); |
179 | |
180 | $writer->incr( 'total', $key ); |
181 | $writer->incr( 'total-time', $key, $totalTime ); |
182 | $writer->incr( 'total-cond', $key, $condsUsed ); |
183 | |
184 | // Increment overflow counter, if our condition limit overflowed |
185 | if ( $condsUsed > $this->options->get( 'AbuseFilterConditionLimit' ) ) { |
186 | $writer->incr( 'overflow', $key ); |
187 | } |
188 | |
189 | // Increment counter by 1 if there was at least one match |
190 | if ( $anyMatch ) { |
191 | $writer->incr( 'matches', $key ); |
192 | } |
193 | $writer->flush(); |
194 | } |
195 | |
196 | /** |
197 | * Record runtime profiling data for all filters together |
198 | * |
199 | * @param int $totalFilters |
200 | * @param int $totalConditions |
201 | * @param float $runtime |
202 | * @codeCoverageIgnore |
203 | */ |
204 | public function recordRuntimeProfilingResult( int $totalFilters, int $totalConditions, float $runtime ): void { |
205 | $keyPrefix = 'abusefilter.runtime-profile.' . $this->localWikiID . '.'; |
206 | |
207 | $this->statsd->timing( $keyPrefix . 'runtime', $runtime ); |
208 | $this->statsd->timing( $keyPrefix . 'total_filters', $totalFilters ); |
209 | $this->statsd->timing( $keyPrefix . 'total_conditions', $totalConditions ); |
210 | } |
211 | |
212 | /** |
213 | * Record per-filter profiling, for all filters |
214 | * |
215 | * @param Title $title |
216 | * @param array $data Profiling data |
217 | * @phan-param array<string,array{time:float,conds:int,result:bool}> $data |
218 | */ |
219 | public function recordPerFilterProfiling( Title $title, array $data ): void { |
220 | $slowFilterThreshold = $this->options->get( 'AbuseFilterSlowFilterRuntimeLimit' ); |
221 | |
222 | foreach ( $data as $filterName => $params ) { |
223 | [ $filterID, $global ] = GlobalNameUtils::splitGlobalName( $filterName ); |
224 | // @todo Maybe add a parameter to recordProfilingResult to record global filters |
225 | // data separately (in the foreign wiki) |
226 | if ( !$global ) { |
227 | $this->recordProfilingResult( |
228 | $filterID, |
229 | $params['time'], |
230 | $params['conds'], |
231 | $params['result'] |
232 | ); |
233 | } |
234 | |
235 | if ( $params['time'] > $slowFilterThreshold ) { |
236 | $this->recordSlowFilter( |
237 | $title, |
238 | $filterName, |
239 | $params['time'], |
240 | $params['conds'], |
241 | $params['result'], |
242 | $global |
243 | ); |
244 | } |
245 | } |
246 | } |
247 | |
248 | /** |
249 | * Logs slow filter's runtime data for later analysis |
250 | * |
251 | * @param Title $title |
252 | * @param string $filterId |
253 | * @param float $runtime |
254 | * @param int $totalConditions |
255 | * @param bool $matched |
256 | * @param bool $global |
257 | */ |
258 | private function recordSlowFilter( |
259 | Title $title, |
260 | string $filterId, |
261 | float $runtime, |
262 | int $totalConditions, |
263 | bool $matched, |
264 | bool $global |
265 | ): void { |
266 | $this->logger->info( |
267 | 'Edit filter {filter_id} on {wiki} is taking longer than expected', |
268 | [ |
269 | 'wiki' => $this->localWikiID, |
270 | 'filter_id' => $filterId, |
271 | 'title' => $title->getPrefixedText(), |
272 | 'runtime' => $runtime, |
273 | 'matched' => $matched, |
274 | 'total_conditions' => $totalConditions, |
275 | 'global' => $global |
276 | ] |
277 | ); |
278 | } |
279 | |
280 | /** |
281 | * Get the WRStats entity key used to store per-filter profiling data. |
282 | * |
283 | * @param int $filter |
284 | * @return LocalEntityKey |
285 | */ |
286 | private function filterProfileKey( int $filter ): LocalEntityKey { |
287 | return new LocalEntityKey( [ 'filter', (string)$filter ] ); |
288 | } |
289 | |
290 | /** |
291 | * WRStats entity key used to store overall profiling data for rule groups |
292 | * |
293 | * @param string $group |
294 | * @return LocalEntityKey |
295 | */ |
296 | private function filterProfileGroupKey( string $group ): LocalEntityKey { |
297 | return new LocalEntityKey( [ 'group', $group ] ); |
298 | } |
299 | } |