Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
100.00% |
88 / 88 |
|
100.00% |
10 / 10 |
CRAP | |
100.00% |
1 / 1 |
| FilterProfiler | |
100.00% |
88 / 88 |
|
100.00% |
10 / 10 |
17 | |
100.00% |
1 / 1 |
| __construct | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
1 | |||
| resetFilterProfile | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
| getFilterProfile | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
1 | |||
| getGroupProfile | |
100.00% |
9 / 9 |
|
100.00% |
1 / 1 |
1 | |||
| recordProfilingResult | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
2 | |||
| recordStats | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
3 | |||
| recordRuntimeProfilingResult | n/a |
0 / 0 |
n/a |
0 / 0 |
1 | |||||
| recordPerFilterProfiling | |
100.00% |
19 / 19 |
|
100.00% |
1 / 1 |
4 | |||
| recordSlowFilter | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
1 | |||
| filterProfileKey | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| filterProfileGroupKey | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace MediaWiki\Extension\AbuseFilter; |
| 4 | |
| 5 | use MediaWiki\Config\ServiceOptions; |
| 6 | use MediaWiki\Title\Title; |
| 7 | use Psr\Log\LoggerInterface; |
| 8 | use Wikimedia\ObjectCache\BagOStuff; |
| 9 | use Wikimedia\Stats\IBufferingStatsdDataFactory; |
| 10 | use Wikimedia\WRStats\LocalEntityKey; |
| 11 | use Wikimedia\WRStats\WRStatsFactory; |
| 12 | |
| 13 | /** |
| 14 | * This class is used to create, store, and retrieve profiling information for single filters and |
| 15 | * groups of filters. |
| 16 | * |
| 17 | * @internal |
| 18 | */ |
| 19 | class FilterProfiler { |
| 20 | public const SERVICE_NAME = 'AbuseFilterFilterProfiler'; |
| 21 | |
| 22 | public const CONSTRUCTOR_OPTIONS = [ |
| 23 | 'AbuseFilterConditionLimit', |
| 24 | 'AbuseFilterSlowFilterRuntimeLimit', |
| 25 | ]; |
| 26 | |
| 27 | /** |
| 28 | * How long to keep profiling data in cache (in seconds) |
| 29 | */ |
| 30 | private const STATS_STORAGE_PERIOD = BagOStuff::TTL_DAY; |
| 31 | |
| 32 | /** The stats time bucket size */ |
| 33 | private const STATS_TIME_STEP = self::STATS_STORAGE_PERIOD / 12; |
| 34 | |
| 35 | /** The WRStats spec common to all metrics */ |
| 36 | private const STATS_TEMPLATE = [ |
| 37 | 'sequences' => [ [ |
| 38 | 'timeStep' => self::STATS_TIME_STEP, |
| 39 | 'expiry' => self::STATS_STORAGE_PERIOD, |
| 40 | ] ], |
| 41 | ]; |
| 42 | |
| 43 | private const KEY_PREFIX = 'abusefilter-profile'; |
| 44 | |
| 45 | /** @var array<string,array> */ |
| 46 | private $statsSpecs; |
| 47 | |
| 48 | public function __construct( |
| 49 | private readonly WRStatsFactory $statsFactory, |
| 50 | private readonly ServiceOptions $options, |
| 51 | private readonly string $localWikiID, |
| 52 | private readonly IBufferingStatsdDataFactory $statsd, |
| 53 | private readonly LoggerInterface $logger |
| 54 | ) { |
| 55 | $this->statsSpecs = [ |
| 56 | 'count' => self::STATS_TEMPLATE, |
| 57 | 'total' => self::STATS_TEMPLATE, |
| 58 | 'overflow' => self::STATS_TEMPLATE, |
| 59 | 'matches' => self::STATS_TEMPLATE, |
| 60 | 'total-time' => [ 'resolution' => 1e-3 ] + self::STATS_TEMPLATE, |
| 61 | 'total-cond' => self::STATS_TEMPLATE |
| 62 | ]; |
| 63 | } |
| 64 | |
| 65 | public function resetFilterProfile( int $filter ): void { |
| 66 | $writer = $this->statsFactory->createWriter( |
| 67 | $this->statsSpecs, |
| 68 | self::KEY_PREFIX |
| 69 | ); |
| 70 | $writer->resetAll( [ $this->filterProfileKey( $filter ) ] ); |
| 71 | } |
| 72 | |
| 73 | /** |
| 74 | * Retrieve per-filter statistics. |
| 75 | * |
| 76 | * @param int $filter |
| 77 | * @return array{count:int,matches:int,total-time:float,total-cond:int} |
| 78 | * See self::NULL_FILTER_PROFILE for the returned array structure |
| 79 | */ |
| 80 | public function getFilterProfile( int $filter ): array { |
| 81 | $reader = $this->statsFactory->createReader( |
| 82 | $this->statsSpecs, |
| 83 | self::KEY_PREFIX |
| 84 | ); |
| 85 | return $reader->total( $reader->getRates( |
| 86 | [ 'count', 'matches', 'total-time', 'total-cond' ], |
| 87 | $this->filterProfileKey( $filter ), |
| 88 | $reader->latest( self::STATS_STORAGE_PERIOD ) |
| 89 | ) ); |
| 90 | } |
| 91 | |
| 92 | /** |
| 93 | * Retrieve per-group statistics. |
| 94 | * |
| 95 | * @param string $group |
| 96 | * @return array{total:int,overflow:int,total-time:float,total-cond:int,matches:int} |
| 97 | * See self::NULL_GROUP_PROFILE for the returned array structure |
| 98 | */ |
| 99 | public function getGroupProfile( string $group ): array { |
| 100 | $reader = $this->statsFactory->createReader( |
| 101 | $this->statsSpecs, |
| 102 | self::KEY_PREFIX |
| 103 | ); |
| 104 | return $reader->total( $reader->getRates( |
| 105 | [ 'total', 'overflow', 'total-time', 'total-cond', 'matches' ], |
| 106 | $this->filterProfileGroupKey( $group ), |
| 107 | $reader->latest( self::STATS_STORAGE_PERIOD ) |
| 108 | ) ); |
| 109 | } |
| 110 | |
| 111 | /** |
| 112 | * Record per-filter profiling data |
| 113 | * |
| 114 | * @param int $filter |
| 115 | * @param float $time Time taken, in milliseconds |
| 116 | * @param int $conds |
| 117 | * @param bool $matched |
| 118 | */ |
| 119 | private function recordProfilingResult( int $filter, float $time, int $conds, bool $matched ): void { |
| 120 | $key = $this->filterProfileKey( $filter ); |
| 121 | $writer = $this->statsFactory->createWriter( |
| 122 | $this->statsSpecs, |
| 123 | self::KEY_PREFIX |
| 124 | ); |
| 125 | $writer->incr( 'count', $key ); |
| 126 | if ( $matched ) { |
| 127 | $writer->incr( 'matches', $key ); |
| 128 | } |
| 129 | $writer->incr( 'total-time', $key, $time ); |
| 130 | $writer->incr( 'total-cond', $key, $conds ); |
| 131 | $writer->flush(); |
| 132 | } |
| 133 | |
| 134 | /** |
| 135 | * Update global statistics |
| 136 | * |
| 137 | * @param string $group |
| 138 | * @param int $condsUsed The amount of used conditions |
| 139 | * @param float $totalTime Time taken, in milliseconds |
| 140 | * @param bool $anyMatch Whether at least one filter matched the action |
| 141 | */ |
| 142 | public function recordStats( string $group, int $condsUsed, float $totalTime, bool $anyMatch ): void { |
| 143 | $writer = $this->statsFactory->createWriter( |
| 144 | $this->statsSpecs, |
| 145 | self::KEY_PREFIX |
| 146 | ); |
| 147 | $key = $this->filterProfileGroupKey( $group ); |
| 148 | |
| 149 | $writer->incr( 'total', $key ); |
| 150 | $writer->incr( 'total-time', $key, $totalTime ); |
| 151 | $writer->incr( 'total-cond', $key, $condsUsed ); |
| 152 | |
| 153 | // Increment overflow counter, if our condition limit overflowed |
| 154 | if ( $condsUsed > $this->options->get( 'AbuseFilterConditionLimit' ) ) { |
| 155 | $writer->incr( 'overflow', $key ); |
| 156 | } |
| 157 | |
| 158 | // Increment counter by 1 if there was at least one match |
| 159 | if ( $anyMatch ) { |
| 160 | $writer->incr( 'matches', $key ); |
| 161 | } |
| 162 | $writer->flush(); |
| 163 | } |
| 164 | |
| 165 | /** |
| 166 | * Record runtime profiling data for all filters together |
| 167 | * |
| 168 | * @param int $totalFilters |
| 169 | * @param int $totalConditions |
| 170 | * @param float $runtime |
| 171 | * @codeCoverageIgnore |
| 172 | */ |
| 173 | public function recordRuntimeProfilingResult( int $totalFilters, int $totalConditions, float $runtime ): void { |
| 174 | $keyPrefix = 'abusefilter.runtime-profile.' . $this->localWikiID . '.'; |
| 175 | |
| 176 | $this->statsd->timing( $keyPrefix . 'runtime', $runtime ); |
| 177 | $this->statsd->timing( $keyPrefix . 'total_filters', $totalFilters ); |
| 178 | $this->statsd->timing( $keyPrefix . 'total_conditions', $totalConditions ); |
| 179 | } |
| 180 | |
| 181 | /** |
| 182 | * Record per-filter profiling, for all filters |
| 183 | * |
| 184 | * @param Title $title |
| 185 | * @param array<string,array{time:float,conds:int,result:bool}> $data Profiling data |
| 186 | */ |
| 187 | public function recordPerFilterProfiling( Title $title, array $data ): void { |
| 188 | $slowFilterThreshold = $this->options->get( 'AbuseFilterSlowFilterRuntimeLimit' ); |
| 189 | |
| 190 | foreach ( $data as $filterName => $params ) { |
| 191 | [ $filterID, $global ] = GlobalNameUtils::splitGlobalName( $filterName ); |
| 192 | // @todo Maybe add a parameter to recordProfilingResult to record global filters |
| 193 | // data separately (in the foreign wiki) |
| 194 | if ( !$global ) { |
| 195 | $this->recordProfilingResult( |
| 196 | $filterID, |
| 197 | $params['time'], |
| 198 | $params['conds'], |
| 199 | $params['result'] |
| 200 | ); |
| 201 | } |
| 202 | |
| 203 | if ( $params['time'] > $slowFilterThreshold ) { |
| 204 | $this->recordSlowFilter( |
| 205 | $title, |
| 206 | $filterName, |
| 207 | $params['time'], |
| 208 | $params['conds'], |
| 209 | $params['result'], |
| 210 | $global |
| 211 | ); |
| 212 | } |
| 213 | } |
| 214 | } |
| 215 | |
| 216 | /** |
| 217 | * Logs slow filter's runtime data for later analysis |
| 218 | * |
| 219 | * @param Title $title |
| 220 | * @param string $filterId |
| 221 | * @param float $runtime |
| 222 | * @param int $totalConditions |
| 223 | * @param bool $matched |
| 224 | * @param bool $global |
| 225 | */ |
| 226 | private function recordSlowFilter( |
| 227 | Title $title, |
| 228 | string $filterId, |
| 229 | float $runtime, |
| 230 | int $totalConditions, |
| 231 | bool $matched, |
| 232 | bool $global |
| 233 | ): void { |
| 234 | $this->logger->info( |
| 235 | 'Edit filter {filter_id} on {wiki} is taking longer than expected', |
| 236 | [ |
| 237 | 'wiki' => $this->localWikiID, |
| 238 | 'filter_id' => $filterId, |
| 239 | 'title' => $title->getPrefixedText(), |
| 240 | 'runtime' => $runtime, |
| 241 | 'matched' => $matched, |
| 242 | 'total_conditions' => $totalConditions, |
| 243 | 'global' => $global |
| 244 | ] |
| 245 | ); |
| 246 | } |
| 247 | |
| 248 | /** |
| 249 | * Get the WRStats entity key used to store per-filter profiling data. |
| 250 | * |
| 251 | * @param int $filter |
| 252 | * @return LocalEntityKey |
| 253 | */ |
| 254 | private function filterProfileKey( int $filter ): LocalEntityKey { |
| 255 | return new LocalEntityKey( [ 'filter', (string)$filter ] ); |
| 256 | } |
| 257 | |
| 258 | /** |
| 259 | * WRStats entity key used to store overall profiling data for rule groups |
| 260 | * |
| 261 | * @param string $group |
| 262 | * @return LocalEntityKey |
| 263 | */ |
| 264 | private function filterProfileGroupKey( string $group ): LocalEntityKey { |
| 265 | return new LocalEntityKey( [ 'group', $group ] ); |
| 266 | } |
| 267 | } |