Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
95.10% |
330 / 347 |
|
71.43% |
5 / 7 |
CRAP | |
0.00% |
0 / 1 |
FlaggedRevsStats | |
95.10% |
330 / 347 |
|
71.43% |
5 / 7 |
41 | |
0.00% |
0 / 1 |
getStats | |
100.00% |
51 / 51 |
|
100.00% |
1 / 1 |
15 | |||
updateCache | |
98.82% |
84 / 85 |
|
0.00% |
0 / 1 |
5 | |||
getPerNamespaceTotals | |
100.00% |
25 / 25 |
|
100.00% |
1 / 1 |
2 | |||
dbUnixTime | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
getMeanPendingEditTime | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
1 | |||
getEditReviewTimes | |
89.87% |
142 / 158 |
|
0.00% |
0 / 1 |
15.23 | |||
estimateRevisionRowCount | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | use MediaWiki\MediaWikiServices; |
4 | use Wikimedia\ObjectCache\BagOStuff; |
5 | use Wikimedia\Rdbms\IExpression; |
6 | use Wikimedia\Rdbms\IReadableDatabase; |
7 | use Wikimedia\Rdbms\RawSQLExpression; |
8 | use Wikimedia\Rdbms\SelectQueryBuilder; |
9 | use Wikimedia\Rdbms\Subquery; |
10 | |
11 | /** |
12 | * FlaggedRevs stats functions |
13 | */ |
14 | class FlaggedRevsStats { |
15 | /** |
16 | * @return array of current FR stats |
17 | */ |
18 | public static function getStats() { |
19 | $data = [ |
20 | 'reviewLag-anon-sampleSize' => '-', |
21 | 'reviewLag-anon-average' => '-', |
22 | 'reviewLag-anon-median' => '-', |
23 | 'reviewLag-anon-percentile' => [], |
24 | 'reviewLag-user-sampleSize' => '-', |
25 | 'reviewLag-user-average' => '-', |
26 | 'reviewLag-user-median' => '-', |
27 | 'reviewLag-user-percentile' => [], |
28 | 'totalPages-NS' => [], |
29 | 'reviewedPages-NS' => [], |
30 | 'syncedPages-NS' => [], |
31 | 'pendingLag-average' => '-', |
32 | 'statTimestamp' => '-', |
33 | ]; |
34 | |
35 | $dbr = MediaWikiServices::getInstance()->getConnectionProvider()->getReplicaDatabase(); |
36 | // Latest timestamp recorded |
37 | $timestamp = $dbr->newSelectQueryBuilder() |
38 | ->select( 'MAX(frs_timestamp)' ) |
39 | ->from( 'flaggedrevs_statistics' ) |
40 | ->caller( __METHOD__ ) |
41 | ->fetchField(); |
42 | |
43 | if ( $timestamp !== false ) { |
44 | $data['statTimestamp'] = wfTimestamp( TS_MW, $timestamp ); |
45 | |
46 | $res = $dbr->newSelectQueryBuilder() |
47 | ->select( [ 'frs_stat_key', 'frs_stat_val' ] ) |
48 | ->from( 'flaggedrevs_statistics' ) |
49 | ->where( [ 'frs_timestamp' => $dbr->timestamp( $timestamp ) ] ) |
50 | ->caller( __METHOD__ ) |
51 | ->fetchResultSet(); |
52 | foreach ( $res as $row ) { |
53 | $key = explode( ':', $row->frs_stat_key ); |
54 | switch ( $key[0] ) { |
55 | case 'reviewLag-anon-sampleSize': |
56 | case 'reviewLag-anon-average': |
57 | case 'reviewLag-anon-median': |
58 | case 'reviewLag-user-sampleSize': |
59 | case 'reviewLag-user-average': |
60 | case 'reviewLag-user-median': |
61 | case 'pendingLag-average': |
62 | $data[$key[0]] = (int)$row->frs_stat_val; |
63 | break; |
64 | case 'reviewLag-anon-percentile': // <stat name,percentile> |
65 | case 'reviewLag-user-percentile': // <stat name,percentile> |
66 | $data[$key[0]][$key[1]] = (int)$row->frs_stat_val; |
67 | break; |
68 | case 'totalPages-NS': // <stat name,namespace> |
69 | case 'reviewedPages-NS': // <stat name,namespace> |
70 | case 'syncedPages-NS': // <stat name,namespace> |
71 | $data[$key[0]][$key[1]] = (int)$row->frs_stat_val; |
72 | break; |
73 | } |
74 | } |
75 | } |
76 | |
77 | return $data; |
78 | } |
79 | |
80 | /** |
81 | * Run a stats update and update the DB |
82 | * Note: this can easily be too expensive to run live |
83 | * |
84 | * @return void |
85 | */ |
86 | public static function updateCache() { |
87 | $rNamespaces = FlaggedRevs::getReviewNamespaces(); |
88 | $cache = MediaWikiServices::getInstance() |
89 | ->getObjectCacheFactory()->getLocalClusterInstance(); |
90 | if ( !$rNamespaces ) { |
91 | return; // no SQL errors please :) |
92 | } |
93 | |
94 | // Get total, reviewed, and synced page count for each namespace |
95 | [ $ns_total, $ns_reviewed, $ns_synced ] = self::getPerNamespaceTotals(); |
96 | |
97 | // Getting mean pending edit time |
98 | // @TODO: percentiles? |
99 | $avePET = self::getMeanPendingEditTime(); |
100 | |
101 | # Get wait (till review) time samples for anon edits... |
102 | $reviewDataAnon = self::getEditReviewTimes( $cache, 'anons' ); |
103 | # Get wait (till review) time samples for logged-in user edits... |
104 | $reviewDataUser = self::getEditReviewTimes( $cache, 'users' ); |
105 | |
106 | $dbw = MediaWikiServices::getInstance()->getConnectionProvider()->getPrimaryDatabase(); |
107 | // The timestamp to identify this whole batch of data |
108 | $encDataTimestamp = $dbw->timestamp(); |
109 | |
110 | $dataSet = []; |
111 | // Data range for samples... |
112 | $dataSet[] = [ |
113 | 'frs_stat_key' => 'reviewLag-anon-sampleStartTimestamp', |
114 | 'frs_stat_val' => $reviewDataAnon['sampleStartTS'], // unix |
115 | 'frs_timestamp' => $encDataTimestamp ]; |
116 | $dataSet[] = [ |
117 | 'frs_stat_key' => 'reviewLag-user-sampleStartTimestamp', |
118 | 'frs_stat_val' => $reviewDataUser['sampleStartTS'], // unix |
119 | 'frs_timestamp' => $encDataTimestamp ]; |
120 | $dataSet[] = [ |
121 | 'frs_stat_key' => 'reviewLag-anon-sampleEndTimestamp', |
122 | 'frs_stat_val' => $reviewDataAnon['sampleEndTS'], // unix |
123 | 'frs_timestamp' => $encDataTimestamp ]; |
124 | $dataSet[] = [ |
125 | 'frs_stat_key' => 'reviewLag-user-sampleEndTimestamp', |
126 | 'frs_stat_val' => $reviewDataUser['sampleEndTS'], // unix |
127 | 'frs_timestamp' => $encDataTimestamp ]; |
128 | // All-namespace percentiles... |
129 | foreach ( $reviewDataAnon['percTable'] as $percentile => $seconds ) { |
130 | $dataSet[] = [ |
131 | 'frs_stat_key' => 'reviewLag-anon-percentile:' . (int)$percentile, |
132 | 'frs_stat_val' => $seconds, |
133 | 'frs_timestamp' => $encDataTimestamp ]; |
134 | } |
135 | foreach ( $reviewDataUser['percTable'] as $percentile => $seconds ) { |
136 | $dataSet[] = [ |
137 | 'frs_stat_key' => 'reviewLag-user-percentile:' . (int)$percentile, |
138 | 'frs_stat_val' => $seconds, |
139 | 'frs_timestamp' => $encDataTimestamp ]; |
140 | } |
141 | // Sample sizes... |
142 | $dataSet[] = [ |
143 | 'frs_stat_key' => 'reviewLag-anon-sampleSize', |
144 | 'frs_stat_val' => $reviewDataAnon['sampleSize'], |
145 | 'frs_timestamp' => $encDataTimestamp ]; |
146 | $dataSet[] = [ |
147 | 'frs_stat_key' => 'reviewLag-user-sampleSize', |
148 | 'frs_stat_val' => $reviewDataUser['sampleSize'], |
149 | 'frs_timestamp' => $encDataTimestamp ]; |
150 | |
151 | // All-namespace ave/med review lag & ave pending lag stats... |
152 | $dataSet[] = [ |
153 | 'frs_stat_key' => 'reviewLag-anon-average', |
154 | 'frs_stat_val' => $reviewDataAnon['average'], |
155 | 'frs_timestamp' => $encDataTimestamp ]; |
156 | $dataSet[] = [ |
157 | 'frs_stat_key' => 'reviewLag-user-average', |
158 | 'frs_stat_val' => $reviewDataUser['average'], |
159 | 'frs_timestamp' => $encDataTimestamp ]; |
160 | $dataSet[] = [ |
161 | 'frs_stat_key' => 'reviewLag-anon-median', |
162 | 'frs_stat_val' => $reviewDataAnon['median'], |
163 | 'frs_timestamp' => $encDataTimestamp ]; |
164 | $dataSet[] = [ |
165 | 'frs_stat_key' => 'reviewLag-user-median', |
166 | 'frs_stat_val' => $reviewDataUser['median'], |
167 | 'frs_timestamp' => $encDataTimestamp ]; |
168 | $dataSet[] = [ |
169 | 'frs_stat_key' => 'pendingLag-average', |
170 | 'frs_stat_val' => $avePET, |
171 | 'frs_timestamp' => $encDataTimestamp ]; |
172 | |
173 | // Per-namespace total/reviewed/synced stats... |
174 | foreach ( $rNamespaces as $namespace ) { |
175 | $dataSet[] = [ |
176 | 'frs_stat_key' => 'totalPages-NS:' . (int)$namespace, |
177 | 'frs_stat_val' => $ns_total[$namespace] ?? 0, |
178 | 'frs_timestamp' => $encDataTimestamp ]; |
179 | $dataSet[] = [ |
180 | 'frs_stat_key' => 'reviewedPages-NS:' . (int)$namespace, |
181 | 'frs_stat_val' => $ns_reviewed[$namespace] ?? 0, |
182 | 'frs_timestamp' => $encDataTimestamp ]; |
183 | $dataSet[] = [ |
184 | 'frs_stat_key' => 'syncedPages-NS:' . (int)$namespace, |
185 | 'frs_stat_val' => $ns_synced[$namespace] ?? 0, |
186 | 'frs_timestamp' => $encDataTimestamp ]; |
187 | } |
188 | |
189 | // Save the data... |
190 | $dbw->newInsertQueryBuilder() |
191 | ->insertInto( 'flaggedrevs_statistics' ) |
192 | ->ignore() |
193 | ->rows( $dataSet ) |
194 | ->caller( __METHOD__ ) |
195 | ->execute(); |
196 | } |
197 | |
198 | /** |
199 | * @return int[][] |
200 | */ |
201 | private static function getPerNamespaceTotals() { |
202 | $ns_total = []; |
203 | $ns_reviewed = []; |
204 | $ns_synced = []; |
205 | // Get total, reviewed, and synced page count for each namespace |
206 | $dbr = MediaWikiServices::getInstance()->getConnectionProvider()->getReplicaDatabase( false, 'vslow' ); |
207 | $res = $dbr->newSelectQueryBuilder() |
208 | ->select( [ |
209 | 'page_namespace', |
210 | 'total' => 'COUNT(*)', |
211 | 'reviewed' => 'COUNT(fp_page_id)', |
212 | 'pending' => 'COUNT(fp_pending_since)' |
213 | ] ) |
214 | ->from( 'page' ) |
215 | ->leftJoin( 'flaggedpages', null, 'fp_page_id = page_id' ) |
216 | ->where( [ |
217 | 'page_is_redirect' => 0, |
218 | 'page_namespace' => FlaggedRevs::getReviewNamespaces() |
219 | ] ) |
220 | ->groupBy( 'page_namespace' ) |
221 | ->caller( __METHOD__ ) |
222 | ->fetchResultSet(); |
223 | foreach ( $res as $row ) { |
224 | $ns_total[$row->page_namespace] = (int)$row->total; |
225 | $ns_reviewed[$row->page_namespace] = (int)$row->reviewed; |
226 | $ns_synced[$row->page_namespace] = (int)$row->reviewed - (int)$row->pending; |
227 | } |
228 | return [ $ns_total, $ns_reviewed, $ns_synced ]; |
229 | } |
230 | |
231 | /** |
232 | * @param IReadableDatabase $db |
233 | * @param string $column |
234 | * |
235 | * @return string |
236 | */ |
237 | private static function dbUnixTime( IReadableDatabase $db, $column ) { |
238 | return $db->getType() === 'sqlite' ? "strftime('%s',$column)" : "UNIX_TIMESTAMP($column)"; |
239 | } |
240 | |
241 | /** |
242 | * @return int |
243 | */ |
244 | private static function getMeanPendingEditTime() { |
245 | $dbr = MediaWikiServices::getInstance()->getConnectionProvider()->getReplicaDatabase( false, 'vslow' ); |
246 | $nowUnix = wfTimestamp(); |
247 | $unixTimeCall = self::dbUnixTime( $dbr, 'fp_pending_since' ); |
248 | return (int)$dbr->newSelectQueryBuilder() |
249 | ->select( "AVG( $nowUnix - $unixTimeCall )" ) |
250 | ->from( 'flaggedpages' ) |
251 | ->join( 'page', null, 'fp_page_id = page_id' ) |
252 | ->where( [ |
253 | $dbr->expr( 'fp_pending_since', '!=', null ), |
254 | 'page_namespace' => FlaggedRevs::getReviewNamespaces() // sanity |
255 | ] ) |
256 | ->caller( __METHOD__ ) |
257 | ->fetchField(); |
258 | } |
259 | |
260 | /** |
261 | * Get edit review time statistics (as recent as possible) |
262 | * @param BagOStuff $cache |
263 | * @param string $users string "anons" or "users" |
264 | * @return array associative |
265 | */ |
266 | private static function getEditReviewTimes( $cache, $users ) { |
267 | $result = [ |
268 | 'average' => 0, |
269 | 'median' => 0, |
270 | 'percTable' => [], |
271 | 'sampleSize' => 0, |
272 | 'sampleStartTS' => null, |
273 | 'sampleEndTS' => null |
274 | ]; |
275 | if ( FlaggedRevs::useOnlyIfProtected() ) { |
276 | return $result; // disabled |
277 | } |
278 | |
279 | $rPerTable = []; // review wait percentiles |
280 | # Only go so far back...otherwise we will get garbage values due to |
281 | # the fact that FlaggedRevs wasn't enabled until after a while. |
282 | $dbr = MediaWikiServices::getInstance()->getConnectionProvider()->getReplicaDatabase( false, 'vslow' ); |
283 | $installedUnix = $dbr->newSelectQueryBuilder() |
284 | ->select( self::dbUnixTime( $dbr, 'MIN(log_timestamp)' ) ) |
285 | ->from( 'logging' ) |
286 | ->where( [ 'log_type' => 'review' ] ) |
287 | ->caller( __METHOD__ ) |
288 | ->fetchField(); |
289 | $dbInstalled = $dbr->timestamp( $installedUnix ?: wfTimestamp() ); |
290 | # Skip the most recent recent revs as they are likely to just |
291 | # be WHERE condition misses. This also gives us more data to use. |
292 | # Lastly, we want to avoid bias that would make the time too low |
293 | # since new revisions could not have "took a long time to sight". |
294 | $worstLagTS = $dbr->timestamp(); // now |
295 | $lastTS = $dbInstalled; |
296 | while ( true ) { // should almost always be ~1 pass |
297 | # Get the page with the worst pending lag... |
298 | $row = $dbr->newSelectQueryBuilder() |
299 | ->select( [ 'fp_page_id', 'fr_rev_id', 'fp_pending_since', 'fr_timestamp' ] ) |
300 | ->from( 'flaggedpages' ) |
301 | ->join( 'flaggedrevs', null, [ 'fr_page_id = fp_page_id', 'fr_rev_id = fp_stable' ] ) |
302 | ->where( [ |
303 | $dbr->expr( 'fp_pending_since', '!=', null ), |
304 | $dbr->expr( 'fp_pending_since', '>', $lastTS ), // skip failed rows |
305 | ] ) |
306 | ->orderBy( 'fp_pending_since' ) |
307 | ->caller( __METHOD__ )->fetchRow(); |
308 | if ( !$row ) { |
309 | break; |
310 | } |
311 | # Find the newest revision at the time the page was reviewed, |
312 | # this is the one that *should* have been reviewed. |
313 | $idealRev = (int)$dbr->newSelectQueryBuilder() |
314 | ->select( 'rev_id' ) |
315 | ->from( 'revision' ) |
316 | ->where( [ |
317 | 'rev_page' => $row->fp_page_id, |
318 | $dbr->expr( 'rev_timestamp', '<', $row->fr_timestamp ), |
319 | ] ) |
320 | ->caller( __METHOD__ ) |
321 | ->orderBy( 'rev_timestamp', SelectQueryBuilder::SORT_DESC ) |
322 | ->fetchField(); |
323 | if ( $row->fr_rev_id >= $idealRev ) { |
324 | $worstLagTS = $row->fp_pending_since; |
325 | break; // sane $worstLagTS found |
326 | # Fudge factor to prevent deliberate reviewing of non-current revisions |
327 | # from squeezing the range. Shouldn't effect anything otherwise. |
328 | } else { |
329 | $lastTS = $row->fp_pending_since; // next iteration |
330 | } |
331 | } |
332 | |
333 | $tempUserConfig = MediaWikiServices::getInstance()->getTempUserConfig(); |
334 | |
335 | # User condition (anons/users) |
336 | if ( $users === 'anons' ) { |
337 | $anonConds = [ new RawSQLExpression( 'actor_user IS NULL' ) ]; |
338 | if ( $tempUserConfig->isKnown() ) { |
339 | $anonConds[] = $tempUserConfig->getMatchCondition( $dbr, 'actor_name', IExpression::LIKE ); |
340 | } |
341 | |
342 | $userCondition = $dbr->orExpr( $anonConds ); |
343 | } elseif ( $users === 'users' ) { |
344 | $userConds = [ new RawSQLExpression( 'actor_user IS NOT NULL' ) ]; |
345 | if ( $tempUserConfig->isKnown() ) { |
346 | $userConds[] = $tempUserConfig->getMatchCondition( $dbr, 'actor_name', IExpression::NOT_LIKE ); |
347 | } |
348 | |
349 | $userCondition = $dbr->andExpr( $userConds ); |
350 | } else { |
351 | throw new InvalidArgumentException( 'Invalid $users param given.' ); |
352 | } |
353 | # Avoid having to censor data |
354 | # Note: if no edits pending, $worstLagTS is the cur time just before we checked |
355 | # for the worst lag. Thus, new edits *right* after the check are properly excluded. |
356 | $maxTSUnix = (int)wfTimestamp( TS_UNIX, $worstLagTS ) - 1; // all edits later reviewed |
357 | $dbMaxTS = $dbr->timestamp( $maxTSUnix ); |
358 | # Use a one week time range |
359 | $days = 7; |
360 | $minTSUnix = $maxTSUnix - $days * 86400; |
361 | # Approximate the number rows to scan |
362 | $rows = self::estimateRevisionRowCount( $dbr, $maxTSUnix, 7, $userCondition ); |
363 | # If the range doesn't have many rows (like on small wikis), use 30 days |
364 | if ( $rows < 500 ) { |
365 | $rows = self::estimateRevisionRowCount( $dbr, $maxTSUnix, 30, $userCondition ); |
366 | # If the range doesn't have many rows (like on really tiny wikis), use 90 days |
367 | if ( $rows < 500 ) { |
368 | $days = 90; |
369 | $minTSUnix = $maxTSUnix - $days * 86400; |
370 | } |
371 | } |
372 | |
373 | $sampleSize = 1500; // sample size |
374 | # Sanity check the starting timestamp |
375 | $minTSUnix = max( $minTSUnix, $installedUnix ); |
376 | $dbMinTS = $dbr->timestamp( $minTSUnix ); |
377 | # Get timestamp boundaries |
378 | $timeCondition = [ |
379 | $dbr->expr( 'rev_timestamp', '>=', $dbMinTS ), |
380 | $dbr->expr( 'rev_timestamp', '<=', $dbMaxTS ), |
381 | ]; |
382 | # Get mod for edit spread |
383 | $fname = __METHOD__; |
384 | $edits = $cache->getWithSetCallback( |
385 | $cache->makeKey( 'flaggedrevs', 'rcEditCount', $users, $days ), |
386 | $cache::TTL_WEEK * 2, |
387 | static function () use ( $dbr, $fname, $userCondition, $timeCondition ) { |
388 | return (int)$dbr->newSelectQueryBuilder() |
389 | ->select( 'COUNT(*)' ) |
390 | ->from( 'revision' ) |
391 | ->join( 'page', null, 'page_id = rev_page' ) |
392 | ->join( 'actor', null, 'rev_actor = actor_id' ) |
393 | ->where( $timeCondition ) |
394 | ->andWhere( [ |
395 | $userCondition, |
396 | 'page_namespace' => FlaggedRevs::getReviewNamespaces() |
397 | ] ) |
398 | ->caller( $fname ) |
399 | ->fetchField(); |
400 | } |
401 | ); |
402 | $mod = max( floor( $edits / $sampleSize ), 1 ); # $mod >= 1 |
403 | # For edits that started off pending, how long do they take to get reviewed? |
404 | # Edits started off pending if made when a flagged rev of the page already existed. |
405 | # Get the *first* reviewed rev *after* each edit and get the time difference. |
406 | $res = $dbr->newSelectQueryBuilder() |
407 | ->select( [ |
408 | 'rt' => 'rev_timestamp', // time revision was made |
409 | 'nft' => new Subquery( $dbr->newSelectQueryBuilder() |
410 | ->select( 'MIN(fr_timestamp)' ) |
411 | ->from( 'flaggedrevs' ) |
412 | ->where( [ |
413 | 'fr_page_id = rev_page', |
414 | 'fr_rev_timestamp >= rev_timestamp' |
415 | ] ) |
416 | ->caller( __METHOD__ ) |
417 | ->getSQL() |
418 | ) // time when revision was first reviewed |
419 | ] ) |
420 | ->from( 'revision' ) |
421 | ->join( 'actor', null, 'rev_actor = actor_id' ) |
422 | ->where( [ |
423 | $userCondition, |
424 | "(rev_id % $mod) = 0", |
425 | $dbr->expr( 'rev_parent_id', '>', 0 ), // optimize (exclude new pages) |
426 | 'EXISTS (' . $dbr->newSelectQueryBuilder() |
427 | ->select( '*' ) |
428 | ->from( 'flaggedrevs' ) |
429 | ->where( [ // page was reviewed when this revision was made |
430 | 'fr_page_id = rev_page', |
431 | 'fr_rev_timestamp < rev_timestamp', // before this revision |
432 | 'fr_rev_id < rev_id', // not imported later |
433 | 'fr_timestamp < rev_timestamp', // page reviewed before revision |
434 | ] ) |
435 | ->caller( __METHOD__ ) |
436 | ->getSQL() . |
437 | ')' |
438 | ] ) |
439 | ->andWhere( $timeCondition ) |
440 | ->caller( __METHOD__ ) |
441 | ->fetchResultSet(); |
442 | |
443 | $secondsR = 0; // total wait seconds for edits later reviewed |
444 | $secondsP = 0; // total wait seconds for edits still pending |
445 | $times = []; |
446 | if ( $res->numRows() ) { |
447 | # Get the elapsed times revs were pending (flagged time - edit time) |
448 | foreach ( $res as $row ) { |
449 | $time = (int)wfTimestamp( TS_UNIX, $row->nft ) - (int)wfTimestamp( TS_UNIX, $row->rt ); |
450 | $time = max( $time, 0 ); // sanity |
451 | $secondsR += $time; |
452 | $times[] = $time; |
453 | } |
454 | $sampleSize = count( $times ); |
455 | $aveRT = ( $secondsR + $secondsP ) / $sampleSize; // sample mean |
456 | sort( $times ); // order smallest -> largest |
457 | // Sample median |
458 | $rank = intval( round( count( $times ) / 2 + 0.5 ) - 1 ); |
459 | $medianRT = $times[$rank]; |
460 | // Make percentile tabulation data |
461 | $doPercentiles = [ 35, 45, 55, 65, 75, 85, 90, 95 ]; |
462 | foreach ( $doPercentiles as $percentile ) { |
463 | $rank = intval( round( $percentile * count( $times ) / 100 + 0.5 ) - 1 ); |
464 | $rPerTable[$percentile] = $times[$rank]; |
465 | } |
466 | $result['average'] = $aveRT; |
467 | $result['median'] = $medianRT; |
468 | $result['percTable'] = $rPerTable; |
469 | $result['sampleSize'] = count( $times ); |
470 | $result['sampleStartTS'] = $minTSUnix; |
471 | $result['sampleEndTS'] = $maxTSUnix; |
472 | } |
473 | |
474 | return $result; |
475 | } |
476 | |
477 | /** |
478 | * Convenience function to estimate the number of revisions authored by a given user type |
479 | * within a given timeframe. |
480 | * |
481 | * @param IReadableDatabase $dbr Replica DB connection handle. |
482 | * @param int $maxTSUnix Only consider revisions created before this UNIX timestamp. |
483 | * @param int $days Only consider revisions created at most this many days before $maxTSUnix. |
484 | * @param IExpression|string $userCondition SQL condition to filter revisions by user type. |
485 | * |
486 | * @return int The estimated number of revisions matching the given time and user constraints. |
487 | */ |
488 | private static function estimateRevisionRowCount( |
489 | IReadableDatabase $dbr, |
490 | int $maxTSUnix, |
491 | int $days, |
492 | $userCondition |
493 | ): int { |
494 | $minTSUnix = $maxTSUnix - $days * 86400; |
495 | $dbMinTS = $dbr->timestamp( $minTSUnix ); |
496 | $dbMaxTS = $dbr->timestamp( $maxTSUnix ); |
497 | |
498 | return $dbr->newSelectQueryBuilder() |
499 | ->select( '1' ) |
500 | ->from( 'revision' ) |
501 | ->join( 'actor', null, 'rev_actor = actor_id' ) |
502 | ->where( $userCondition ) |
503 | ->andWhere( [ |
504 | $dbr->expr( 'rev_timestamp', '>=', $dbMinTS ), |
505 | $dbr->expr( 'rev_timestamp', '<=', $dbMaxTS ), |
506 | ] ) |
507 | ->caller( __METHOD__ ) |
508 | ->estimateRowCount(); |
509 | } |
510 | } |