Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
95.10% |
330 / 347 |
|
71.43% |
5 / 7 |
CRAP | |
0.00% |
0 / 1 |
| FlaggedRevsStats | |
95.10% |
330 / 347 |
|
71.43% |
5 / 7 |
41 | |
0.00% |
0 / 1 |
| getStats | |
100.00% |
51 / 51 |
|
100.00% |
1 / 1 |
15 | |||
| updateCache | |
98.82% |
84 / 85 |
|
0.00% |
0 / 1 |
5 | |||
| getPerNamespaceTotals | |
100.00% |
25 / 25 |
|
100.00% |
1 / 1 |
2 | |||
| dbUnixTime | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
2 | |||
| getMeanPendingEditTime | |
100.00% |
13 / 13 |
|
100.00% |
1 / 1 |
1 | |||
| getEditReviewTimes | |
89.87% |
142 / 158 |
|
0.00% |
0 / 1 |
15.23 | |||
| estimateRevisionRowCount | |
100.00% |
14 / 14 |
|
100.00% |
1 / 1 |
1 | |||
| 1 | <?php |
| 2 | |
| 3 | use MediaWiki\MediaWikiServices; |
| 4 | use Wikimedia\ObjectCache\BagOStuff; |
| 5 | use Wikimedia\Rdbms\IExpression; |
| 6 | use Wikimedia\Rdbms\IReadableDatabase; |
| 7 | use Wikimedia\Rdbms\RawSQLExpression; |
| 8 | use Wikimedia\Rdbms\SelectQueryBuilder; |
| 9 | use Wikimedia\Rdbms\Subquery; |
| 10 | |
| 11 | /** |
| 12 | * FlaggedRevs stats functions |
| 13 | */ |
| 14 | class FlaggedRevsStats { |
| 15 | /** |
| 16 | * @return array of current FR stats |
| 17 | */ |
| 18 | public static function getStats() { |
| 19 | $data = [ |
| 20 | 'reviewLag-anon-sampleSize' => '-', |
| 21 | 'reviewLag-anon-average' => '-', |
| 22 | 'reviewLag-anon-median' => '-', |
| 23 | 'reviewLag-anon-percentile' => [], |
| 24 | 'reviewLag-user-sampleSize' => '-', |
| 25 | 'reviewLag-user-average' => '-', |
| 26 | 'reviewLag-user-median' => '-', |
| 27 | 'reviewLag-user-percentile' => [], |
| 28 | 'totalPages-NS' => [], |
| 29 | 'reviewedPages-NS' => [], |
| 30 | 'syncedPages-NS' => [], |
| 31 | 'pendingLag-average' => '-', |
| 32 | 'statTimestamp' => '-', |
| 33 | ]; |
| 34 | |
| 35 | $dbr = MediaWikiServices::getInstance()->getConnectionProvider()->getReplicaDatabase(); |
| 36 | // Latest timestamp recorded |
| 37 | $timestamp = $dbr->newSelectQueryBuilder() |
| 38 | ->select( 'MAX(frs_timestamp)' ) |
| 39 | ->from( 'flaggedrevs_statistics' ) |
| 40 | ->caller( __METHOD__ ) |
| 41 | ->fetchField(); |
| 42 | |
| 43 | if ( $timestamp !== false ) { |
| 44 | $data['statTimestamp'] = wfTimestamp( TS_MW, $timestamp ); |
| 45 | |
| 46 | $res = $dbr->newSelectQueryBuilder() |
| 47 | ->select( [ 'frs_stat_key', 'frs_stat_val' ] ) |
| 48 | ->from( 'flaggedrevs_statistics' ) |
| 49 | ->where( [ 'frs_timestamp' => $dbr->timestamp( $timestamp ) ] ) |
| 50 | ->caller( __METHOD__ ) |
| 51 | ->fetchResultSet(); |
| 52 | foreach ( $res as $row ) { |
| 53 | $key = explode( ':', $row->frs_stat_key ); |
| 54 | switch ( $key[0] ) { |
| 55 | case 'reviewLag-anon-sampleSize': |
| 56 | case 'reviewLag-anon-average': |
| 57 | case 'reviewLag-anon-median': |
| 58 | case 'reviewLag-user-sampleSize': |
| 59 | case 'reviewLag-user-average': |
| 60 | case 'reviewLag-user-median': |
| 61 | case 'pendingLag-average': |
| 62 | $data[$key[0]] = (int)$row->frs_stat_val; |
| 63 | break; |
| 64 | case 'reviewLag-anon-percentile': // <stat name,percentile> |
| 65 | case 'reviewLag-user-percentile': // <stat name,percentile> |
| 66 | $data[$key[0]][$key[1]] = (int)$row->frs_stat_val; |
| 67 | break; |
| 68 | case 'totalPages-NS': // <stat name,namespace> |
| 69 | case 'reviewedPages-NS': // <stat name,namespace> |
| 70 | case 'syncedPages-NS': // <stat name,namespace> |
| 71 | $data[$key[0]][$key[1]] = (int)$row->frs_stat_val; |
| 72 | break; |
| 73 | } |
| 74 | } |
| 75 | } |
| 76 | |
| 77 | return $data; |
| 78 | } |
| 79 | |
| 80 | /** |
| 81 | * Run a stats update and update the DB |
| 82 | * Note: this can easily be too expensive to run live |
| 83 | * |
| 84 | * @return void |
| 85 | */ |
| 86 | public static function updateCache() { |
| 87 | $rNamespaces = FlaggedRevs::getReviewNamespaces(); |
| 88 | $cache = MediaWikiServices::getInstance() |
| 89 | ->getObjectCacheFactory()->getLocalClusterInstance(); |
| 90 | if ( !$rNamespaces ) { |
| 91 | return; // no SQL errors please :) |
| 92 | } |
| 93 | |
| 94 | // Get total, reviewed, and synced page count for each namespace |
| 95 | [ $ns_total, $ns_reviewed, $ns_synced ] = self::getPerNamespaceTotals(); |
| 96 | |
| 97 | // Getting mean pending edit time |
| 98 | // @TODO: percentiles? |
| 99 | $avePET = self::getMeanPendingEditTime(); |
| 100 | |
| 101 | # Get wait (till review) time samples for anon edits... |
| 102 | $reviewDataAnon = self::getEditReviewTimes( $cache, 'anons' ); |
| 103 | # Get wait (till review) time samples for logged-in user edits... |
| 104 | $reviewDataUser = self::getEditReviewTimes( $cache, 'users' ); |
| 105 | |
| 106 | $dbw = MediaWikiServices::getInstance()->getConnectionProvider()->getPrimaryDatabase(); |
| 107 | // The timestamp to identify this whole batch of data |
| 108 | $encDataTimestamp = $dbw->timestamp(); |
| 109 | |
| 110 | $dataSet = []; |
| 111 | // Data range for samples... |
| 112 | $dataSet[] = [ |
| 113 | 'frs_stat_key' => 'reviewLag-anon-sampleStartTimestamp', |
| 114 | 'frs_stat_val' => $reviewDataAnon['sampleStartTS'] ?? 0, // unix |
| 115 | 'frs_timestamp' => $encDataTimestamp ]; |
| 116 | $dataSet[] = [ |
| 117 | 'frs_stat_key' => 'reviewLag-user-sampleStartTimestamp', |
| 118 | 'frs_stat_val' => $reviewDataUser['sampleStartTS'] ?? 0, // unix |
| 119 | 'frs_timestamp' => $encDataTimestamp ]; |
| 120 | $dataSet[] = [ |
| 121 | 'frs_stat_key' => 'reviewLag-anon-sampleEndTimestamp', |
| 122 | 'frs_stat_val' => $reviewDataAnon['sampleEndTS'] ?? 0, // unix |
| 123 | 'frs_timestamp' => $encDataTimestamp ]; |
| 124 | $dataSet[] = [ |
| 125 | 'frs_stat_key' => 'reviewLag-user-sampleEndTimestamp', |
| 126 | 'frs_stat_val' => $reviewDataUser['sampleEndTS'] ?? 0, // unix |
| 127 | 'frs_timestamp' => $encDataTimestamp ]; |
| 128 | // All-namespace percentiles... |
| 129 | foreach ( $reviewDataAnon['percTable'] as $percentile => $seconds ) { |
| 130 | $dataSet[] = [ |
| 131 | 'frs_stat_key' => 'reviewLag-anon-percentile:' . (int)$percentile, |
| 132 | 'frs_stat_val' => $seconds, |
| 133 | 'frs_timestamp' => $encDataTimestamp ]; |
| 134 | } |
| 135 | foreach ( $reviewDataUser['percTable'] as $percentile => $seconds ) { |
| 136 | $dataSet[] = [ |
| 137 | 'frs_stat_key' => 'reviewLag-user-percentile:' . (int)$percentile, |
| 138 | 'frs_stat_val' => $seconds, |
| 139 | 'frs_timestamp' => $encDataTimestamp ]; |
| 140 | } |
| 141 | // Sample sizes... |
| 142 | $dataSet[] = [ |
| 143 | 'frs_stat_key' => 'reviewLag-anon-sampleSize', |
| 144 | 'frs_stat_val' => $reviewDataAnon['sampleSize'], |
| 145 | 'frs_timestamp' => $encDataTimestamp ]; |
| 146 | $dataSet[] = [ |
| 147 | 'frs_stat_key' => 'reviewLag-user-sampleSize', |
| 148 | 'frs_stat_val' => $reviewDataUser['sampleSize'], |
| 149 | 'frs_timestamp' => $encDataTimestamp ]; |
| 150 | |
| 151 | // All-namespace ave/med review lag & ave pending lag stats... |
| 152 | $dataSet[] = [ |
| 153 | 'frs_stat_key' => 'reviewLag-anon-average', |
| 154 | 'frs_stat_val' => $reviewDataAnon['average'], |
| 155 | 'frs_timestamp' => $encDataTimestamp ]; |
| 156 | $dataSet[] = [ |
| 157 | 'frs_stat_key' => 'reviewLag-user-average', |
| 158 | 'frs_stat_val' => $reviewDataUser['average'], |
| 159 | 'frs_timestamp' => $encDataTimestamp ]; |
| 160 | $dataSet[] = [ |
| 161 | 'frs_stat_key' => 'reviewLag-anon-median', |
| 162 | 'frs_stat_val' => $reviewDataAnon['median'], |
| 163 | 'frs_timestamp' => $encDataTimestamp ]; |
| 164 | $dataSet[] = [ |
| 165 | 'frs_stat_key' => 'reviewLag-user-median', |
| 166 | 'frs_stat_val' => $reviewDataUser['median'], |
| 167 | 'frs_timestamp' => $encDataTimestamp ]; |
| 168 | $dataSet[] = [ |
| 169 | 'frs_stat_key' => 'pendingLag-average', |
| 170 | 'frs_stat_val' => $avePET, |
| 171 | 'frs_timestamp' => $encDataTimestamp ]; |
| 172 | |
| 173 | // Per-namespace total/reviewed/synced stats... |
| 174 | foreach ( $rNamespaces as $namespace ) { |
| 175 | $dataSet[] = [ |
| 176 | 'frs_stat_key' => 'totalPages-NS:' . (int)$namespace, |
| 177 | 'frs_stat_val' => $ns_total[$namespace] ?? 0, |
| 178 | 'frs_timestamp' => $encDataTimestamp ]; |
| 179 | $dataSet[] = [ |
| 180 | 'frs_stat_key' => 'reviewedPages-NS:' . (int)$namespace, |
| 181 | 'frs_stat_val' => $ns_reviewed[$namespace] ?? 0, |
| 182 | 'frs_timestamp' => $encDataTimestamp ]; |
| 183 | $dataSet[] = [ |
| 184 | 'frs_stat_key' => 'syncedPages-NS:' . (int)$namespace, |
| 185 | 'frs_stat_val' => $ns_synced[$namespace] ?? 0, |
| 186 | 'frs_timestamp' => $encDataTimestamp ]; |
| 187 | } |
| 188 | |
| 189 | // Save the data... |
| 190 | $dbw->newInsertQueryBuilder() |
| 191 | ->insertInto( 'flaggedrevs_statistics' ) |
| 192 | ->ignore() |
| 193 | ->rows( $dataSet ) |
| 194 | ->caller( __METHOD__ ) |
| 195 | ->execute(); |
| 196 | } |
| 197 | |
| 198 | /** |
| 199 | * @return int[][] |
| 200 | */ |
| 201 | private static function getPerNamespaceTotals() { |
| 202 | $ns_total = []; |
| 203 | $ns_reviewed = []; |
| 204 | $ns_synced = []; |
| 205 | // Get total, reviewed, and synced page count for each namespace |
| 206 | $dbr = MediaWikiServices::getInstance()->getConnectionProvider()->getReplicaDatabase( false, 'vslow' ); |
| 207 | $res = $dbr->newSelectQueryBuilder() |
| 208 | ->select( [ |
| 209 | 'page_namespace', |
| 210 | 'total' => 'COUNT(*)', |
| 211 | 'reviewed' => 'COUNT(fp_page_id)', |
| 212 | 'pending' => 'COUNT(fp_pending_since)' |
| 213 | ] ) |
| 214 | ->from( 'page' ) |
| 215 | ->leftJoin( 'flaggedpages', null, 'fp_page_id = page_id' ) |
| 216 | ->where( [ |
| 217 | 'page_is_redirect' => 0, |
| 218 | 'page_namespace' => FlaggedRevs::getReviewNamespaces() |
| 219 | ] ) |
| 220 | ->groupBy( 'page_namespace' ) |
| 221 | ->caller( __METHOD__ ) |
| 222 | ->fetchResultSet(); |
| 223 | foreach ( $res as $row ) { |
| 224 | $ns_total[$row->page_namespace] = (int)$row->total; |
| 225 | $ns_reviewed[$row->page_namespace] = (int)$row->reviewed; |
| 226 | $ns_synced[$row->page_namespace] = (int)$row->reviewed - (int)$row->pending; |
| 227 | } |
| 228 | return [ $ns_total, $ns_reviewed, $ns_synced ]; |
| 229 | } |
| 230 | |
| 231 | /** |
| 232 | * @param IReadableDatabase $db |
| 233 | * @param string $column |
| 234 | * |
| 235 | * @return string |
| 236 | */ |
| 237 | private static function dbUnixTime( IReadableDatabase $db, $column ) { |
| 238 | return $db->getType() === 'sqlite' ? "strftime('%s',$column)" : "UNIX_TIMESTAMP($column)"; |
| 239 | } |
| 240 | |
| 241 | /** |
| 242 | * @return int |
| 243 | */ |
| 244 | private static function getMeanPendingEditTime() { |
| 245 | $dbr = MediaWikiServices::getInstance()->getConnectionProvider()->getReplicaDatabase( false, 'vslow' ); |
| 246 | $nowUnix = wfTimestamp(); |
| 247 | $unixTimeCall = self::dbUnixTime( $dbr, 'fp_pending_since' ); |
| 248 | return (int)$dbr->newSelectQueryBuilder() |
| 249 | ->select( "AVG( $nowUnix - $unixTimeCall )" ) |
| 250 | ->from( 'flaggedpages' ) |
| 251 | ->join( 'page', null, 'fp_page_id = page_id' ) |
| 252 | ->where( [ |
| 253 | $dbr->expr( 'fp_pending_since', '!=', null ), |
| 254 | 'page_namespace' => FlaggedRevs::getReviewNamespaces() // sanity |
| 255 | ] ) |
| 256 | ->caller( __METHOD__ ) |
| 257 | ->fetchField(); |
| 258 | } |
| 259 | |
| 260 | /** |
| 261 | * Get edit review time statistics (as recent as possible) |
| 262 | * @param BagOStuff $cache |
| 263 | * @param string $users string "anons" or "users" |
| 264 | * @return array associative |
| 265 | */ |
| 266 | private static function getEditReviewTimes( $cache, $users ) { |
| 267 | $result = [ |
| 268 | 'average' => 0, |
| 269 | 'median' => 0, |
| 270 | 'percTable' => [], |
| 271 | 'sampleSize' => 0, |
| 272 | 'sampleStartTS' => null, |
| 273 | 'sampleEndTS' => null |
| 274 | ]; |
| 275 | if ( FlaggedRevs::useOnlyIfProtected() ) { |
| 276 | return $result; // disabled |
| 277 | } |
| 278 | |
| 279 | $rPerTable = []; // review wait percentiles |
| 280 | # Only go so far back...otherwise we will get garbage values due to |
| 281 | # the fact that FlaggedRevs wasn't enabled until after a while. |
| 282 | $dbr = MediaWikiServices::getInstance()->getConnectionProvider()->getReplicaDatabase( false, 'vslow' ); |
| 283 | $installedUnix = $dbr->newSelectQueryBuilder() |
| 284 | ->select( self::dbUnixTime( $dbr, 'MIN(log_timestamp)' ) ) |
| 285 | ->from( 'logging' ) |
| 286 | ->where( [ 'log_type' => 'review' ] ) |
| 287 | ->caller( __METHOD__ ) |
| 288 | ->fetchField(); |
| 289 | $dbInstalled = $dbr->timestamp( $installedUnix ?: wfTimestamp() ); |
| 290 | # Skip the most recent recent revs as they are likely to just |
| 291 | # be WHERE condition misses. This also gives us more data to use. |
| 292 | # Lastly, we want to avoid bias that would make the time too low |
| 293 | # since new revisions could not have "took a long time to sight". |
| 294 | $worstLagTS = $dbr->timestamp(); // now |
| 295 | $lastTS = $dbInstalled; |
| 296 | while ( true ) { // should almost always be ~1 pass |
| 297 | # Get the page with the worst pending lag... |
| 298 | $row = $dbr->newSelectQueryBuilder() |
| 299 | ->select( [ 'fp_page_id', 'fr_rev_id', 'fp_pending_since', 'fr_timestamp' ] ) |
| 300 | ->from( 'flaggedpages' ) |
| 301 | ->join( 'flaggedrevs', null, [ 'fr_page_id = fp_page_id', 'fr_rev_id = fp_stable' ] ) |
| 302 | ->where( [ |
| 303 | $dbr->expr( 'fp_pending_since', '!=', null ), |
| 304 | $dbr->expr( 'fp_pending_since', '>', $lastTS ), // skip failed rows |
| 305 | ] ) |
| 306 | ->orderBy( 'fp_pending_since' ) |
| 307 | ->caller( __METHOD__ )->fetchRow(); |
| 308 | if ( !$row ) { |
| 309 | break; |
| 310 | } |
| 311 | # Find the newest revision at the time the page was reviewed, |
| 312 | # this is the one that *should* have been reviewed. |
| 313 | $idealRev = (int)$dbr->newSelectQueryBuilder() |
| 314 | ->select( 'rev_id' ) |
| 315 | ->from( 'revision' ) |
| 316 | ->where( [ |
| 317 | 'rev_page' => $row->fp_page_id, |
| 318 | $dbr->expr( 'rev_timestamp', '<', $row->fr_timestamp ), |
| 319 | ] ) |
| 320 | ->caller( __METHOD__ ) |
| 321 | ->orderBy( 'rev_timestamp', SelectQueryBuilder::SORT_DESC ) |
| 322 | ->fetchField(); |
| 323 | if ( $row->fr_rev_id >= $idealRev ) { |
| 324 | $worstLagTS = $row->fp_pending_since; |
| 325 | break; // sane $worstLagTS found |
| 326 | # Fudge factor to prevent deliberate reviewing of non-current revisions |
| 327 | # from squeezing the range. Shouldn't effect anything otherwise. |
| 328 | } else { |
| 329 | $lastTS = $row->fp_pending_since; // next iteration |
| 330 | } |
| 331 | } |
| 332 | |
| 333 | $tempUserConfig = MediaWikiServices::getInstance()->getTempUserConfig(); |
| 334 | |
| 335 | # User condition (anons/users) |
| 336 | if ( $users === 'anons' ) { |
| 337 | $anonConds = [ new RawSQLExpression( 'actor_user IS NULL' ) ]; |
| 338 | if ( $tempUserConfig->isKnown() ) { |
| 339 | $anonConds[] = $tempUserConfig->getMatchCondition( $dbr, 'actor_name', IExpression::LIKE ); |
| 340 | } |
| 341 | |
| 342 | $userCondition = $dbr->orExpr( $anonConds ); |
| 343 | } elseif ( $users === 'users' ) { |
| 344 | $userConds = [ new RawSQLExpression( 'actor_user IS NOT NULL' ) ]; |
| 345 | if ( $tempUserConfig->isKnown() ) { |
| 346 | $userConds[] = $tempUserConfig->getMatchCondition( $dbr, 'actor_name', IExpression::NOT_LIKE ); |
| 347 | } |
| 348 | |
| 349 | $userCondition = $dbr->andExpr( $userConds ); |
| 350 | } else { |
| 351 | throw new InvalidArgumentException( 'Invalid $users param given.' ); |
| 352 | } |
| 353 | # Avoid having to censor data |
| 354 | # Note: if no edits pending, $worstLagTS is the cur time just before we checked |
| 355 | # for the worst lag. Thus, new edits *right* after the check are properly excluded. |
| 356 | $maxTSUnix = (int)wfTimestamp( TS_UNIX, $worstLagTS ) - 1; // all edits later reviewed |
| 357 | $dbMaxTS = $dbr->timestamp( $maxTSUnix ); |
| 358 | # Use a one week time range |
| 359 | $days = 7; |
| 360 | $minTSUnix = $maxTSUnix - $days * 86400; |
| 361 | # Approximate the number rows to scan |
| 362 | $rows = self::estimateRevisionRowCount( $dbr, $maxTSUnix, 7, $userCondition ); |
| 363 | # If the range doesn't have many rows (like on small wikis), use 30 days |
| 364 | if ( $rows < 500 ) { |
| 365 | $rows = self::estimateRevisionRowCount( $dbr, $maxTSUnix, 30, $userCondition ); |
| 366 | # If the range doesn't have many rows (like on really tiny wikis), use 90 days |
| 367 | if ( $rows < 500 ) { |
| 368 | $days = 90; |
| 369 | $minTSUnix = $maxTSUnix - $days * 86400; |
| 370 | } |
| 371 | } |
| 372 | |
| 373 | $sampleSize = 1500; // sample size |
| 374 | # Sanity check the starting timestamp |
| 375 | $minTSUnix = max( $minTSUnix, $installedUnix ); |
| 376 | $dbMinTS = $dbr->timestamp( $minTSUnix ); |
| 377 | # Get timestamp boundaries |
| 378 | $timeCondition = [ |
| 379 | $dbr->expr( 'rev_timestamp', '>=', $dbMinTS ), |
| 380 | $dbr->expr( 'rev_timestamp', '<=', $dbMaxTS ), |
| 381 | ]; |
| 382 | # Get mod for edit spread |
| 383 | $fname = __METHOD__; |
| 384 | $edits = $cache->getWithSetCallback( |
| 385 | $cache->makeKey( 'flaggedrevs', 'rcEditCount', $users, $days ), |
| 386 | $cache::TTL_WEEK * 2, |
| 387 | static function () use ( $dbr, $fname, $userCondition, $timeCondition ) { |
| 388 | return (int)$dbr->newSelectQueryBuilder() |
| 389 | ->select( 'COUNT(*)' ) |
| 390 | ->from( 'revision' ) |
| 391 | ->join( 'page', null, 'page_id = rev_page' ) |
| 392 | ->join( 'actor', null, 'rev_actor = actor_id' ) |
| 393 | ->where( $timeCondition ) |
| 394 | ->andWhere( [ |
| 395 | $userCondition, |
| 396 | 'page_namespace' => FlaggedRevs::getReviewNamespaces() |
| 397 | ] ) |
| 398 | ->caller( $fname ) |
| 399 | ->fetchField(); |
| 400 | } |
| 401 | ); |
| 402 | $mod = max( floor( $edits / $sampleSize ), 1 ); # $mod >= 1 |
| 403 | # For edits that started off pending, how long do they take to get reviewed? |
| 404 | # Edits started off pending if made when a flagged rev of the page already existed. |
| 405 | # Get the *first* reviewed rev *after* each edit and get the time difference. |
| 406 | $res = $dbr->newSelectQueryBuilder() |
| 407 | ->select( [ |
| 408 | 'rt' => 'rev_timestamp', // time revision was made |
| 409 | 'nft' => new Subquery( $dbr->newSelectQueryBuilder() |
| 410 | ->select( 'MIN(fr_timestamp)' ) |
| 411 | ->from( 'flaggedrevs' ) |
| 412 | ->where( [ |
| 413 | 'fr_page_id = rev_page', |
| 414 | 'fr_rev_timestamp >= rev_timestamp' |
| 415 | ] ) |
| 416 | ->caller( __METHOD__ ) |
| 417 | ->getSQL() |
| 418 | ) // time when revision was first reviewed |
| 419 | ] ) |
| 420 | ->from( 'revision' ) |
| 421 | ->join( 'actor', null, 'rev_actor = actor_id' ) |
| 422 | ->where( [ |
| 423 | $userCondition, |
| 424 | "(rev_id % $mod) = 0", |
| 425 | $dbr->expr( 'rev_parent_id', '>', 0 ), // optimize (exclude new pages) |
| 426 | 'EXISTS (' . $dbr->newSelectQueryBuilder() |
| 427 | ->select( '*' ) |
| 428 | ->from( 'flaggedrevs' ) |
| 429 | ->where( [ // page was reviewed when this revision was made |
| 430 | 'fr_page_id = rev_page', |
| 431 | 'fr_rev_timestamp < rev_timestamp', // before this revision |
| 432 | 'fr_rev_id < rev_id', // not imported later |
| 433 | 'fr_timestamp < rev_timestamp', // page reviewed before revision |
| 434 | ] ) |
| 435 | ->caller( __METHOD__ ) |
| 436 | ->getSQL() . |
| 437 | ')' |
| 438 | ] ) |
| 439 | ->andWhere( $timeCondition ) |
| 440 | ->caller( __METHOD__ ) |
| 441 | ->fetchResultSet(); |
| 442 | |
| 443 | $secondsR = 0; // total wait seconds for edits later reviewed |
| 444 | $secondsP = 0; // total wait seconds for edits still pending |
| 445 | $times = []; |
| 446 | if ( $res->numRows() ) { |
| 447 | # Get the elapsed times revs were pending (flagged time - edit time) |
| 448 | foreach ( $res as $row ) { |
| 449 | $time = (int)wfTimestamp( TS_UNIX, $row->nft ) - (int)wfTimestamp( TS_UNIX, $row->rt ); |
| 450 | $time = max( $time, 0 ); // sanity |
| 451 | $secondsR += $time; |
| 452 | $times[] = $time; |
| 453 | } |
| 454 | $sampleSize = count( $times ); |
| 455 | $aveRT = ( $secondsR + $secondsP ) / $sampleSize; // sample mean |
| 456 | sort( $times ); // order smallest -> largest |
| 457 | // Sample median |
| 458 | $rank = intval( round( count( $times ) / 2 + 0.5 ) - 1 ); |
| 459 | $medianRT = $times[$rank]; |
| 460 | // Make percentile tabulation data |
| 461 | $doPercentiles = [ 35, 45, 55, 65, 75, 85, 90, 95 ]; |
| 462 | foreach ( $doPercentiles as $percentile ) { |
| 463 | $rank = intval( round( $percentile * count( $times ) / 100 + 0.5 ) - 1 ); |
| 464 | $rPerTable[$percentile] = $times[$rank]; |
| 465 | } |
| 466 | $result['average'] = $aveRT; |
| 467 | $result['median'] = $medianRT; |
| 468 | $result['percTable'] = $rPerTable; |
| 469 | $result['sampleSize'] = count( $times ); |
| 470 | $result['sampleStartTS'] = $minTSUnix; |
| 471 | $result['sampleEndTS'] = $maxTSUnix; |
| 472 | } |
| 473 | |
| 474 | return $result; |
| 475 | } |
| 476 | |
| 477 | /** |
| 478 | * Convenience function to estimate the number of revisions authored by a given user type |
| 479 | * within a given timeframe. |
| 480 | * |
| 481 | * @param IReadableDatabase $dbr Replica DB connection handle. |
| 482 | * @param int $maxTSUnix Only consider revisions created before this UNIX timestamp. |
| 483 | * @param int $days Only consider revisions created at most this many days before $maxTSUnix. |
| 484 | * @param IExpression|string $userCondition SQL condition to filter revisions by user type. |
| 485 | * |
| 486 | * @return int The estimated number of revisions matching the given time and user constraints. |
| 487 | */ |
| 488 | private static function estimateRevisionRowCount( |
| 489 | IReadableDatabase $dbr, |
| 490 | int $maxTSUnix, |
| 491 | int $days, |
| 492 | $userCondition |
| 493 | ): int { |
| 494 | $minTSUnix = $maxTSUnix - $days * 86400; |
| 495 | $dbMinTS = $dbr->timestamp( $minTSUnix ); |
| 496 | $dbMaxTS = $dbr->timestamp( $maxTSUnix ); |
| 497 | |
| 498 | return $dbr->newSelectQueryBuilder() |
| 499 | ->select( '1' ) |
| 500 | ->from( 'revision' ) |
| 501 | ->join( 'actor', null, 'rev_actor = actor_id' ) |
| 502 | ->where( $userCondition ) |
| 503 | ->andWhere( [ |
| 504 | $dbr->expr( 'rev_timestamp', '>=', $dbMinTS ), |
| 505 | $dbr->expr( 'rev_timestamp', '<=', $dbMaxTS ), |
| 506 | ] ) |
| 507 | ->caller( __METHOD__ ) |
| 508 | ->estimateRowCount(); |
| 509 | } |
| 510 | } |