Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
37.34% |
59 / 158 |
|
50.00% |
3 / 6 |
CRAP | |
0.00% |
0 / 1 |
| RecentChangesUpdateJob | |
37.58% |
59 / 157 |
|
50.00% |
3 / 6 |
190.41 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
2 | |||
| newPurgeJob | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| newCacheUpdateJob | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| run | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
3.03 | |||
| purgeExpiredRows | |
82.35% |
42 / 51 |
|
0.00% |
0 / 1 |
9.45 | |||
| updateActiveUsers | |
0.00% |
0 / 88 |
|
0.00% |
0 / 1 |
110 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * @license GPL-2.0-or-later |
| 4 | * @file |
| 5 | */ |
| 6 | |
| 7 | namespace MediaWiki\RecentChanges; |
| 8 | |
| 9 | use InvalidArgumentException; |
| 10 | use MediaWiki\Deferred\SiteStatsUpdate; |
| 11 | use MediaWiki\HookContainer\HookRunner; |
| 12 | use MediaWiki\JobQueue\Job; |
| 13 | use MediaWiki\MainConfigNames; |
| 14 | use MediaWiki\MediaWikiServices; |
| 15 | use MediaWiki\SpecialPage\SpecialPage; |
| 16 | use MediaWiki\Title\Title; |
| 17 | use Wikimedia\Timestamp\ConvertibleTimestamp; |
| 18 | use Wikimedia\Timestamp\TimestampFormat as TS; |
| 19 | |
| 20 | /** |
| 21 | * Purge expired rows from the recentchanges table. |
| 22 | * |
| 23 | * @since 1.25 |
| 24 | * @ingroup RecentChanges |
| 25 | * @ingroup JobQueue |
| 26 | */ |
| 27 | class RecentChangesUpdateJob extends Job { |
| 28 | public function __construct( Title $title, array $params ) { |
| 29 | parent::__construct( 'recentChangesUpdate', $title, $params ); |
| 30 | |
| 31 | if ( !isset( $params['type'] ) ) { |
| 32 | throw new InvalidArgumentException( "Missing 'type' parameter." ); |
| 33 | } |
| 34 | |
| 35 | $this->executionFlags |= self::JOB_NO_EXPLICIT_TRX_ROUND; |
| 36 | $this->removeDuplicates = true; |
| 37 | } |
| 38 | |
| 39 | /** |
| 40 | * @return RecentChangesUpdateJob |
| 41 | */ |
| 42 | final public static function newPurgeJob() { |
| 43 | return new self( |
| 44 | SpecialPage::getTitleFor( 'Recentchanges' ), [ 'type' => 'purge' ] |
| 45 | ); |
| 46 | } |
| 47 | |
| 48 | /** |
| 49 | * @return RecentChangesUpdateJob |
| 50 | * @since 1.26 |
| 51 | */ |
| 52 | final public static function newCacheUpdateJob() { |
| 53 | return new self( |
| 54 | SpecialPage::getTitleFor( 'Recentchanges' ), [ 'type' => 'cacheUpdate' ] |
| 55 | ); |
| 56 | } |
| 57 | |
| 58 | /** @inheritDoc */ |
| 59 | public function run() { |
| 60 | if ( $this->params['type'] === 'purge' ) { |
| 61 | $this->purgeExpiredRows(); |
| 62 | } elseif ( $this->params['type'] === 'cacheUpdate' ) { |
| 63 | $this->updateActiveUsers(); |
| 64 | } else { |
| 65 | throw new InvalidArgumentException( |
| 66 | "Invalid 'type' parameter '{$this->params['type']}'." ); |
| 67 | } |
| 68 | |
| 69 | return true; |
| 70 | } |
| 71 | |
| 72 | protected function purgeExpiredRows() { |
| 73 | $services = MediaWikiServices::getInstance(); |
| 74 | $rcMaxAge = $services->getMainConfig()->get( |
| 75 | MainConfigNames::RCMaxAge ); |
| 76 | $updateRowsPerQuery = $services->getMainConfig()->get( |
| 77 | MainConfigNames::UpdateRowsPerQuery ); |
| 78 | $dbProvider = $services->getConnectionProvider(); |
| 79 | $dbw = $dbProvider->getPrimaryDatabase(); |
| 80 | $lockKey = $dbw->getDomainID() . ':recentchanges-prune'; |
| 81 | if ( !$dbw->lock( $lockKey, __METHOD__, 0 ) ) { |
| 82 | // already in progress |
| 83 | return; |
| 84 | } |
| 85 | $ticket = $dbProvider->getEmptyTransactionTicket( __METHOD__ ); |
| 86 | $hookContainer = $services->getHookContainer(); |
| 87 | $hookRunner = new HookRunner( $hookContainer ); |
| 88 | $cutoff = $dbw->timestamp( ConvertibleTimestamp::time() - $rcMaxAge ); |
| 89 | $hasLegacyHook = $hookContainer->isRegistered( 'RecentChangesPurgeRows' ); |
| 90 | if ( $hasLegacyHook ) { |
| 91 | $query = $dbw->newSelectQueryBuilder() |
| 92 | ->queryInfo( RecentChange::getQueryInfo( RecentChange::STRAIGHT_JOIN_ACTOR ) ) |
| 93 | ->where( $dbw->expr( 'rc_timestamp', '<', $cutoff ) ) |
| 94 | ->limit( $updateRowsPerQuery ) |
| 95 | ->caller( __METHOD__ ); |
| 96 | } else { |
| 97 | $query = $dbw->newSelectQueryBuilder() |
| 98 | ->select( 'rc_id' ) |
| 99 | ->from( 'recentchanges' ) |
| 100 | ->where( $dbw->expr( 'rc_timestamp', '<', $cutoff ) ) |
| 101 | ->limit( $updateRowsPerQuery ) |
| 102 | ->caller( __METHOD__ ); |
| 103 | } |
| 104 | $callbacks = []; |
| 105 | $hookRunner->onRecentChangesPurgeQuery( $query, $callbacks ); |
| 106 | do { |
| 107 | $res = $query->fetchResultSet(); |
| 108 | $rcIds = []; |
| 109 | if ( $res->numRows() ) { |
| 110 | $rows = []; |
| 111 | foreach ( $res as $row ) { |
| 112 | $rcIds[] = $row->rc_id; |
| 113 | if ( $hasLegacyHook ) { |
| 114 | $rows[] = $row; |
| 115 | } |
| 116 | } |
| 117 | |
| 118 | $dbw->newDeleteQueryBuilder() |
| 119 | ->deleteFrom( 'recentchanges' ) |
| 120 | ->where( [ 'rc_id' => $rcIds ] ) |
| 121 | ->caller( __METHOD__ )->execute(); |
| 122 | |
| 123 | foreach ( $callbacks as $callback ) { |
| 124 | $callback( $res ); |
| 125 | } |
| 126 | if ( $hasLegacyHook ) { |
| 127 | $hookRunner->onRecentChangesPurgeRows( $rows ); |
| 128 | } |
| 129 | // There might be more, so try waiting for replica DBs |
| 130 | if ( !$dbProvider->commitAndWaitForReplication( |
| 131 | __METHOD__, $ticket, [ 'timeout' => 3 ] |
| 132 | ) ) { |
| 133 | // Another job will continue anyway |
| 134 | break; |
| 135 | } |
| 136 | } |
| 137 | } while ( $rcIds ); |
| 138 | |
| 139 | $dbw->unlock( $lockKey, __METHOD__ ); |
| 140 | } |
| 141 | |
| 142 | protected function updateActiveUsers() { |
| 143 | $services = MediaWikiServices::getInstance(); |
| 144 | $activeUserDays = $services->getMainConfig()->get( |
| 145 | MainConfigNames::ActiveUserDays ); |
| 146 | |
| 147 | // Users that made edits at least this many days ago are "active" |
| 148 | $days = $activeUserDays; |
| 149 | // Pull in the full window of active users in this update |
| 150 | $window = $activeUserDays * 86400; |
| 151 | |
| 152 | $rcLookup = $services->getRecentChangeLookup(); |
| 153 | $dbProvider = $services->getConnectionProvider(); |
| 154 | $dbw = $dbProvider->getPrimaryDatabase(); |
| 155 | $ticket = $dbProvider->getEmptyTransactionTicket( __METHOD__ ); |
| 156 | |
| 157 | $lockKey = $dbw->getDomainID() . '-activeusers'; |
| 158 | if ( !$dbw->lock( $lockKey, __METHOD__, 0 ) ) { |
| 159 | // Exclusive update (avoids duplicate entries)… it's usually fine to just |
| 160 | // drop out here, if the Job is already running. |
| 161 | return; |
| 162 | } |
| 163 | |
| 164 | // Long-running queries expected |
| 165 | $dbw->setSessionOptions( [ 'connTimeout' => 900 ] ); |
| 166 | |
| 167 | $nowUnix = time(); |
| 168 | // Get the last-updated timestamp for the cache |
| 169 | $cTime = $dbw->newSelectQueryBuilder() |
| 170 | ->select( 'qci_timestamp' ) |
| 171 | ->from( 'querycache_info' ) |
| 172 | ->where( [ 'qci_type' => 'activeusers' ] ) |
| 173 | ->caller( __METHOD__ )->fetchField(); |
| 174 | $cTimeUnix = $cTime ? (int)wfTimestamp( TS::UNIX, $cTime ) : 1; |
| 175 | |
| 176 | // Pick the date range to fetch from. This is normally from the last |
| 177 | // update to till the present time, but has a limited window. |
| 178 | // If the window is limited, multiple runs are need to fully populate it. |
| 179 | $sTimestamp = max( $cTimeUnix, $nowUnix - $days * 86400 ); |
| 180 | $eTimestamp = min( $sTimestamp + $window, $nowUnix ); |
| 181 | |
| 182 | // Get all the users active since the last update |
| 183 | $res = $dbw->newSelectQueryBuilder() |
| 184 | ->select( [ 'actor_name', 'lastedittime' => 'MAX(rc_timestamp)' ] ) |
| 185 | ->from( 'recentchanges' ) |
| 186 | ->join( 'actor', null, 'actor_id=rc_actor' ) |
| 187 | ->where( [ |
| 188 | $dbw->expr( 'actor_user', '!=', null ), // actual accounts |
| 189 | $dbw->expr( 'rc_source', '=', $rcLookup->getPrimarySources() ), |
| 190 | $dbw->expr( 'rc_log_type', '=', null )->or( 'rc_log_type', '!=', 'newusers' ), |
| 191 | $dbw->expr( 'rc_timestamp', '>=', $dbw->timestamp( $sTimestamp ) ), |
| 192 | $dbw->expr( 'rc_timestamp', '<=', $dbw->timestamp( $eTimestamp ) ), |
| 193 | ] ) |
| 194 | ->groupBy( 'actor_name' ) |
| 195 | ->orderBy( 'NULL' ) // avoid filesort |
| 196 | ->caller( __METHOD__ )->fetchResultSet(); |
| 197 | |
| 198 | $names = []; |
| 199 | foreach ( $res as $row ) { |
| 200 | $names[$row->actor_name] = $row->lastedittime; |
| 201 | } |
| 202 | |
| 203 | // Find which of the recently active users are already accounted for |
| 204 | if ( count( $names ) ) { |
| 205 | $res = $dbw->newSelectQueryBuilder() |
| 206 | ->select( [ 'user_name' => 'qcc_title' ] ) |
| 207 | ->from( 'querycachetwo' ) |
| 208 | ->where( [ |
| 209 | 'qcc_type' => 'activeusers', |
| 210 | 'qcc_namespace' => NS_USER, |
| 211 | 'qcc_title' => array_map( 'strval', array_keys( $names ) ), |
| 212 | $dbw->expr( 'qcc_value', '>=', $nowUnix - $days * 86400 ), |
| 213 | ] ) |
| 214 | ->caller( __METHOD__ )->fetchResultSet(); |
| 215 | // Note: In order for this to be actually consistent, we would need |
| 216 | // to update these rows with the new lastedittime. |
| 217 | foreach ( $res as $row ) { |
| 218 | unset( $names[$row->user_name] ); |
| 219 | } |
| 220 | } |
| 221 | |
| 222 | // Insert the users that need to be added to the list |
| 223 | if ( count( $names ) ) { |
| 224 | $newRows = []; |
| 225 | foreach ( $names as $name => $lastEditTime ) { |
| 226 | $newRows[] = [ |
| 227 | 'qcc_type' => 'activeusers', |
| 228 | 'qcc_namespace' => NS_USER, |
| 229 | 'qcc_title' => $name, |
| 230 | 'qcc_value' => (int)wfTimestamp( TS::UNIX, $lastEditTime ), |
| 231 | 'qcc_namespacetwo' => 0, // unused |
| 232 | 'qcc_titletwo' => '' // unused |
| 233 | ]; |
| 234 | } |
| 235 | foreach ( array_chunk( $newRows, 500 ) as $rowBatch ) { |
| 236 | $dbw->newInsertQueryBuilder() |
| 237 | ->insertInto( 'querycachetwo' ) |
| 238 | ->rows( $rowBatch ) |
| 239 | ->caller( __METHOD__ )->execute(); |
| 240 | $dbProvider->commitAndWaitForReplication( __METHOD__, $ticket ); |
| 241 | } |
| 242 | } |
| 243 | |
| 244 | // If a transaction was already started, it might have an old |
| 245 | // snapshot, so kludge the timestamp range back as needed. |
| 246 | $asOfTimestamp = min( $eTimestamp, (int)$dbw->trxTimestamp() ); |
| 247 | |
| 248 | // Touch the data freshness timestamp |
| 249 | $dbw->newReplaceQueryBuilder() |
| 250 | ->replaceInto( 'querycache_info' ) |
| 251 | ->row( [ |
| 252 | 'qci_type' => 'activeusers', |
| 253 | 'qci_timestamp' => $dbw->timestamp( $asOfTimestamp ), // not always $now |
| 254 | ] ) |
| 255 | ->uniqueIndexFields( [ 'qci_type' ] ) |
| 256 | ->caller( __METHOD__ )->execute(); |
| 257 | |
| 258 | // Rotate out users that have not edited in too long (according to old data set) |
| 259 | $dbw->newDeleteQueryBuilder() |
| 260 | ->deleteFrom( 'querycachetwo' ) |
| 261 | ->where( [ |
| 262 | 'qcc_type' => 'activeusers', |
| 263 | $dbw->expr( 'qcc_value', '<', $nowUnix - $days * 86400 ) // TS::UNIX |
| 264 | ] ) |
| 265 | ->caller( __METHOD__ )->execute(); |
| 266 | |
| 267 | if ( !$services->getMainConfig()->get( MainConfigNames::MiserMode ) ) { |
| 268 | SiteStatsUpdate::cacheUpdate( $dbw ); |
| 269 | } |
| 270 | |
| 271 | $dbw->unlock( $lockKey, __METHOD__ ); |
| 272 | } |
| 273 | } |
| 274 | |
| 275 | /** @deprecated class alias since 1.44 */ |
| 276 | class_alias( RecentChangesUpdateJob::class, 'RecentChangesUpdateJob' ); |