MediaWiki master
RecentChangesUpdateJob.php
Go to the documentation of this file.
1<?php
8
9use InvalidArgumentException;
17use Wikimedia\Timestamp\ConvertibleTimestamp;
18use Wikimedia\Timestamp\TimestampFormat as TS;
19
29 public function __construct( Title $title, array $params ) {
30 parent::__construct( 'recentChangesUpdate', $title, $params );
31
32 if ( !isset( $params['type'] ) ) {
33 throw new InvalidArgumentException( "Missing 'type' parameter." );
34 }
35
36 $this->executionFlags |= self::JOB_NO_EXPLICIT_TRX_ROUND;
37 $this->removeDuplicates = true;
38 }
39
43 final public static function newPurgeJob() {
44 return new self(
45 SpecialPage::getTitleFor( 'Recentchanges' ), [ 'type' => 'purge' ]
46 );
47 }
48
53 final public static function newCacheUpdateJob() {
54 return new self(
55 SpecialPage::getTitleFor( 'Recentchanges' ), [ 'type' => 'cacheUpdate' ]
56 );
57 }
58
60 public function run() {
61 if ( $this->params['type'] === 'purge' ) {
62 $this->purgeExpiredRows();
63 } elseif ( $this->params['type'] === 'cacheUpdate' ) {
64 $this->updateActiveUsers();
65 } else {
66 throw new InvalidArgumentException(
67 "Invalid 'type' parameter '{$this->params['type']}'." );
68 }
69
70 return true;
71 }
72
73 protected function purgeExpiredRows() {
75 $rcMaxAge = $services->getMainConfig()->get(
77 $updateRowsPerQuery = $services->getMainConfig()->get(
79 $dbProvider = $services->getConnectionProvider();
80 $dbw = $dbProvider->getPrimaryDatabase();
81 $lockKey = $dbw->getDomainID() . ':recentchanges-prune';
82 if ( !$dbw->lock( $lockKey, __METHOD__, 0 ) ) {
83 // already in progress
84 return;
85 }
86 $ticket = $dbProvider->getEmptyTransactionTicket( __METHOD__ );
87 $hookContainer = $services->getHookContainer();
88 $hookRunner = new HookRunner( $hookContainer );
89 $cutoff = $dbw->timestamp( ConvertibleTimestamp::time() - $rcMaxAge );
90 $hasLegacyHook = $hookContainer->isRegistered( 'RecentChangesPurgeRows' );
91 if ( $hasLegacyHook ) {
92 $query = $dbw->newSelectQueryBuilder()
94 ->where( $dbw->expr( 'rc_timestamp', '<', $cutoff ) )
95 ->limit( $updateRowsPerQuery )
96 ->caller( __METHOD__ );
97 } else {
98 $query = $dbw->newSelectQueryBuilder()
99 ->select( 'rc_id' )
100 ->from( 'recentchanges' )
101 ->where( $dbw->expr( 'rc_timestamp', '<', $cutoff ) )
102 ->limit( $updateRowsPerQuery )
103 ->caller( __METHOD__ );
104 }
105 $callbacks = [];
106 $hookRunner->onRecentChangesPurgeQuery( $query, $callbacks );
107 do {
108 $res = $query->fetchResultSet();
109 $rcIds = [];
110 if ( $res->numRows() ) {
111 $rows = [];
112 foreach ( $res as $row ) {
113 $rcIds[] = $row->rc_id;
114 if ( $hasLegacyHook ) {
115 $rows[] = $row;
116 }
117 }
118
119 $dbw->newDeleteQueryBuilder()
120 ->deleteFrom( 'recentchanges' )
121 ->where( [ 'rc_id' => $rcIds ] )
122 ->caller( __METHOD__ )->execute();
123
124 foreach ( $callbacks as $callback ) {
125 $callback( $res );
126 }
127 if ( $hasLegacyHook ) {
128 $hookRunner->onRecentChangesPurgeRows( $rows );
129 }
130 // There might be more, so try waiting for replica DBs
131 if ( !$dbProvider->commitAndWaitForReplication(
132 __METHOD__, $ticket, [ 'timeout' => 3 ]
133 ) ) {
134 // Another job will continue anyway
135 break;
136 }
137 }
138 } while ( $rcIds );
139
140 $dbw->unlock( $lockKey, __METHOD__ );
141 }
142
143 protected function updateActiveUsers() {
144 $services = MediaWikiServices::getInstance();
145 $activeUserDays = $services->getMainConfig()->get(
147
148 // Users that made edits at least this many days ago are "active"
149 $days = $activeUserDays;
150 // Pull in the full window of active users in this update
151 $window = $activeUserDays * 86400;
152
153 $rcLookup = $services->getRecentChangeLookup();
154 $dbProvider = $services->getConnectionProvider();
155 $dbw = $dbProvider->getPrimaryDatabase();
156 $ticket = $dbProvider->getEmptyTransactionTicket( __METHOD__ );
157
158 $lockKey = $dbw->getDomainID() . '-activeusers';
159 if ( !$dbw->lock( $lockKey, __METHOD__, 0 ) ) {
160 // Exclusive update (avoids duplicate entries)… it's usually fine to just
161 // drop out here, if the Job is already running.
162 return;
163 }
164
165 // Long-running queries expected
166 $dbw->setSessionOptions( [ 'connTimeout' => 900 ] );
167
168 $nowUnix = time();
169 // Get the last-updated timestamp for the cache
170 $cTime = $dbw->newSelectQueryBuilder()
171 ->select( 'qci_timestamp' )
172 ->from( 'querycache_info' )
173 ->where( [ 'qci_type' => 'activeusers' ] )
174 ->caller( __METHOD__ )->fetchField();
175 $cTimeUnix = $cTime ? (int)wfTimestamp( TS::UNIX, $cTime ) : 1;
176
177 // Pick the date range to fetch from. This is normally from the last
178 // update to till the present time, but has a limited window.
179 // If the window is limited, multiple runs are need to fully populate it.
180 $sTimestamp = max( $cTimeUnix, $nowUnix - $days * 86400 );
181 $eTimestamp = min( $sTimestamp + $window, $nowUnix );
182
183 // Get all the users active since the last update
184 $res = $dbw->newSelectQueryBuilder()
185 ->select( [ 'actor_name', 'lastedittime' => 'MAX(rc_timestamp)' ] )
186 ->from( 'recentchanges' )
187 ->join( 'actor', null, 'actor_id=rc_actor' )
188 ->where( [
189 $dbw->expr( 'actor_user', '!=', null ), // actual accounts
190 $dbw->expr( 'rc_source', '=', $rcLookup->getPrimarySources() ),
191 $dbw->expr( 'rc_log_type', '=', null )->or( 'rc_log_type', '!=', 'newusers' ),
192 $dbw->expr( 'rc_timestamp', '>=', $dbw->timestamp( $sTimestamp ) ),
193 $dbw->expr( 'rc_timestamp', '<=', $dbw->timestamp( $eTimestamp ) ),
194 ] )
195 ->groupBy( 'actor_name' )
196 ->orderBy( 'NULL' ) // avoid filesort
197 ->caller( __METHOD__ )->fetchResultSet();
198
199 $names = [];
200 foreach ( $res as $row ) {
201 $names[$row->actor_name] = $row->lastedittime;
202 }
203
204 // Find which of the recently active users are already accounted for
205 if ( count( $names ) ) {
206 $res = $dbw->newSelectQueryBuilder()
207 ->select( [ 'user_name' => 'qcc_title' ] )
208 ->from( 'querycachetwo' )
209 ->where( [
210 'qcc_type' => 'activeusers',
211 'qcc_namespace' => NS_USER,
212 'qcc_title' => array_map( 'strval', array_keys( $names ) ),
213 $dbw->expr( 'qcc_value', '>=', $nowUnix - $days * 86400 ),
214 ] )
215 ->caller( __METHOD__ )->fetchResultSet();
216 // Note: In order for this to be actually consistent, we would need
217 // to update these rows with the new lastedittime.
218 foreach ( $res as $row ) {
219 unset( $names[$row->user_name] );
220 }
221 }
222
223 // Insert the users that need to be added to the list
224 if ( count( $names ) ) {
225 $newRows = [];
226 foreach ( $names as $name => $lastEditTime ) {
227 $newRows[] = [
228 'qcc_type' => 'activeusers',
229 'qcc_namespace' => NS_USER,
230 'qcc_title' => $name,
231 'qcc_value' => (int)wfTimestamp( TS::UNIX, $lastEditTime ),
232 'qcc_namespacetwo' => 0, // unused
233 'qcc_titletwo' => '' // unused
234 ];
235 }
236 foreach ( array_chunk( $newRows, 500 ) as $rowBatch ) {
237 $dbw->newInsertQueryBuilder()
238 ->insertInto( 'querycachetwo' )
239 ->rows( $rowBatch )
240 ->caller( __METHOD__ )->execute();
241 $dbProvider->commitAndWaitForReplication( __METHOD__, $ticket );
242 }
243 }
244
245 // If a transaction was already started, it might have an old
246 // snapshot, so kludge the timestamp range back as needed.
247 $asOfTimestamp = min( $eTimestamp, (int)$dbw->trxTimestamp() );
248
249 // Touch the data freshness timestamp
250 $dbw->newReplaceQueryBuilder()
251 ->replaceInto( 'querycache_info' )
252 ->row( [
253 'qci_type' => 'activeusers',
254 'qci_timestamp' => $dbw->timestamp( $asOfTimestamp ), // not always $now
255 ] )
256 ->uniqueIndexFields( [ 'qci_type' ] )
257 ->caller( __METHOD__ )->execute();
258
259 // Rotate out users that have not edited in too long (according to old data set)
260 $dbw->newDeleteQueryBuilder()
261 ->deleteFrom( 'querycachetwo' )
262 ->where( [
263 'qcc_type' => 'activeusers',
264 $dbw->expr( 'qcc_value', '<', $nowUnix - $days * 86400 ) // TS::UNIX
265 ] )
266 ->caller( __METHOD__ )->execute();
267
268 if ( !$services->getMainConfig()->get( MainConfigNames::MiserMode ) ) {
269 SiteStatsUpdate::cacheUpdate( $dbw );
270 }
271
272 $dbw->unlock( $lockKey, __METHOD__ );
273 }
274}
275
277class_alias( RecentChangesUpdateJob::class, 'RecentChangesUpdateJob' );
const NS_USER
Definition Defines.php:53
wfTimestamp( $outputtype=TS::UNIX, $ts=0)
Get a timestamp string in one of various formats.
Class for handling updates to the site_stats table.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Describe and execute a background job.
Definition Job.php:28
array $params
Array of job parameters.
Definition Job.php:33
A class containing constants representing the names of configuration variables.
const UpdateRowsPerQuery
Name constant for the UpdateRowsPerQuery setting, for use with Config::get()
const RCMaxAge
Name constant for the RCMaxAge setting, for use with Config::get()
const MiserMode
Name constant for the MiserMode setting, for use with Config::get()
const ActiveUserDays
Name constant for the ActiveUserDays setting, for use with Config::get()
Service locator for MediaWiki core services.
static getInstance()
Returns the global default instance of the top level service locator.
static getQueryInfo(int $joinFlags=0)
Return the tables, fields, and join conditions to be selected to create a new recentchanges object.
const STRAIGHT_JOIN_ACTOR
Flag for RecentChange::getQueryInfo()
Purge expired rows from the recentchanges table.
run()
Run the job.If this method returns false or completes exceptionally, the job runner will retry execut...
Parent class for all special pages.
static getTitleFor( $name, $subpage=false, $fragment='')
Get a localised Title object for a specified special page name If you don't need a full Title object,...
Represents a title within MediaWiki.
Definition Title.php:69