MediaWiki master
RecentChangesUpdateJob.php
Go to the documentation of this file.
1<?php
8
9use InvalidArgumentException;
17use Wikimedia\Timestamp\ConvertibleTimestamp;
18use Wikimedia\Timestamp\TimestampFormat as TS;
19
28 public function __construct( Title $title, array $params ) {
29 parent::__construct( 'recentChangesUpdate', $title, $params );
30
31 if ( !isset( $params['type'] ) ) {
32 throw new InvalidArgumentException( "Missing 'type' parameter." );
33 }
34
35 $this->executionFlags |= self::JOB_NO_EXPLICIT_TRX_ROUND;
36 $this->removeDuplicates = true;
37 }
38
42 final public static function newPurgeJob() {
43 return new self(
44 SpecialPage::getTitleFor( 'Recentchanges' ), [ 'type' => 'purge' ]
45 );
46 }
47
52 final public static function newCacheUpdateJob() {
53 return new self(
54 SpecialPage::getTitleFor( 'Recentchanges' ), [ 'type' => 'cacheUpdate' ]
55 );
56 }
57
59 public function run() {
60 if ( $this->params['type'] === 'purge' ) {
61 $this->purgeExpiredRows();
62 } elseif ( $this->params['type'] === 'cacheUpdate' ) {
63 $this->updateActiveUsers();
64 } else {
65 throw new InvalidArgumentException(
66 "Invalid 'type' parameter '{$this->params['type']}'." );
67 }
68
69 return true;
70 }
71
72 protected function purgeExpiredRows() {
74 $rcMaxAge = $services->getMainConfig()->get(
76 $updateRowsPerQuery = $services->getMainConfig()->get(
78 $dbProvider = $services->getConnectionProvider();
79 $dbw = $dbProvider->getPrimaryDatabase();
80 $lockKey = $dbw->getDomainID() . ':recentchanges-prune';
81 if ( !$dbw->lock( $lockKey, __METHOD__, 0 ) ) {
82 // already in progress
83 return;
84 }
85 $ticket = $dbProvider->getEmptyTransactionTicket( __METHOD__ );
86 $hookContainer = $services->getHookContainer();
87 $hookRunner = new HookRunner( $hookContainer );
88 $cutoff = $dbw->timestamp( ConvertibleTimestamp::time() - $rcMaxAge );
89 $hasLegacyHook = $hookContainer->isRegistered( 'RecentChangesPurgeRows' );
90 if ( $hasLegacyHook ) {
91 $query = $dbw->newSelectQueryBuilder()
93 ->where( $dbw->expr( 'rc_timestamp', '<', $cutoff ) )
94 ->limit( $updateRowsPerQuery )
95 ->caller( __METHOD__ );
96 } else {
97 $query = $dbw->newSelectQueryBuilder()
98 ->select( 'rc_id' )
99 ->from( 'recentchanges' )
100 ->where( $dbw->expr( 'rc_timestamp', '<', $cutoff ) )
101 ->limit( $updateRowsPerQuery )
102 ->caller( __METHOD__ );
103 }
104 $callbacks = [];
105 $hookRunner->onRecentChangesPurgeQuery( $query, $callbacks );
106 do {
107 $res = $query->fetchResultSet();
108 $rcIds = [];
109 if ( $res->numRows() ) {
110 $rows = [];
111 foreach ( $res as $row ) {
112 $rcIds[] = $row->rc_id;
113 if ( $hasLegacyHook ) {
114 $rows[] = $row;
115 }
116 }
117
118 $dbw->newDeleteQueryBuilder()
119 ->deleteFrom( 'recentchanges' )
120 ->where( [ 'rc_id' => $rcIds ] )
121 ->caller( __METHOD__ )->execute();
122
123 foreach ( $callbacks as $callback ) {
124 $callback( $res );
125 }
126 if ( $hasLegacyHook ) {
127 $hookRunner->onRecentChangesPurgeRows( $rows );
128 }
129 // There might be more, so try waiting for replica DBs
130 if ( !$dbProvider->commitAndWaitForReplication(
131 __METHOD__, $ticket, [ 'timeout' => 3 ]
132 ) ) {
133 // Another job will continue anyway
134 break;
135 }
136 }
137 } while ( $rcIds );
138
139 $dbw->unlock( $lockKey, __METHOD__ );
140 }
141
142 protected function updateActiveUsers() {
143 $services = MediaWikiServices::getInstance();
144 $activeUserDays = $services->getMainConfig()->get(
146
147 // Users that made edits at least this many days ago are "active"
148 $days = $activeUserDays;
149 // Pull in the full window of active users in this update
150 $window = $activeUserDays * 86400;
151
152 $rcLookup = $services->getRecentChangeLookup();
153 $dbProvider = $services->getConnectionProvider();
154 $dbw = $dbProvider->getPrimaryDatabase();
155 $ticket = $dbProvider->getEmptyTransactionTicket( __METHOD__ );
156
157 $lockKey = $dbw->getDomainID() . '-activeusers';
158 if ( !$dbw->lock( $lockKey, __METHOD__, 0 ) ) {
159 // Exclusive update (avoids duplicate entries)… it's usually fine to just
160 // drop out here, if the Job is already running.
161 return;
162 }
163
164 // Long-running queries expected
165 $dbw->setSessionOptions( [ 'connTimeout' => 900 ] );
166
167 $nowUnix = time();
168 // Get the last-updated timestamp for the cache
169 $cTime = $dbw->newSelectQueryBuilder()
170 ->select( 'qci_timestamp' )
171 ->from( 'querycache_info' )
172 ->where( [ 'qci_type' => 'activeusers' ] )
173 ->caller( __METHOD__ )->fetchField();
174 $cTimeUnix = $cTime ? (int)wfTimestamp( TS::UNIX, $cTime ) : 1;
175
176 // Pick the date range to fetch from. This is normally from the last
177 // update to till the present time, but has a limited window.
178 // If the window is limited, multiple runs are need to fully populate it.
179 $sTimestamp = max( $cTimeUnix, $nowUnix - $days * 86400 );
180 $eTimestamp = min( $sTimestamp + $window, $nowUnix );
181
182 // Get all the users active since the last update
183 $res = $dbw->newSelectQueryBuilder()
184 ->select( [ 'actor_name', 'lastedittime' => 'MAX(rc_timestamp)' ] )
185 ->from( 'recentchanges' )
186 ->join( 'actor', null, 'actor_id=rc_actor' )
187 ->where( [
188 $dbw->expr( 'actor_user', '!=', null ), // actual accounts
189 $dbw->expr( 'rc_source', '=', $rcLookup->getPrimarySources() ),
190 $dbw->expr( 'rc_log_type', '=', null )->or( 'rc_log_type', '!=', 'newusers' ),
191 $dbw->expr( 'rc_timestamp', '>=', $dbw->timestamp( $sTimestamp ) ),
192 $dbw->expr( 'rc_timestamp', '<=', $dbw->timestamp( $eTimestamp ) ),
193 ] )
194 ->groupBy( 'actor_name' )
195 ->orderBy( 'NULL' ) // avoid filesort
196 ->caller( __METHOD__ )->fetchResultSet();
197
198 $names = [];
199 foreach ( $res as $row ) {
200 $names[$row->actor_name] = $row->lastedittime;
201 }
202
203 // Find which of the recently active users are already accounted for
204 if ( count( $names ) ) {
205 $res = $dbw->newSelectQueryBuilder()
206 ->select( [ 'user_name' => 'qcc_title' ] )
207 ->from( 'querycachetwo' )
208 ->where( [
209 'qcc_type' => 'activeusers',
210 'qcc_namespace' => NS_USER,
211 'qcc_title' => array_map( 'strval', array_keys( $names ) ),
212 $dbw->expr( 'qcc_value', '>=', $nowUnix - $days * 86400 ),
213 ] )
214 ->caller( __METHOD__ )->fetchResultSet();
215 // Note: In order for this to be actually consistent, we would need
216 // to update these rows with the new lastedittime.
217 foreach ( $res as $row ) {
218 unset( $names[$row->user_name] );
219 }
220 }
221
222 // Insert the users that need to be added to the list
223 if ( count( $names ) ) {
224 $newRows = [];
225 foreach ( $names as $name => $lastEditTime ) {
226 $newRows[] = [
227 'qcc_type' => 'activeusers',
228 'qcc_namespace' => NS_USER,
229 'qcc_title' => $name,
230 'qcc_value' => (int)wfTimestamp( TS::UNIX, $lastEditTime ),
231 'qcc_namespacetwo' => 0, // unused
232 'qcc_titletwo' => '' // unused
233 ];
234 }
235 foreach ( array_chunk( $newRows, 500 ) as $rowBatch ) {
236 $dbw->newInsertQueryBuilder()
237 ->insertInto( 'querycachetwo' )
238 ->rows( $rowBatch )
239 ->caller( __METHOD__ )->execute();
240 $dbProvider->commitAndWaitForReplication( __METHOD__, $ticket );
241 }
242 }
243
244 // If a transaction was already started, it might have an old
245 // snapshot, so kludge the timestamp range back as needed.
246 $asOfTimestamp = min( $eTimestamp, (int)$dbw->trxTimestamp() );
247
248 // Touch the data freshness timestamp
249 $dbw->newReplaceQueryBuilder()
250 ->replaceInto( 'querycache_info' )
251 ->row( [
252 'qci_type' => 'activeusers',
253 'qci_timestamp' => $dbw->timestamp( $asOfTimestamp ), // not always $now
254 ] )
255 ->uniqueIndexFields( [ 'qci_type' ] )
256 ->caller( __METHOD__ )->execute();
257
258 // Rotate out users that have not edited in too long (according to old data set)
259 $dbw->newDeleteQueryBuilder()
260 ->deleteFrom( 'querycachetwo' )
261 ->where( [
262 'qcc_type' => 'activeusers',
263 $dbw->expr( 'qcc_value', '<', $nowUnix - $days * 86400 ) // TS::UNIX
264 ] )
265 ->caller( __METHOD__ )->execute();
266
267 if ( !$services->getMainConfig()->get( MainConfigNames::MiserMode ) ) {
268 SiteStatsUpdate::cacheUpdate( $dbw );
269 }
270
271 $dbw->unlock( $lockKey, __METHOD__ );
272 }
273}
274
276class_alias( RecentChangesUpdateJob::class, 'RecentChangesUpdateJob' );
const NS_USER
Definition Defines.php:53
wfTimestamp( $outputtype=TS::UNIX, $ts=0)
Get a timestamp string in one of various formats.
Class for handling updates to the site_stats table.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Describe and execute a background job.
Definition Job.php:28
array $params
Array of job parameters.
Definition Job.php:33
A class containing constants representing the names of configuration variables.
const UpdateRowsPerQuery
Name constant for the UpdateRowsPerQuery setting, for use with Config::get()
const RCMaxAge
Name constant for the RCMaxAge setting, for use with Config::get()
const MiserMode
Name constant for the MiserMode setting, for use with Config::get()
const ActiveUserDays
Name constant for the ActiveUserDays setting, for use with Config::get()
Service locator for MediaWiki core services.
static getInstance()
Returns the global default instance of the top level service locator.
static getQueryInfo(int $joinFlags=0)
Return the tables, fields, and join conditions to be selected to create a new recentchanges object.
const STRAIGHT_JOIN_ACTOR
Flag for RecentChange::getQueryInfo()
Purge expired rows from the recentchanges table.
run()
Run the job.If this method returns false or completes exceptionally, the job runner will retry execut...
Parent class for all special pages.
static getTitleFor( $name, $subpage=false, $fragment='')
Get a localised Title object for a specified special page name If you don't need a full Title object,...
Represents a title within MediaWiki.
Definition Title.php:70