MediaWiki master
RecentChangesUpdateJob.php
Go to the documentation of this file.
1<?php
22
23use InvalidArgumentException;
31use Wikimedia\Timestamp\ConvertibleTimestamp;
32
41 public function __construct( Title $title, array $params ) {
42 parent::__construct( 'recentChangesUpdate', $title, $params );
43
44 if ( !isset( $params['type'] ) ) {
45 throw new InvalidArgumentException( "Missing 'type' parameter." );
46 }
47
48 $this->executionFlags |= self::JOB_NO_EXPLICIT_TRX_ROUND;
49 $this->removeDuplicates = true;
50 }
51
55 final public static function newPurgeJob() {
56 return new self(
57 SpecialPage::getTitleFor( 'Recentchanges' ), [ 'type' => 'purge' ]
58 );
59 }
60
65 final public static function newCacheUpdateJob() {
66 return new self(
67 SpecialPage::getTitleFor( 'Recentchanges' ), [ 'type' => 'cacheUpdate' ]
68 );
69 }
70
71 public function run() {
72 if ( $this->params['type'] === 'purge' ) {
73 $this->purgeExpiredRows();
74 } elseif ( $this->params['type'] === 'cacheUpdate' ) {
75 $this->updateActiveUsers();
76 } else {
77 throw new InvalidArgumentException(
78 "Invalid 'type' parameter '{$this->params['type']}'." );
79 }
80
81 return true;
82 }
83
84 protected function purgeExpiredRows() {
86 $rcMaxAge = $services->getMainConfig()->get(
88 $updateRowsPerQuery = $services->getMainConfig()->get(
90 $dbProvider = $services->getConnectionProvider();
91 $dbw = $dbProvider->getPrimaryDatabase();
92 $lockKey = $dbw->getDomainID() . ':recentchanges-prune';
93 if ( !$dbw->lock( $lockKey, __METHOD__, 0 ) ) {
94 // already in progress
95 return;
96 }
97 $ticket = $dbProvider->getEmptyTransactionTicket( __METHOD__ );
98 $hookRunner = new HookRunner( $services->getHookContainer() );
99 $cutoff = $dbw->timestamp( ConvertibleTimestamp::time() - $rcMaxAge );
100 $rcQuery = RecentChange::getQueryInfo();
101 do {
102 $rcIds = [];
103 $rows = [];
104 $res = $dbw->newSelectQueryBuilder()
105 ->queryInfo( $rcQuery )
106 ->where( $dbw->expr( 'rc_timestamp', '<', $cutoff ) )
107 ->limit( $updateRowsPerQuery )
108 ->caller( __METHOD__ )
109 ->fetchResultSet();
110 foreach ( $res as $row ) {
111 $rcIds[] = $row->rc_id;
112 $rows[] = $row;
113 }
114 if ( $rcIds ) {
115 $dbw->newDeleteQueryBuilder()
116 ->deleteFrom( 'recentchanges' )
117 ->where( [ 'rc_id' => $rcIds ] )
118 ->caller( __METHOD__ )->execute();
119 $hookRunner->onRecentChangesPurgeRows( $rows );
120 // There might be more, so try waiting for replica DBs
121 if ( !$dbProvider->commitAndWaitForReplication(
122 __METHOD__, $ticket, [ 'timeout' => 3 ]
123 ) ) {
124 // Another job will continue anyway
125 break;
126 }
127 }
128 } while ( $rcIds );
129
130 $dbw->unlock( $lockKey, __METHOD__ );
131 }
132
133 protected function updateActiveUsers() {
134 $activeUserDays = MediaWikiServices::getInstance()->getMainConfig()->get(
136
137 // Users that made edits at least this many days ago are "active"
138 $days = $activeUserDays;
139 // Pull in the full window of active users in this update
140 $window = $activeUserDays * 86400;
141
142 $dbProvider = MediaWikiServices::getInstance()->getConnectionProvider();
143 $dbw = $dbProvider->getPrimaryDatabase();
144 $ticket = $dbProvider->getEmptyTransactionTicket( __METHOD__ );
145
146 $lockKey = $dbw->getDomainID() . '-activeusers';
147 if ( !$dbw->lock( $lockKey, __METHOD__, 0 ) ) {
148 // Exclusive update (avoids duplicate entries)… it's usually fine to just
149 // drop out here, if the Job is already running.
150 return;
151 }
152
153 // Long-running queries expected
154 $dbw->setSessionOptions( [ 'connTimeout' => 900 ] );
155
156 $nowUnix = time();
157 // Get the last-updated timestamp for the cache
158 $cTime = $dbw->newSelectQueryBuilder()
159 ->select( 'qci_timestamp' )
160 ->from( 'querycache_info' )
161 ->where( [ 'qci_type' => 'activeusers' ] )
162 ->caller( __METHOD__ )->fetchField();
163 $cTimeUnix = $cTime ? (int)wfTimestamp( TS_UNIX, $cTime ) : 1;
164
165 // Pick the date range to fetch from. This is normally from the last
166 // update to till the present time, but has a limited window.
167 // If the window is limited, multiple runs are need to fully populate it.
168 $sTimestamp = max( $cTimeUnix, $nowUnix - $days * 86400 );
169 $eTimestamp = min( $sTimestamp + $window, $nowUnix );
170
171 // Get all the users active since the last update
172 $res = $dbw->newSelectQueryBuilder()
173 ->select( [ 'actor_name', 'lastedittime' => 'MAX(rc_timestamp)' ] )
174 ->from( 'recentchanges' )
175 ->join( 'actor', null, 'actor_id=rc_actor' )
176 ->where( [
177 $dbw->expr( 'actor_user', '!=', null ), // actual accounts
178 $dbw->expr( 'rc_type', '!=', RC_EXTERNAL ), // no wikidata
179 $dbw->expr( 'rc_log_type', '=', null )->or( 'rc_log_type', '!=', 'newusers' ),
180 $dbw->expr( 'rc_timestamp', '>=', $dbw->timestamp( $sTimestamp ) ),
181 $dbw->expr( 'rc_timestamp', '<=', $dbw->timestamp( $eTimestamp ) ),
182 ] )
183 ->groupBy( 'actor_name' )
184 ->orderBy( 'NULL' ) // avoid filesort
185 ->caller( __METHOD__ )->fetchResultSet();
186
187 $names = [];
188 foreach ( $res as $row ) {
189 $names[$row->actor_name] = $row->lastedittime;
190 }
191
192 // Find which of the recently active users are already accounted for
193 if ( count( $names ) ) {
194 $res = $dbw->newSelectQueryBuilder()
195 ->select( [ 'user_name' => 'qcc_title' ] )
196 ->from( 'querycachetwo' )
197 ->where( [
198 'qcc_type' => 'activeusers',
199 'qcc_namespace' => NS_USER,
200 'qcc_title' => array_map( 'strval', array_keys( $names ) ),
201 $dbw->expr( 'qcc_value', '>=', $nowUnix - $days * 86400 ),
202 ] )
203 ->caller( __METHOD__ )->fetchResultSet();
204 // Note: In order for this to be actually consistent, we would need
205 // to update these rows with the new lastedittime.
206 foreach ( $res as $row ) {
207 unset( $names[$row->user_name] );
208 }
209 }
210
211 // Insert the users that need to be added to the list
212 if ( count( $names ) ) {
213 $newRows = [];
214 foreach ( $names as $name => $lastEditTime ) {
215 $newRows[] = [
216 'qcc_type' => 'activeusers',
217 'qcc_namespace' => NS_USER,
218 'qcc_title' => $name,
219 'qcc_value' => (int)wfTimestamp( TS_UNIX, $lastEditTime ),
220 'qcc_namespacetwo' => 0, // unused
221 'qcc_titletwo' => '' // unused
222 ];
223 }
224 foreach ( array_chunk( $newRows, 500 ) as $rowBatch ) {
225 $dbw->newInsertQueryBuilder()
226 ->insertInto( 'querycachetwo' )
227 ->rows( $rowBatch )
228 ->caller( __METHOD__ )->execute();
229 $dbProvider->commitAndWaitForReplication( __METHOD__, $ticket );
230 }
231 }
232
233 // If a transaction was already started, it might have an old
234 // snapshot, so kludge the timestamp range back as needed.
235 $asOfTimestamp = min( $eTimestamp, (int)$dbw->trxTimestamp() );
236
237 // Touch the data freshness timestamp
238 $dbw->newReplaceQueryBuilder()
239 ->replaceInto( 'querycache_info' )
240 ->row( [
241 'qci_type' => 'activeusers',
242 'qci_timestamp' => $dbw->timestamp( $asOfTimestamp ), // not always $now
243 ] )
244 ->uniqueIndexFields( [ 'qci_type' ] )
245 ->caller( __METHOD__ )->execute();
246
247 // Rotate out users that have not edited in too long (according to old data set)
248 $dbw->newDeleteQueryBuilder()
249 ->deleteFrom( 'querycachetwo' )
250 ->where( [
251 'qcc_type' => 'activeusers',
252 $dbw->expr( 'qcc_value', '<', $nowUnix - $days * 86400 ) // TS_UNIX
253 ] )
254 ->caller( __METHOD__ )->execute();
255
256 if ( !MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::MiserMode ) ) {
257 SiteStatsUpdate::cacheUpdate( $dbw );
258 }
259
260 $dbw->unlock( $lockKey, __METHOD__ );
261 }
262}
263
265class_alias( RecentChangesUpdateJob::class, 'RecentChangesUpdateJob' );
const NS_USER
Definition Defines.php:67
const RC_EXTERNAL
Definition Defines.php:120
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Class for handling updates to the site_stats table.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Describe and execute a background job.
Definition Job.php:41
array $params
Array of job parameters.
Definition Job.php:46
A class containing constants representing the names of configuration variables.
const UpdateRowsPerQuery
Name constant for the UpdateRowsPerQuery setting, for use with Config::get()
const RCMaxAge
Name constant for the RCMaxAge setting, for use with Config::get()
const MiserMode
Name constant for the MiserMode setting, for use with Config::get()
const ActiveUserDays
Name constant for the ActiveUserDays setting, for use with Config::get()
Service locator for MediaWiki core services.
static getInstance()
Returns the global default instance of the top level service locator.
static getQueryInfo()
Return the tables, fields, and join conditions to be selected to create a new recentchanges object.
Purge expired rows from the recentchanges table.
Parent class for all special pages.
static getTitleFor( $name, $subpage=false, $fragment='')
Get a localised Title object for a specified special page name If you don't need a full Title object,...
Represents a title within MediaWiki.
Definition Title.php:78