MediaWiki master
RecentChangesUpdateJob.php
Go to the documentation of this file.
1<?php
27
35 public function __construct( Title $title, array $params ) {
36 parent::__construct( 'recentChangesUpdate', $title, $params );
37
38 if ( !isset( $params['type'] ) ) {
39 throw new InvalidArgumentException( "Missing 'type' parameter." );
40 }
41
42 $this->executionFlags |= self::JOB_NO_EXPLICIT_TRX_ROUND;
43 $this->removeDuplicates = true;
44 }
45
49 final public static function newPurgeJob() {
50 return new self(
51 SpecialPage::getTitleFor( 'Recentchanges' ), [ 'type' => 'purge' ]
52 );
53 }
54
59 final public static function newCacheUpdateJob() {
60 return new self(
61 SpecialPage::getTitleFor( 'Recentchanges' ), [ 'type' => 'cacheUpdate' ]
62 );
63 }
64
65 public function run() {
66 if ( $this->params['type'] === 'purge' ) {
67 $this->purgeExpiredRows();
68 } elseif ( $this->params['type'] === 'cacheUpdate' ) {
69 $this->updateActiveUsers();
70 } else {
71 throw new InvalidArgumentException(
72 "Invalid 'type' parameter '{$this->params['type']}'." );
73 }
74
75 return true;
76 }
77
78 protected function purgeExpiredRows() {
79 $services = MediaWikiServices::getInstance();
80 $rcMaxAge = $services->getMainConfig()->get(
81 MainConfigNames::RCMaxAge );
82 $updateRowsPerQuery = $services->getMainConfig()->get(
83 MainConfigNames::UpdateRowsPerQuery );
84 $dbProvider = $services->getConnectionProvider();
85 $dbw = $dbProvider->getPrimaryDatabase();
86 $lockKey = $dbw->getDomainID() . ':recentchanges-prune';
87 if ( !$dbw->lock( $lockKey, __METHOD__, 0 ) ) {
88 // already in progress
89 return;
90 }
91 $ticket = $dbProvider->getEmptyTransactionTicket( __METHOD__ );
92 $hookRunner = new HookRunner( $services->getHookContainer() );
93 $cutoff = $dbw->timestamp( time() - $rcMaxAge );
94 $rcQuery = RecentChange::getQueryInfo();
95 do {
96 $rcIds = [];
97 $rows = [];
98 $res = $dbw->newSelectQueryBuilder()
99 ->queryInfo( $rcQuery )
100 ->where( $dbw->expr( 'rc_timestamp', '<', $cutoff ) )
101 ->limit( $updateRowsPerQuery )
102 ->caller( __METHOD__ )
103 ->fetchResultSet();
104 foreach ( $res as $row ) {
105 $rcIds[] = $row->rc_id;
106 $rows[] = $row;
107 }
108 if ( $rcIds ) {
109 $dbw->newDeleteQueryBuilder()
110 ->deleteFrom( 'recentchanges' )
111 ->where( [ 'rc_id' => $rcIds ] )
112 ->caller( __METHOD__ )->execute();
113 $hookRunner->onRecentChangesPurgeRows( $rows );
114 // There might be more, so try waiting for replica DBs
115 if ( !$dbProvider->commitAndWaitForReplication(
116 __METHOD__, $ticket, [ 'timeout' => 3 ]
117 ) ) {
118 // Another job will continue anyway
119 break;
120 }
121 }
122 } while ( $rcIds );
123
124 $dbw->unlock( $lockKey, __METHOD__ );
125 }
126
127 protected function updateActiveUsers() {
128 $activeUserDays = MediaWikiServices::getInstance()->getMainConfig()->get(
129 MainConfigNames::ActiveUserDays );
130
131 // Users that made edits at least this many days ago are "active"
132 $days = $activeUserDays;
133 // Pull in the full window of active users in this update
134 $window = $activeUserDays * 86400;
135
136 $dbProvider = MediaWikiServices::getInstance()->getConnectionProvider();
137 $dbw = $dbProvider->getPrimaryDatabase();
138 $ticket = $dbProvider->getEmptyTransactionTicket( __METHOD__ );
139
140 $lockKey = $dbw->getDomainID() . '-activeusers';
141 if ( !$dbw->lock( $lockKey, __METHOD__, 0 ) ) {
142 // Exclusive update (avoids duplicate entries)… it's usually fine to just
143 // drop out here, if the Job is already running.
144 return;
145 }
146
147 // Long-running queries expected
148 $dbw->setSessionOptions( [ 'connTimeout' => 900 ] );
149
150 $nowUnix = time();
151 // Get the last-updated timestamp for the cache
152 $cTime = $dbw->newSelectQueryBuilder()
153 ->select( 'qci_timestamp' )
154 ->from( 'querycache_info' )
155 ->where( [ 'qci_type' => 'activeusers' ] )
156 ->caller( __METHOD__ )->fetchField();
157 $cTimeUnix = $cTime ? (int)wfTimestamp( TS_UNIX, $cTime ) : 1;
158
159 // Pick the date range to fetch from. This is normally from the last
160 // update to till the present time, but has a limited window.
161 // If the window is limited, multiple runs are need to fully populate it.
162 $sTimestamp = max( $cTimeUnix, $nowUnix - $days * 86400 );
163 $eTimestamp = min( $sTimestamp + $window, $nowUnix );
164
165 // Get all the users active since the last update
166 $res = $dbw->newSelectQueryBuilder()
167 ->select( [ 'actor_name', 'lastedittime' => 'MAX(rc_timestamp)' ] )
168 ->from( 'recentchanges' )
169 ->join( 'actor', null, 'actor_id=rc_actor' )
170 ->where( [
171 $dbw->expr( 'actor_user', '!=', null ), // actual accounts
172 $dbw->expr( 'rc_type', '!=', RC_EXTERNAL ), // no wikidata
173 $dbw->expr( 'rc_log_type', '=', null )->or( 'rc_log_type', '!=', 'newusers' ),
174 $dbw->expr( 'rc_timestamp', '>=', $dbw->timestamp( $sTimestamp ) ),
175 $dbw->expr( 'rc_timestamp', '<=', $dbw->timestamp( $eTimestamp ) ),
176 ] )
177 ->groupBy( 'actor_name' )
178 ->orderBy( 'NULL' ) // avoid filesort
179 ->caller( __METHOD__ )->fetchResultSet();
180
181 $names = [];
182 foreach ( $res as $row ) {
183 $names[$row->actor_name] = $row->lastedittime;
184 }
185
186 // Find which of the recently active users are already accounted for
187 if ( count( $names ) ) {
188 $res = $dbw->newSelectQueryBuilder()
189 ->select( [ 'user_name' => 'qcc_title' ] )
190 ->from( 'querycachetwo' )
191 ->where( [
192 'qcc_type' => 'activeusers',
193 'qcc_namespace' => NS_USER,
194 'qcc_title' => array_map( 'strval', array_keys( $names ) ),
195 $dbw->expr( 'qcc_value', '>=', $nowUnix - $days * 86400 ),
196 ] )
197 ->caller( __METHOD__ )->fetchResultSet();
198 // Note: In order for this to be actually consistent, we would need
199 // to update these rows with the new lastedittime.
200 foreach ( $res as $row ) {
201 unset( $names[$row->user_name] );
202 }
203 }
204
205 // Insert the users that need to be added to the list
206 if ( count( $names ) ) {
207 $newRows = [];
208 foreach ( $names as $name => $lastEditTime ) {
209 $newRows[] = [
210 'qcc_type' => 'activeusers',
211 'qcc_namespace' => NS_USER,
212 'qcc_title' => $name,
213 'qcc_value' => (int)wfTimestamp( TS_UNIX, $lastEditTime ),
214 'qcc_namespacetwo' => 0, // unused
215 'qcc_titletwo' => '' // unused
216 ];
217 }
218 foreach ( array_chunk( $newRows, 500 ) as $rowBatch ) {
219 $dbw->newInsertQueryBuilder()
220 ->insertInto( 'querycachetwo' )
221 ->rows( $rowBatch )
222 ->caller( __METHOD__ )->execute();
223 $dbProvider->commitAndWaitForReplication( __METHOD__, $ticket );
224 }
225 }
226
227 // If a transaction was already started, it might have an old
228 // snapshot, so kludge the timestamp range back as needed.
229 $asOfTimestamp = min( $eTimestamp, (int)$dbw->trxTimestamp() );
230
231 // Touch the data freshness timestamp
232 $dbw->newReplaceQueryBuilder()
233 ->replaceInto( 'querycache_info' )
234 ->row( [
235 'qci_type' => 'activeusers',
236 'qci_timestamp' => $dbw->timestamp( $asOfTimestamp ), // not always $now
237 ] )
238 ->uniqueIndexFields( [ 'qci_type' ] )
239 ->caller( __METHOD__ )->execute();
240
241 // Rotate out users that have not edited in too long (according to old data set)
242 $dbw->newDeleteQueryBuilder()
243 ->deleteFrom( 'querycachetwo' )
244 ->where( [
245 'qcc_type' => 'activeusers',
246 $dbw->expr( 'qcc_value', '<', $nowUnix - $days * 86400 ) // TS_UNIX
247 ] )
248 ->caller( __METHOD__ )->execute();
249
250 if ( !MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::MiserMode ) ) {
251 SiteStatsUpdate::cacheUpdate( $dbw );
252 }
253
254 $dbw->unlock( $lockKey, __METHOD__ );
255 }
256}
const NS_USER
Definition Defines.php:66
const RC_EXTERNAL
Definition Defines.php:119
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
array $params
The job parameters.
Describe and execute a background job.
Definition Job.php:40
Class for handling updates to the site_stats table.
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
A class containing constants representing the names of configuration variables.
Service locator for MediaWiki core services.
Parent class for all special pages.
Represents a title within MediaWiki.
Definition Title.php:78
Puurge expired rows from the recentchanges table.
__construct(Title $title, array $params)