MediaWiki  master
RecentChangesUpdateJob.php
Go to the documentation of this file.
1 <?php
26 
33 class RecentChangesUpdateJob extends Job {
34  public function __construct( Title $title, array $params ) {
35  parent::__construct( 'recentChangesUpdate', $title, $params );
36 
37  if ( !isset( $params['type'] ) ) {
38  throw new Exception( "Missing 'type' parameter." );
39  }
40 
41  $this->executionFlags |= self::JOB_NO_EXPLICIT_TRX_ROUND;
42  $this->removeDuplicates = true;
43  }
44 
48  final public static function newPurgeJob() {
49  return new self(
50  SpecialPage::getTitleFor( 'Recentchanges' ), [ 'type' => 'purge' ]
51  );
52  }
53 
58  final public static function newCacheUpdateJob() {
59  return new self(
60  SpecialPage::getTitleFor( 'Recentchanges' ), [ 'type' => 'cacheUpdate' ]
61  );
62  }
63 
64  public function run() {
65  if ( $this->params['type'] === 'purge' ) {
66  $this->purgeExpiredRows();
67  } elseif ( $this->params['type'] === 'cacheUpdate' ) {
68  $this->updateActiveUsers();
69  } else {
70  throw new InvalidArgumentException(
71  "Invalid 'type' parameter '{$this->params['type']}'." );
72  }
73 
74  return true;
75  }
76 
77  protected function purgeExpiredRows() {
78  $services = MediaWikiServices::getInstance();
79  $rcMaxAge = $services->getMainConfig()->get(
80  MainConfigNames::RCMaxAge );
81  $updateRowsPerQuery = $services->getMainConfig()->get(
82  MainConfigNames::UpdateRowsPerQuery );
83  $dbw = wfGetDB( DB_PRIMARY );
84  $lockKey = $dbw->getDomainID() . ':recentchanges-prune';
85  if ( !$dbw->lock( $lockKey, __METHOD__, 0 ) ) {
86  // already in progress
87  return;
88  }
89 
90  $factory = $services->getDBLoadBalancerFactory();
91  $ticket = $factory->getEmptyTransactionTicket( __METHOD__ );
92  $hookRunner = new HookRunner( $services->getHookContainer() );
93  $cutoff = $dbw->timestamp( time() - $rcMaxAge );
94  $rcQuery = RecentChange::getQueryInfo();
95  do {
96  $rcIds = [];
97  $rows = [];
98  $res = $dbw->select(
99  $rcQuery['tables'],
100  $rcQuery['fields'],
101  [ 'rc_timestamp < ' . $dbw->addQuotes( $cutoff ) ],
102  __METHOD__,
103  [ 'LIMIT' => $updateRowsPerQuery ],
104  $rcQuery['joins']
105  );
106  foreach ( $res as $row ) {
107  $rcIds[] = $row->rc_id;
108  $rows[] = $row;
109  }
110  if ( $rcIds ) {
111  $dbw->newDeleteQueryBuilder()
112  ->deleteFrom( 'recentchanges' )
113  ->where( [ 'rc_id' => $rcIds ] )
114  ->caller( __METHOD__ )->execute();
115  $hookRunner->onRecentChangesPurgeRows( $rows );
116  // There might be more, so try waiting for replica DBs
117  if ( !$factory->commitAndWaitForReplication(
118  __METHOD__, $ticket, [ 'timeout' => 3 ]
119  ) ) {
120  // Another job will continue anyway
121  break;
122  }
123  }
124  } while ( $rcIds );
125 
126  $dbw->unlock( $lockKey, __METHOD__ );
127  }
128 
129  protected function updateActiveUsers() {
130  $activeUserDays = MediaWikiServices::getInstance()->getMainConfig()->get(
131  MainConfigNames::ActiveUserDays );
132 
133  // Users that made edits at least this many days ago are "active"
134  $days = $activeUserDays;
135  // Pull in the full window of active users in this update
136  $window = $activeUserDays * 86400;
137 
138  $factory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
139  $dbw = $factory->getPrimaryDatabase();
140  $ticket = $factory->getEmptyTransactionTicket( __METHOD__ );
141 
142  $lockKey = $dbw->getDomainID() . '-activeusers';
143  if ( !$dbw->lock( $lockKey, __METHOD__, 0 ) ) {
144  // Exclusive update (avoids duplicate entries)… it's usually fine to just
145  // drop out here, if the Job is already running.
146  return;
147  }
148 
149  // Long-running queries expected
150  $dbw->setSessionOptions( [ 'connTimeout' => 900 ] );
151 
152  $nowUnix = time();
153  // Get the last-updated timestamp for the cache
154  $cTime = $dbw->newSelectQueryBuilder()
155  ->select( 'qci_timestamp' )
156  ->from( 'querycache_info' )
157  ->where( [ 'qci_type' => 'activeusers' ] )
158  ->caller( __METHOD__ )->fetchField();
159  $cTimeUnix = $cTime ? (int)wfTimestamp( TS_UNIX, $cTime ) : 1;
160 
161  // Pick the date range to fetch from. This is normally from the last
162  // update to till the present time, but has a limited window.
163  // If the window is limited, multiple runs are need to fully populate it.
164  $sTimestamp = max( $cTimeUnix, $nowUnix - $days * 86400 );
165  $eTimestamp = min( $sTimestamp + $window, $nowUnix );
166 
167  // Get all the users active since the last update
168  $res = $dbw->newSelectQueryBuilder()
169  ->select( [ 'actor_name', 'lastedittime' => 'MAX(rc_timestamp)' ] )
170  ->from( 'recentchanges' )
171  ->join( 'actor', null, 'actor_id=rc_actor' )
172  ->where( [
173  'actor_user IS NOT NULL', // actual accounts
174  'rc_type != ' . $dbw->addQuotes( RC_EXTERNAL ), // no wikidata
175  'rc_log_type IS NULL OR rc_log_type != ' . $dbw->addQuotes( 'newusers' ),
176  $dbw->buildComparison( '>=', [ 'rc_timestamp' => $dbw->timestamp( $sTimestamp ) ] ),
177  $dbw->buildComparison( '<=', [ 'rc_timestamp' => $dbw->timestamp( $eTimestamp ) ] ),
178  ] )
179  ->groupBy( 'actor_name' )
180  ->orderBy( 'NULL' ) // avoid filesort
181  ->caller( __METHOD__ )->fetchResultSet();
182 
183  $names = [];
184  foreach ( $res as $row ) {
185  $names[$row->actor_name] = $row->lastedittime;
186  }
187 
188  // Find which of the recently active users are already accounted for
189  if ( count( $names ) ) {
190  $res = $dbw->newSelectQueryBuilder()
191  ->select( [ 'user_name' => 'qcc_title' ] )
192  ->from( 'querycachetwo' )
193  ->where( [
194  'qcc_type' => 'activeusers',
195  'qcc_namespace' => NS_USER,
196  'qcc_title' => array_map( 'strval', array_keys( $names ) ),
197  $dbw->buildComparison( '>=', [ 'qcc_value' => $nowUnix - $days * 86400 ] ),
198  ] )
199  ->caller( __METHOD__ )->fetchResultSet();
200  // Note: In order for this to be actually consistent, we would need
201  // to update these rows with the new lastedittime.
202  foreach ( $res as $row ) {
203  unset( $names[$row->user_name] );
204  }
205  }
206 
207  // Insert the users that need to be added to the list
208  if ( count( $names ) ) {
209  $newRows = [];
210  foreach ( $names as $name => $lastEditTime ) {
211  $newRows[] = [
212  'qcc_type' => 'activeusers',
213  'qcc_namespace' => NS_USER,
214  'qcc_title' => $name,
215  'qcc_value' => (int)wfTimestamp( TS_UNIX, $lastEditTime ),
216  'qcc_namespacetwo' => 0, // unused
217  'qcc_titletwo' => '' // unused
218  ];
219  }
220  foreach ( array_chunk( $newRows, 500 ) as $rowBatch ) {
221  $dbw->insert( 'querycachetwo', $rowBatch, __METHOD__ );
222  $factory->commitAndWaitForReplication( __METHOD__, $ticket );
223  }
224  }
225 
226  // If a transaction was already started, it might have an old
227  // snapshot, so kludge the timestamp range back as needed.
228  $asOfTimestamp = min( $eTimestamp, (int)$dbw->trxTimestamp() );
229 
230  // Touch the data freshness timestamp
231  $dbw->newReplaceQueryBuilder()
232  ->replaceInto( 'querycache_info' )
233  ->rows( [
234  'qci_type' => 'activeusers',
235  'qci_timestamp' => $dbw->timestamp( $asOfTimestamp ) , // not always $now
236  ] )
237  ->uniqueIndexFields( [ 'qci_type' ] )
238  ->caller( __METHOD__ )->execute();
239 
240  // Rotate out users that have not edited in too long (according to old data set)
241  $dbw->newDeleteQueryBuilder()
242  ->deleteFrom( 'querycachetwo' )
243  ->where( [
244  'qcc_type' => 'activeusers',
245  $dbw->buildComparison( '<', [ 'qcc_value' => $nowUnix - $days * 86400 ] ) // TS_UNIX
246  ] )
247  ->caller( __METHOD__ )->execute();
248 
249  if ( !MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::MiserMode ) ) {
251  }
252 
253  $dbw->unlock( $lockKey, __METHOD__ );
254  }
255 }
const NS_USER
Definition: Defines.php:66
const RC_EXTERNAL
Definition: Defines.php:119
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Class to both describe a background job and handle jobs.
Definition: Job.php:40
Title $title
Definition: Job.php:51
array $params
Array of job parameters.
Definition: Job.php:45
This class provides an implementation of the core hook interfaces, forwarding hook calls to HookConta...
Definition: HookRunner.php:568
A class containing constants representing the names of configuration variables.
Service locator for MediaWiki core services.
Parent class for all special pages.
Definition: SpecialPage.php:66
Represents a title within MediaWiki.
Definition: Title.php:76
static getQueryInfo()
Return the tables, fields, and join conditions to be selected to create a new recentchanges object.
Puurge expired rows from the recentchanges table.
__construct(Title $title, array $params)
static cacheUpdate(IDatabase $dbw)
const DB_PRIMARY
Definition: defines.php:28