MediaWiki  master
RecentChangesUpdateJob.php
Go to the documentation of this file.
1 <?php
23 
30 class RecentChangesUpdateJob extends Job {
31  public function __construct( Title $title, array $params ) {
32  parent::__construct( 'recentChangesUpdate', $title, $params );
33 
34  if ( !isset( $params['type'] ) ) {
35  throw new Exception( "Missing 'type' parameter." );
36  }
37 
38  $this->executionFlags |= self::JOB_NO_EXPLICIT_TRX_ROUND;
39  $this->removeDuplicates = true;
40  }
41 
45  final public static function newPurgeJob() {
46  return new self(
47  SpecialPage::getTitleFor( 'Recentchanges' ), [ 'type' => 'purge' ]
48  );
49  }
50 
55  final public static function newCacheUpdateJob() {
56  return new self(
57  SpecialPage::getTitleFor( 'Recentchanges' ), [ 'type' => 'cacheUpdate' ]
58  );
59  }
60 
61  public function run() {
62  if ( $this->params['type'] === 'purge' ) {
63  $this->purgeExpiredRows();
64  } elseif ( $this->params['type'] === 'cacheUpdate' ) {
65  $this->updateActiveUsers();
66  } else {
67  throw new InvalidArgumentException(
68  "Invalid 'type' parameter '{$this->params['type']}'." );
69  }
70 
71  return true;
72  }
73 
74  protected function purgeExpiredRows() {
75  $rcMaxAge = MediaWikiServices::getInstance()->getMainConfig()->get(
76  MainConfigNames::RCMaxAge );
77  $updateRowsPerQuery = MediaWikiServices::getInstance()->getMainConfig()->get(
78  MainConfigNames::UpdateRowsPerQuery );
79  $dbw = wfGetDB( DB_PRIMARY );
80  $lockKey = $dbw->getDomainID() . ':recentchanges-prune';
81  if ( !$dbw->lock( $lockKey, __METHOD__, 0 ) ) {
82  // already in progress
83  return;
84  }
85 
86  $factory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
87  $ticket = $factory->getEmptyTransactionTicket( __METHOD__ );
88  $cutoff = $dbw->timestamp( time() - $rcMaxAge );
89  $rcQuery = RecentChange::getQueryInfo();
90  do {
91  $rcIds = [];
92  $rows = [];
93  $res = $dbw->select(
94  $rcQuery['tables'],
95  $rcQuery['fields'],
96  [ 'rc_timestamp < ' . $dbw->addQuotes( $cutoff ) ],
97  __METHOD__,
98  [ 'LIMIT' => $updateRowsPerQuery ],
99  $rcQuery['joins']
100  );
101  foreach ( $res as $row ) {
102  $rcIds[] = $row->rc_id;
103  $rows[] = $row;
104  }
105  if ( $rcIds ) {
106  $dbw->delete( 'recentchanges', [ 'rc_id' => $rcIds ], __METHOD__ );
107  Hooks::runner()->onRecentChangesPurgeRows( $rows );
108  // There might be more, so try waiting for replica DBs
109  if ( !$factory->commitAndWaitForReplication(
110  __METHOD__, $ticket, [ 'timeout' => 3 ]
111  ) ) {
112  // Another job will continue anyway
113  break;
114  }
115  }
116  } while ( $rcIds );
117 
118  $dbw->unlock( $lockKey, __METHOD__ );
119  }
120 
121  protected function updateActiveUsers() {
122  $activeUserDays = MediaWikiServices::getInstance()->getMainConfig()->get(
123  MainConfigNames::ActiveUserDays );
124 
125  // Users that made edits at least this many days ago are "active"
126  $days = $activeUserDays;
127  // Pull in the full window of active users in this update
128  $window = $activeUserDays * 86400;
129 
130  $dbw = wfGetDB( DB_PRIMARY );
131  $factory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
132  $ticket = $factory->getEmptyTransactionTicket( __METHOD__ );
133 
134  $lockKey = $dbw->getDomainID() . '-activeusers';
135  if ( !$dbw->lock( $lockKey, __METHOD__, 0 ) ) {
136  // Exclusive update (avoids duplicate entries)… it's usually fine to just
137  // drop out here, if the Job is already running.
138  return;
139  }
140 
141  // Long-running queries expected
142  $dbw->setSessionOptions( [ 'connTimeout' => 900 ] );
143 
144  $nowUnix = time();
145  // Get the last-updated timestamp for the cache
146  $cTime = $dbw->selectField( 'querycache_info',
147  'qci_timestamp',
148  [ 'qci_type' => 'activeusers' ],
149  __METHOD__
150  );
151  $cTimeUnix = $cTime ? (int)wfTimestamp( TS_UNIX, $cTime ) : 1;
152 
153  // Pick the date range to fetch from. This is normally from the last
154  // update to till the present time, but has a limited window.
155  // If the window is limited, multiple runs are need to fully populate it.
156  $sTimestamp = max( $cTimeUnix, $nowUnix - $days * 86400 );
157  $eTimestamp = min( $sTimestamp + $window, $nowUnix );
158 
159  // Get all the users active since the last update
160  $res = $dbw->select(
161  [ 'recentchanges', 'actor' ],
162  [
163  'actor_name',
164  'lastedittime' => 'MAX(rc_timestamp)'
165  ],
166  [
167  'actor_user IS NOT NULL', // actual accounts
168  'rc_type != ' . $dbw->addQuotes( RC_EXTERNAL ), // no wikidata
169  'rc_log_type IS NULL OR rc_log_type != ' . $dbw->addQuotes( 'newusers' ),
170  'rc_timestamp >= ' . $dbw->addQuotes( $dbw->timestamp( $sTimestamp ) ),
171  'rc_timestamp <= ' . $dbw->addQuotes( $dbw->timestamp( $eTimestamp ) )
172  ],
173  __METHOD__,
174  [
175  'GROUP BY' => 'actor_name',
176  'ORDER BY' => 'NULL' // avoid filesort
177  ],
178  [
179  'actor' => [ 'JOIN', 'actor_id=rc_actor' ]
180  ]
181  );
182  $names = [];
183  foreach ( $res as $row ) {
184  $names[$row->actor_name] = $row->lastedittime;
185  }
186 
187  // Find which of the recently active users are already accounted for
188  if ( count( $names ) ) {
189  $res = $dbw->select( 'querycachetwo',
190  [ 'user_name' => 'qcc_title' ],
191  [
192  'qcc_type' => 'activeusers',
193  'qcc_namespace' => NS_USER,
194  'qcc_title' => array_map( 'strval', array_keys( $names ) ),
195  'qcc_value >= ' . $dbw->addQuotes( $nowUnix - $days * 86400 ), // TS_UNIX
196  ],
197  __METHOD__
198  );
199  // Note: In order for this to be actually consistent, we would need
200  // to update these rows with the new lastedittime.
201  foreach ( $res as $row ) {
202  unset( $names[$row->user_name] );
203  }
204  }
205 
206  // Insert the users that need to be added to the list
207  if ( count( $names ) ) {
208  $newRows = [];
209  foreach ( $names as $name => $lastEditTime ) {
210  $newRows[] = [
211  'qcc_type' => 'activeusers',
212  'qcc_namespace' => NS_USER,
213  'qcc_title' => $name,
214  'qcc_value' => (int)wfTimestamp( TS_UNIX, $lastEditTime ),
215  'qcc_namespacetwo' => 0, // unused
216  'qcc_titletwo' => '' // unused
217  ];
218  }
219  foreach ( array_chunk( $newRows, 500 ) as $rowBatch ) {
220  $dbw->insert( 'querycachetwo', $rowBatch, __METHOD__ );
221  $factory->commitAndWaitForReplication( __METHOD__, $ticket );
222  }
223  }
224 
225  // If a transaction was already started, it might have an old
226  // snapshot, so kludge the timestamp range back as needed.
227  $asOfTimestamp = min( $eTimestamp, (int)$dbw->trxTimestamp() );
228 
229  // Touch the data freshness timestamp
230  $dbw->replace(
231  'querycache_info',
232  'qci_type',
233  [ 'qci_type' => 'activeusers',
234  'qci_timestamp' => $dbw->timestamp( $asOfTimestamp ) ], // not always $now
235  __METHOD__
236  );
237 
238  // Rotate out users that have not edited in too long (according to old data set)
239  $dbw->delete( 'querycachetwo',
240  [
241  'qcc_type' => 'activeusers',
242  'qcc_value < ' . $dbw->addQuotes( $nowUnix - $days * 86400 ) // TS_UNIX
243  ],
244  __METHOD__
245  );
246 
247  $dbw->unlock( $lockKey, __METHOD__ );
248  }
249 }
const NS_USER
Definition: Defines.php:66
const RC_EXTERNAL
Definition: Defines.php:119
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
static runner()
Get a HookRunner instance for calling hooks using the new interfaces.
Definition: Hooks.php:173
Class to both describe a background job and handle jobs.
Definition: Job.php:39
Title $title
Definition: Job.php:50
array $params
Array of job parameters.
Definition: Job.php:44
A class containing constants representing the names of configuration variables.
Service locator for MediaWiki core services.
static getQueryInfo()
Return the tables, fields, and join conditions to be selected to create a new recentchanges object.
Puurge expired rows from the recentchanges table.
__construct(Title $title, array $params)
static getTitleFor( $name, $subpage=false, $fragment='')
Get a localised Title object for a specified special page name If you don't need a full Title object,...
Represents a title within MediaWiki.
Definition: Title.php:52
const DB_PRIMARY
Definition: defines.php:28