MediaWiki  master
rebuildrecentchanges.php
Go to the documentation of this file.
1 <?php
26 require_once __DIR__ . '/Maintenance.php';
27 
30 
38  private $cutoffFrom;
40  private $cutoffTo;
41 
42  public function __construct() {
43  parent::__construct();
44  $this->addDescription( 'Rebuild recent changes' );
45 
46  $this->addOption(
47  'from',
48  "Only rebuild rows in requested time range (in YYYYMMDDHHMMSS format)",
49  false,
50  true
51  );
52  $this->addOption(
53  'to',
54  "Only rebuild rows in requested time range (in YYYYMMDDHHMMSS format)",
55  false,
56  true
57  );
58  $this->setBatchSize( 200 );
59  }
60 
61  public function execute() {
62  if (
63  ( $this->hasOption( 'from' ) && !$this->hasOption( 'to' ) ) ||
64  ( !$this->hasOption( 'from' ) && $this->hasOption( 'to' ) )
65  ) {
66  $this->fatalError( "Both 'from' and 'to' must be given, or neither" );
67  }
68 
69  $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
70  $this->rebuildRecentChangesTablePass1( $lbFactory );
71  $this->rebuildRecentChangesTablePass2( $lbFactory );
72  $this->rebuildRecentChangesTablePass3( $lbFactory );
73  $this->rebuildRecentChangesTablePass4( $lbFactory );
74  $this->rebuildRecentChangesTablePass5( $lbFactory );
75  if ( !( $this->hasOption( 'from' ) && $this->hasOption( 'to' ) ) ) {
76  $this->purgeFeeds();
77  }
78  $this->output( "Done.\n" );
79  }
80 
86  private function rebuildRecentChangesTablePass1( ILBFactory $lbFactory ) {
87  $dbw = $this->getDB( DB_MASTER );
88  $commentStore = CommentStore::getStore();
89 
90  if ( $this->hasOption( 'from' ) && $this->hasOption( 'to' ) ) {
91  $this->cutoffFrom = wfTimestamp( TS_UNIX, $this->getOption( 'from' ) );
92  $this->cutoffTo = wfTimestamp( TS_UNIX, $this->getOption( 'to' ) );
93 
94  $sec = $this->cutoffTo - $this->cutoffFrom;
95  $days = $sec / 24 / 3600;
96  $this->output( "Rebuilding range of $sec seconds ($days days)\n" );
97  } else {
98  global $wgRCMaxAge;
99 
100  $days = $wgRCMaxAge / 24 / 3600;
101  $this->output( "Rebuilding \$wgRCMaxAge=$wgRCMaxAge seconds ($days days)\n" );
102 
103  $this->cutoffFrom = time() - $wgRCMaxAge;
104  $this->cutoffTo = time();
105  }
106 
107  $this->output( "Clearing recentchanges table for time range...\n" );
108  $rcids = $dbw->selectFieldValues(
109  'recentchanges',
110  'rc_id',
111  [
112  'rc_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
113  'rc_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) )
114  ]
115  );
116  foreach ( array_chunk( $rcids, $this->getBatchSize() ) as $rcidBatch ) {
117  $dbw->delete( 'recentchanges', [ 'rc_id' => $rcidBatch ], __METHOD__ );
118  $lbFactory->waitForReplication();
119  }
120 
121  $this->output( "Loading from page and revision tables...\n" );
122 
123  $commentQuery = $commentStore->getJoin( 'rev_comment' );
124  $actorQuery = ActorMigration::newMigration()->getJoin( 'rev_user' );
125  $res = $dbw->select(
126  [ 'revision', 'page' ] + $commentQuery['tables'] + $actorQuery['tables'],
127  [
128  'rev_timestamp',
129  'rev_minor_edit',
130  'rev_id',
131  'rev_deleted',
132  'page_namespace',
133  'page_title',
134  'page_is_new',
135  'page_id'
136  ] + $commentQuery['fields'] + $actorQuery['fields'],
137  [
138  'rev_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
139  'rev_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) )
140  ],
141  __METHOD__,
142  [ 'ORDER BY' => 'rev_timestamp DESC' ],
143  [
144  'page' => [ 'JOIN', 'rev_page=page_id' ],
145  ] + $commentQuery['joins'] + $actorQuery['joins']
146  );
147 
148  $this->output( "Inserting from page and revision tables...\n" );
149  $inserted = 0;
150  $actorMigration = ActorMigration::newMigration();
151  foreach ( $res as $row ) {
152  $comment = $commentStore->getComment( 'rev_comment', $row );
153  $user = User::newFromAnyId( $row->rev_user, $row->rev_user_text, $row->rev_actor );
154  $dbw->insert(
155  'recentchanges',
156  [
157  'rc_timestamp' => $row->rev_timestamp,
158  'rc_namespace' => $row->page_namespace,
159  'rc_title' => $row->page_title,
160  'rc_minor' => $row->rev_minor_edit,
161  'rc_bot' => 0,
162  'rc_new' => $row->page_is_new,
163  'rc_cur_id' => $row->page_id,
164  'rc_this_oldid' => $row->rev_id,
165  'rc_last_oldid' => 0, // is this ok?
166  'rc_type' => $row->page_is_new ? RC_NEW : RC_EDIT,
167  'rc_source' => $row->page_is_new ? RecentChange::SRC_NEW : RecentChange::SRC_EDIT,
168  'rc_deleted' => $row->rev_deleted
169  ] + $commentStore->insert( $dbw, 'rc_comment', $comment )
170  + $actorMigration->getInsertValues( $dbw, 'rc_user', $user ),
171  __METHOD__
172  );
173  if ( ( ++$inserted % $this->getBatchSize() ) == 0 ) {
174  $lbFactory->waitForReplication();
175  }
176  }
177  }
178 
185  private function rebuildRecentChangesTablePass2( ILBFactory $lbFactory ) {
186  $dbw = $this->getDB( DB_MASTER );
187 
188  $this->output( "Updating links and size differences...\n" );
189 
190  # Fill in the rc_last_oldid field, which points to the previous edit
191  $res = $dbw->select(
192  'recentchanges',
193  [ 'rc_cur_id', 'rc_this_oldid', 'rc_timestamp' ],
194  [
195  "rc_timestamp > " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
196  "rc_timestamp < " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) )
197  ],
198  __METHOD__,
199  [ 'ORDER BY' => [ 'rc_cur_id', 'rc_timestamp' ] ]
200  );
201 
202  $lastCurId = 0;
203  $lastOldId = 0;
204  $lastSize = null;
205  $updated = 0;
206  foreach ( $res as $row ) {
207  $new = 0;
208 
209  if ( $row->rc_cur_id != $lastCurId ) {
210  # Switch! Look up the previous last edit, if any
211  $lastCurId = intval( $row->rc_cur_id );
212  $emit = $row->rc_timestamp;
213 
214  $revRow = $dbw->selectRow(
215  'revision',
216  [ 'rev_id', 'rev_len' ],
217  [ 'rev_page' => $lastCurId, "rev_timestamp < " . $dbw->addQuotes( $emit ) ],
218  __METHOD__,
219  [ 'ORDER BY' => 'rev_timestamp DESC' ]
220  );
221  if ( $revRow ) {
222  $lastOldId = intval( $revRow->rev_id );
223  # Grab the last text size if available
224  $lastSize = !is_null( $revRow->rev_len ) ? intval( $revRow->rev_len ) : null;
225  } else {
226  # No previous edit
227  $lastOldId = 0;
228  $lastSize = 0;
229  $new = 1; // probably true
230  }
231  }
232 
233  if ( $lastCurId == 0 ) {
234  $this->output( "Uhhh, something wrong? No curid\n" );
235  } else {
236  # Grab the entry's text size
237  $size = (int)$dbw->selectField(
238  'revision',
239  'rev_len',
240  [ 'rev_id' => $row->rc_this_oldid ],
241  __METHOD__
242  );
243 
244  $dbw->update(
245  'recentchanges',
246  [
247  'rc_last_oldid' => $lastOldId,
248  'rc_new' => $new,
249  'rc_type' => $new ? RC_NEW : RC_EDIT,
250  'rc_source' => $new === 1 ? RecentChange::SRC_NEW : RecentChange::SRC_EDIT,
251  'rc_old_len' => $lastSize,
252  'rc_new_len' => $size,
253  ],
254  [
255  'rc_cur_id' => $lastCurId,
256  'rc_this_oldid' => $row->rc_this_oldid,
257  'rc_timestamp' => $row->rc_timestamp // index usage
258  ],
259  __METHOD__
260  );
261 
262  $lastOldId = intval( $row->rc_this_oldid );
263  $lastSize = $size;
264 
265  if ( ( ++$updated % $this->getBatchSize() ) == 0 ) {
266  $lbFactory->waitForReplication();
267  }
268  }
269  }
270  }
271 
277  private function rebuildRecentChangesTablePass3( ILBFactory $lbFactory ) {
279 
280  $dbw = $this->getDB( DB_MASTER );
281  $commentStore = CommentStore::getStore();
282  $nonRCLogs = array_merge( array_keys( $wgLogRestrictions ),
283  array_keys( $wgFilterLogTypes ),
284  [ 'create' ] );
285 
286  $this->output( "Loading from user and logging tables...\n" );
287 
288  $commentQuery = $commentStore->getJoin( 'log_comment' );
289  $actorQuery = ActorMigration::newMigration()->getJoin( 'log_user' );
290  $res = $dbw->select(
291  [ 'logging' ] + $commentQuery['tables'] + $actorQuery['tables'],
292  [
293  'log_timestamp',
294  'log_namespace',
295  'log_title',
296  'log_page',
297  'log_type',
298  'log_action',
299  'log_id',
300  'log_params',
301  'log_deleted'
302  ] + $commentQuery['fields'] + $actorQuery['fields'],
303  [
304  'log_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
305  'log_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ),
306  // Some logs don't go in RC since they are private, or are included in the filterable log types.
307  'log_type' => array_diff( LogPage::validTypes(), $nonRCLogs ),
308  ],
309  __METHOD__,
310  [ 'ORDER BY' => 'log_timestamp DESC' ],
311  $commentQuery['joins'] + $actorQuery['joins']
312  );
313 
314  $field = $dbw->fieldInfo( 'recentchanges', 'rc_cur_id' );
315 
316  $inserted = 0;
317  $actorMigration = ActorMigration::newMigration();
318  foreach ( $res as $row ) {
319  $comment = $commentStore->getComment( 'log_comment', $row );
320  $user = User::newFromAnyId( $row->log_user, $row->log_user_text, $row->log_actor );
321  $dbw->insert(
322  'recentchanges',
323  [
324  'rc_timestamp' => $row->log_timestamp,
325  'rc_namespace' => $row->log_namespace,
326  'rc_title' => $row->log_title,
327  'rc_minor' => 0,
328  'rc_bot' => 0,
329  'rc_patrolled' => 1,
330  'rc_new' => 0,
331  'rc_this_oldid' => 0,
332  'rc_last_oldid' => 0,
333  'rc_type' => RC_LOG,
334  'rc_source' => RecentChange::SRC_LOG,
335  'rc_cur_id' => $field->isNullable()
336  ? $row->log_page
337  : (int)$row->log_page, // NULL => 0,
338  'rc_log_type' => $row->log_type,
339  'rc_log_action' => $row->log_action,
340  'rc_logid' => $row->log_id,
341  'rc_params' => $row->log_params,
342  'rc_deleted' => $row->log_deleted
343  ] + $commentStore->insert( $dbw, 'rc_comment', $comment )
344  + $actorMigration->getInsertValues( $dbw, 'rc_user', $user ),
345  __METHOD__
346  );
347 
348  if ( ( ++$inserted % $this->getBatchSize() ) == 0 ) {
349  $lbFactory->waitForReplication();
350  }
351  }
352  }
353 
359  private function rebuildRecentChangesTablePass4( ILBFactory $lbFactory ) {
361 
362  $dbw = $this->getDB( DB_MASTER );
363 
364  $userQuery = User::getQueryInfo();
365 
366  # @FIXME: recognize other bot account groups (not the same as users with 'bot' rights)
367  # @NOTE: users with 'bot' rights choose when edits are bot edits or not. That information
368  # may be lost at this point (aside from joining on the patrol log table entries).
369  $botgroups = [ 'bot' ];
370  $autopatrolgroups = $wgUseRCPatrol ? MediaWikiServices::getInstance()
371  ->getPermissionManager()
372  ->getGroupsWithPermission( 'autopatrol' ) : [];
373 
374  # Flag our recent bot edits
375  if ( $botgroups ) {
376  $this->output( "Flagging bot account edits...\n" );
377 
378  # Find all users that are bots
379  $res = $dbw->select(
380  array_merge( [ 'user_groups' ], $userQuery['tables'] ),
381  $userQuery['fields'],
382  [ 'ug_group' => $botgroups ],
383  __METHOD__,
384  [ 'DISTINCT' ],
385  [ 'user_groups' => [ 'JOIN', 'user_id = ug_user' ] ] + $userQuery['joins']
386  );
387 
388  $botusers = [];
389  foreach ( $res as $row ) {
390  $botusers[] = User::newFromRow( $row );
391  }
392 
393  # Fill in the rc_bot field
394  if ( $botusers ) {
395  $actorQuery = ActorMigration::newMigration()->getWhere( $dbw, 'rc_user', $botusers, false );
396  $rcids = [];
397  foreach ( $actorQuery['orconds'] as $cond ) {
398  $rcids = array_merge( $rcids, $dbw->selectFieldValues(
399  [ 'recentchanges' ] + $actorQuery['tables'],
400  'rc_id',
401  [
402  "rc_timestamp > " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
403  "rc_timestamp < " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ),
404  $cond,
405  ],
406  __METHOD__,
407  [],
408  $actorQuery['joins']
409  ) );
410  }
411  $rcids = array_values( array_unique( $rcids ) );
412 
413  foreach ( array_chunk( $rcids, $this->getBatchSize() ) as $rcidBatch ) {
414  $dbw->update(
415  'recentchanges',
416  [ 'rc_bot' => 1 ],
417  [ 'rc_id' => $rcidBatch ],
418  __METHOD__
419  );
420  $lbFactory->waitForReplication();
421  }
422  }
423  }
424 
425  # Flag our recent autopatrolled edits
426  if ( !$wgMiserMode && $autopatrolgroups ) {
427  $patrolusers = [];
428 
429  $this->output( "Flagging auto-patrolled edits...\n" );
430 
431  # Find all users in RC with autopatrol rights
432  $res = $dbw->select(
433  array_merge( [ 'user_groups' ], $userQuery['tables'] ),
434  $userQuery['fields'],
435  [ 'ug_group' => $autopatrolgroups ],
436  __METHOD__,
437  [ 'DISTINCT' ],
438  [ 'user_groups' => [ 'JOIN', 'user_id = ug_user' ] ] + $userQuery['joins']
439  );
440 
441  foreach ( $res as $row ) {
442  $patrolusers[] = User::newFromRow( $row );
443  }
444 
445  # Fill in the rc_patrolled field
446  if ( $patrolusers ) {
447  $actorQuery = ActorMigration::newMigration()->getWhere( $dbw, 'rc_user', $patrolusers, false );
448  foreach ( $actorQuery['orconds'] as $cond ) {
449  $dbw->update(
450  'recentchanges',
451  [ 'rc_patrolled' => 1 ],
452  [
453  $cond,
454  'rc_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
455  'rc_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ),
456  ],
457  __METHOD__
458  );
459  $lbFactory->waitForReplication();
460  }
461  }
462  }
463  }
464 
471  private function rebuildRecentChangesTablePass5( ILBFactory $lbFactory ) {
472  $dbw = wfGetDB( DB_MASTER );
473 
474  $this->output( "Removing duplicate revision and logging entries...\n" );
475 
476  $res = $dbw->select(
477  [ 'logging', 'log_search' ],
478  [ 'ls_value', 'ls_log_id' ],
479  [
480  'ls_log_id = log_id',
481  'ls_field' => 'associated_rev_id',
482  'log_type' => 'upload',
483  'log_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
484  'log_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ),
485  ],
486  __METHOD__
487  );
488 
489  $updates = 0;
490  foreach ( $res as $row ) {
491  $rev_id = $row->ls_value;
492  $log_id = $row->ls_log_id;
493 
494  // Mark the logging row as having an associated rev id
495  $dbw->update(
496  'recentchanges',
497  /*SET*/ [ 'rc_this_oldid' => $rev_id ],
498  /*WHERE*/ [ 'rc_logid' => $log_id ],
499  __METHOD__
500  );
501 
502  // Delete the revision row
503  $dbw->delete(
504  'recentchanges',
505  /*WHERE*/ [ 'rc_this_oldid' => $rev_id, 'rc_logid' => 0 ],
506  __METHOD__
507  );
508 
509  if ( ( ++$updates % $this->getBatchSize() ) == 0 ) {
510  $lbFactory->waitForReplication();
511  }
512  }
513  }
514 
518  private function purgeFeeds() {
519  global $wgFeedClasses;
520 
521  $this->output( "Deleting feed timestamps.\n" );
522 
523  $wanCache = MediaWikiServices::getInstance()->getMainWANObjectCache();
524  foreach ( $wgFeedClasses as $feed => $className ) {
525  $wanCache->delete( $wanCache->makeKey( 'rcfeed', $feed, 'timestamp' ) ); # Good enough for now.
526  }
527  }
528 }
529 
530 $maintClass = RebuildRecentchanges::class;
531 require_once RUN_MAINTENANCE_IF_MAIN;
const RUN_MAINTENANCE_IF_MAIN
Definition: Maintenance.php:39
Maintenance script that rebuilds recent changes from scratch.
getOption( $name, $default=null)
Get an option, or return the default.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: Maintenance.php:86
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
static newFromAnyId( $userId, $userName, $actorId, $dbDomain=false)
Static factory method for creation from an ID, name, and/or actor ID.
Definition: User.php:617
rebuildRecentChangesTablePass2(ILBFactory $lbFactory)
Rebuild pass 2: Enhance entries for page revisions with references to the previous revision (rc_last_...
int $cutoffTo
UNIX timestamp.
waitForReplication(array $opts=[])
Waits for the replica DBs to catch up to the current master position.
setBatchSize( $s=0)
Set the batch size.
static getQueryInfo()
Return the tables, fields, and join conditions to be selected to create a new user object...
Definition: User.php:5324
hasOption( $name)
Checks to see if a particular option exists.
$wgLogRestrictions
This restricts log access to those who have a certain right Users without this will not see it in the...
const DB_MASTER
Definition: defines.php:26
$wgRCMaxAge
Recentchanges items are periodically purged; entries older than this many seconds will go...
purgeFeeds()
Purge cached feeds in $wanCache.
rebuildRecentChangesTablePass1(ILBFactory $lbFactory)
Rebuild pass 1: Insert recentchanges entries for page revisions.
rebuildRecentChangesTablePass5(ILBFactory $lbFactory)
Rebuild pass 5: Delete duplicate entries where we generate both a page revision and a log entry for a...
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
static newMigration()
Static constructor.
$wgMiserMode
Disable database-intensive features.
addDescription( $text)
Set the description text.
rebuildRecentChangesTablePass4(ILBFactory $lbFactory)
Rebuild pass 4: Mark bot and autopatrolled entries.
$wgUseRCPatrol
Use RC Patrolling to check for vandalism (from recent changes and watchlists) New pages and new files...
output( $out, $channel=null)
Throw some output to the user.
int $cutoffFrom
UNIX timestamp.
An interface for generating database load balancers.
Definition: ILBFactory.php:33
static getStore()
static newFromRow( $row, $data=null)
Create a new user object from a user row.
Definition: User.php:717
getBatchSize()
Returns batch size.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
const RC_NEW
Definition: Defines.php:123
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
rebuildRecentChangesTablePass3(ILBFactory $lbFactory)
Rebuild pass 3: Insert recentchanges entries for action logs.
static validTypes()
Get the list of valid log types.
Definition: LogPage.php:198
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
$wgFilterLogTypes
Show/hide links on Special:Log will be shown for these log types.
const RC_EDIT
Definition: Defines.php:122
const RC_LOG
Definition: Defines.php:124
$wgFeedClasses
Available feeds objects.