MediaWiki  master
rebuildrecentchanges.php
Go to the documentation of this file.
1 <?php
26 require_once __DIR__ . '/Maintenance.php';
27 
32 
40  private $cutoffFrom;
42  private $cutoffTo;
43 
44  public function __construct() {
45  parent::__construct();
46  $this->addDescription( 'Rebuild recent changes' );
47 
48  $this->addOption(
49  'from',
50  "Only rebuild rows in requested time range (in YYYYMMDDHHMMSS format)",
51  false,
52  true
53  );
54  $this->addOption(
55  'to',
56  "Only rebuild rows in requested time range (in YYYYMMDDHHMMSS format)",
57  false,
58  true
59  );
60  $this->setBatchSize( 200 );
61  }
62 
63  public function execute() {
64  if (
65  ( $this->hasOption( 'from' ) && !$this->hasOption( 'to' ) ) ||
66  ( !$this->hasOption( 'from' ) && $this->hasOption( 'to' ) )
67  ) {
68  $this->fatalError( "Both 'from' and 'to' must be given, or neither" );
69  }
70 
71  $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
72  $this->rebuildRecentChangesTablePass1( $lbFactory );
73  $this->rebuildRecentChangesTablePass2( $lbFactory );
74  $this->rebuildRecentChangesTablePass3( $lbFactory );
75  $this->rebuildRecentChangesTablePass4( $lbFactory );
76  $this->rebuildRecentChangesTablePass5( $lbFactory );
77  if ( !( $this->hasOption( 'from' ) && $this->hasOption( 'to' ) ) ) {
78  $this->purgeFeeds();
79  }
80  $this->output( "Done.\n" );
81  }
82 
88  private function rebuildRecentChangesTablePass1( ILBFactory $lbFactory ) {
89  $dbw = $this->getDB( DB_PRIMARY );
90  $commentStore = MediaWikiServices::getInstance()->getCommentStore();
91 
92  if ( $this->hasOption( 'from' ) && $this->hasOption( 'to' ) ) {
93  $this->cutoffFrom = (int)wfTimestamp( TS_UNIX, $this->getOption( 'from' ) );
94  $this->cutoffTo = (int)wfTimestamp( TS_UNIX, $this->getOption( 'to' ) );
95 
96  $sec = $this->cutoffTo - $this->cutoffFrom;
97  $days = $sec / 24 / 3600;
98  $this->output( "Rebuilding range of $sec seconds ($days days)\n" );
99  } else {
100  global $wgRCMaxAge;
101 
102  $days = $wgRCMaxAge / 24 / 3600;
103  $this->output( "Rebuilding \$wgRCMaxAge=$wgRCMaxAge seconds ($days days)\n" );
104 
105  $this->cutoffFrom = time() - $wgRCMaxAge;
106  $this->cutoffTo = time();
107  }
108 
109  $this->output( "Clearing recentchanges table for time range...\n" );
110  $rcids = $dbw->selectFieldValues(
111  'recentchanges',
112  'rc_id',
113  [
114  'rc_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
115  'rc_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) )
116  ],
117  __METHOD__
118  );
119  foreach ( array_chunk( $rcids, $this->getBatchSize() ) as $rcidBatch ) {
120  $dbw->delete( 'recentchanges', [ 'rc_id' => $rcidBatch ], __METHOD__ );
121  $this->waitForReplication();
122  }
123 
124  $this->output( "Loading from page and revision tables...\n" );
125 
126  $commentQuery = $commentStore->getJoin( 'rev_comment' );
127  $actorQuery = ActorMigration::newMigration()->getJoin( 'rev_user' );
128  $res = $dbw->select(
129  [ 'revision', 'page' ] + $commentQuery['tables'] + $actorQuery['tables'],
130  [
131  'rev_timestamp',
132  'rev_minor_edit',
133  'rev_id',
134  'rev_deleted',
135  'page_namespace',
136  'page_title',
137  'page_is_new',
138  'page_id'
139  ] + $commentQuery['fields'] + $actorQuery['fields'],
140  [
141  'rev_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
142  'rev_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) )
143  ],
144  __METHOD__,
145  [ 'ORDER BY' => 'rev_timestamp DESC' ],
146  [
147  'page' => [ 'JOIN', 'rev_page=page_id' ],
148  ] + $commentQuery['joins'] + $actorQuery['joins']
149  );
150 
151  $this->output( "Inserting from page and revision tables...\n" );
152  $inserted = 0;
153  foreach ( $res as $row ) {
154  $comment = $commentStore->getComment( 'rev_comment', $row );
155  $dbw->insert(
156  'recentchanges',
157  [
158  'rc_timestamp' => $row->rev_timestamp,
159  'rc_actor' => $row->rev_actor,
160  'rc_namespace' => $row->page_namespace,
161  'rc_title' => $row->page_title,
162  'rc_minor' => $row->rev_minor_edit,
163  'rc_bot' => 0,
164  'rc_new' => $row->page_is_new,
165  'rc_cur_id' => $row->page_id,
166  'rc_this_oldid' => $row->rev_id,
167  'rc_last_oldid' => 0, // is this ok?
168  'rc_type' => $row->page_is_new ? RC_NEW : RC_EDIT,
169  'rc_source' => $row->page_is_new ? RecentChange::SRC_NEW : RecentChange::SRC_EDIT,
170  'rc_deleted' => $row->rev_deleted
171  ] + $commentStore->insert( $dbw, 'rc_comment', $comment ),
172  __METHOD__
173  );
174 
175  $rcid = $dbw->insertId();
176  $dbw->update(
177  'change_tag',
178  [ 'ct_rc_id' => $rcid ],
179  [ 'ct_rev_id' => $row->rev_id ],
180  __METHOD__
181  );
182 
183  if ( ( ++$inserted % $this->getBatchSize() ) == 0 ) {
184  $this->waitForReplication();
185  }
186  }
187  }
188 
195  private function rebuildRecentChangesTablePass2( ILBFactory $lbFactory ) {
196  $dbw = $this->getDB( DB_PRIMARY );
197 
198  $this->output( "Updating links and size differences...\n" );
199 
200  # Fill in the rc_last_oldid field, which points to the previous edit
201  $res = $dbw->select(
202  'recentchanges',
203  [ 'rc_cur_id', 'rc_this_oldid', 'rc_timestamp' ],
204  [
205  "rc_timestamp > " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
206  "rc_timestamp < " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) )
207  ],
208  __METHOD__,
209  [ 'ORDER BY' => [ 'rc_cur_id', 'rc_timestamp' ] ]
210  );
211 
212  $lastCurId = 0;
213  $lastOldId = 0;
214  $lastSize = null;
215  $updated = 0;
216  foreach ( $res as $row ) {
217  $new = 0;
218 
219  if ( $row->rc_cur_id != $lastCurId ) {
220  # Switch! Look up the previous last edit, if any
221  $lastCurId = intval( $row->rc_cur_id );
222  $emit = $row->rc_timestamp;
223 
224  $revRow = $dbw->selectRow(
225  'revision',
226  [ 'rev_id', 'rev_len' ],
227  [ 'rev_page' => $lastCurId, "rev_timestamp < " . $dbw->addQuotes( $emit ) ],
228  __METHOD__,
229  [ 'ORDER BY' => 'rev_timestamp DESC' ]
230  );
231  if ( $revRow ) {
232  $lastOldId = intval( $revRow->rev_id );
233  # Grab the last text size if available
234  $lastSize = $revRow->rev_len !== null ? intval( $revRow->rev_len ) : null;
235  } else {
236  # No previous edit
237  $lastOldId = 0;
238  $lastSize = 0;
239  $new = 1; // probably true
240  }
241  }
242 
243  if ( $lastCurId == 0 ) {
244  $this->output( "Uhhh, something wrong? No curid\n" );
245  } else {
246  # Grab the entry's text size
247  $size = (int)$dbw->selectField(
248  'revision',
249  'rev_len',
250  [ 'rev_id' => $row->rc_this_oldid ],
251  __METHOD__
252  );
253 
254  $dbw->update(
255  'recentchanges',
256  [
257  'rc_last_oldid' => $lastOldId,
258  'rc_new' => $new,
259  'rc_type' => $new ? RC_NEW : RC_EDIT,
260  'rc_source' => $new === 1 ? RecentChange::SRC_NEW : RecentChange::SRC_EDIT,
261  'rc_old_len' => $lastSize,
262  'rc_new_len' => $size,
263  ],
264  [
265  'rc_cur_id' => $lastCurId,
266  'rc_this_oldid' => $row->rc_this_oldid,
267  'rc_timestamp' => $row->rc_timestamp // index usage
268  ],
269  __METHOD__
270  );
271 
272  $lastOldId = intval( $row->rc_this_oldid );
273  $lastSize = $size;
274 
275  if ( ( ++$updated % $this->getBatchSize() ) == 0 ) {
276  $this->waitForReplication();
277  }
278  }
279  }
280  }
281 
287  private function rebuildRecentChangesTablePass3( ILBFactory $lbFactory ) {
289 
290  $dbw = $this->getDB( DB_PRIMARY );
291  $commentStore = MediaWikiServices::getInstance()->getCommentStore();
292  $nonRCLogs = array_merge( array_keys( $wgLogRestrictions ),
293  array_keys( $wgFilterLogTypes ),
294  [ 'create' ] );
295 
296  $this->output( "Loading from user and logging tables...\n" );
297 
298  $commentQuery = $commentStore->getJoin( 'log_comment' );
299  $res = $dbw->select(
300  [ 'logging' ] + $commentQuery['tables'],
301  [
302  'log_timestamp',
303  'log_actor',
304  'log_namespace',
305  'log_title',
306  'log_page',
307  'log_type',
308  'log_action',
309  'log_id',
310  'log_params',
311  'log_deleted'
312  ] + $commentQuery['fields'],
313  [
314  'log_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
315  'log_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ),
316  // Some logs don't go in RC since they are private, or are included in the filterable log types.
317  'log_type' => array_diff( LogPage::validTypes(), $nonRCLogs ),
318  ],
319  __METHOD__,
320  [ 'ORDER BY' => [ 'log_timestamp DESC', 'log_id DESC' ] ],
321  $commentQuery['joins']
322  );
323 
324  $field = $dbw->fieldInfo( 'recentchanges', 'rc_cur_id' );
325 
326  $inserted = 0;
327  foreach ( $res as $row ) {
328  $comment = $commentStore->getComment( 'log_comment', $row );
329  $dbw->insert(
330  'recentchanges',
331  [
332  'rc_timestamp' => $row->log_timestamp,
333  'rc_actor' => $row->log_actor,
334  'rc_namespace' => $row->log_namespace,
335  'rc_title' => $row->log_title,
336  'rc_minor' => 0,
337  'rc_bot' => 0,
338  'rc_patrolled' => $row->log_type == 'upload' ? 0 : 2,
339  'rc_new' => 0,
340  'rc_this_oldid' => 0,
341  'rc_last_oldid' => 0,
342  'rc_type' => RC_LOG,
343  'rc_source' => RecentChange::SRC_LOG,
344  'rc_cur_id' => $field->isNullable()
345  ? $row->log_page
346  : (int)$row->log_page, // NULL => 0,
347  'rc_log_type' => $row->log_type,
348  'rc_log_action' => $row->log_action,
349  'rc_logid' => $row->log_id,
350  'rc_params' => $row->log_params,
351  'rc_deleted' => $row->log_deleted
352  ] + $commentStore->insert( $dbw, 'rc_comment', $comment ),
353  __METHOD__
354  );
355 
356  $rcid = $dbw->insertId();
357  $dbw->update(
358  'change_tag',
359  [ 'ct_rc_id' => $rcid ],
360  [ 'ct_log_id' => $row->log_id ],
361  __METHOD__
362  );
363 
364  if ( ( ++$inserted % $this->getBatchSize() ) == 0 ) {
365  $this->waitForReplication();
366  }
367  }
368  }
369 
378  private function findRcIdsWithGroups( $db, $groups, $conds = [] ) {
379  if ( !count( $groups ) ) {
380  return [];
381  }
382  return $db->selectFieldValues(
383  [ 'recentchanges', 'actor', 'user_groups' ],
384  'rc_id',
385  $conds + [
386  "rc_timestamp > " . $db->addQuotes( $db->timestamp( $this->cutoffFrom ) ),
387  "rc_timestamp < " . $db->addQuotes( $db->timestamp( $this->cutoffTo ) ),
388  'ug_group' => $groups
389  ],
390  __METHOD__,
391  [ 'DISTINCT' ],
392  [
393  'actor' => [ 'JOIN', 'actor_id=rc_actor' ],
394  'user_groups' => [ 'JOIN', 'ug_user=actor_user' ]
395  ]
396  );
397  }
398 
404  private function rebuildRecentChangesTablePass4( ILBFactory $lbFactory ) {
406 
407  $dbw = $this->getDB( DB_PRIMARY );
408 
409  # @FIXME: recognize other bot account groups (not the same as users with 'bot' rights)
410  # @NOTE: users with 'bot' rights choose when edits are bot edits or not. That information
411  # may be lost at this point (aside from joining on the patrol log table entries).
412  $botgroups = [ 'bot' ];
413  $autopatrolgroups = ( $wgUseRCPatrol || $wgUseNPPatrol || $wgUseFilePatrol ) ?
414  MediaWikiServices::getInstance()->getGroupPermissionsLookup()
415  ->getGroupsWithPermission( 'autopatrol' ) : [];
416 
417  # Flag our recent bot edits
418  // @phan-suppress-next-line PhanRedundantCondition
419  if ( $botgroups ) {
420  $this->output( "Flagging bot account edits...\n" );
421 
422  # Fill in the rc_bot field
423  $rcids = $this->findRcIdsWithGroups( $dbw, $botgroups );
424 
425  foreach ( array_chunk( $rcids, $this->getBatchSize() ) as $rcidBatch ) {
426  $dbw->update(
427  'recentchanges',
428  [ 'rc_bot' => 1 ],
429  [ 'rc_id' => $rcidBatch ],
430  __METHOD__
431  );
432  $this->waitForReplication();
433  }
434  }
435 
436  # Flag our recent autopatrolled edits
437  if ( !$wgMiserMode && $autopatrolgroups ) {
438  $this->output( "Flagging auto-patrolled edits...\n" );
439 
440  $conds = [ 'rc_patrolled' => 0 ];
441  if ( !$wgUseRCPatrol ) {
442  $subConds = [];
443  if ( $wgUseNPPatrol ) {
444  $subConds[] = 'rc_source = ' . $dbw->addQuotes( RecentChange::SRC_NEW );
445  }
446  if ( $wgUseFilePatrol ) {
447  $subConds[] = 'rc_log_type = ' . $dbw->addQuotes( 'upload' );
448  }
449  $conds[] = $dbw->makeList( $subConds, IDatabase::LIST_OR );
450  }
451 
452  $rcids = $this->findRcIdsWithGroups( $dbw, $autopatrolgroups, $conds );
453  foreach ( array_chunk( $rcids, $this->getBatchSize() ) as $rcidBatch ) {
454  $dbw->update(
455  'recentchanges',
456  [ 'rc_patrolled' => 2 ],
457  [ 'rc_id' => $rcidBatch ],
458  __METHOD__
459  );
460  $this->waitForReplication();
461  }
462  }
463  }
464 
471  private function rebuildRecentChangesTablePass5( ILBFactory $lbFactory ) {
472  $dbw = $this->getDB( DB_PRIMARY );
473 
474  $this->output( "Removing duplicate revision and logging entries...\n" );
475 
476  $res = $dbw->select(
477  [ 'logging', 'log_search' ],
478  [ 'ls_value', 'ls_log_id' ],
479  [
480  'ls_log_id = log_id',
481  'ls_field' => 'associated_rev_id',
482  'log_type != ' . $dbw->addQuotes( 'create' ),
483  'log_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
484  'log_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ),
485  ],
486  __METHOD__
487  );
488 
489  $updates = 0;
490  foreach ( $res as $row ) {
491  $rev_id = $row->ls_value;
492  $log_id = $row->ls_log_id;
493 
494  // Mark the logging row as having an associated rev id
495  $dbw->update(
496  'recentchanges',
497  /*SET*/ [ 'rc_this_oldid' => $rev_id ],
498  /*WHERE*/ [ 'rc_logid' => $log_id ],
499  __METHOD__
500  );
501 
502  // Delete the revision row
503  $dbw->delete(
504  'recentchanges',
505  /*WHERE*/ [ 'rc_this_oldid' => $rev_id, 'rc_logid' => 0 ],
506  __METHOD__
507  );
508 
509  if ( ( ++$updates % $this->getBatchSize() ) == 0 ) {
510  $this->waitForReplication();
511  }
512  }
513  }
514 
518  private function purgeFeeds() {
519  global $wgFeedClasses;
520 
521  $this->output( "Deleting feed timestamps.\n" );
522 
523  $wanCache = MediaWikiServices::getInstance()->getMainWANObjectCache();
524  foreach ( $wgFeedClasses as $feed => $className ) {
525  $wanCache->delete( $wanCache->makeKey( 'rcfeed', $feed, 'timestamp' ) ); # Good enough for now.
526  }
527  }
528 }
529 
530 $maintClass = RebuildRecentchanges::class;
531 require_once RUN_MAINTENANCE_IF_MAIN;
const RC_NEW
Definition: Defines.php:117
const LIST_OR
Definition: Defines.php:46
const RC_LOG
Definition: Defines.php:118
const RC_EDIT
Definition: Defines.php:116
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
static validTypes()
Get the list of valid log types.
Definition: LogPage.php:208
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: Maintenance.php:66
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
output( $out, $channel=null)
Throw some output to the user.
waitForReplication()
Wait for replica DBs to catch up.
hasOption( $name)
Checks to see if a particular option was set.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
Service locator for MediaWiki core services.
This is not intended to be a long-term part of MediaWiki; it will be deprecated and removed once acto...
Maintenance script that rebuilds recent changes from scratch.
execute()
Do the actual work.
__construct()
Default constructor.
$wgUseFilePatrol
Config variable stub for the UseFilePatrol setting, for use by phpdoc and IDEs.
$wgLogRestrictions
Config variable stub for the LogRestrictions setting, for use by phpdoc and IDEs.
$wgUseRCPatrol
Config variable stub for the UseRCPatrol setting, for use by phpdoc and IDEs.
$wgUseNPPatrol
Config variable stub for the UseNPPatrol setting, for use by phpdoc and IDEs.
$wgRCMaxAge
Config variable stub for the RCMaxAge setting, for use by phpdoc and IDEs.
$wgFeedClasses
Config variable stub for the FeedClasses setting, for use by phpdoc and IDEs.
$wgFilterLogTypes
Config variable stub for the FilterLogTypes setting, for use by phpdoc and IDEs.
$wgMiserMode
Config variable stub for the MiserMode setting, for use by phpdoc and IDEs.
Basic database interface for live and lazy-loaded relation database handles.
Definition: IDatabase.php:40
Manager of ILoadBalancer objects and, indirectly, IDatabase connections.
Definition: ILBFactory.php:46
const DB_PRIMARY
Definition: defines.php:28