MediaWiki REL1_34
rebuildrecentchanges.php
Go to the documentation of this file.
1<?php
26require_once __DIR__ . '/Maintenance.php';
27
30
38 private $cutoffFrom;
40 private $cutoffTo;
41
42 public function __construct() {
43 parent::__construct();
44 $this->addDescription( 'Rebuild recent changes' );
45
46 $this->addOption(
47 'from',
48 "Only rebuild rows in requested time range (in YYYYMMDDHHMMSS format)",
49 false,
50 true
51 );
52 $this->addOption(
53 'to',
54 "Only rebuild rows in requested time range (in YYYYMMDDHHMMSS format)",
55 false,
56 true
57 );
58 $this->setBatchSize( 200 );
59 }
60
61 public function execute() {
62 if (
63 ( $this->hasOption( 'from' ) && !$this->hasOption( 'to' ) ) ||
64 ( !$this->hasOption( 'from' ) && $this->hasOption( 'to' ) )
65 ) {
66 $this->fatalError( "Both 'from' and 'to' must be given, or neither" );
67 }
68
69 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
70 $this->rebuildRecentChangesTablePass1( $lbFactory );
71 $this->rebuildRecentChangesTablePass2( $lbFactory );
72 $this->rebuildRecentChangesTablePass3( $lbFactory );
73 $this->rebuildRecentChangesTablePass4( $lbFactory );
74 $this->rebuildRecentChangesTablePass5( $lbFactory );
75 if ( !( $this->hasOption( 'from' ) && $this->hasOption( 'to' ) ) ) {
76 $this->purgeFeeds();
77 }
78 $this->output( "Done.\n" );
79 }
80
86 private function rebuildRecentChangesTablePass1( ILBFactory $lbFactory ) {
87 $dbw = $this->getDB( DB_MASTER );
88 $commentStore = CommentStore::getStore();
89
90 if ( $this->hasOption( 'from' ) && $this->hasOption( 'to' ) ) {
91 $this->cutoffFrom = wfTimestamp( TS_UNIX, $this->getOption( 'from' ) );
92 $this->cutoffTo = wfTimestamp( TS_UNIX, $this->getOption( 'to' ) );
93
94 $sec = $this->cutoffTo - $this->cutoffFrom;
95 $days = $sec / 24 / 3600;
96 $this->output( "Rebuilding range of $sec seconds ($days days)\n" );
97 } else {
98 global $wgRCMaxAge;
99
100 $days = $wgRCMaxAge / 24 / 3600;
101 $this->output( "Rebuilding \$wgRCMaxAge=$wgRCMaxAge seconds ($days days)\n" );
102
103 $this->cutoffFrom = time() - $wgRCMaxAge;
104 $this->cutoffTo = time();
105 }
106
107 $this->output( "Clearing recentchanges table for time range...\n" );
108 $rcids = $dbw->selectFieldValues(
109 'recentchanges',
110 'rc_id',
111 [
112 'rc_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
113 'rc_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) )
114 ]
115 );
116 foreach ( array_chunk( $rcids, $this->getBatchSize() ) as $rcidBatch ) {
117 $dbw->delete( 'recentchanges', [ 'rc_id' => $rcidBatch ], __METHOD__ );
118 $lbFactory->waitForReplication();
119 }
120
121 $this->output( "Loading from page and revision tables...\n" );
122
123 $commentQuery = $commentStore->getJoin( 'rev_comment' );
124 $actorQuery = ActorMigration::newMigration()->getJoin( 'rev_user' );
125 $res = $dbw->select(
126 [ 'revision', 'page' ] + $commentQuery['tables'] + $actorQuery['tables'],
127 [
128 'rev_timestamp',
129 'rev_minor_edit',
130 'rev_id',
131 'rev_deleted',
132 'page_namespace',
133 'page_title',
134 'page_is_new',
135 'page_id'
136 ] + $commentQuery['fields'] + $actorQuery['fields'],
137 [
138 'rev_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
139 'rev_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) )
140 ],
141 __METHOD__,
142 [ 'ORDER BY' => 'rev_timestamp DESC' ],
143 [
144 'page' => [ 'JOIN', 'rev_page=page_id' ],
145 ] + $commentQuery['joins'] + $actorQuery['joins']
146 );
147
148 $this->output( "Inserting from page and revision tables...\n" );
149 $inserted = 0;
150 $actorMigration = ActorMigration::newMigration();
151 foreach ( $res as $row ) {
152 $comment = $commentStore->getComment( 'rev_comment', $row );
153 $user = User::newFromAnyId( $row->rev_user, $row->rev_user_text, $row->rev_actor );
154 $dbw->insert(
155 'recentchanges',
156 [
157 'rc_timestamp' => $row->rev_timestamp,
158 'rc_namespace' => $row->page_namespace,
159 'rc_title' => $row->page_title,
160 'rc_minor' => $row->rev_minor_edit,
161 'rc_bot' => 0,
162 'rc_new' => $row->page_is_new,
163 'rc_cur_id' => $row->page_id,
164 'rc_this_oldid' => $row->rev_id,
165 'rc_last_oldid' => 0, // is this ok?
166 'rc_type' => $row->page_is_new ? RC_NEW : RC_EDIT,
167 'rc_source' => $row->page_is_new ? RecentChange::SRC_NEW : RecentChange::SRC_EDIT,
168 'rc_deleted' => $row->rev_deleted
169 ] + $commentStore->insert( $dbw, 'rc_comment', $comment )
170 + $actorMigration->getInsertValues( $dbw, 'rc_user', $user ),
171 __METHOD__
172 );
173
174 $rcid = $dbw->insertId();
175 $dbw->update(
176 'change_tag',
177 [ 'ct_rc_id' => $rcid ],
178 [ 'ct_rev_id' => $row->rev_id ],
179 __METHOD__
180 );
181
182 if ( ( ++$inserted % $this->getBatchSize() ) == 0 ) {
183 $lbFactory->waitForReplication();
184 }
185 }
186 }
187
194 private function rebuildRecentChangesTablePass2( ILBFactory $lbFactory ) {
195 $dbw = $this->getDB( DB_MASTER );
196
197 $this->output( "Updating links and size differences...\n" );
198
199 # Fill in the rc_last_oldid field, which points to the previous edit
200 $res = $dbw->select(
201 'recentchanges',
202 [ 'rc_cur_id', 'rc_this_oldid', 'rc_timestamp' ],
203 [
204 "rc_timestamp > " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
205 "rc_timestamp < " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) )
206 ],
207 __METHOD__,
208 [ 'ORDER BY' => 'rc_cur_id,rc_timestamp' ]
209 );
210
211 $lastCurId = 0;
212 $lastOldId = 0;
213 $lastSize = null;
214 $updated = 0;
215 foreach ( $res as $row ) {
216 $new = 0;
217
218 if ( $row->rc_cur_id != $lastCurId ) {
219 # Switch! Look up the previous last edit, if any
220 $lastCurId = intval( $row->rc_cur_id );
221 $emit = $row->rc_timestamp;
222
223 $revRow = $dbw->selectRow(
224 'revision',
225 [ 'rev_id', 'rev_len' ],
226 [ 'rev_page' => $lastCurId, "rev_timestamp < " . $dbw->addQuotes( $emit ) ],
227 __METHOD__,
228 [ 'ORDER BY' => 'rev_timestamp DESC' ]
229 );
230 if ( $revRow ) {
231 $lastOldId = intval( $revRow->rev_id );
232 # Grab the last text size if available
233 $lastSize = !is_null( $revRow->rev_len ) ? intval( $revRow->rev_len ) : null;
234 } else {
235 # No previous edit
236 $lastOldId = 0;
237 $lastSize = 0;
238 $new = 1; // probably true
239 }
240 }
241
242 if ( $lastCurId == 0 ) {
243 $this->output( "Uhhh, something wrong? No curid\n" );
244 } else {
245 # Grab the entry's text size
246 $size = (int)$dbw->selectField(
247 'revision',
248 'rev_len',
249 [ 'rev_id' => $row->rc_this_oldid ],
250 __METHOD__
251 );
252
253 $dbw->update(
254 'recentchanges',
255 [
256 'rc_last_oldid' => $lastOldId,
257 'rc_new' => $new,
258 'rc_type' => $new ? RC_NEW : RC_EDIT,
259 'rc_source' => $new === 1 ? RecentChange::SRC_NEW : RecentChange::SRC_EDIT,
260 'rc_old_len' => $lastSize,
261 'rc_new_len' => $size,
262 ],
263 [
264 'rc_cur_id' => $lastCurId,
265 'rc_this_oldid' => $row->rc_this_oldid,
266 'rc_timestamp' => $row->rc_timestamp // index usage
267 ],
268 __METHOD__
269 );
270
271 $lastOldId = intval( $row->rc_this_oldid );
272 $lastSize = $size;
273
274 if ( ( ++$updated % $this->getBatchSize() ) == 0 ) {
275 $lbFactory->waitForReplication();
276 }
277 }
278 }
279 }
280
286 private function rebuildRecentChangesTablePass3( ILBFactory $lbFactory ) {
288
289 $dbw = $this->getDB( DB_MASTER );
290 $commentStore = CommentStore::getStore();
291 $nonRCLogs = array_merge( array_keys( $wgLogRestrictions ),
292 array_keys( $wgFilterLogTypes ),
293 [ 'create' ] );
294
295 $this->output( "Loading from user and logging tables...\n" );
296
297 $commentQuery = $commentStore->getJoin( 'log_comment' );
298 $actorQuery = ActorMigration::newMigration()->getJoin( 'log_user' );
299 $res = $dbw->select(
300 [ 'logging' ] + $commentQuery['tables'] + $actorQuery['tables'],
301 [
302 'log_timestamp',
303 'log_namespace',
304 'log_title',
305 'log_page',
306 'log_type',
307 'log_action',
308 'log_id',
309 'log_params',
310 'log_deleted'
311 ] + $commentQuery['fields'] + $actorQuery['fields'],
312 [
313 'log_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
314 'log_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ),
315 // Some logs don't go in RC since they are private, or are included in the filterable log types.
316 'log_type' => array_diff( LogPage::validTypes(), $nonRCLogs ),
317 ],
318 __METHOD__,
319 [ 'ORDER BY' => 'log_timestamp DESC' ],
320 $commentQuery['joins'] + $actorQuery['joins']
321 );
322
323 $field = $dbw->fieldInfo( 'recentchanges', 'rc_cur_id' );
324
325 $inserted = 0;
326 $actorMigration = ActorMigration::newMigration();
327 foreach ( $res as $row ) {
328 $comment = $commentStore->getComment( 'log_comment', $row );
329 $user = User::newFromAnyId( $row->log_user, $row->log_user_text, $row->log_actor );
330 $dbw->insert(
331 'recentchanges',
332 [
333 'rc_timestamp' => $row->log_timestamp,
334 'rc_namespace' => $row->log_namespace,
335 'rc_title' => $row->log_title,
336 'rc_minor' => 0,
337 'rc_bot' => 0,
338 'rc_patrolled' => $row->log_type == 'upload' ? 0 : 2,
339 'rc_new' => 0,
340 'rc_this_oldid' => 0,
341 'rc_last_oldid' => 0,
342 'rc_type' => RC_LOG,
343 'rc_source' => RecentChange::SRC_LOG,
344 'rc_cur_id' => $field->isNullable()
345 ? $row->log_page
346 : (int)$row->log_page, // NULL => 0,
347 'rc_log_type' => $row->log_type,
348 'rc_log_action' => $row->log_action,
349 'rc_logid' => $row->log_id,
350 'rc_params' => $row->log_params,
351 'rc_deleted' => $row->log_deleted
352 ] + $commentStore->insert( $dbw, 'rc_comment', $comment )
353 + $actorMigration->getInsertValues( $dbw, 'rc_user', $user ),
354 __METHOD__
355 );
356
357 $rcid = $dbw->insertId();
358 $dbw->update(
359 'change_tag',
360 [ 'ct_rc_id' => $rcid ],
361 [ 'ct_log_id' => $row->log_id ],
362 __METHOD__
363 );
364
365 if ( ( ++$inserted % $this->getBatchSize() ) == 0 ) {
366 $lbFactory->waitForReplication();
367 }
368 }
369 }
370
376 private function rebuildRecentChangesTablePass4( ILBFactory $lbFactory ) {
378
379 $dbw = $this->getDB( DB_MASTER );
380
381 $userQuery = User::getQueryInfo();
382
383 # @FIXME: recognize other bot account groups (not the same as users with 'bot' rights)
384 # @NOTE: users with 'bot' rights choose when edits are bot edits or not. That information
385 # may be lost at this point (aside from joining on the patrol log table entries).
386 $botgroups = [ 'bot' ];
387 $autopatrolgroups = $wgUseRCPatrol ? MediaWikiServices::getInstance()
388 ->getPermissionManager()
389 ->getGroupsWithPermission( 'autopatrol' ) : [];
390
391 # Flag our recent bot edits
392 if ( $botgroups ) {
393 $this->output( "Flagging bot account edits...\n" );
394
395 # Find all users that are bots
396 $res = $dbw->select(
397 array_merge( [ 'user_groups' ], $userQuery['tables'] ),
398 $userQuery['fields'],
399 [ 'ug_group' => $botgroups ],
400 __METHOD__,
401 [ 'DISTINCT' ],
402 [ 'user_groups' => [ 'JOIN', 'user_id = ug_user' ] ] + $userQuery['joins']
403 );
404
405 $botusers = [];
406 foreach ( $res as $row ) {
407 $botusers[] = User::newFromRow( $row );
408 }
409
410 # Fill in the rc_bot field
411 if ( $botusers ) {
412 $actorQuery = ActorMigration::newMigration()->getWhere( $dbw, 'rc_user', $botusers, false );
413 $rcids = [];
414 foreach ( $actorQuery['orconds'] as $cond ) {
415 $rcids = array_merge( $rcids, $dbw->selectFieldValues(
416 [ 'recentchanges' ] + $actorQuery['tables'],
417 'rc_id',
418 [
419 "rc_timestamp > " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
420 "rc_timestamp < " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ),
421 $cond,
422 ],
423 __METHOD__,
424 [],
425 $actorQuery['joins']
426 ) );
427 }
428 $rcids = array_values( array_unique( $rcids ) );
429
430 foreach ( array_chunk( $rcids, $this->getBatchSize() ) as $rcidBatch ) {
431 $dbw->update(
432 'recentchanges',
433 [ 'rc_bot' => 1 ],
434 [ 'rc_id' => $rcidBatch ],
435 __METHOD__
436 );
437 $lbFactory->waitForReplication();
438 }
439 }
440 }
441
442 # Flag our recent autopatrolled edits
443 if ( !$wgMiserMode && $autopatrolgroups ) {
444 $patrolusers = [];
445
446 $this->output( "Flagging auto-patrolled edits...\n" );
447
448 # Find all users in RC with autopatrol rights
449 $res = $dbw->select(
450 array_merge( [ 'user_groups' ], $userQuery['tables'] ),
451 $userQuery['fields'],
452 [ 'ug_group' => $autopatrolgroups ],
453 __METHOD__,
454 [ 'DISTINCT' ],
455 [ 'user_groups' => [ 'JOIN', 'user_id = ug_user' ] ] + $userQuery['joins']
456 );
457
458 foreach ( $res as $row ) {
459 $patrolusers[] = User::newFromRow( $row );
460 }
461
462 # Fill in the rc_patrolled field
463 if ( $patrolusers ) {
464 $actorQuery = ActorMigration::newMigration()->getWhere( $dbw, 'rc_user', $patrolusers, false );
465 foreach ( $actorQuery['orconds'] as $cond ) {
466 $dbw->update(
467 'recentchanges',
468 [ 'rc_patrolled' => 2 ],
469 [
470 $cond,
471 'rc_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
472 'rc_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ),
473 'rc_patrolled' => 0
474 ],
475 __METHOD__
476 );
477 $lbFactory->waitForReplication();
478 }
479 }
480 }
481 }
482
489 private function rebuildRecentChangesTablePass5( ILBFactory $lbFactory ) {
490 $dbw = wfGetDB( DB_MASTER );
491
492 $this->output( "Removing duplicate revision and logging entries...\n" );
493
494 $res = $dbw->select(
495 [ 'logging', 'log_search' ],
496 [ 'ls_value', 'ls_log_id' ],
497 [
498 'ls_log_id = log_id',
499 'ls_field' => 'associated_rev_id',
500 'log_type' => 'upload',
501 'log_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
502 'log_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ),
503 ],
504 __METHOD__
505 );
506
507 $updates = 0;
508 foreach ( $res as $row ) {
509 $rev_id = $row->ls_value;
510 $log_id = $row->ls_log_id;
511
512 // Mark the logging row as having an associated rev id
513 $dbw->update(
514 'recentchanges',
515 /*SET*/ [ 'rc_this_oldid' => $rev_id ],
516 /*WHERE*/ [ 'rc_logid' => $log_id ],
517 __METHOD__
518 );
519
520 // Delete the revision row
521 $dbw->delete(
522 'recentchanges',
523 /*WHERE*/ [ 'rc_this_oldid' => $rev_id, 'rc_logid' => 0 ],
524 __METHOD__
525 );
526
527 if ( ( ++$updates % $this->getBatchSize() ) == 0 ) {
528 $lbFactory->waitForReplication();
529 }
530 }
531 }
532
536 private function purgeFeeds() {
537 global $wgFeedClasses;
538
539 $this->output( "Deleting feed timestamps.\n" );
540
541 $wanCache = MediaWikiServices::getInstance()->getMainWANObjectCache();
542 foreach ( $wgFeedClasses as $feed => $className ) {
543 $wanCache->delete( $wanCache->makeKey( 'rcfeed', $feed, 'timestamp' ) ); # Good enough for now.
544 }
545 }
546}
547
548$maintClass = RebuildRecentchanges::class;
549require_once RUN_MAINTENANCE_IF_MAIN;
getDB()
$wgLogRestrictions
This restricts log access to those who have a certain right Users without this will not see it in the...
$wgUseRCPatrol
Use RC Patrolling to check for vandalism (from recent changes and watchlists) New pages and new files...
$wgRCMaxAge
Recentchanges items are periodically purged; entries older than this many seconds will go.
$wgFeedClasses
Available feeds objects.
$wgFilterLogTypes
Show/hide links on Special:Log will be shown for these log types.
$wgMiserMode
Disable database-intensive features.
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
const RUN_MAINTENANCE_IF_MAIN
static validTypes()
Get the list of valid log types.
Definition LogPage.php:198
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option exists.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
Set the batch size.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Maintenance script that rebuilds recent changes from scratch.
execute()
Do the actual work.
rebuildRecentChangesTablePass5(ILBFactory $lbFactory)
Rebuild pass 5: Delete duplicate entries where we generate both a page revision and a log entry for a...
int $cutoffFrom
UNIX timestamp.
rebuildRecentChangesTablePass3(ILBFactory $lbFactory)
Rebuild pass 3: Insert recentchanges entries for action logs.
rebuildRecentChangesTablePass4(ILBFactory $lbFactory)
Rebuild pass 4: Mark bot and autopatrolled entries.
int $cutoffTo
UNIX timestamp.
__construct()
Default constructor.
rebuildRecentChangesTablePass1(ILBFactory $lbFactory)
Rebuild pass 1: Insert recentchanges entries for page revisions.
rebuildRecentChangesTablePass2(ILBFactory $lbFactory)
Rebuild pass 2: Enhance entries for page revisions with references to the previous revision (rc_last_...
purgeFeeds()
Purge cached feeds in $wanCache.
static getQueryInfo()
Return the tables, fields, and join conditions to be selected to create a new user object.
Definition User.php:5358
static newFromRow( $row, $data=null)
Create a new user object from a user row.
Definition User.php:699
static newFromAnyId( $userId, $userName, $actorId, $dbDomain=false)
Static factory method for creation from an ID, name, and/or actor ID.
Definition User.php:599
const RC_NEW
Definition Defines.php:132
const RC_LOG
Definition Defines.php:133
const RC_EDIT
Definition Defines.php:131
An interface for generating database load balancers.
waitForReplication(array $opts=[])
Waits for the replica DBs to catch up to the current master position.
const DB_MASTER
Definition defines.php:26