MediaWiki fundraising/REL1_35
rebuildrecentchanges.php
Go to the documentation of this file.
1<?php
26require_once __DIR__ . '/Maintenance.php';
27
31
39 private $cutoffFrom;
41 private $cutoffTo;
42
43 public function __construct() {
44 parent::__construct();
45 $this->addDescription( 'Rebuild recent changes' );
46
47 $this->addOption(
48 'from',
49 "Only rebuild rows in requested time range (in YYYYMMDDHHMMSS format)",
50 false,
51 true
52 );
53 $this->addOption(
54 'to',
55 "Only rebuild rows in requested time range (in YYYYMMDDHHMMSS format)",
56 false,
57 true
58 );
59 $this->setBatchSize( 200 );
60 }
61
62 public function execute() {
63 if (
64 ( $this->hasOption( 'from' ) && !$this->hasOption( 'to' ) ) ||
65 ( !$this->hasOption( 'from' ) && $this->hasOption( 'to' ) )
66 ) {
67 $this->fatalError( "Both 'from' and 'to' must be given, or neither" );
68 }
69
70 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
71 $this->rebuildRecentChangesTablePass1( $lbFactory );
72 $this->rebuildRecentChangesTablePass2( $lbFactory );
73 $this->rebuildRecentChangesTablePass3( $lbFactory );
74 $this->rebuildRecentChangesTablePass4( $lbFactory );
75 $this->rebuildRecentChangesTablePass5( $lbFactory );
76 if ( !( $this->hasOption( 'from' ) && $this->hasOption( 'to' ) ) ) {
77 $this->purgeFeeds();
78 }
79 $this->output( "Done.\n" );
80 }
81
87 private function rebuildRecentChangesTablePass1( ILBFactory $lbFactory ) {
88 $dbw = $this->getDB( DB_MASTER );
89 $commentStore = CommentStore::getStore();
90
91 if ( $this->hasOption( 'from' ) && $this->hasOption( 'to' ) ) {
92 $this->cutoffFrom = wfTimestamp( TS_UNIX, $this->getOption( 'from' ) );
93 $this->cutoffTo = wfTimestamp( TS_UNIX, $this->getOption( 'to' ) );
94
95 $sec = $this->cutoffTo - $this->cutoffFrom;
96 $days = $sec / 24 / 3600;
97 $this->output( "Rebuilding range of $sec seconds ($days days)\n" );
98 } else {
99 global $wgRCMaxAge;
100
101 $days = $wgRCMaxAge / 24 / 3600;
102 $this->output( "Rebuilding \$wgRCMaxAge=$wgRCMaxAge seconds ($days days)\n" );
103
104 $this->cutoffFrom = time() - $wgRCMaxAge;
105 $this->cutoffTo = time();
106 }
107
108 $this->output( "Clearing recentchanges table for time range...\n" );
109 $rcids = $dbw->selectFieldValues(
110 'recentchanges',
111 'rc_id',
112 [
113 'rc_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
114 'rc_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) )
115 ],
116 __METHOD__
117 );
118 foreach ( array_chunk( $rcids, $this->getBatchSize() ) as $rcidBatch ) {
119 $dbw->delete( 'recentchanges', [ 'rc_id' => $rcidBatch ], __METHOD__ );
120 $lbFactory->waitForReplication();
121 }
122
123 $this->output( "Loading from page and revision tables...\n" );
124
125 $commentQuery = $commentStore->getJoin( 'rev_comment' );
126 $actorQuery = ActorMigration::newMigration()->getJoin( 'rev_user' );
127 $res = $dbw->select(
128 [ 'revision', 'page' ] + $commentQuery['tables'] + $actorQuery['tables'],
129 [
130 'rev_timestamp',
131 'rev_minor_edit',
132 'rev_id',
133 'rev_deleted',
134 'page_namespace',
135 'page_title',
136 'page_is_new',
137 'page_id'
138 ] + $commentQuery['fields'] + $actorQuery['fields'],
139 [
140 'rev_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
141 'rev_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) )
142 ],
143 __METHOD__,
144 [ 'ORDER BY' => 'rev_timestamp DESC' ],
145 [
146 'page' => [ 'JOIN', 'rev_page=page_id' ],
147 ] + $commentQuery['joins'] + $actorQuery['joins']
148 );
149
150 $this->output( "Inserting from page and revision tables...\n" );
151 $inserted = 0;
152 $actorMigration = ActorMigration::newMigration();
153 foreach ( $res as $row ) {
154 $comment = $commentStore->getComment( 'rev_comment', $row );
155 $user = User::newFromAnyId( $row->rev_user, $row->rev_user_text, $row->rev_actor );
156 $dbw->insert(
157 'recentchanges',
158 [
159 'rc_timestamp' => $row->rev_timestamp,
160 'rc_namespace' => $row->page_namespace,
161 'rc_title' => $row->page_title,
162 'rc_minor' => $row->rev_minor_edit,
163 'rc_bot' => 0,
164 'rc_new' => $row->page_is_new,
165 'rc_cur_id' => $row->page_id,
166 'rc_this_oldid' => $row->rev_id,
167 'rc_last_oldid' => 0, // is this ok?
168 'rc_type' => $row->page_is_new ? RC_NEW : RC_EDIT,
169 'rc_source' => $row->page_is_new ? RecentChange::SRC_NEW : RecentChange::SRC_EDIT,
170 'rc_deleted' => $row->rev_deleted
171 ] + $commentStore->insert( $dbw, 'rc_comment', $comment )
172 + $actorMigration->getInsertValues( $dbw, 'rc_user', $user ),
173 __METHOD__
174 );
175
176 $rcid = $dbw->insertId();
177 $dbw->update(
178 'change_tag',
179 [ 'ct_rc_id' => $rcid ],
180 [ 'ct_rev_id' => $row->rev_id ],
181 __METHOD__
182 );
183
184 if ( ( ++$inserted % $this->getBatchSize() ) == 0 ) {
185 $lbFactory->waitForReplication();
186 }
187 }
188 }
189
196 private function rebuildRecentChangesTablePass2( ILBFactory $lbFactory ) {
197 $dbw = $this->getDB( DB_MASTER );
198
199 $this->output( "Updating links and size differences...\n" );
200
201 # Fill in the rc_last_oldid field, which points to the previous edit
202 $res = $dbw->select(
203 'recentchanges',
204 [ 'rc_cur_id', 'rc_this_oldid', 'rc_timestamp' ],
205 [
206 "rc_timestamp > " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
207 "rc_timestamp < " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) )
208 ],
209 __METHOD__,
210 [ 'ORDER BY' => [ 'rc_cur_id', 'rc_timestamp' ] ]
211 );
212
213 $lastCurId = 0;
214 $lastOldId = 0;
215 $lastSize = null;
216 $updated = 0;
217 foreach ( $res as $row ) {
218 $new = 0;
219
220 if ( $row->rc_cur_id != $lastCurId ) {
221 # Switch! Look up the previous last edit, if any
222 $lastCurId = intval( $row->rc_cur_id );
223 $emit = $row->rc_timestamp;
224
225 $revRow = $dbw->selectRow(
226 'revision',
227 [ 'rev_id', 'rev_len' ],
228 [ 'rev_page' => $lastCurId, "rev_timestamp < " . $dbw->addQuotes( $emit ) ],
229 __METHOD__,
230 [ 'ORDER BY' => 'rev_timestamp DESC' ]
231 );
232 if ( $revRow ) {
233 $lastOldId = intval( $revRow->rev_id );
234 # Grab the last text size if available
235 $lastSize = $revRow->rev_len !== null ? intval( $revRow->rev_len ) : null;
236 } else {
237 # No previous edit
238 $lastOldId = 0;
239 $lastSize = 0;
240 $new = 1; // probably true
241 }
242 }
243
244 if ( $lastCurId == 0 ) {
245 $this->output( "Uhhh, something wrong? No curid\n" );
246 } else {
247 # Grab the entry's text size
248 $size = (int)$dbw->selectField(
249 'revision',
250 'rev_len',
251 [ 'rev_id' => $row->rc_this_oldid ],
252 __METHOD__
253 );
254
255 $dbw->update(
256 'recentchanges',
257 [
258 'rc_last_oldid' => $lastOldId,
259 'rc_new' => $new,
260 'rc_type' => $new ? RC_NEW : RC_EDIT,
261 'rc_source' => $new === 1 ? RecentChange::SRC_NEW : RecentChange::SRC_EDIT,
262 'rc_old_len' => $lastSize,
263 'rc_new_len' => $size,
264 ],
265 [
266 'rc_cur_id' => $lastCurId,
267 'rc_this_oldid' => $row->rc_this_oldid,
268 'rc_timestamp' => $row->rc_timestamp // index usage
269 ],
270 __METHOD__
271 );
272
273 $lastOldId = intval( $row->rc_this_oldid );
274 $lastSize = $size;
275
276 if ( ( ++$updated % $this->getBatchSize() ) == 0 ) {
277 $lbFactory->waitForReplication();
278 }
279 }
280 }
281 }
282
288 private function rebuildRecentChangesTablePass3( ILBFactory $lbFactory ) {
290
291 $dbw = $this->getDB( DB_MASTER );
292 $commentStore = CommentStore::getStore();
293 $nonRCLogs = array_merge( array_keys( $wgLogRestrictions ),
294 array_keys( $wgFilterLogTypes ),
295 [ 'create' ] );
296
297 $this->output( "Loading from user and logging tables...\n" );
298
299 $commentQuery = $commentStore->getJoin( 'log_comment' );
300 $actorQuery = ActorMigration::newMigration()->getJoin( 'log_user' );
301 $res = $dbw->select(
302 [ 'logging' ] + $commentQuery['tables'] + $actorQuery['tables'],
303 [
304 'log_timestamp',
305 'log_namespace',
306 'log_title',
307 'log_page',
308 'log_type',
309 'log_action',
310 'log_id',
311 'log_params',
312 'log_deleted'
313 ] + $commentQuery['fields'] + $actorQuery['fields'],
314 [
315 'log_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
316 'log_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ),
317 // Some logs don't go in RC since they are private, or are included in the filterable log types.
318 'log_type' => array_diff( LogPage::validTypes(), $nonRCLogs ),
319 ],
320 __METHOD__,
321 [ 'ORDER BY' => 'log_timestamp DESC' ],
322 $commentQuery['joins'] + $actorQuery['joins']
323 );
324
325 $field = $dbw->fieldInfo( 'recentchanges', 'rc_cur_id' );
326
327 $inserted = 0;
328 $actorMigration = ActorMigration::newMigration();
329 foreach ( $res as $row ) {
330 $comment = $commentStore->getComment( 'log_comment', $row );
331 $user = User::newFromAnyId( $row->log_user, $row->log_user_text, $row->log_actor );
332 $dbw->insert(
333 'recentchanges',
334 [
335 'rc_timestamp' => $row->log_timestamp,
336 'rc_namespace' => $row->log_namespace,
337 'rc_title' => $row->log_title,
338 'rc_minor' => 0,
339 'rc_bot' => 0,
340 'rc_patrolled' => $row->log_type == 'upload' ? 0 : 2,
341 'rc_new' => 0,
342 'rc_this_oldid' => 0,
343 'rc_last_oldid' => 0,
344 'rc_type' => RC_LOG,
345 'rc_source' => RecentChange::SRC_LOG,
346 'rc_cur_id' => $field->isNullable()
347 ? $row->log_page
348 : (int)$row->log_page, // NULL => 0,
349 'rc_log_type' => $row->log_type,
350 'rc_log_action' => $row->log_action,
351 'rc_logid' => $row->log_id,
352 'rc_params' => $row->log_params,
353 'rc_deleted' => $row->log_deleted
354 ] + $commentStore->insert( $dbw, 'rc_comment', $comment )
355 + $actorMigration->getInsertValues( $dbw, 'rc_user', $user ),
356 __METHOD__
357 );
358
359 $rcid = $dbw->insertId();
360 $dbw->update(
361 'change_tag',
362 [ 'ct_rc_id' => $rcid ],
363 [ 'ct_log_id' => $row->log_id ],
364 __METHOD__
365 );
366
367 if ( ( ++$inserted % $this->getBatchSize() ) == 0 ) {
368 $lbFactory->waitForReplication();
369 }
370 }
371 }
372
378 private function rebuildRecentChangesTablePass4( ILBFactory $lbFactory ) {
380
381 $dbw = $this->getDB( DB_MASTER );
382
383 $userQuery = User::getQueryInfo();
384
385 # @FIXME: recognize other bot account groups (not the same as users with 'bot' rights)
386 # @NOTE: users with 'bot' rights choose when edits are bot edits or not. That information
387 # may be lost at this point (aside from joining on the patrol log table entries).
388 $botgroups = [ 'bot' ];
389 $autopatrolgroups = ( $wgUseRCPatrol || $wgUseNPPatrol || $wgUseFilePatrol ) ?
390 MediaWikiServices::getInstance()->getPermissionManager()
391 ->getGroupsWithPermission( 'autopatrol' ) : [];
392
393 # Flag our recent bot edits
394 // @phan-suppress-next-line PhanRedundantCondition
395 if ( $botgroups ) {
396 $this->output( "Flagging bot account edits...\n" );
397
398 # Find all users that are bots
399 $res = $dbw->select(
400 array_merge( [ 'user_groups' ], $userQuery['tables'] ),
401 $userQuery['fields'],
402 [ 'ug_group' => $botgroups ],
403 __METHOD__,
404 [ 'DISTINCT' ],
405 [ 'user_groups' => [ 'JOIN', 'user_id = ug_user' ] ] + $userQuery['joins']
406 );
407
408 $botusers = [];
409 foreach ( $res as $row ) {
410 $botusers[] = User::newFromRow( $row );
411 }
412
413 # Fill in the rc_bot field
414 if ( $botusers ) {
415 $actorQuery = ActorMigration::newMigration()->getWhere( $dbw, 'rc_user', $botusers, false );
416 $rcids = [];
417 foreach ( $actorQuery['orconds'] as $cond ) {
418 $rcids = array_merge( $rcids, $dbw->selectFieldValues(
419 [ 'recentchanges' ] + $actorQuery['tables'],
420 'rc_id',
421 [
422 "rc_timestamp > " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
423 "rc_timestamp < " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ),
424 $cond,
425 ],
426 __METHOD__,
427 [],
428 $actorQuery['joins']
429 ) );
430 }
431 $rcids = array_values( array_unique( $rcids ) );
432
433 foreach ( array_chunk( $rcids, $this->getBatchSize() ) as $rcidBatch ) {
434 $dbw->update(
435 'recentchanges',
436 [ 'rc_bot' => 1 ],
437 [ 'rc_id' => $rcidBatch ],
438 __METHOD__
439 );
440 $lbFactory->waitForReplication();
441 }
442 }
443 }
444
445 # Flag our recent autopatrolled edits
446 if ( !$wgMiserMode && $autopatrolgroups ) {
447 $patrolusers = [];
448
449 $this->output( "Flagging auto-patrolled edits...\n" );
450
451 # Find all users in RC with autopatrol rights
452 $res = $dbw->select(
453 array_merge( [ 'user_groups' ], $userQuery['tables'] ),
454 $userQuery['fields'],
455 [ 'ug_group' => $autopatrolgroups ],
456 __METHOD__,
457 [ 'DISTINCT' ],
458 [ 'user_groups' => [ 'JOIN', 'user_id = ug_user' ] ] + $userQuery['joins']
459 );
460
461 foreach ( $res as $row ) {
462 $patrolusers[] = User::newFromRow( $row );
463 }
464
465 # Fill in the rc_patrolled field
466 if ( $patrolusers ) {
467 $actorQuery = ActorMigration::newMigration()->getWhere( $dbw, 'rc_user', $patrolusers, false );
468 foreach ( $actorQuery['orconds'] as $cond ) {
469 $conds = [
470 $cond,
471 'rc_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
472 'rc_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ),
473 'rc_patrolled' => 0
474 ];
475
476 if ( !$wgUseRCPatrol ) {
477 $subConds = [];
478 if ( $wgUseNPPatrol ) {
479 $subConds[] = 'rc_source = ' . $dbw->addQuotes( RecentChange::SRC_NEW );
480 }
481 if ( $wgUseFilePatrol ) {
482 $subConds[] = 'rc_log_type = ' . $dbw->addQuotes( 'upload' );
483 }
484 $conds[] = $dbw->makeList( $subConds, IDatabase::LIST_OR );
485 }
486
487 $dbw->update(
488 'recentchanges',
489 [ 'rc_patrolled' => 2 ],
490 $conds,
491 __METHOD__
492 );
493 $lbFactory->waitForReplication();
494 }
495 }
496 }
497 }
498
505 private function rebuildRecentChangesTablePass5( ILBFactory $lbFactory ) {
506 $dbw = wfGetDB( DB_MASTER );
507
508 $this->output( "Removing duplicate revision and logging entries...\n" );
509
510 $res = $dbw->select(
511 [ 'logging', 'log_search' ],
512 [ 'ls_value', 'ls_log_id' ],
513 [
514 'ls_log_id = log_id',
515 'ls_field' => 'associated_rev_id',
516 'log_type != ' . $dbw->addQuotes( 'create' ),
517 'log_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
518 'log_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ),
519 ],
520 __METHOD__
521 );
522
523 $updates = 0;
524 foreach ( $res as $row ) {
525 $rev_id = $row->ls_value;
526 $log_id = $row->ls_log_id;
527
528 // Mark the logging row as having an associated rev id
529 $dbw->update(
530 'recentchanges',
531 /*SET*/ [ 'rc_this_oldid' => $rev_id ],
532 /*WHERE*/ [ 'rc_logid' => $log_id ],
533 __METHOD__
534 );
535
536 // Delete the revision row
537 $dbw->delete(
538 'recentchanges',
539 /*WHERE*/ [ 'rc_this_oldid' => $rev_id, 'rc_logid' => 0 ],
540 __METHOD__
541 );
542
543 if ( ( ++$updates % $this->getBatchSize() ) == 0 ) {
544 $lbFactory->waitForReplication();
545 }
546 }
547 }
548
552 private function purgeFeeds() {
553 global $wgFeedClasses;
554
555 $this->output( "Deleting feed timestamps.\n" );
556
557 $wanCache = MediaWikiServices::getInstance()->getMainWANObjectCache();
558 foreach ( $wgFeedClasses as $feed => $className ) {
559 $wanCache->delete( $wanCache->makeKey( 'rcfeed', $feed, 'timestamp' ) ); # Good enough for now.
560 }
561 }
562}
563
564$maintClass = RebuildRecentchanges::class;
565require_once RUN_MAINTENANCE_IF_MAIN;
getDB()
$wgUseFilePatrol
Use file patrolling to check new files on Special:Newfiles.
$wgLogRestrictions
This restricts log access to those who have a certain right Users without this will not see it in the...
$wgUseRCPatrol
Use RC Patrolling to check for vandalism (from recent changes and watchlists) New pages and new files...
$wgUseNPPatrol
Use new page patrolling to check new pages on Special:Newpages.
$wgRCMaxAge
Recentchanges items are periodically purged; entries older than this many seconds will go.
$wgFeedClasses
Available feeds objects.
$wgFilterLogTypes
Show/hide links on Special:Log will be shown for these log types.
$wgMiserMode
Disable database-intensive features.
wfGetDB( $db, $groups=[], $wiki=false)
Get a Database object.
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
const RUN_MAINTENANCE_IF_MAIN
static validTypes()
Get the list of valid log types.
Definition LogPage.php:203
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option was set.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
Set the batch size.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
MediaWikiServices is the service locator for the application scope of MediaWiki.
Maintenance script that rebuilds recent changes from scratch.
execute()
Do the actual work.
rebuildRecentChangesTablePass5(ILBFactory $lbFactory)
Rebuild pass 5: Delete duplicate entries where we generate both a page revision and a log entry for a...
int $cutoffFrom
UNIX timestamp.
rebuildRecentChangesTablePass3(ILBFactory $lbFactory)
Rebuild pass 3: Insert recentchanges entries for action logs.
rebuildRecentChangesTablePass4(ILBFactory $lbFactory)
Rebuild pass 4: Mark bot and autopatrolled entries.
int $cutoffTo
UNIX timestamp.
__construct()
Default constructor.
rebuildRecentChangesTablePass1(ILBFactory $lbFactory)
Rebuild pass 1: Insert recentchanges entries for page revisions.
rebuildRecentChangesTablePass2(ILBFactory $lbFactory)
Rebuild pass 2: Enhance entries for page revisions with references to the previous revision (rc_last_...
purgeFeeds()
Purge cached feeds in $wanCache.
const RC_NEW
Definition Defines.php:133
const RC_LOG
Definition Defines.php:134
const RC_EDIT
Definition Defines.php:132
Basic database interface for live and lazy-loaded relation database handles.
Definition IDatabase.php:38
An interface for generating database load balancers.
waitForReplication(array $opts=[])
Waits for the replica DBs to catch up to the current master position.
const DB_MASTER
Definition defines.php:29