MediaWiki REL1_40
rebuildrecentchanges.php
Go to the documentation of this file.
1<?php
26require_once __DIR__ . '/Maintenance.php';
27
31
39 private $cutoffFrom;
41 private $cutoffTo;
42
43 public function __construct() {
44 parent::__construct();
45 $this->addDescription( 'Rebuild recent changes' );
46
47 $this->addOption(
48 'from',
49 "Only rebuild rows in requested time range (in YYYYMMDDHHMMSS format)",
50 false,
51 true
52 );
53 $this->addOption(
54 'to',
55 "Only rebuild rows in requested time range (in YYYYMMDDHHMMSS format)",
56 false,
57 true
58 );
59 $this->setBatchSize( 200 );
60 }
61
62 public function execute() {
63 if (
64 ( $this->hasOption( 'from' ) && !$this->hasOption( 'to' ) ) ||
65 ( !$this->hasOption( 'from' ) && $this->hasOption( 'to' ) )
66 ) {
67 $this->fatalError( "Both 'from' and 'to' must be given, or neither" );
68 }
69
70 $this->rebuildRecentChangesTablePass1();
71 $this->rebuildRecentChangesTablePass2();
72 $this->rebuildRecentChangesTablePass3();
73 $this->rebuildRecentChangesTablePass4();
74 $this->rebuildRecentChangesTablePass5();
75 if ( !( $this->hasOption( 'from' ) && $this->hasOption( 'to' ) ) ) {
76 $this->purgeFeeds();
77 }
78 $this->output( "Done.\n" );
79 }
80
84 private function rebuildRecentChangesTablePass1() {
85 $dbw = $this->getDB( DB_PRIMARY );
86 $commentStore = MediaWikiServices::getInstance()->getCommentStore();
87
88 if ( $this->hasOption( 'from' ) && $this->hasOption( 'to' ) ) {
89 $this->cutoffFrom = (int)wfTimestamp( TS_UNIX, $this->getOption( 'from' ) );
90 $this->cutoffTo = (int)wfTimestamp( TS_UNIX, $this->getOption( 'to' ) );
91
92 $sec = $this->cutoffTo - $this->cutoffFrom;
93 $days = $sec / 24 / 3600;
94 $this->output( "Rebuilding range of $sec seconds ($days days)\n" );
95 } else {
96 global $wgRCMaxAge;
97
98 $days = $wgRCMaxAge / 24 / 3600;
99 $this->output( "Rebuilding \$wgRCMaxAge=$wgRCMaxAge seconds ($days days)\n" );
100
101 $this->cutoffFrom = time() - $wgRCMaxAge;
102 $this->cutoffTo = time();
103 }
104
105 $this->output( "Clearing recentchanges table for time range...\n" );
106 $rcids = $dbw->selectFieldValues(
107 'recentchanges',
108 'rc_id',
109 [
110 'rc_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
111 'rc_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) )
112 ],
113 __METHOD__
114 );
115 foreach ( array_chunk( $rcids, $this->getBatchSize() ) as $rcidBatch ) {
116 $dbw->delete( 'recentchanges', [ 'rc_id' => $rcidBatch ], __METHOD__ );
117 $this->waitForReplication();
118 }
119
120 $this->output( "Loading from page and revision tables...\n" );
121
122 $commentQuery = $commentStore->getJoin( 'rev_comment' );
123 $actorQuery = ActorMigration::newMigration()->getJoin( 'rev_user' );
124 $res = $dbw->select(
125 [ 'revision', 'page' ] + $commentQuery['tables'] + $actorQuery['tables'],
126 [
127 'rev_timestamp',
128 'rev_minor_edit',
129 'rev_id',
130 'rev_deleted',
131 'page_namespace',
132 'page_title',
133 'page_is_new',
134 'page_id'
135 ] + $commentQuery['fields'] + $actorQuery['fields'],
136 [
137 'rev_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
138 'rev_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) )
139 ],
140 __METHOD__,
141 [ 'ORDER BY' => 'rev_timestamp DESC' ],
142 [
143 'page' => [ 'JOIN', 'rev_page=page_id' ],
144 ] + $commentQuery['joins'] + $actorQuery['joins']
145 );
146
147 $this->output( "Inserting from page and revision tables...\n" );
148 $inserted = 0;
149 foreach ( $res as $row ) {
150 $comment = $commentStore->getComment( 'rev_comment', $row );
151 $dbw->insert(
152 'recentchanges',
153 [
154 'rc_timestamp' => $row->rev_timestamp,
155 'rc_actor' => $row->rev_actor,
156 'rc_namespace' => $row->page_namespace,
157 'rc_title' => $row->page_title,
158 'rc_minor' => $row->rev_minor_edit,
159 'rc_bot' => 0,
160 'rc_new' => $row->page_is_new,
161 'rc_cur_id' => $row->page_id,
162 'rc_this_oldid' => $row->rev_id,
163 'rc_last_oldid' => 0, // is this ok?
164 'rc_type' => $row->page_is_new ? RC_NEW : RC_EDIT,
165 'rc_source' => $row->page_is_new ? RecentChange::SRC_NEW : RecentChange::SRC_EDIT,
166 'rc_deleted' => $row->rev_deleted
167 ] + $commentStore->insert( $dbw, 'rc_comment', $comment ),
168 __METHOD__
169 );
170
171 $rcid = $dbw->insertId();
172 $dbw->update(
173 'change_tag',
174 [ 'ct_rc_id' => $rcid ],
175 [ 'ct_rev_id' => $row->rev_id ],
176 __METHOD__
177 );
178
179 if ( ( ++$inserted % $this->getBatchSize() ) == 0 ) {
180 $this->waitForReplication();
181 }
182 }
183 }
184
189 private function rebuildRecentChangesTablePass2() {
190 $dbw = $this->getDB( DB_PRIMARY );
191
192 $this->output( "Updating links and size differences...\n" );
193
194 # Fill in the rc_last_oldid field, which points to the previous edit
195 $res = $dbw->select(
196 'recentchanges',
197 [ 'rc_cur_id', 'rc_this_oldid', 'rc_timestamp' ],
198 [
199 "rc_timestamp > " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
200 "rc_timestamp < " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) )
201 ],
202 __METHOD__,
203 [ 'ORDER BY' => [ 'rc_cur_id', 'rc_timestamp' ] ]
204 );
205
206 $lastCurId = 0;
207 $lastOldId = 0;
208 $lastSize = null;
209 $updated = 0;
210 foreach ( $res as $row ) {
211 $new = 0;
212
213 if ( $row->rc_cur_id != $lastCurId ) {
214 # Switch! Look up the previous last edit, if any
215 $lastCurId = intval( $row->rc_cur_id );
216 $emit = $row->rc_timestamp;
217
218 $revRow = $dbw->selectRow(
219 'revision',
220 [ 'rev_id', 'rev_len' ],
221 [ 'rev_page' => $lastCurId, "rev_timestamp < " . $dbw->addQuotes( $emit ) ],
222 __METHOD__,
223 [ 'ORDER BY' => 'rev_timestamp DESC' ]
224 );
225 if ( $revRow ) {
226 $lastOldId = intval( $revRow->rev_id );
227 # Grab the last text size if available
228 $lastSize = $revRow->rev_len !== null ? intval( $revRow->rev_len ) : null;
229 } else {
230 # No previous edit
231 $lastOldId = 0;
232 $lastSize = 0;
233 $new = 1; // probably true
234 }
235 }
236
237 if ( $lastCurId == 0 ) {
238 $this->output( "Uhhh, something wrong? No curid\n" );
239 } else {
240 # Grab the entry's text size
241 $size = (int)$dbw->selectField(
242 'revision',
243 'rev_len',
244 [ 'rev_id' => $row->rc_this_oldid ],
245 __METHOD__
246 );
247
248 $dbw->update(
249 'recentchanges',
250 [
251 'rc_last_oldid' => $lastOldId,
252 'rc_new' => $new,
253 'rc_type' => $new ? RC_NEW : RC_EDIT,
254 'rc_source' => $new === 1 ? RecentChange::SRC_NEW : RecentChange::SRC_EDIT,
255 'rc_old_len' => $lastSize,
256 'rc_new_len' => $size,
257 ],
258 [
259 'rc_cur_id' => $lastCurId,
260 'rc_this_oldid' => $row->rc_this_oldid,
261 'rc_timestamp' => $row->rc_timestamp // index usage
262 ],
263 __METHOD__
264 );
265
266 $lastOldId = intval( $row->rc_this_oldid );
267 $lastSize = $size;
268
269 if ( ( ++$updated % $this->getBatchSize() ) == 0 ) {
270 $this->waitForReplication();
271 }
272 }
273 }
274 }
275
279 private function rebuildRecentChangesTablePass3() {
281
282 $dbw = $this->getDB( DB_PRIMARY );
283 $commentStore = MediaWikiServices::getInstance()->getCommentStore();
284 $nonRCLogs = array_merge( array_keys( $wgLogRestrictions ),
285 array_keys( $wgFilterLogTypes ),
286 [ 'create' ] );
287
288 $this->output( "Loading from user and logging tables...\n" );
289
290 $commentQuery = $commentStore->getJoin( 'log_comment' );
291 $res = $dbw->select(
292 [ 'logging' ] + $commentQuery['tables'],
293 [
294 'log_timestamp',
295 'log_actor',
296 'log_namespace',
297 'log_title',
298 'log_page',
299 'log_type',
300 'log_action',
301 'log_id',
302 'log_params',
303 'log_deleted'
304 ] + $commentQuery['fields'],
305 [
306 'log_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
307 'log_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ),
308 // Some logs don't go in RC since they are private, or are included in the filterable log types.
309 'log_type' => array_diff( LogPage::validTypes(), $nonRCLogs ),
310 ],
311 __METHOD__,
312 [ 'ORDER BY' => [ 'log_timestamp DESC', 'log_id DESC' ] ],
313 $commentQuery['joins']
314 );
315
316 $field = $dbw->fieldInfo( 'recentchanges', 'rc_cur_id' );
317
318 $inserted = 0;
319 foreach ( $res as $row ) {
320 $comment = $commentStore->getComment( 'log_comment', $row );
321 $dbw->insert(
322 'recentchanges',
323 [
324 'rc_timestamp' => $row->log_timestamp,
325 'rc_actor' => $row->log_actor,
326 'rc_namespace' => $row->log_namespace,
327 'rc_title' => $row->log_title,
328 'rc_minor' => 0,
329 'rc_bot' => 0,
330 'rc_patrolled' => $row->log_type == 'upload' ? 0 : 2,
331 'rc_new' => 0,
332 'rc_this_oldid' => 0,
333 'rc_last_oldid' => 0,
334 'rc_type' => RC_LOG,
335 'rc_source' => RecentChange::SRC_LOG,
336 'rc_cur_id' => $field->isNullable()
337 ? $row->log_page
338 : (int)$row->log_page, // NULL => 0,
339 'rc_log_type' => $row->log_type,
340 'rc_log_action' => $row->log_action,
341 'rc_logid' => $row->log_id,
342 'rc_params' => $row->log_params,
343 'rc_deleted' => $row->log_deleted
344 ] + $commentStore->insert( $dbw, 'rc_comment', $comment ),
345 __METHOD__
346 );
347
348 $rcid = $dbw->insertId();
349 $dbw->update(
350 'change_tag',
351 [ 'ct_rc_id' => $rcid ],
352 [ 'ct_log_id' => $row->log_id ],
353 __METHOD__
354 );
355
356 if ( ( ++$inserted % $this->getBatchSize() ) == 0 ) {
357 $this->waitForReplication();
358 }
359 }
360 }
361
370 private function findRcIdsWithGroups( $db, $groups, $conds = [] ) {
371 if ( !count( $groups ) ) {
372 return [];
373 }
374 return $db->selectFieldValues(
375 [ 'recentchanges', 'actor', 'user_groups' ],
376 'rc_id',
377 $conds + [
378 "rc_timestamp > " . $db->addQuotes( $db->timestamp( $this->cutoffFrom ) ),
379 "rc_timestamp < " . $db->addQuotes( $db->timestamp( $this->cutoffTo ) ),
380 'ug_group' => $groups
381 ],
382 __METHOD__,
383 [ 'DISTINCT' ],
384 [
385 'actor' => [ 'JOIN', 'actor_id=rc_actor' ],
386 'user_groups' => [ 'JOIN', 'ug_user=actor_user' ]
387 ]
388 );
389 }
390
394 private function rebuildRecentChangesTablePass4() {
396
397 $dbw = $this->getDB( DB_PRIMARY );
398
399 # @FIXME: recognize other bot account groups (not the same as users with 'bot' rights)
400 # @NOTE: users with 'bot' rights choose when edits are bot edits or not. That information
401 # may be lost at this point (aside from joining on the patrol log table entries).
402 $botgroups = [ 'bot' ];
403 $autopatrolgroups = ( $wgUseRCPatrol || $wgUseNPPatrol || $wgUseFilePatrol ) ?
404 MediaWikiServices::getInstance()->getGroupPermissionsLookup()
405 ->getGroupsWithPermission( 'autopatrol' ) : [];
406
407 # Flag our recent bot edits
408 // @phan-suppress-next-line PhanRedundantCondition
409 if ( $botgroups ) {
410 $this->output( "Flagging bot account edits...\n" );
411
412 # Fill in the rc_bot field
413 $rcids = $this->findRcIdsWithGroups( $dbw, $botgroups );
414
415 foreach ( array_chunk( $rcids, $this->getBatchSize() ) as $rcidBatch ) {
416 $dbw->update(
417 'recentchanges',
418 [ 'rc_bot' => 1 ],
419 [ 'rc_id' => $rcidBatch ],
420 __METHOD__
421 );
422 $this->waitForReplication();
423 }
424 }
425
426 # Flag our recent autopatrolled edits
427 if ( !$wgMiserMode && $autopatrolgroups ) {
428 $this->output( "Flagging auto-patrolled edits...\n" );
429
430 $conds = [ 'rc_patrolled' => 0 ];
431 if ( !$wgUseRCPatrol ) {
432 $subConds = [];
433 if ( $wgUseNPPatrol ) {
434 $subConds[] = 'rc_source = ' . $dbw->addQuotes( RecentChange::SRC_NEW );
435 }
436 if ( $wgUseFilePatrol ) {
437 $subConds[] = 'rc_log_type = ' . $dbw->addQuotes( 'upload' );
438 }
439 $conds[] = $dbw->makeList( $subConds, IDatabase::LIST_OR );
440 }
441
442 $rcids = $this->findRcIdsWithGroups( $dbw, $autopatrolgroups, $conds );
443 foreach ( array_chunk( $rcids, $this->getBatchSize() ) as $rcidBatch ) {
444 $dbw->update(
445 'recentchanges',
446 [ 'rc_patrolled' => 2 ],
447 [ 'rc_id' => $rcidBatch ],
448 __METHOD__
449 );
450 $this->waitForReplication();
451 }
452 }
453 }
454
459 private function rebuildRecentChangesTablePass5() {
460 $dbw = $this->getDB( DB_PRIMARY );
461
462 $this->output( "Removing duplicate revision and logging entries...\n" );
463
464 $res = $dbw->select(
465 [ 'logging', 'log_search' ],
466 [ 'ls_value', 'ls_log_id' ],
467 [
468 'ls_log_id = log_id',
469 'ls_field' => 'associated_rev_id',
470 'log_type != ' . $dbw->addQuotes( 'create' ),
471 'log_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
472 'log_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ),
473 ],
474 __METHOD__
475 );
476
477 $updates = 0;
478 foreach ( $res as $row ) {
479 $rev_id = $row->ls_value;
480 $log_id = $row->ls_log_id;
481
482 // Mark the logging row as having an associated rev id
483 $dbw->update(
484 'recentchanges',
485 /*SET*/ [ 'rc_this_oldid' => $rev_id ],
486 /*WHERE*/ [ 'rc_logid' => $log_id ],
487 __METHOD__
488 );
489
490 // Delete the revision row
491 $dbw->delete(
492 'recentchanges',
493 /*WHERE*/ [ 'rc_this_oldid' => $rev_id, 'rc_logid' => 0 ],
494 __METHOD__
495 );
496
497 if ( ( ++$updates % $this->getBatchSize() ) == 0 ) {
498 $this->waitForReplication();
499 }
500 }
501 }
502
506 private function purgeFeeds() {
507 global $wgFeedClasses;
508
509 $this->output( "Deleting feed timestamps.\n" );
510
511 $wanCache = MediaWikiServices::getInstance()->getMainWANObjectCache();
512 foreach ( $wgFeedClasses as $feed => $className ) {
513 $wanCache->delete( $wanCache->makeKey( 'rcfeed', $feed, 'timestamp' ) ); # Good enough for now.
514 }
515 }
516}
517
518$maintClass = RebuildRecentchanges::class;
519require_once RUN_MAINTENANCE_IF_MAIN;
getDB()
const RC_NEW
Definition Defines.php:117
const RC_LOG
Definition Defines.php:118
const RC_EDIT
Definition Defines.php:116
wfTimestamp( $outputtype=TS_UNIX, $ts=0)
Get a timestamp string in one of various formats.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
output( $out, $channel=null)
Throw some output to the user.
waitForReplication()
Wait for replica DBs to catch up.
hasOption( $name)
Checks to see if a particular option was set.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
Service locator for MediaWiki core services.
This is not intended to be a long-term part of MediaWiki; it will be deprecated and removed once acto...
Maintenance script that rebuilds recent changes from scratch.
execute()
Do the actual work.
__construct()
Default constructor.
Utility class for creating new RC entries.
$wgUseFilePatrol
Config variable stub for the UseFilePatrol setting, for use by phpdoc and IDEs.
$wgLogRestrictions
Config variable stub for the LogRestrictions setting, for use by phpdoc and IDEs.
$wgUseRCPatrol
Config variable stub for the UseRCPatrol setting, for use by phpdoc and IDEs.
$wgUseNPPatrol
Config variable stub for the UseNPPatrol setting, for use by phpdoc and IDEs.
$wgRCMaxAge
Config variable stub for the RCMaxAge setting, for use by phpdoc and IDEs.
$wgFeedClasses
Config variable stub for the FeedClasses setting, for use by phpdoc and IDEs.
$wgFilterLogTypes
Config variable stub for the FilterLogTypes setting, for use by phpdoc and IDEs.
$wgMiserMode
Config variable stub for the MiserMode setting, for use by phpdoc and IDEs.
Basic database interface for live and lazy-loaded relation database handles.
Definition IDatabase.php:36
const DB_PRIMARY
Definition defines.php:28