Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 329
0.00% covered (danger)
0.00%
0 / 9
CRAP
0.00% covered (danger)
0.00%
0 / 1
RebuildRecentchanges
0.00% covered (danger)
0.00%
0 / 329
0.00% covered (danger)
0.00%
0 / 9
2450
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 15
0.00% covered (danger)
0.00%
0 / 1
2
 execute
0.00% covered (danger)
0.00%
0 / 11
0.00% covered (danger)
0.00%
0 / 1
56
 rebuildRecentChangesTablePass1
0.00% covered (danger)
0.00%
0 / 88
0.00% covered (danger)
0.00%
0 / 1
72
 rebuildRecentChangesTablePass2
0.00% covered (danger)
0.00%
0 / 57
0.00% covered (danger)
0.00%
0 / 1
90
 rebuildRecentChangesTablePass3
0.00% covered (danger)
0.00%
0 / 75
0.00% covered (danger)
0.00%
0 / 1
30
 findRcIdsWithGroups
0.00% covered (danger)
0.00%
0 / 16
0.00% covered (danger)
0.00%
0 / 1
6
 rebuildRecentChangesTablePass4
0.00% covered (danger)
0.00%
0 / 34
0.00% covered (danger)
0.00%
0 / 1
156
 rebuildRecentChangesTablePass5
0.00% covered (danger)
0.00%
0 / 28
0.00% covered (danger)
0.00%
0 / 1
12
 purgeFeeds
0.00% covered (danger)
0.00%
0 / 5
0.00% covered (danger)
0.00%
0 / 1
6
1<?php
2/**
3 * Rebuild recent changes from scratch.  This takes several hours,
4 * depending on the database size and server configuration.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 * http://www.gnu.org/copyleft/gpl.html
20 *
21 * @file
22 * @ingroup Maintenance
23 * @todo Document
24 */
25
26// @codeCoverageIgnoreStart
27require_once __DIR__ . '/Maintenance.php';
28// @codeCoverageIgnoreEnd
29
30use MediaWiki\Maintenance\Maintenance;
31use Wikimedia\Rdbms\IDatabase;
32use Wikimedia\Rdbms\IReadableDatabase;
33use Wikimedia\Rdbms\SelectQueryBuilder;
34
35/**
36 * Maintenance script that rebuilds recent changes from scratch.
37 *
38 * @ingroup Maintenance
39 */
40class RebuildRecentchanges extends Maintenance {
41    /** @var int UNIX timestamp */
42    private $cutoffFrom;
43    /** @var int UNIX timestamp */
44    private $cutoffTo;
45
46    public function __construct() {
47        parent::__construct();
48        $this->addDescription( 'Rebuild recent changes' );
49
50        $this->addOption(
51            'from',
52            "Only rebuild rows in requested time range (in YYYYMMDDHHMMSS format)",
53            false,
54            true
55        );
56        $this->addOption(
57            'to',
58            "Only rebuild rows in requested time range (in YYYYMMDDHHMMSS format)",
59            false,
60            true
61        );
62        $this->setBatchSize( 200 );
63    }
64
65    public function execute() {
66        if (
67            ( $this->hasOption( 'from' ) && !$this->hasOption( 'to' ) ) ||
68            ( !$this->hasOption( 'from' ) && $this->hasOption( 'to' ) )
69        ) {
70            $this->fatalError( "Both 'from' and 'to' must be given, or neither" );
71        }
72
73        $this->rebuildRecentChangesTablePass1();
74        $this->rebuildRecentChangesTablePass2();
75        $this->rebuildRecentChangesTablePass3();
76        $this->rebuildRecentChangesTablePass4();
77        $this->rebuildRecentChangesTablePass5();
78        if ( !( $this->hasOption( 'from' ) && $this->hasOption( 'to' ) ) ) {
79            $this->purgeFeeds();
80        }
81        $this->output( "Done.\n" );
82    }
83
84    /**
85     * Rebuild pass 1: Insert `recentchanges` entries for page revisions.
86     */
87    private function rebuildRecentChangesTablePass1() {
88        $dbw = $this->getPrimaryDB();
89        $commentStore = $this->getServiceContainer()->getCommentStore();
90
91        if ( $this->hasOption( 'from' ) && $this->hasOption( 'to' ) ) {
92            $this->cutoffFrom = (int)wfTimestamp( TS_UNIX, $this->getOption( 'from' ) );
93            $this->cutoffTo = (int)wfTimestamp( TS_UNIX, $this->getOption( 'to' ) );
94
95            $sec = $this->cutoffTo - $this->cutoffFrom;
96            $days = $sec / 24 / 3600;
97            $this->output( "Rebuilding range of $sec seconds ($days days)\n" );
98        } else {
99            global $wgRCMaxAge;
100
101            $days = $wgRCMaxAge / 24 / 3600;
102            $this->output( "Rebuilding \$wgRCMaxAge=$wgRCMaxAge seconds ($days days)\n" );
103
104            $this->cutoffFrom = time() - $wgRCMaxAge;
105            $this->cutoffTo = time();
106        }
107
108        $this->output( "Clearing recentchanges table for time range...\n" );
109        $rcids = $dbw->newSelectQueryBuilder()
110            ->select( 'rc_id' )
111            ->from( 'recentchanges' )
112            ->where( $dbw->expr( 'rc_timestamp', '>', $dbw->timestamp( $this->cutoffFrom ) ) )
113            ->andWhere( $dbw->expr( 'rc_timestamp', '<', $dbw->timestamp( $this->cutoffTo ) ) )
114            ->caller( __METHOD__ )->fetchFieldValues();
115        foreach ( array_chunk( $rcids, $this->getBatchSize() ) as $rcidBatch ) {
116            $dbw->newDeleteQueryBuilder()
117                ->deleteFrom( 'recentchanges' )
118                ->where( [ 'rc_id' => $rcidBatch ] )
119                ->caller( __METHOD__ )->execute();
120            $this->waitForReplication();
121        }
122
123        $this->output( "Loading from page and revision tables...\n" );
124
125        $res = $dbw->newSelectQueryBuilder()
126            ->select(
127                [
128                    'rev_timestamp',
129                    'rev_minor_edit',
130                    'rev_id',
131                    'rev_deleted',
132                    'page_namespace',
133                    'page_title',
134                    'page_is_new',
135                    'page_id',
136                    'rev_comment_text' => 'comment_rev_comment.comment_text',
137                    'rev_comment_data' => 'comment_rev_comment.comment_data',
138                    'rev_comment_cid' => 'comment_rev_comment.comment_id',
139                    'rev_user' => 'actor_rev_user.actor_user',
140                    'rev_user_text' => 'actor_rev_user.actor_name',
141                    'rev_actor' => 'rev_actor',
142                ]
143            )
144            ->from( 'revision' )
145            ->join( 'page', null, 'rev_page=page_id' )
146            ->join( 'comment', 'comment_rev_comment', 'comment_rev_comment.comment_id = rev_comment_id' )
147            ->join( 'actor', 'actor_rev_user', 'actor_rev_user.actor_id = rev_actor' )
148            ->where(
149                [
150                    $dbw->expr( 'rev_timestamp', '>', $dbw->timestamp( $this->cutoffFrom ) ),
151                    $dbw->expr( 'rev_timestamp', '<', $dbw->timestamp( $this->cutoffTo ) )
152                ]
153            )
154            ->orderBy( 'rev_timestamp', SelectQueryBuilder::SORT_DESC )
155            ->caller( __METHOD__ )->fetchResultSet();
156
157        $this->output( "Inserting from page and revision tables...\n" );
158        $inserted = 0;
159        foreach ( $res as $row ) {
160            $comment = $commentStore->getComment( 'rev_comment', $row );
161            $dbw->newInsertQueryBuilder()
162                ->insertInto( 'recentchanges' )
163                ->row( [
164                    'rc_timestamp' => $row->rev_timestamp,
165                    'rc_actor' => $row->rev_actor,
166                    'rc_namespace' => $row->page_namespace,
167                    'rc_title' => $row->page_title,
168                    'rc_minor' => $row->rev_minor_edit,
169                    'rc_bot' => 0,
170                    'rc_new' => $row->page_is_new,
171                    'rc_cur_id' => $row->page_id,
172                    'rc_this_oldid' => $row->rev_id,
173                    'rc_last_oldid' => 0, // is this ok?
174                    'rc_type' => $row->page_is_new ? RC_NEW : RC_EDIT,
175                    'rc_source' => $row->page_is_new ? RecentChange::SRC_NEW : RecentChange::SRC_EDIT,
176                    'rc_deleted' => $row->rev_deleted
177                    ] + $commentStore->insert( $dbw, 'rc_comment', $comment ) )
178                ->caller( __METHOD__ )->execute();
179
180            $rcid = $dbw->insertId();
181            $dbw->newUpdateQueryBuilder()
182                ->update( 'change_tag' )
183                ->set( [ 'ct_rc_id' => $rcid ] )
184                ->where( [ 'ct_rev_id' => $row->rev_id ] )
185                ->caller( __METHOD__ )->execute();
186
187            if ( ( ++$inserted % $this->getBatchSize() ) == 0 ) {
188                $this->waitForReplication();
189            }
190        }
191    }
192
193    /**
194     * Rebuild pass 2: Enhance entries for page revisions with references to the previous revision
195     * (rc_last_oldid, rc_new etc.) and size differences (rc_old_len, rc_new_len).
196     */
197    private function rebuildRecentChangesTablePass2() {
198        $dbw = $this->getPrimaryDB();
199
200        $this->output( "Updating links and size differences...\n" );
201
202        # Fill in the rc_last_oldid field, which points to the previous edit
203        $res = $dbw->newSelectQueryBuilder()
204            ->select( [ 'rc_cur_id', 'rc_this_oldid', 'rc_timestamp' ] )
205            ->from( 'recentchanges' )
206            ->where( $dbw->expr( 'rc_timestamp', '>', $dbw->timestamp( $this->cutoffFrom ) ) )
207            ->andWhere( $dbw->expr( 'rc_timestamp', '<', $dbw->timestamp( $this->cutoffTo ) ) )
208            ->orderBy( [ 'rc_cur_id', 'rc_timestamp' ] )
209            ->caller( __METHOD__ )->fetchResultSet();
210
211        $lastCurId = 0;
212        $lastOldId = 0;
213        $lastSize = null;
214        $updated = 0;
215        foreach ( $res as $row ) {
216            $new = 0;
217
218            if ( $row->rc_cur_id != $lastCurId ) {
219                # Switch! Look up the previous last edit, if any
220                $lastCurId = intval( $row->rc_cur_id );
221                $emit = $row->rc_timestamp;
222
223                $revRow = $dbw->newSelectQueryBuilder()
224                    ->select( [ 'rev_id', 'rev_len' ] )
225                    ->from( 'revision' )
226                    ->where( [ 'rev_page' => $lastCurId, $dbw->expr( 'rev_timestamp', '<', $emit ) ] )
227                    ->orderBy( 'rev_timestamp DESC' )
228                    ->caller( __METHOD__ )->fetchRow();
229                if ( $revRow ) {
230                    $lastOldId = intval( $revRow->rev_id );
231                    # Grab the last text size if available
232                    $lastSize = $revRow->rev_len !== null ? intval( $revRow->rev_len ) : null;
233                } else {
234                    # No previous edit
235                    $lastOldId = 0;
236                    $lastSize = 0;
237                    $new = 1; // probably true
238                }
239            }
240
241            if ( $lastCurId == 0 ) {
242                $this->output( "Uhhh, something wrong? No curid\n" );
243            } else {
244                # Grab the entry's text size
245                $size = (int)$dbw->newSelectQueryBuilder()
246                    ->select( 'rev_len' )
247                    ->from( 'revision' )
248                    ->where( [ 'rev_id' => $row->rc_this_oldid ] )
249                    ->caller( __METHOD__ )->fetchField();
250
251                $dbw->newUpdateQueryBuilder()
252                    ->update( 'recentchanges' )
253                    ->set( [
254                        'rc_last_oldid' => $lastOldId,
255                        'rc_new' => $new,
256                        'rc_type' => $new ? RC_NEW : RC_EDIT,
257                        'rc_source' => $new === 1 ? RecentChange::SRC_NEW : RecentChange::SRC_EDIT,
258                        'rc_old_len' => $lastSize,
259                        'rc_new_len' => $size,
260                    ] )
261                    ->where( [
262                        'rc_cur_id' => $lastCurId,
263                        'rc_this_oldid' => $row->rc_this_oldid,
264                        // index usage
265                        'rc_timestamp' => $row->rc_timestamp,
266                    ] )
267                    ->caller( __METHOD__ )->execute();
268
269                $lastOldId = intval( $row->rc_this_oldid );
270                $lastSize = $size;
271
272                if ( ( ++$updated % $this->getBatchSize() ) == 0 ) {
273                    $this->waitForReplication();
274                }
275            }
276        }
277    }
278
279    /**
280     * Rebuild pass 3: Insert `recentchanges` entries for action logs.
281     */
282    private function rebuildRecentChangesTablePass3() {
283        global $wgLogRestrictions, $wgFilterLogTypes;
284
285        $dbw = $this->getDB( DB_PRIMARY );
286        $commentStore = $this->getServiceContainer()->getCommentStore();
287        $nonRCLogs = array_merge(
288            array_keys( $wgLogRestrictions ),
289            array_keys( $wgFilterLogTypes ),
290            [ 'create' ]
291        );
292
293        $this->output( "Loading from user and logging tables...\n" );
294
295        $res = $dbw->newSelectQueryBuilder()
296            ->select(
297                [
298                    'log_timestamp',
299                    'log_actor',
300                    'log_namespace',
301                    'log_title',
302                    'log_page',
303                    'log_type',
304                    'log_action',
305                    'log_id',
306                    'log_params',
307                    'log_deleted',
308                    'log_comment_text' => 'comment_log_comment.comment_text',
309                    'log_comment_data' => 'comment_log_comment.comment_data',
310                    'log_comment_cid' => 'comment_log_comment.comment_id',
311                ]
312            )
313            ->from( 'logging' )
314            ->join( 'comment', 'comment_log_comment', 'comment_log_comment.comment_id = log_comment_id' )
315            ->where(
316                [
317                    $dbw->expr( 'log_timestamp', '>', $dbw->timestamp( $this->cutoffFrom ) ),
318                    $dbw->expr( 'log_timestamp', '<', $dbw->timestamp( $this->cutoffTo ) ),
319                    // Some logs don't go in RC since they are private, or are included in the filterable log types.
320                    'log_type' => array_diff( LogPage::validTypes(), $nonRCLogs ),
321                ]
322            )
323            ->orderBy( [ 'log_timestamp DESC', 'log_id DESC' ] )
324            ->caller( __METHOD__ )->fetchResultSet();
325
326        $field = $dbw->fieldInfo( 'recentchanges', 'rc_cur_id' );
327
328        $inserted = 0;
329        foreach ( $res as $row ) {
330            $comment = $commentStore->getComment( 'log_comment', $row );
331            $dbw->newInsertQueryBuilder()
332                ->insertInto( 'recentchanges' )
333                ->row( [
334                    'rc_timestamp' => $row->log_timestamp,
335                    'rc_actor' => $row->log_actor,
336                    'rc_namespace' => $row->log_namespace,
337                    'rc_title' => $row->log_title,
338                    'rc_minor' => 0,
339                    'rc_bot' => 0,
340                    'rc_patrolled' => $row->log_type == 'upload' ? 0 : 2,
341                    'rc_new' => 0,
342                    'rc_this_oldid' => 0,
343                    'rc_last_oldid' => 0,
344                    'rc_type' => RC_LOG,
345                    'rc_source' => RecentChange::SRC_LOG,
346                    'rc_cur_id' => $field->isNullable()
347                        ? $row->log_page
348                        : (int)$row->log_page, // NULL => 0,
349                    'rc_log_type' => $row->log_type,
350                    'rc_log_action' => $row->log_action,
351                    'rc_logid' => $row->log_id,
352                    'rc_params' => $row->log_params,
353                    'rc_deleted' => $row->log_deleted
354                    ] + $commentStore->insert( $dbw, 'rc_comment', $comment ) )
355                ->caller( __METHOD__ )->execute();
356
357            $rcid = $dbw->insertId();
358            $dbw->newUpdateQueryBuilder()
359                ->update( 'change_tag' )
360                ->set( [ 'ct_rc_id' => $rcid ] )
361                ->where( [ 'ct_log_id' => $row->log_id ] )
362                ->caller( __METHOD__ )->execute();
363
364            if ( ( ++$inserted % $this->getBatchSize() ) == 0 ) {
365                $this->waitForReplication();
366            }
367        }
368    }
369
370    /**
371     * Find rc_id values that have a user with one of the specified groups
372     *
373     * @param IReadableDatabase $db
374     * @param string[] $groups
375     * @param array $conds Extra query conditions
376     * @return int[]
377     */
378    private function findRcIdsWithGroups( $db, $groups, $conds = [] ) {
379        if ( !count( $groups ) ) {
380            return [];
381        }
382        return $db->newSelectQueryBuilder()
383            ->select( 'rc_id' )
384            ->distinct()
385            ->from( 'recentchanges' )
386            ->join( 'actor', null, 'actor_id=rc_actor' )
387            ->join( 'user_groups', null, 'ug_user=actor_user' )
388            ->where( $conds )
389            ->andWhere( [
390                $db->expr( 'rc_timestamp', '>', $db->timestamp( $this->cutoffFrom ) ),
391                $db->expr( 'rc_timestamp', '<', $db->timestamp( $this->cutoffTo ) ),
392                'ug_group' => $groups
393            ] )
394            ->caller( __METHOD__ )->fetchFieldValues();
395    }
396
397    /**
398     * Rebuild pass 4: Mark bot and autopatrolled entries.
399     */
400    private function rebuildRecentChangesTablePass4() {
401        global $wgUseRCPatrol, $wgUseNPPatrol, $wgUseFilePatrol, $wgMiserMode;
402
403        $dbw = $this->getPrimaryDB();
404
405        # @FIXME: recognize other bot account groups (not the same as users with 'bot' rights)
406        # @NOTE: users with 'bot' rights choose when edits are bot edits or not. That information
407        # may be lost at this point (aside from joining on the patrol log table entries).
408        $botgroups = [ 'bot' ];
409        $autopatrolgroups = ( $wgUseRCPatrol || $wgUseNPPatrol || $wgUseFilePatrol ) ?
410            $this->getServiceContainer()->getGroupPermissionsLookup()
411            ->getGroupsWithPermission( 'autopatrol' ) : [];
412
413        # Flag our recent bot edits
414        // @phan-suppress-next-line PhanRedundantCondition
415        if ( $botgroups ) {
416            $this->output( "Flagging bot account edits...\n" );
417
418            # Fill in the rc_bot field
419            $rcids = $this->findRcIdsWithGroups( $dbw, $botgroups );
420
421            foreach ( array_chunk( $rcids, $this->getBatchSize() ) as $rcidBatch ) {
422                $dbw->newUpdateQueryBuilder()
423                    ->update( 'recentchanges' )
424                    ->set( [ 'rc_bot' => 1 ] )
425                    ->where( [ 'rc_id' => $rcidBatch ] )
426                    ->caller( __METHOD__ )->execute();
427                $this->waitForReplication();
428            }
429        }
430
431        # Flag our recent autopatrolled edits
432        if ( !$wgMiserMode && $autopatrolgroups ) {
433            $this->output( "Flagging auto-patrolled edits...\n" );
434
435            $conds = [ 'rc_patrolled' => 0 ];
436            if ( !$wgUseRCPatrol ) {
437                $subConds = [];
438                if ( $wgUseNPPatrol ) {
439                    $subConds[] = $dbw->expr( 'rc_source', '=', RecentChange::SRC_NEW );
440                }
441                if ( $wgUseFilePatrol ) {
442                    $subConds[] = $dbw->expr( 'rc_log_type', '=', 'upload' );
443                }
444                $conds[] = $dbw->makeList( $subConds, IDatabase::LIST_OR );
445            }
446
447            $rcids = $this->findRcIdsWithGroups( $dbw, $autopatrolgroups, $conds );
448            foreach ( array_chunk( $rcids, $this->getBatchSize() ) as $rcidBatch ) {
449                $dbw->newUpdateQueryBuilder()
450                    ->update( 'recentchanges' )
451                    ->set( [ 'rc_patrolled' => 2 ] )
452                    ->where( [ 'rc_id' => $rcidBatch ] )
453                    ->caller( __METHOD__ )->execute();
454                $this->waitForReplication();
455            }
456        }
457    }
458
459    /**
460     * Rebuild pass 5: Delete duplicate entries where we generate both a page revision and a log
461     * entry for a single action (upload, move, protect, import, etc.).
462     */
463    private function rebuildRecentChangesTablePass5() {
464        $dbw = $this->getPrimaryDB();
465
466        $this->output( "Removing duplicate revision and logging entries...\n" );
467
468        $res = $dbw->newSelectQueryBuilder()
469            ->select( [ 'ls_value', 'ls_log_id' ] )
470            ->from( 'logging' )
471            ->join( 'log_search', null, 'ls_log_id = log_id' )
472            ->where( [
473                'ls_field' => 'associated_rev_id',
474                $dbw->expr( 'log_type', '!=', 'create' ),
475                $dbw->expr( 'log_timestamp', '>', $dbw->timestamp( $this->cutoffFrom ) ),
476                $dbw->expr( 'log_timestamp', '<', $dbw->timestamp( $this->cutoffTo ) ),
477            ] )
478            ->caller( __METHOD__ )->fetchResultSet();
479
480        $updates = 0;
481        foreach ( $res as $row ) {
482            $rev_id = $row->ls_value;
483            $log_id = $row->ls_log_id;
484
485            // Mark the logging row as having an associated rev id
486            $dbw->newUpdateQueryBuilder()
487                ->update( 'recentchanges' )
488                ->set( [ 'rc_this_oldid' => $rev_id ] )
489                ->where( [ 'rc_logid' => $log_id ] )
490                ->caller( __METHOD__ )->execute();
491
492            // Delete the revision row
493            $dbw->newDeleteQueryBuilder()
494                ->deleteFrom( 'recentchanges' )
495                ->where( [ 'rc_this_oldid' => $rev_id, 'rc_logid' => 0 ] )
496                ->caller( __METHOD__ )->execute();
497
498            if ( ( ++$updates % $this->getBatchSize() ) == 0 ) {
499                $this->waitForReplication();
500            }
501        }
502    }
503
504    /**
505     * Purge cached feeds in $wanCache
506     */
507    private function purgeFeeds() {
508        global $wgFeedClasses;
509
510        $this->output( "Deleting feed timestamps.\n" );
511
512        $wanCache = $this->getServiceContainer()->getMainWANObjectCache();
513        foreach ( $wgFeedClasses as $feed => $className ) {
514            $wanCache->delete( $wanCache->makeKey( 'rcfeed', $feed, 'timestamp' ) ); # Good enough for now.
515        }
516    }
517}
518
519// @codeCoverageIgnoreStart
520$maintClass = RebuildRecentchanges::class;
521require_once RUN_MAINTENANCE_IF_MAIN;
522// @codeCoverageIgnoreEnd