Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
0.00% covered (danger)
0.00%
0 / 111
0.00% covered (danger)
0.00%
0 / 4
CRAP
0.00% covered (danger)
0.00%
0 / 1
PopulateGlobalEditCount
0.00% covered (danger)
0.00%
0 / 105
0.00% covered (danger)
0.00%
0 / 4
380
0.00% covered (danger)
0.00%
0 / 1
 __construct
0.00% covered (danger)
0.00%
0 / 4
0.00% covered (danger)
0.00%
0 / 1
2
 init
0.00% covered (danger)
0.00%
0 / 2
0.00% covered (danger)
0.00%
0 / 1
2
 execute
0.00% covered (danger)
0.00%
0 / 90
0.00% covered (danger)
0.00%
0 / 1
210
 showProgress
0.00% covered (danger)
0.00%
0 / 9
0.00% covered (danger)
0.00%
0 / 1
12
1<?php
2
3namespace MediaWiki\Extension\CentralAuth\Maintenance;
4
5use Maintenance;
6use MediaWiki\Extension\CentralAuth\CentralAuthDatabaseManager;
7use MediaWiki\Extension\CentralAuth\CentralAuthServices;
8use MediaWiki\MediaWikiServices;
9use RuntimeException;
10
11$IP = getenv( 'MW_INSTALL_PATH' );
12if ( $IP === false ) {
13    $IP = __DIR__ . '/../../..';
14}
15require_once "$IP/maintenance/Maintenance.php";
16
17class PopulateGlobalEditCount extends Maintenance {
18    private const READ_BATCH_SIZE = 1000;
19
20    /** @var CentralAuthDatabaseManager */
21    private $databaseManager;
22
23    /** @var int */
24    private $lastReportTime = 0;
25
26    public function __construct() {
27        parent::__construct();
28        $this->requireExtension( 'CentralAuth' );
29
30        // Batch size for write queries
31        $this->setBatchSize( 100 );
32        $this->addOption( 'start', 'gu_id value to start at', false, true );
33    }
34
35    private function init() {
36        $services = MediaWikiServices::getInstance();
37        $this->databaseManager = CentralAuthServices::getDatabaseManager( $services );
38    }
39
40    public function execute() {
41        $this->init();
42        $dbcr = $this->databaseManager->getCentralReplicaDB();
43        $dbcw = $this->databaseManager->getCentralPrimaryDB();
44        $lastId = (int)$dbcr->newSelectQueryBuilder()
45            ->select( 'MAX(gu_id)' )
46            ->from( 'globaluser' )
47            ->caller( __METHOD__ )
48            ->fetchField();
49
50        $numGlobalAccounts = 0;
51        $numUpdated = 0;
52
53        $start = (int)$this->getOption( 'start', 0 );
54        for ( $batchStartId = $start; $batchStartId < $lastId; $batchStartId += self::READ_BATCH_SIZE ) {
55            $this->showProgress( $batchStartId, $lastId );
56            $batchEndId = $batchStartId + self::READ_BATCH_SIZE - 1;
57            $res = $dbcr->newSelectQueryBuilder()
58                ->select( [
59                    'lu_global_id',
60                    'lu_local_id',
61                    'lu_wiki',
62                    'gec_count'
63                ] )
64                // The globaluser table is just needed for batch ordering
65                ->from( 'globaluser' )
66                ->join( 'localuser', null, [ 'lu_name=gu_name' ] )
67                ->leftJoin( 'global_edit_count', null, [ 'gu_id=gec_user' ] )
68                ->where( [
69                    "gu_id BETWEEN $batchStartId AND $batchEndId",
70                    "lu_global_id <> 0",
71                ] )
72                ->orderBy( [ 'gu_id', 'lu_wiki' ] )
73                ->caller( __METHOD__ )
74                ->fetchResultSet();
75
76            /** @var int[][] $localIds All local IDs by wiki */
77            $localIds = [];
78            /** @var int[][] $globalIds The global ID for each wiki/local ID */
79            $globalIds = [];
80            /** @var int[] $originalCounts The gec_count for each global ID */
81            $originalCounts = [];
82            /** @var int[] $countsByGlobalId The final computed counts */
83            $countsByGlobalId = [];
84
85            // Read the global result into arrays
86            foreach ( $res as $row ) {
87                $localIds[$row->lu_wiki][] = (int)$row->lu_local_id;
88                if ( !isset( $originalCounts[(int)$row->lu_global_id] ) ) {
89                    $count = $row->gec_count === null ? null : (int)$row->gec_count;
90                    $originalCounts[(int)$row->lu_global_id] = $count;
91                }
92                $globalIds[$row->lu_wiki][(int)$row->lu_local_id] = (int)$row->lu_global_id;
93                $countsByGlobalId[(int)$row->lu_global_id] = 0;
94            }
95
96            $numGlobalAccounts += count( $originalCounts );
97
98            // Get the edit counts on each wiki using a batch query
99            foreach ( $localIds as $wiki => $ids ) {
100                $dblr = $this->databaseManager->getLocalDB( DB_REPLICA, $wiki );
101                $res = $dblr->newSelectQueryBuilder()
102                    ->select( [
103                        'user_id',
104                        'user_editcount'
105                    ] )
106                    ->from( 'user' )
107                    ->where( [ 'user_id' => $ids ] )
108                    ->caller( __METHOD__ )
109                    ->fetchResultSet();
110
111                foreach ( $res as $row ) {
112                    if ( !isset( $globalIds[$wiki][$row->user_id] ) ) {
113                        throw new RuntimeException( "lost user ID {$row->user_id}" );
114                    }
115                    $countsByGlobalId[$globalIds[$wiki][$row->user_id]] += (int)$row->user_editcount;
116                }
117            }
118
119            // Find users with a count mismatch
120            $updates = [];
121            $inserts = [];
122            foreach ( $countsByGlobalId as $id => $count ) {
123                if ( $originalCounts[$id] !== $count ) {
124                    if ( $originalCounts[$id] === null ) {
125                        $inserts[] = [
126                            'gec_user' => $id,
127                            'gec_count' => $count
128                        ];
129                    } else {
130                        $updates[$id] = $count;
131                    }
132                }
133            }
134
135            // Do the writes in small batches
136            foreach ( array_chunk( $inserts, $this->getBatchSize() ) as $insertBatch ) {
137                $this->beginTransaction( $dbcw, __METHOD__ );
138                $dbcw->newInsertQueryBuilder()
139                    ->insertInto( 'global_edit_count' )
140                    ->rows( $insertBatch )
141                    ->caller( __METHOD__ )
142                    ->execute();
143                $this->commitTransaction( $dbcw, __METHOD__ );
144            }
145
146            foreach ( array_chunk( $updates, $this->getBatchSize(), true ) as $updateBatch ) {
147                $this->beginTransaction( $dbcw, __METHOD__ );
148                foreach ( $updateBatch as $id => $count ) {
149                    $dbcw->newUpdateQueryBuilder()
150                        ->update( 'global_edit_count' )
151                        ->set( [ 'gec_count' => $count ] )
152                        ->where( [ 'gec_user' => $id ] )
153                        ->caller( __METHOD__ )
154                        ->execute();
155                }
156                $this->commitTransaction( $dbcw, __METHOD__ );
157            }
158
159            $numUpdated += count( $updates ) + count( $inserts );
160        }
161
162        $this->showProgress( $lastId, $lastId );
163        $this->output( "Complete. Updated $numUpdated of $numGlobalAccounts edit counts.\n" );
164    }
165
166    /**
167     * @param int $position
168     * @param int $end
169     */
170    private function showProgress( $position, $end ) {
171        $now = time();
172        if ( !$this->isQuiet() && $now - $this->lastReportTime >= 10 ) {
173            printf(
174                "... %d / %d (%-.2f%%)\n",
175                $position,
176                $end,
177                $position / $end * 100
178            );
179            $this->lastReportTime = $now;
180        }
181    }
182}
183
184$maintClass = PopulateGlobalEditCount::class;
185require RUN_MAINTENANCE_IF_MAIN;