MediaWiki master
backfillInterwikiRightsLog.php
Go to the documentation of this file.
1<?php
9declare( strict_types = 1 );
10
23use Wikimedia\Timestamp\ConvertibleTimestamp;
24use Wikimedia\Timestamp\TimestampFormat as TS;
25
26// @codeCoverageIgnoreStart
27require_once __DIR__ . '/Maintenance.php';
28// @codeCoverageIgnoreEnd
29
36 private string $interwikiDelimiter;
37
38 public function __construct() {
39 parent::__construct();
40 $this->addDescription( 'Backfill interwiki rights log from the specified wiki' );
41 $this->addArg( 'before', 'Only interwiki rights logs before this timestamp will be processed' );
42 $this->addOption( 'remote-wiki', 'The wiki to read logs from', true, true );
43 $this->addOption( 'dry-run', 'Perform a dry run, copy nothing' );
44 $this->setBatchSize( 200 );
45 }
46
47 public function execute() {
48 $dryRun = $this->hasOption( 'dry-run' );
49 $sourceWiki = $this->getOption( 'remote-wiki' );
50 $cutoffTimestamp = ConvertibleTimestamp::convert( TS::MW, $this->getArg( 0 ) );
51
52 $currentWiki = WikiMap::getCurrentWikiId();
53 if ( $sourceWiki === $currentWiki ) {
54 $this->output( "Source wiki must be different from the current wiki.\n" );
55 return;
56 }
57
58 $sourceDb = $this->getReplicaDB( $sourceWiki );
59 $this->interwikiDelimiter = $this->getConfig()->get( MainConfigNames::UserrightsInterwikiDelimiter );
60 $titlePattern = new LikeValue( $sourceDb->anyString(), $this->interwikiDelimiter . $currentWiki );
61
62 if ( $dryRun ) {
63 $this->output( "DRY RUN: No changes will be made\n" );
64 }
65
66 $minTimestamp = $sourceDb->newSelectQueryBuilder()
67 ->select( 'log_timestamp' )
68 ->from( 'logging' )
69 ->where( [
70 'log_type' => 'rights',
71 'log_action' => 'rights',
72 ] )
73 ->orderBy( 'log_timestamp', SelectQueryBuilder::SORT_ASC )
74 ->caller( __METHOD__ )
75 ->fetchField();
76
77 if ( $minTimestamp === false ) {
78 $this->output( "No source data found, exiting\n" );
79 return;
80 }
81
82 $lastLogId = 0;
83 $lastTimestamp = $minTimestamp;
84 $count = 0;
85 $skipped = 0;
86 $minInsertedId = null;
87 $maxInsertedId = null;
88 while ( true ) {
89 $rows = DatabaseLogEntry::newSelectQueryBuilder( $sourceDb )
90 ->where( [
91 'log_type' => 'rights',
92 'log_action' => 'rights',
93 $sourceDb->expr( 'log_title', IExpression::LIKE, $titlePattern ),
94 $sourceDb->expr( 'log_timestamp', '<', $sourceDb->timestamp( $cutoffTimestamp ) ),
95 ] )
96 ->where(
97 $sourceDb->buildComparison( '>', [
98 'log_timestamp' => $sourceDb->timestamp( $lastTimestamp ),
99 'log_id' => $lastLogId,
100 ] )
101 )
102 ->orderBy( [ 'log_timestamp', 'log_id' ], SelectQueryBuilder::SORT_ASC )
103 ->limit( $this->getBatchSize() )
104 ->caller( __METHOD__ )
105 ->fetchResultSet();
106
107 if ( $rows->numRows() === 0 ) {
108 break;
109 }
110 $this->output( "Processing batch of {$rows->numRows()} log entries...\n" );
111
112 $this->beginTransactionRound( __METHOD__ );
113
114 $originalEntries = [];
115 $targetUserNames = [];
116 foreach ( $rows as $row ) {
117 $entry = DatabaseLogEntry::newFromRow( $row );
118 $originalEntries[] = $entry;
119 $targetUserNames[] = $this->getTargetUserName( $entry );
120 }
121
122 $renames = $this->getRenames( $targetUserNames );
123
124 $logsToInsert = [];
125 // For deduplication query
126 $timestampsPresent = [];
127 foreach ( $originalEntries as $originalEntry ) {
128 $lastLogId = $originalEntry->getId();
129 $lastTimestamp = $originalEntry->getTimestamp();
130
131 $targetName = $this->getTargetUserName( $originalEntry );
132 $targetNewName = $this->getUpToDateUserName( $targetName, $originalEntry->getTimestamp(), $renames );
133 if ( $targetNewName !== $targetName ) {
134 $this->output( "Renaming $targetName to $targetNewName in entry $lastLogId\n" );
135 }
136 $targetName = $targetNewName;
137 $localTarget = Title::newFromText( $targetName, $originalEntry->getTarget()->getNamespace() );
138
139 $params = $originalEntry->getParameters();
140 if ( $originalEntry->isLegacy() ) {
141 // We must ensure that the inserted log entry is in the current form, so that we don't create
142 // a yet another params schema
143 $legacyParams = $originalEntry->getParameters();
144 if ( count( $legacyParams ) > 1 ) {
145 $oldGroups = array_map( 'trim', explode( ',', $legacyParams[0] ) );
146 $newGroups = array_map( 'trim', explode( ',', $legacyParams[1] ) );
147 $params = [
148 '4::oldgroups' => $oldGroups,
149 '5::newgroups' => $newGroups,
150 ];
151 }
152 }
153
154 $performerName = $originalEntry->getPerformerIdentity()->getName();
155 $performer = UserIdentityValue::newExternal( $sourceWiki, $performerName );
156
157 $logEntry = new ManualLogEntry( 'rights', 'rights' );
158 $logEntry->setTimestamp( $originalEntry->getTimestamp() );
159 $logEntry->setPerformer( $performer );
160 $logEntry->setTarget( $localTarget );
161 $logEntry->setComment( $originalEntry->getComment() );
162 $logEntry->setParameters( $params );
163 $logEntry->setDeleted( $originalEntry->getDeleted() );
164 $logsToInsert[] = $logEntry;
165 $timestampsPresent[] = $logEntry->getTimestamp();
166 }
167
168 $existingRows = DatabaseLogEntry::newSelectQueryBuilder( $this->getReplicaDB() )
169 ->where( [
170 'log_type' => 'rights',
171 'log_action' => 'rights',
172 'log_timestamp' => array_map(
173 $this->getReplicaDB()->timestamp( ... ),
174 $timestampsPresent
175 ),
176 ] )
177 ->caller( __METHOD__ )
178 ->fetchResultSet();
179
180 // keyed by timestamp => array of target users
181 $existingChanges = [];
182 foreach ( $existingRows as $row ) {
183 $entry = DatabaseLogEntry::newFromRow( $row );
184 $existingChanges[ $entry->getTimestamp() ][] = $entry->getTarget()->getText();
185 }
186
187 foreach ( $logsToInsert as $logEntry ) {
188 // If the target user's rights were already changed at the same timestamp, skip so that we don't
189 // duplicate entries. This leaves room to false positives, where the user's rights are changed by
190 // different users at the same time. It's unlikely and we accept this risk here
191 if (
192 isset( $existingChanges[ $logEntry->getTimestamp() ] )
193 && in_array( $logEntry->getTarget()->getText(), $existingChanges[ $logEntry->getTimestamp() ] )
194 ) {
195 $skipped++;
196 continue;
197 }
198
199 if ( !$dryRun ) {
200 $id = $logEntry->insert();
201
202 if ( $minInsertedId === null ) {
203 $minInsertedId = $id;
204 }
205 $maxInsertedId = $id;
206 }
207 $count++;
208 }
209
210 $this->commitTransactionRound( __METHOD__ );
211 }
212
213 $this->output( "Skipped $skipped log entries.\n" );
214 if ( $dryRun ) {
215 $this->output( "Would insert $count log entries.\n" );
216 } else {
217 LoggerFactory::getInstance( 'logentry' )->info(
218 'Backfilled {count} interwiki rights log entries from {sourceWiki}.',
219 [
220 'count' => $count,
221 'sourceWiki' => $sourceWiki,
222 'minInsertedId' => $minInsertedId,
223 'maxInsertedId' => $maxInsertedId,
224 ]
225 );
226
227 $minInsertedId ??= '(null)';
228 $maxInsertedId ??= '(null)';
229 $this->output( "Inserted $count log entries, with ids between $minInsertedId and $maxInsertedId.\n" );
230 }
231 }
232
233 private function getTargetUserName( LogEntry $logEntry ): string {
234 $originalTargetText = $logEntry->getTarget()->getText();
235 return explode( $this->interwikiDelimiter, $originalTargetText )[0];
236 }
237
238 private function getUpToDateUserName( string $originalName, string $timestamp, array $renames ): string {
239 while ( array_key_exists( $originalName, $renames ) ) {
240 $renameFound = false;
241 foreach ( $renames[$originalName] as $renameTimestamp => $newName ) {
242 if ( $renameTimestamp > $timestamp ) {
243 $originalName = $newName;
244 $timestamp = $renameTimestamp;
245 $renameFound = true;
246 break;
247 }
248 }
249 if ( !$renameFound ) {
250 break;
251 }
252 }
253 return $originalName;
254 }
255
262 private function getRenames( array $originalUserNames ): array {
263 $renames = [];
264 $dbr = $this->getReplicaDB();
265
266 // Convert usernames to the title form (with underscores). Use space form only in output
267 $originalUserNames = array_map( static fn ( $name ) => strtr( $name, ' ', '_' ), $originalUserNames );
268
269 while ( $originalUserNames ) {
270 $originalUserNames = array_unique( $originalUserNames );
271 $batch = array_splice( $originalUserNames, 0, 100 );
272 $renameLogs = DatabaseLogEntry::newSelectQueryBuilder( $dbr )
273 ->where( [
274 'log_namespace' => NS_USER,
275 'log_title' => $batch,
276 'log_type' => 'renameuser',
277 ] )
278 ->orderBy( 'log_timestamp' )
279 ->caller( __METHOD__ )
280 ->fetchResultSet();
281
282 foreach ( $renameLogs as $renameLog ) {
283 $log = DatabaseLogEntry::newFromRow( $renameLog );
284
285 $oldName = $log->getTarget()->getDBkey();
286 $timestamp = $log->getTimestamp();
287 $params = $log->getParameters();
288 $newName = strtr( $params['5::newuser'] ?? $params[0] ?? '', ' ', '_' );
289
290 if ( $newName === '' ) {
291 // Invalid log entry, ignore
292 continue;
293 }
294
295 $renames[$oldName][$timestamp] = strtr( $newName, '_', ' ' );
296 if ( !array_key_exists( $newName, $renames ) ) {
297 // Follow up on the next renames affecting the same user
298 $originalUserNames[] = $newName;
299 }
300 }
301 }
302
303 return $renames;
304 }
305}
306
307// @codeCoverageIgnoreStart
308$maintClass = BackfillInterwikiRightsLog::class;
309require_once RUN_MAINTENANCE_IF_MAIN;
310// @codeCoverageIgnoreEnd
const NS_USER
Definition Defines.php:53
Maintenance script to copy interwiki rights changes from log on the remote wiki to the current wiki.
Create PSR-3 logger objects.
A value class to process existing log entries.
Class for creating new log entries and inserting them into the database.
A class containing constants representing the names of configuration variables.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
addArg( $arg, $description, $required=true, $multi=false)
Add some args that are needed.
getArg( $argId=0, $default=null)
Get an argument.
getBatchSize()
Returns batch size.
output( $out, $channel=null)
Throw some output to the user.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
hasOption( $name)
Checks to see if a particular option was set.
getOption( $name, $default=null)
Get an option, or return the default.
commitTransactionRound( $fname)
Commit a transactional batch of DB operations and wait for replica DB servers to catch up.
getReplicaDB(string|false $virtualDomain=false)
beginTransactionRound( $fname)
Start a transactional batch of DB operations.
addDescription( $text)
Set the description text.
Represents a title within MediaWiki.
Definition Title.php:69
Value object representing a user's identity.
Tools for dealing with other locally-hosted wikis.
Definition WikiMap.php:19
Content of like value.
Definition LikeValue.php:14
Build SELECT queries with a fluent interface.
An individual log entry.
Definition LogEntry.php:24
array $params
The job parameters.