MediaWiki  master
userDupes.inc
Go to the documentation of this file.
1 <?php
28 
37 class UserDupes {
41  private $db;
43  private $reassigned;
45  private $trimmed;
47  private $failed;
49  private $outputCallback;
50 
55  public function __construct( &$database, $outputCallback ) {
56  $this->db = $database;
57  $this->outputCallback = $outputCallback;
58  }
59 
64  private function out( $str ) {
65  call_user_func( $this->outputCallback, $str );
66  }
67 
73  public function hasUniqueIndex() {
74  $info = $this->db->indexInfo( 'user', 'user_name', __METHOD__ );
75  if ( !$info ) {
76  $this->out( "WARNING: doesn't seem to have user_name index at all!\n" );
77 
78  return false;
79  }
80 
81  # Confusingly, 'Non_unique' is 0 for *unique* indexes,
82  # and 1 for *non-unique* indexes. Pass the crack, MySQL,
83  # it's obviously some good stuff!
84  return ( $info[0]->Non_unique == 0 );
85  }
86 
98  public function clearDupes() {
99  return $this->checkDupes( true );
100  }
101 
116  private function checkDupes( $doDelete = false ) {
117  $dbDomain = WikiMap::getCurrentWikiDbDomain()->getId();
118  if ( $this->hasUniqueIndex() ) {
119  echo "$dbDomain already has a unique index on its user table.\n";
120 
121  return true;
122  }
123 
124  $this->lock();
125 
126  $this->out( "Checking for duplicate accounts...\n" );
127  $dupes = $this->getDupes();
128  $count = count( $dupes );
129 
130  $this->out( "Found $count accounts with duplicate records on $dbDomain.\n" );
131  $this->trimmed = 0;
132  $this->reassigned = 0;
133  $this->failed = 0;
134  foreach ( $dupes as $name ) {
135  $this->examine( $name, $doDelete );
136  }
137 
138  $this->unlock();
139 
140  $this->out( "\n" );
141 
142  // @phan-suppress-next-line PhanSuspiciousValueComparison
143  if ( $this->reassigned > 0 ) {
144  if ( $doDelete ) {
145  $this->out( "$this->reassigned duplicate accounts had edits "
146  . "reassigned to a canonical record id.\n" );
147  } else {
148  $this->out( "$this->reassigned duplicate accounts need to have edits reassigned.\n" );
149  }
150  }
151 
152  // @phan-suppress-next-line PhanSuspiciousValueComparison
153  if ( $this->trimmed > 0 ) {
154  if ( $doDelete ) {
155  $this->out(
156  "$this->trimmed duplicate user records were deleted from $dbDomain.\n" );
157  } else {
158  $this->out(
159  "$this->trimmed duplicate user accounts were found on $dbDomain " .
160  "which can be removed safely.\n"
161  );
162  }
163  }
164 
165  // @phan-suppress-next-line PhanSuspiciousValueComparison
166  if ( $this->failed > 0 ) {
167  $this->out( "Something terribly awry; $this->failed duplicate accounts were not removed.\n" );
168 
169  return false;
170  }
171 
172  // @phan-suppress-next-line PhanSuspiciousValueComparison
173  if ( $this->trimmed == 0 || $doDelete ) {
174  $this->out( "It is now safe to apply the unique index on user_name.\n" );
175 
176  return true;
177  } else {
178  $this->out( "Run this script again with the --fix option to automatically delete them.\n" );
179 
180  return false;
181  }
182  }
183 
187  private function lock() {
188  $set = [ 'user', 'revision' ];
189  $names = array_map( [ $this, 'lockTable' ], $set );
190  $tables = implode( ',', $names );
191 
192  $this->db->query( "LOCK TABLES $tables", __METHOD__ );
193  }
194 
195  private function lockTable( $table ) {
196  return $this->db->tableName( $table ) . ' WRITE';
197  }
198 
202  private function unlock() {
203  $this->db->query( "UNLOCK TABLES", __METHOD__ );
204  }
205 
210  private function getDupes() {
211  $user = $this->db->tableName( 'user' );
212  $result = $this->db->query(
213  "SELECT user_name,COUNT(*) AS n
214  FROM $user
215  GROUP BY user_name
216  HAVING n > 1", __METHOD__ );
217 
218  $list = [];
219  foreach ( $result as $row ) {
220  $list[] = $row->user_name;
221  }
222 
223  return $list;
224  }
225 
233  private function examine( $name, $doDelete ) {
234  $result = $this->db->select( 'user',
235  [ 'user_id' ],
236  [ 'user_name' => $name ],
237  __METHOD__ );
238 
239  $firstRow = $this->db->fetchObject( $result );
240  $firstId = $firstRow->user_id;
241  $this->out( "Record that will be used for '$name' is user_id=$firstId\n" );
242 
243  foreach ( $result as $row ) {
244  $dupeId = $row->user_id;
245  $this->out( "... dupe id $dupeId: " );
246  $edits = $this->editCount( $dupeId );
247  if ( $edits > 0 ) {
248  $this->reassigned++;
249  $this->out( "has $edits edits! " );
250  if ( $doDelete ) {
251  $this->reassignEdits( $dupeId, $firstId );
252  $newEdits = $this->editCount( $dupeId );
253  if ( $newEdits == 0 ) {
254  $this->out( "confirmed cleaned. " );
255  } else {
256  $this->failed++;
257  $this->out( "WARNING! $newEdits remaining edits for $dupeId; NOT deleting user.\n" );
258  continue;
259  }
260  } else {
261  $this->out( "(will need to reassign edits on fix)" );
262  }
263  } else {
264  $this->out( "ok, no edits. " );
265  }
266  $this->trimmed++;
267  if ( $doDelete ) {
268  $this->trimAccount( $dupeId );
269  }
270  $this->out( "\n" );
271  }
272  }
273 
281  private function editCount( $userid ) {
282  return intval( $this->db->selectField(
283  'revision',
284  'COUNT(*)',
285  [ 'rev_user' => $userid ],
286  __METHOD__ ) );
287  }
288 
293  private function reassignEdits( $from, $to ) {
294  $this->out( 'reassigning... ' );
295  $this->db->update( 'revision',
296  [ 'rev_user' => $to ],
297  [ 'rev_user' => $from ],
298  __METHOD__ );
299  $this->out( "ok. " );
300  }
301 
306  private function trimAccount( $userid ) {
307  $this->out( "deleting..." );
308  $this->db->delete( 'user', [ 'user_id' => $userid ], __METHOD__ );
309  $this->out( " ok" );
310  }
311 }
UserDupes\$outputCallback
callable $outputCallback
Definition: userDupes.inc:49
UserDupes\lock
lock()
We don't want anybody to mess with our stuff...
Definition: userDupes.inc:187
WikiMap\getCurrentWikiDbDomain
static getCurrentWikiDbDomain()
Definition: WikiMap.php:293
UserDupes\lockTable
lockTable( $table)
Definition: userDupes.inc:195
UserDupes\$db
IMaintainableDatabase $db
Definition: userDupes.inc:41
UserDupes\clearDupes
clearDupes()
Checks the database for duplicate user account records and remove them in preparation for application...
Definition: userDupes.inc:98
UserDupes\editCount
editCount( $userid)
Count the number of edits attributed to this user.
Definition: userDupes.inc:281
UserDupes\$reassigned
int $reassigned
Definition: userDupes.inc:43
UserDupes\checkDupes
checkDupes( $doDelete=false)
Checks the database for duplicate user account records in preparation for application of a unique ind...
Definition: userDupes.inc:116
UserDupes\unlock
unlock()
Definition: userDupes.inc:202
UserDupes\examine
examine( $name, $doDelete)
Examine user records for the given name.
Definition: userDupes.inc:233
UserDupes\hasUniqueIndex
hasUniqueIndex()
Check if this database's user table has already had a unique user_name index applied.
Definition: userDupes.inc:73
UserDupes\trimAccount
trimAccount( $userid)
Remove a user account line.
Definition: userDupes.inc:306
UserDupes\getDupes
getDupes()
Grab usernames for which multiple records are present in the database.
Definition: userDupes.inc:210
UserDupes\$failed
int $failed
Definition: userDupes.inc:47
UserDupes\out
out( $str)
Output some text via the output callback provided.
Definition: userDupes.inc:64
UserDupes
Look for duplicate user table entries and optionally prune them.
Definition: userDupes.inc:37
UserDupes\$trimmed
int $trimmed
Definition: userDupes.inc:45
UserDupes\reassignEdits
reassignEdits( $from, $to)
Definition: userDupes.inc:293
UserDupes\__construct
__construct(&$database, $outputCallback)
Definition: userDupes.inc:55
Wikimedia\Rdbms\IMaintainableDatabase
Advanced database interface for IDatabase handles that include maintenance methods.
Definition: IMaintainableDatabase.php:38