MediaWiki  1.34.0
userDupes.inc
Go to the documentation of this file.
1 <?php
28 
37 class UserDupes {
41  private $db;
42  private $reassigned;
43  private $trimmed;
44  private $failed;
45  private $outputCallback;
46 
51  public function __construct( &$database, $outputCallback ) {
52  $this->db = $database;
53  $this->outputCallback = $outputCallback;
54  }
55 
60  private function out( $str ) {
61  call_user_func( $this->outputCallback, $str );
62  }
63 
69  public function hasUniqueIndex() {
70  $info = $this->db->indexInfo( 'user', 'user_name', __METHOD__ );
71  if ( !$info ) {
72  $this->out( "WARNING: doesn't seem to have user_name index at all!\n" );
73 
74  return false;
75  }
76 
77  # Confusingly, 'Non_unique' is 0 for *unique* indexes,
78  # and 1 for *non-unique* indexes. Pass the crack, MySQL,
79  # it's obviously some good stuff!
80  return ( $info[0]->Non_unique == 0 );
81  }
82 
94  public function clearDupes() {
95  return $this->checkDupes( true );
96  }
97 
112  private function checkDupes( $doDelete = false ) {
113  $dbDomain = WikiMap::getCurrentWikiDbDomain()->getId();
114  if ( $this->hasUniqueIndex() ) {
115  echo "$dbDomain already has a unique index on its user table.\n";
116 
117  return true;
118  }
119 
120  $this->lock();
121 
122  $this->out( "Checking for duplicate accounts...\n" );
123  $dupes = $this->getDupes();
124  $count = count( $dupes );
125 
126  $this->out( "Found $count accounts with duplicate records on $dbDomain.\n" );
127  $this->trimmed = 0;
128  $this->reassigned = 0;
129  $this->failed = 0;
130  foreach ( $dupes as $name ) {
131  $this->examine( $name, $doDelete );
132  }
133 
134  $this->unlock();
135 
136  $this->out( "\n" );
137 
138  if ( $this->reassigned > 0 ) {
139  if ( $doDelete ) {
140  $this->out( "$this->reassigned duplicate accounts had edits "
141  . "reassigned to a canonical record id.\n" );
142  } else {
143  $this->out( "$this->reassigned duplicate accounts need to have edits reassigned.\n" );
144  }
145  }
146 
147  if ( $this->trimmed > 0 ) {
148  if ( $doDelete ) {
149  $this->out(
150  "$this->trimmed duplicate user records were deleted from $dbDomain.\n" );
151  } else {
152  $this->out(
153  "$this->trimmed duplicate user accounts were found on $dbDomain " .
154  "which can be removed safely.\n"
155  );
156  }
157  }
158 
159  if ( $this->failed > 0 ) {
160  $this->out( "Something terribly awry; $this->failed duplicate accounts were not removed.\n" );
161 
162  return false;
163  }
164 
165  if ( $this->trimmed == 0 || $doDelete ) {
166  $this->out( "It is now safe to apply the unique index on user_name.\n" );
167 
168  return true;
169  } else {
170  $this->out( "Run this script again with the --fix option to automatically delete them.\n" );
171 
172  return false;
173  }
174  }
175 
179  private function lock() {
180  $set = [ 'user', 'revision' ];
181  $names = array_map( [ $this, 'lockTable' ], $set );
182  $tables = implode( ',', $names );
183 
184  $this->db->query( "LOCK TABLES $tables", __METHOD__ );
185  }
186 
187  private function lockTable( $table ) {
188  return $this->db->tableName( $table ) . ' WRITE';
189  }
190 
194  private function unlock() {
195  $this->db->query( "UNLOCK TABLES", __METHOD__ );
196  }
197 
202  private function getDupes() {
203  $user = $this->db->tableName( 'user' );
204  $result = $this->db->query(
205  "SELECT user_name,COUNT(*) AS n
206  FROM $user
207  GROUP BY user_name
208  HAVING n > 1", __METHOD__ );
209 
210  $list = [];
211  foreach ( $result as $row ) {
212  $list[] = $row->user_name;
213  }
214 
215  return $list;
216  }
217 
225  private function examine( $name, $doDelete ) {
226  $result = $this->db->select( 'user',
227  [ 'user_id' ],
228  [ 'user_name' => $name ],
229  __METHOD__ );
230 
231  $firstRow = $this->db->fetchObject( $result );
232  $firstId = $firstRow->user_id;
233  $this->out( "Record that will be used for '$name' is user_id=$firstId\n" );
234 
235  foreach ( $result as $row ) {
236  $dupeId = $row->user_id;
237  $this->out( "... dupe id $dupeId: " );
238  $edits = $this->editCount( $dupeId );
239  if ( $edits > 0 ) {
240  $this->reassigned++;
241  $this->out( "has $edits edits! " );
242  if ( $doDelete ) {
243  $this->reassignEdits( $dupeId, $firstId );
244  $newEdits = $this->editCount( $dupeId );
245  if ( $newEdits == 0 ) {
246  $this->out( "confirmed cleaned. " );
247  } else {
248  $this->failed++;
249  $this->out( "WARNING! $newEdits remaining edits for $dupeId; NOT deleting user.\n" );
250  continue;
251  }
252  } else {
253  $this->out( "(will need to reassign edits on fix)" );
254  }
255  } else {
256  $this->out( "ok, no edits. " );
257  }
258  $this->trimmed++;
259  if ( $doDelete ) {
260  $this->trimAccount( $dupeId );
261  }
262  $this->out( "\n" );
263  }
264  }
265 
273  private function editCount( $userid ) {
274  return intval( $this->db->selectField(
275  'revision',
276  'COUNT(*)',
277  [ 'rev_user' => $userid ],
278  __METHOD__ ) );
279  }
280 
285  private function reassignEdits( $from, $to ) {
286  $this->out( 'reassigning... ' );
287  $this->db->update( 'revision',
288  [ 'rev_user' => $to ],
289  [ 'rev_user' => $from ],
290  __METHOD__ );
291  $this->out( "ok. " );
292  }
293 
298  private function trimAccount( $userid ) {
299  $this->out( "deleting..." );
300  $this->db->delete( 'user', [ 'user_id' => $userid ], __METHOD__ );
301  $this->out( " ok" );
302  }
303 }
UserDupes\lock
lock()
We don't want anybody to mess with our stuff...
Definition: userDupes.inc:179
WikiMap\getCurrentWikiDbDomain
static getCurrentWikiDbDomain()
Definition: WikiMap.php:292
UserDupes\$reassigned
$reassigned
Definition: userDupes.inc:42
UserDupes\lockTable
lockTable( $table)
Definition: userDupes.inc:187
UserDupes\$db
IMaintainableDatabase $db
Definition: userDupes.inc:41
UserDupes\clearDupes
clearDupes()
Checks the database for duplicate user account records and remove them in preparation for application...
Definition: userDupes.inc:94
UserDupes\$trimmed
$trimmed
Definition: userDupes.inc:43
UserDupes\editCount
editCount( $userid)
Count the number of edits attributed to this user.
Definition: userDupes.inc:273
UserDupes\checkDupes
checkDupes( $doDelete=false)
Checks the database for duplicate user account records in preparation for application of a unique ind...
Definition: userDupes.inc:112
UserDupes\unlock
unlock()
Definition: userDupes.inc:194
UserDupes\examine
examine( $name, $doDelete)
Examine user records for the given name.
Definition: userDupes.inc:225
UserDupes\hasUniqueIndex
hasUniqueIndex()
Check if this database's user table has already had a unique user_name index applied.
Definition: userDupes.inc:69
UserDupes\trimAccount
trimAccount( $userid)
Remove a user account line.
Definition: userDupes.inc:298
UserDupes\getDupes
getDupes()
Grab usernames for which multiple records are present in the database.
Definition: userDupes.inc:202
UserDupes\$outputCallback
$outputCallback
Definition: userDupes.inc:45
UserDupes\$failed
$failed
Definition: userDupes.inc:44
UserDupes\out
out( $str)
Output some text via the output callback provided.
Definition: userDupes.inc:60
UserDupes
Look for duplicate user table entries and optionally prune them.
Definition: userDupes.inc:37
UserDupes\reassignEdits
reassignEdits( $from, $to)
Definition: userDupes.inc:285
UserDupes\__construct
__construct(&$database, $outputCallback)
Definition: userDupes.inc:51
Wikimedia\Rdbms\IMaintainableDatabase
Advanced database interface for IDatabase handles that include maintenance methods.
Definition: IMaintainableDatabase.php:38