MediaWiki  master
populateIpChanges.php
Go to the documentation of this file.
1 <?php
27 require_once __DIR__ . '/Maintenance.php';
28 
30 use Wikimedia\IPUtils;
31 
40  public function __construct() {
41  parent::__construct();
42 
43  $this->addDescription( <<<TEXT
44 This script will find all rows in the revision table where the user is an IP,
45 and copy relevant fields to the ip_changes table. This backfilled data will
46 then be available when querying for IP ranges at Special:Contributions.
47 TEXT
48  );
49  $this->addOption( 'rev-id', 'The rev_id to start copying from. Default: 0', false, true );
50  $this->addOption(
51  'max-rev-id',
52  'The rev_id to stop at. Default: result of MAX(rev_id)',
53  false,
54  true
55  );
56  $this->addOption(
57  'throttle',
58  'Wait this many milliseconds after copying each batch of revisions. Default: 0',
59  false,
60  true
61  );
62  $this->addOption( 'force', 'Run regardless of whether the database says it\'s been run already' );
63  }
64 
65  public function doDBUpdates() {
66  $dbw = $this->getDB( DB_MASTER );
67 
68  if ( !$dbw->tableExists( 'ip_changes' ) ) {
69  $this->fatalError( 'ip_changes table does not exist' );
70  }
71 
72  $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
73  $dbr = $this->getDB( DB_REPLICA, [ 'vslow' ] );
74  $throttle = intval( $this->getOption( 'throttle', 0 ) );
75  $maxRevId = intval( $this->getOption( 'max-rev-id', 0 ) );
76  $start = $this->getOption( 'rev-id', 0 );
77  $end = $maxRevId > 0
78  ? $maxRevId
79  : $dbw->selectField( 'revision', 'MAX(rev_id)', '', __METHOD__ );
80 
81  if ( empty( $end ) ) {
82  $this->output( "No revisions found, aborting.\n" );
83  return true;
84  }
85 
86  $blockStart = $start;
87  $attempted = 0;
88  $inserted = 0;
89 
90  $this->output( "Copying IP revisions to ip_changes, from rev_id $start to rev_id $end\n" );
91 
92  $actorMigration = ActorMigration::newMigration();
93  $actorQuery = $actorMigration->getJoin( 'rev_user' );
94  $revUserIsAnon = $actorMigration->isAnon( $actorQuery['fields']['rev_user'] );
95 
96  while ( $blockStart <= $end ) {
97  $blockEnd = min( $blockStart + $this->getBatchSize(), $end );
98  $rows = $dbr->select(
99  [ 'revision' ] + $actorQuery['tables'],
100  [ 'rev_id', 'rev_timestamp', 'rev_user_text' => $actorQuery['fields']['rev_user_text'] ],
101  [ "rev_id BETWEEN " . (int)$blockStart . " AND " . (int)$blockEnd, $revUserIsAnon ],
102  __METHOD__,
103  [],
104  $actorQuery['joins']
105  );
106 
107  $numRows = $rows->numRows();
108 
109  if ( !$rows || $numRows === 0 ) {
110  $blockStart = $blockEnd + 1;
111  continue;
112  }
113 
114  $this->output( "...checking $numRows revisions for IP edits that need copying, " .
115  "between rev_ids $blockStart and $blockEnd\n" );
116 
117  $insertRows = [];
118  foreach ( $rows as $row ) {
119  // Make sure this is really an IP, e.g. not maintenance user or imported revision.
120  if ( IPUtils::isValid( $row->rev_user_text ) ) {
121  $insertRows[] = [
122  'ipc_rev_id' => $row->rev_id,
123  'ipc_rev_timestamp' => $row->rev_timestamp,
124  'ipc_hex' => IPUtils::toHex( $row->rev_user_text ),
125  ];
126 
127  $attempted++;
128  }
129  }
130 
131  if ( $insertRows ) {
132  $dbw->insert( 'ip_changes', $insertRows, __METHOD__, [ 'IGNORE' ] );
133 
134  $inserted += $dbw->affectedRows();
135  }
136 
137  $lbFactory->waitForReplication();
138  usleep( $throttle * 1000 );
139 
140  $blockStart = $blockEnd + 1;
141  }
142 
143  $this->output( "Attempted to insert $attempted IP revisions, $inserted actually done.\n" );
144 
145  return true;
146  }
147 
148  protected function getUpdateKey() {
149  return 'populate ip_changes';
150  }
151 }
152 
153 $maintClass = PopulateIpChanges::class;
154 require_once RUN_MAINTENANCE_IF_MAIN;
RUN_MAINTENANCE_IF_MAIN
const RUN_MAINTENANCE_IF_MAIN
Definition: Maintenance.php:38
MediaWiki\MediaWikiServices
MediaWikiServices is the service locator for the application scope of MediaWiki.
Definition: MediaWikiServices.php:149
Maintenance\fatalError
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
Definition: Maintenance.php:480
Maintenance\addDescription
addDescription( $text)
Set the description text.
Definition: Maintenance.php:323
IP
Pre-librarized class name for IPUtils.
Definition: IP.php:80
ActorMigration\newMigration
static newMigration()
Static constructor.
Definition: ActorMigration.php:139
$dbr
$dbr
Definition: testCompression.php:54
PopulateIpChanges\__construct
__construct()
Default constructor.
Definition: populateIpChanges.php:40
PopulateIpChanges
Maintenance script that will find all rows in the revision table where rev_user = 0 (user is an IP),...
Definition: populateIpChanges.php:39
LoggedUpdateMaintenance
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
Definition: LoggedUpdateMaintenance.php:26
Maintenance\addOption
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
Definition: Maintenance.php:241
DB_REPLICA
const DB_REPLICA
Definition: defines.php:25
DB_MASTER
const DB_MASTER
Definition: defines.php:26
$maintClass
$maintClass
Definition: populateIpChanges.php:153
Maintenance\getDB
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
Definition: Maintenance.php:1350
PopulateIpChanges\getUpdateKey
getUpdateKey()
Get the update key name to go in the update log table.
Definition: populateIpChanges.php:148
PopulateIpChanges\doDBUpdates
doDBUpdates()
Do the actual work.
Definition: populateIpChanges.php:65
Maintenance\getOption
getOption( $name, $default=null)
Get an option, or return the default.
Definition: Maintenance.php:277
Maintenance\getBatchSize
getBatchSize()
Returns batch size.
Definition: Maintenance.php:362
Maintenance\output
output( $out, $channel=null)
Throw some output to the user.
Definition: Maintenance.php:429