Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 82 |
|
0.00% |
0 / 3 |
CRAP | |
0.00% |
0 / 1 |
PopulateIpChanges | |
0.00% |
0 / 79 |
|
0.00% |
0 / 3 |
156 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
2 | |||
doDBUpdates | |
0.00% |
0 / 60 |
|
0.00% |
0 / 1 |
110 | |||
getUpdateKey | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * Find all revisions by logged out users and copy the rev_id, |
4 | * rev_timestamp, and a hex representation of IP address to the |
5 | * new ip_changes table. This table is used to efficiently query for |
6 | * contributions within an IP range. |
7 | * |
8 | * This program is free software; you can redistribute it and/or modify |
9 | * it under the terms of the GNU General Public License as published by |
10 | * the Free Software Foundation; either version 2 of the License, or |
11 | * (at your option) any later version. |
12 | * |
13 | * This program is distributed in the hope that it will be useful, |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | * GNU General Public License for more details. |
17 | * |
18 | * You should have received a copy of the GNU General Public License along |
19 | * with this program; if not, write to the Free Software Foundation, Inc., |
20 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
21 | * http://www.gnu.org/copyleft/gpl.html |
22 | * |
23 | * @file |
24 | * @ingroup Maintenance |
25 | */ |
26 | |
27 | require_once __DIR__ . '/Maintenance.php'; |
28 | |
29 | use Wikimedia\IPUtils; |
30 | |
31 | /** |
32 | * Maintenance script that will find all rows in the revision table where |
33 | * rev_actor refers to an IP actor, and copy relevant fields to ip_changes so |
34 | * that historical data will be available when querying for IP ranges. |
35 | * |
36 | * @ingroup Maintenance |
37 | */ |
38 | class PopulateIpChanges extends LoggedUpdateMaintenance { |
39 | public function __construct() { |
40 | parent::__construct(); |
41 | |
42 | $this->addDescription( <<<TEXT |
43 | This script will find all rows in the revision table where the user is an IP, |
44 | and copy relevant fields to the ip_changes table. This backfilled data will |
45 | then be available when querying for IP ranges at Special:Contributions. |
46 | TEXT |
47 | ); |
48 | $this->addOption( 'rev-id', 'The rev_id to start copying from. Default: 0', false, true ); |
49 | $this->addOption( |
50 | 'max-rev-id', |
51 | 'The rev_id to stop at. Default: result of MAX(rev_id)', |
52 | false, |
53 | true |
54 | ); |
55 | $this->addOption( |
56 | 'throttle', |
57 | 'Wait this many milliseconds after copying each batch of revisions. Default: 0', |
58 | false, |
59 | true |
60 | ); |
61 | $this->addOption( 'force', 'Run regardless of whether the database says it\'s been run already' ); |
62 | } |
63 | |
64 | public function doDBUpdates() { |
65 | $dbw = $this->getDB( DB_PRIMARY ); |
66 | |
67 | if ( !$dbw->tableExists( 'ip_changes', __METHOD__ ) ) { |
68 | $this->fatalError( 'ip_changes table does not exist' ); |
69 | } |
70 | |
71 | $dbr = $this->getDB( DB_REPLICA, [ 'vslow' ] ); |
72 | $throttle = intval( $this->getOption( 'throttle', 0 ) ); |
73 | $maxRevId = intval( $this->getOption( 'max-rev-id', 0 ) ); |
74 | $start = $this->getOption( 'rev-id', 0 ); |
75 | $end = $maxRevId > 0 |
76 | ? $maxRevId |
77 | : $dbw->newSelectQueryBuilder() |
78 | ->select( 'MAX(rev_id)' ) |
79 | ->from( 'revision' ) |
80 | ->caller( __METHOD__ )->fetchField(); |
81 | |
82 | if ( !$end ) { |
83 | $this->output( "No revisions found, aborting.\n" ); |
84 | return true; |
85 | } |
86 | |
87 | $blockStart = $start; |
88 | $attempted = 0; |
89 | $inserted = 0; |
90 | |
91 | $this->output( "Copying IP revisions to ip_changes, from rev_id $start to rev_id $end\n" ); |
92 | |
93 | while ( $blockStart <= $end ) { |
94 | $blockEnd = min( $blockStart + $this->getBatchSize(), $end ); |
95 | $rows = $dbr->newSelectQueryBuilder() |
96 | ->select( [ 'rev_id', 'rev_timestamp', 'actor_name' ] ) |
97 | ->from( 'revision' ) |
98 | ->join( 'actor', null, 'actor_id = rev_actor' ) |
99 | ->where( [ |
100 | $dbr->expr( 'rev_id', '>=', (int)$blockStart ), |
101 | $dbr->expr( 'rev_id', '<=', (int)$blockEnd ), |
102 | 'actor_user' => null, |
103 | ] ) |
104 | ->caller( __METHOD__ ) |
105 | ->fetchResultSet(); |
106 | |
107 | $numRows = $rows->numRows(); |
108 | |
109 | if ( !$rows || $numRows === 0 ) { |
110 | $blockStart = $blockEnd + 1; |
111 | continue; |
112 | } |
113 | |
114 | $this->output( "...checking $numRows revisions for IP edits that need copying, " . |
115 | "between rev_ids $blockStart and $blockEnd\n" ); |
116 | |
117 | $insertRows = []; |
118 | foreach ( $rows as $row ) { |
119 | // Make sure this is really an IP, e.g. not maintenance user or imported revision. |
120 | if ( IPUtils::isValid( $row->actor_name ) ) { |
121 | $insertRows[] = [ |
122 | 'ipc_rev_id' => $row->rev_id, |
123 | 'ipc_rev_timestamp' => $row->rev_timestamp, |
124 | 'ipc_hex' => IPUtils::toHex( $row->actor_name ), |
125 | ]; |
126 | |
127 | $attempted++; |
128 | } |
129 | } |
130 | |
131 | if ( $insertRows ) { |
132 | $dbw->newInsertQueryBuilder() |
133 | ->insertInto( 'ip_changes' ) |
134 | ->ignore() |
135 | ->rows( $insertRows ) |
136 | ->caller( __METHOD__ )->execute(); |
137 | |
138 | $inserted += $dbw->affectedRows(); |
139 | } |
140 | |
141 | $this->waitForReplication(); |
142 | usleep( $throttle * 1000 ); |
143 | |
144 | $blockStart = $blockEnd + 1; |
145 | } |
146 | |
147 | $this->output( "Attempted to insert $attempted IP revisions, $inserted actually done.\n" ); |
148 | |
149 | return true; |
150 | } |
151 | |
152 | protected function getUpdateKey() { |
153 | return 'populate ip_changes'; |
154 | } |
155 | } |
156 | |
157 | $maintClass = PopulateIpChanges::class; |
158 | require_once RUN_MAINTENANCE_IF_MAIN; |