Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 122 |
|
0.00% |
0 / 7 |
CRAP | |
0.00% |
0 / 1 |
FindMissingActors | |
0.00% |
0 / 119 |
|
0.00% |
0 / 7 |
756 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
2 | |||
getTables | |
0.00% |
0 / 18 |
|
0.00% |
0 / 1 |
20 | |||
getTableInfo | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
getNewActorId | |
0.00% |
0 / 20 |
|
0.00% |
0 / 1 |
56 | |||
execute | |
0.00% |
0 / 24 |
|
0.00% |
0 / 1 |
56 | |||
findBadActors | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
42 | |||
overwriteActorIDs | |
0.00% |
0 / 13 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * This program is free software; you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License as published by |
5 | * the Free Software Foundation; either version 2 of the License, or |
6 | * (at your option) any later version. |
7 | * |
8 | * This program is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | * GNU General Public License for more details. |
12 | * |
13 | * You should have received a copy of the GNU General Public License along |
14 | * with this program; if not, write to the Free Software Foundation, Inc., |
15 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | * http://www.gnu.org/copyleft/gpl.html |
17 | * |
18 | * @file |
19 | * @ingroup Maintenance |
20 | */ |
21 | |
22 | use MediaWiki\MainConfigNames; |
23 | use MediaWiki\User\ActorNormalization; |
24 | use MediaWiki\User\UserFactory; |
25 | use MediaWiki\User\UserNameUtils; |
26 | use MediaWiki\User\UserRigorOptions; |
27 | |
28 | require_once __DIR__ . '/Maintenance.php'; |
29 | |
30 | /** |
31 | * Maintenance script for finding and replacing invalid actor IDs, see T261325 and T307738. |
32 | * |
33 | * @ingroup Maintenance |
34 | */ |
35 | class FindMissingActors extends Maintenance { |
36 | |
37 | private UserFactory $userFactory; |
38 | private UserNameUtils $userNameUtils; |
39 | private ActorNormalization $actorNormalization; |
40 | |
41 | private ?array $tables; |
42 | |
43 | public function __construct() { |
44 | parent::__construct(); |
45 | |
46 | $this->addDescription( 'Find and fix invalid actor IDs.' ); |
47 | $this->addOption( 'field', 'The name of a database field to process', |
48 | true, true ); |
49 | $this->addOption( 'type', 'Which type of invalid actors to find or fix, ' |
50 | . 'missing or broken (with empty actor_name which can\'t be associated ' |
51 | . 'with an existing user).', |
52 | false, true ); |
53 | $this->addOption( 'skip', 'A comma-separated list of actor IDs to skip.', |
54 | false, true ); |
55 | $this->addOption( 'overwrite-with', 'Replace invalid actors with this user. ' |
56 | . 'Typically, this would be "Unknown user", but it could be any reserved ' |
57 | . 'system user (per $wgReservedUsernames) or locally registered user. ' |
58 | . 'If not given, invalid actors will only be listed, not fixed. ' |
59 | . 'You will be prompted for confirmation before data is written. ', |
60 | false, true ); |
61 | |
62 | $this->setBatchSize( 1000 ); |
63 | } |
64 | |
65 | /** |
66 | * @return array |
67 | */ |
68 | private function getTables() { |
69 | if ( !$this->tables ) { |
70 | $tables = [ |
71 | 'ar_actor' => [ 'archive', 'ar_actor', 'ar_id' ], |
72 | 'img_actor' => [ 'image', 'img_actor', 'img_name' ], |
73 | 'oi_actor' => [ 'oldimage', 'oi_actor', 'oi_archive_name' ], // no index on oi_archive_name! |
74 | 'fa_actor' => [ 'filearchive', 'fa_actor', 'fa_id' ], |
75 | 'rc_actor' => [ 'recentchanges', 'rc_actor', 'rc_id' ], |
76 | 'log_actor' => [ 'logging', 'log_actor', 'log_id' ], |
77 | 'rev_actor' => [ 'revision', 'rev_actor', 'rev_id' ], |
78 | ]; |
79 | $stage = $this->getServiceContainer()->getMainConfig() |
80 | ->get( MainConfigNames::BlockTargetMigrationStage ); |
81 | if ( $stage & SCHEMA_COMPAT_WRITE_OLD ) { |
82 | $tables['ipb_by_actor'] = [ 'ipblocks', 'ipb_by_actor', 'ipb_id' ]; // no index on ipb_by_actor! |
83 | } |
84 | if ( $stage & SCHEMA_COMPAT_WRITE_NEW ) { |
85 | $tables['bl_by_actor'] = [ 'block', 'bl_by_actor', 'bl_id' ]; // no index on bl_by_actor! |
86 | } |
87 | $this->tables = $tables; |
88 | } |
89 | return $this->tables; |
90 | } |
91 | |
92 | /** |
93 | * @param string $field |
94 | * @return array|null |
95 | */ |
96 | private function getTableInfo( $field ) { |
97 | $tables = $this->getTables(); |
98 | return $tables[$field] ?? null; |
99 | } |
100 | |
101 | /** |
102 | * Returns the actor ID of the user specified with the --overwrite-with option, |
103 | * or null if --overwrite-with is not set. |
104 | * |
105 | * Existing users and reserved system users are supported. |
106 | * If the user does not have an actor ID yet, one will be assigned. |
107 | * |
108 | * @return int|null |
109 | */ |
110 | private function getNewActorId() { |
111 | $name = $this->getOption( 'overwrite-with' ); |
112 | |
113 | if ( $name === null ) { |
114 | return null; |
115 | } |
116 | |
117 | $user = $this->userFactory->newFromName( $name ); |
118 | |
119 | if ( !$user ) { |
120 | $this->fatalError( "Not a valid user name: '$name'" ); |
121 | } |
122 | |
123 | $name = $this->userNameUtils->getCanonical( $name, UserRigorOptions::RIGOR_NONE ); |
124 | |
125 | if ( $user->isRegistered() ) { |
126 | $this->output( "Using existing user: '$user'\n" ); |
127 | } elseif ( !$this->userNameUtils->isValid( $name ) ) { |
128 | $this->fatalError( "Not a valid user name: '$name'" ); |
129 | } elseif ( !$this->userNameUtils->isUsable( $name ) ) { |
130 | $this->output( "Using system user: '$name'\n" ); |
131 | } else { |
132 | $this->fatalError( "Unknown user: '$name'" ); |
133 | } |
134 | |
135 | $dbw = $this->getPrimaryDB(); |
136 | $actorId = $this->actorNormalization->acquireActorId( $user, $dbw ); |
137 | |
138 | if ( !$actorId ) { |
139 | $this->fatalError( "Failed to acquire an actor ID for user '$user'" ); |
140 | } |
141 | |
142 | $this->output( "Replacement actor ID is $actorId.\n" ); |
143 | return $actorId; |
144 | } |
145 | |
146 | public function execute() { |
147 | $services = $this->getServiceContainer(); |
148 | $this->userFactory = $services->getUserFactory(); |
149 | $this->userNameUtils = $services->getUserNameUtils(); |
150 | $this->actorNormalization = $services->getActorNormalization(); |
151 | $this->setDBProvider( $services->getConnectionProvider() ); |
152 | |
153 | $field = $this->getOption( 'field' ); |
154 | if ( !$this->getTableInfo( $field ) ) { |
155 | $this->fatalError( "Unknown field: $field.\n" ); |
156 | } |
157 | |
158 | $type = $this->getOption( 'type', 'missing' ); |
159 | if ( $type !== 'missing' && $type !== 'broken' ) { |
160 | $this->fatalError( "Unknown type: $type.\n" ); |
161 | } |
162 | |
163 | $skip = $this->parseIntList( $this->getOption( 'skip', '' ) ); |
164 | $overwrite = $this->getNewActorId(); |
165 | |
166 | $bad = $this->findBadActors( $field, $type, $skip ); |
167 | |
168 | if ( $bad && $overwrite ) { |
169 | $this->output( "\n" ); |
170 | $this->output( "Do you want to OVERWRITE the listed actor IDs?\n" ); |
171 | $this->output( "Information about the invalid IDs will be lost!\n" ); |
172 | $this->output( "\n" ); |
173 | $confirm = self::readconsole( 'Type "yes" to continue: ' ); |
174 | |
175 | if ( $confirm === 'yes' ) { |
176 | $this->overwriteActorIDs( $field, array_keys( $bad ), $overwrite ); |
177 | } else { |
178 | $this->fatalError( 'Aborted.' ); |
179 | } |
180 | } |
181 | |
182 | $this->output( "Done.\n" ); |
183 | } |
184 | |
185 | /** |
186 | * Find rows that have bad actor IDs. |
187 | * |
188 | * @param string $field the database field in which to detect bad actor IDs. |
189 | * @param string $type type of bad actors, missing or broken. |
190 | * @param int[] $skip bad actor IDs not to replace. |
191 | * |
192 | * @return array a list of row IDs, identifying rows in which the actor ID needs to be replaced. |
193 | */ |
194 | private function findBadActors( $field, $type, $skip ) { |
195 | [ $table, $actorField, $idField ] = $this->getTableInfo( $field ); |
196 | $this->output( "Finding invalid actor IDs in $table.$actorField...\n" ); |
197 | |
198 | $dbr = $this->getServiceContainer()->getDBLoadBalancer()->getConnectionRef( DB_REPLICA, 'vslow' ); |
199 | |
200 | /* |
201 | We are building an SQL query like this one here, performing a left join |
202 | to detect rows in $table that lack a matching row in the actor table. |
203 | |
204 | In this example, $field is 'log_actor', so $table is 'logging', |
205 | $actorField is 'log_actor', and $idField is 'log_id'. |
206 | Further, $skip is [ 1, 2, 3, 4 ] and the batch size is 1000. |
207 | |
208 | SELECT log_id |
209 | FROM logging |
210 | LEFT JOIN actor ON log_actor = actor_id |
211 | WHERE actor_id IS NULL |
212 | AND log_actor NOT IN (1, 2, 3, 4) |
213 | LIMIT 1000; |
214 | */ |
215 | |
216 | $queryBuilder = $dbr->newSelectQueryBuilder() |
217 | ->select( [ $actorField, $idField ] ) |
218 | ->from( $table ) |
219 | ->leftJoin( 'actor', null, [ "$actorField = actor_id" ] ) |
220 | ->where( $type == 'missing' ? [ 'actor_id' => null ] : [ 'actor_name' => '' ] ) |
221 | ->limit( $this->getBatchSize() ); |
222 | |
223 | if ( $skip ) { |
224 | $queryBuilder->andWhere( $dbr->expr( $actorField, '!=', $skip ) ); |
225 | } |
226 | |
227 | $res = $queryBuilder->caller( __METHOD__ )->fetchResultSet(); |
228 | $count = $res->numRows(); |
229 | |
230 | $bad = []; |
231 | |
232 | if ( $count ) { |
233 | $this->output( "\t\tID\tACTOR\n" ); |
234 | } |
235 | |
236 | foreach ( $res as $row ) { |
237 | $id = $row->$idField; |
238 | $actor = (int)( $row->$actorField ); |
239 | |
240 | $bad[$id] = $actor; |
241 | $this->output( "\t\t$id\t$actor\n" ); |
242 | } |
243 | |
244 | $this->output( "\tFound $count invalid actor IDs.\n" ); |
245 | |
246 | if ( $count >= $this->getBatchSize() ) { |
247 | $this->output( "\tBatch size reached, run again after fixing the current batch.\n" ); |
248 | } |
249 | |
250 | return $bad; |
251 | } |
252 | |
253 | /** |
254 | * Overwrite the actor ID in a given set of rows. |
255 | * |
256 | * @param string $field the database field in which to replace IDs. |
257 | * @param array $ids The row IDs of the rows in which the actor ID should be replaced |
258 | * @param int $overwrite The actor ID to write to the rows identified by $ids. |
259 | * |
260 | * @return int |
261 | */ |
262 | private function overwriteActorIDs( $field, array $ids, int $overwrite ) { |
263 | [ $table, $actorField, $idField ] = $this->getTableInfo( $field ); |
264 | |
265 | $count = count( $ids ); |
266 | $this->output( "OVERWRITING $count actor IDs in $table.$actorField with $overwrite...\n" ); |
267 | |
268 | $dbw = $this->getPrimaryDB(); |
269 | |
270 | $dbw->newUpdateQueryBuilder() |
271 | ->update( $table ) |
272 | ->set( [ $actorField => $overwrite ] ) |
273 | ->where( [ $idField => $ids ] ) |
274 | ->caller( __METHOD__ )->execute(); |
275 | |
276 | $count = $dbw->affectedRows(); |
277 | |
278 | $this->waitForReplication(); |
279 | $this->output( "\tUpdated $count rows.\n" ); |
280 | |
281 | return $count; |
282 | } |
283 | |
284 | } |
285 | |
286 | $maintClass = FindMissingActors::class; |
287 | require_once RUN_MAINTENANCE_IF_MAIN; |