5require_once __DIR__ .
'/Maintenance.php';
20 parent::__construct();
22 'Clean up duplicate ar_rev_id, both within archive and between archive and revision.'
32 $this->
output(
"Deduplicating ar_rev_id...\n" );
36 $this->
output(
"New install, nothing to do here.\n" );
42 $minId = $dbw->selectField(
'archive',
'MIN(ar_rev_id)', [], __METHOD__ );
43 $maxId = $dbw->selectField(
'archive',
'MAX(ar_rev_id)', [], __METHOD__ );
46 $revActorQuery = ActorMigration::newMigration()->getJoin(
'rev_user' );
48 for ( $id = $minId; $id <= $maxId; $id += $batchSize ) {
49 $endId = min( $maxId, $id + $batchSize - 1 );
58 [
'ar_rev_id >= ' . (
int)$id,
'ar_rev_id <= ' . (
int)$endId ],
65 [
'rev_id >= ' . (
int)$id,
'rev_id <= ' . (
int)$endId ],
67 [
'LOCK IN SHARE MODE' ]
72 [
'archive',
'revision' ] + $revActorQuery[
'tables'],
73 [
'rev_id',
'rev_timestamp',
'rev_sha1' ] + $revActorQuery[
'fields'],
74 [
'ar_rev_id >= ' . (
int)$id,
'ar_rev_id <= ' . (
int)$endId ],
77 [
'revision' => [
'JOIN',
'ar_rev_id = rev_id' ] ] + $revActorQuery[
'joins']
80 foreach (
$res as $row ) {
81 $revRows[$row->rev_id] = $row;
84 $arRevIds = $dbw->selectFieldValues(
87 [
'ar_rev_id >= ' . (
int)$id,
'ar_rev_id <= ' . (
int)$endId ],
89 [
'GROUP BY' =>
'ar_rev_id',
'HAVING' =>
'COUNT(*) > 1' ]
91 $arRevIds = array_values( array_unique( array_merge( $arRevIds, array_keys( $revRows ) ) ) );
97 $this->
output(
"... $id-$endId\n" );
102 "Finished deduplicating ar_rev_id. $this->deleted rows deleted, "
103 .
"$this->reassigned assigned new IDs.\n"
118 [
'ar_id',
'ar_rev_id',
'ar_namespace',
'ar_title',
'ar_actor',
119 'ar_timestamp',
'ar_sha1' ],
120 [
'ar_rev_id' => $arRevIds ],
128 foreach (
$res as $row ) {
130 if ( isset( $revRows[$row->ar_rev_id] ) ) {
131 $revRow = $revRows[$row->ar_rev_id];
134 if ( !isset( $seen[$revRow->rev_id] ) ) {
135 $seen[$revRow->rev_id] = [
136 'first' =>
"revision row",
142 if ( $row->ar_timestamp === $revRow->rev_timestamp &&
143 $row->ar_sha1 === $revRow->rev_sha1 &&
144 $row->ar_actor === $revRow->rev_actor
147 "Row $row->ar_id duplicates revision row for rev_id $revRow->rev_id, deleting\n"
149 $toDelete[] = $row->ar_id;
155 if ( !isset( $seen[$row->ar_rev_id] ) ) {
157 $seen[$row->ar_rev_id] = [
158 'first' =>
"archive row $row->ar_id",
161 } elseif ( !isset( $seen[$row->ar_rev_id][$key] ) ) {
163 $seen[$row->ar_rev_id][$key] = $row->ar_id;
165 "Row $row->ar_id conflicts with {$seen[$row->ar_rev_id]['first']} "
166 .
"for rev_id $row->ar_rev_id, reassigning\n"
168 $toReassign[] = $row->ar_id;
172 "Row $row->ar_id duplicates archive row {$seen[$row->ar_rev_id][$key]} "
173 .
"for rev_id $row->ar_rev_id, deleting\n"
175 $toDelete[] = $row->ar_id;
181 $dbw->
delete(
'archive', [
'ar_id' => $toDelete ], __METHOD__ );
195 return implode(
"\n", [
207require_once RUN_MAINTENANCE_IF_MAIN;
Maintenance script that cleans up archive rows with duplicated ar_rev_id, both within archive and bet...
__construct()
Default constructor.
getUpdateKey()
Get the update key name to go in the update log table.
doDBUpdates()
Do the actual work.
getSeenKey( $row)
Make a key identifying a "unique" change from a row.
processArRevIds(IDatabase $dbw, array $arRevIds, array $revRows)
Process a set of ar_rev_ids.
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
beginTransaction(IDatabase $dbw, $fname)
Begin a transaction on a DB.
commitTransaction(IDatabase $dbw, $fname)
Commit the transaction on a DB handle and wait for replica DBs to catch up.
output( $out, $channel=null)
Throw some output to the user.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
static checkMysqlAutoIncrementBug(IDatabase $dbw)
Check for (and work around) a MySQL auto-increment bug.
static reassignArRevIds(IDatabase $dbw, array $arIds, array $conds=[])
Assign new ar_rev_ids to a set of ar_ids.
static isNewInstall(IDatabase $dbw)