5 require_once __DIR__ .
'/Maintenance.php';
23 parent::__construct();
25 'Clean up duplicate ar_rev_id, both within archive and between archive and revision.'
35 $this->
output(
"Deduplicating ar_rev_id...\n" );
39 $this->
output(
"New install, nothing to do here.\n" );
45 $minId = $dbw->selectField(
'archive',
'MIN(ar_rev_id)', [], __METHOD__ );
46 $maxId = $dbw->selectField(
'archive',
'MAX(ar_rev_id)', [], __METHOD__ );
52 for ( $id = $minId; $id <= $maxId; $id += $batchSize ) {
53 $endId = min( $maxId, $id + $batchSize - 1 );
62 [
'ar_rev_id >= ' . (
int)$id,
'ar_rev_id <= ' . (
int)$endId ],
69 [
'rev_id >= ' . (
int)$id,
'rev_id <= ' . (
int)$endId ],
71 [
'LOCK IN SHARE MODE' ]
76 [
'archive',
'revision' ] + $revActorQuery[
'tables'],
77 [
'rev_id',
'rev_timestamp',
'rev_sha1' ] + $revActorQuery[
'fields'],
78 [
'ar_rev_id >= ' . (
int)$id,
'ar_rev_id <= ' . (
int)$endId ],
81 [
'revision' => [
'JOIN',
'ar_rev_id = rev_id' ] ] + $revActorQuery[
'joins']
84 foreach (
$res as $row ) {
85 $revRows[$row->rev_id] = $row;
88 $arRevIds = $dbw->selectFieldValues(
91 [
'ar_rev_id >= ' . (
int)$id,
'ar_rev_id <= ' . (
int)$endId ],
93 [
'GROUP BY' =>
'ar_rev_id',
'HAVING' =>
'COUNT(*) > 1' ]
95 $arRevIds = array_values( array_unique( array_merge( $arRevIds, array_keys( $revRows ) ) ) );
101 $this->
output(
"... $id-$endId\n" );
106 "Finished deduplicating ar_rev_id. $this->deleted rows deleted, "
107 .
"$this->reassigned assigned new IDs.\n"
121 [
'archive' ] + $this->arActorQuery[
'tables'],
122 [
'ar_id',
'ar_rev_id',
'ar_namespace',
'ar_title',
'ar_timestamp',
'ar_sha1' ]
123 + $this->arActorQuery[
'fields'],
124 [
'ar_rev_id' => $arRevIds ],
127 $this->arActorQuery[
'joins']
134 foreach (
$res as $row ) {
136 if ( isset( $revRows[$row->ar_rev_id] ) ) {
137 $revRow = $revRows[$row->ar_rev_id];
140 if ( !isset( $seen[$revRow->rev_id] ) ) {
141 $seen[$revRow->rev_id] = [
142 'first' =>
"revision row",
148 if ( $row->ar_timestamp === $revRow->rev_timestamp &&
149 $row->ar_sha1 === $revRow->rev_sha1 &&
150 $row->ar_user === $revRow->rev_user &&
151 $row->ar_user_text === $revRow->rev_user_text
154 "Row $row->ar_id duplicates revision row for rev_id $revRow->rev_id, deleting\n"
156 $toDelete[] = $row->ar_id;
162 if ( !isset( $seen[$row->ar_rev_id] ) ) {
164 $seen[$row->ar_rev_id] = [
165 'first' =>
"archive row $row->ar_id",
168 } elseif ( !isset( $seen[$row->ar_rev_id][$key] ) ) {
170 $seen[$row->ar_rev_id][$key] = $row->ar_id;
172 "Row $row->ar_id conflicts with {$seen[$row->ar_rev_id]['first']} "
173 .
"for rev_id $row->ar_rev_id, reassigning\n"
175 $toReassign[] = $row->ar_id;
179 "Row $row->ar_id duplicates archive row {$seen[$row->ar_rev_id][$key]} "
180 .
"for rev_id $row->ar_rev_id, deleting\n"
182 $toDelete[] = $row->ar_id;
188 $dbw->
delete(
'archive', [
'ar_id' => $toDelete ], __METHOD__ );
202 return implode(
"\n", [