5require_once __DIR__ .
'/Maintenance.php';
17 private $reassigned = 0;
20 parent::__construct();
22 'Clean up duplicate ar_rev_id, both within archive and between archive and revision.'
32 $this->
output(
"Deduplicating ar_rev_id...\n" );
36 $this->
output(
"New install, nothing to do here.\n" );
43 ->select(
'MIN(ar_rev_id)' )
46 ->caller( __METHOD__ )
49 ->select(
'MAX(ar_rev_id)' )
52 ->caller( __METHOD__ )
56 $revActorQuery = ActorMigration::newMigration()->getJoin(
'rev_user' );
58 for ( $id = $minId; $id <= $maxId; $id += $batchSize ) {
59 $endId = min( $maxId, $id + $batchSize - 1 );
68 ->where( [
'ar_rev_id >= ' . (
int)$id,
'ar_rev_id <= ' . (
int)$endId ] )
69 ->caller( __METHOD__ )
75 ->where( [
'ar_rev_id >= ' . (
int)$id,
'ar_rev_id <= ' . (
int)$endId ] )
76 ->caller( __METHOD__ )
82 [
'archive',
'revision' ] + $revActorQuery[
'tables'],
83 [
'rev_id',
'rev_timestamp',
'rev_sha1' ] + $revActorQuery[
'fields'],
84 [
'ar_rev_id >= ' . (
int)$id,
'ar_rev_id <= ' . (
int)$endId ],
87 [
'revision' => [
'JOIN',
'ar_rev_id = rev_id' ] ] + $revActorQuery[
'joins']
90 foreach (
$res as $row ) {
91 $revRows[$row->rev_id] = $row;
95 ->select(
'ar_rev_id' )
97 ->where( [
'ar_rev_id >= ' . (
int)$id,
'ar_rev_id <= ' . (
int)$endId ] )
98 ->caller( __METHOD__ )
99 ->groupBy(
'ar_rev_id' )
100 ->having(
'COUNT(*) > 1' )
101 ->fetchFieldValues();
102 $arRevIds = array_values( array_unique( array_merge( $arRevIds, array_keys( $revRows ) ) ) );
105 $this->processArRevIds( $dbw, $arRevIds, $revRows );
108 $this->
output(
"... $id-$endId\n" );
113 "Finished deduplicating ar_rev_id. $this->deleted rows deleted, "
114 .
"$this->reassigned assigned new IDs.\n"
125 private function processArRevIds(
IDatabase $dbw, array $arRevIds, array $revRows ) {
128 ->select( [
'ar_id',
'ar_rev_id',
'ar_namespace',
'ar_title',
'ar_actor',
129 'ar_timestamp',
'ar_sha1' ] )
131 ->where( [
'ar_rev_id' => $arRevIds ] )
132 ->caller( __METHOD__ )
139 foreach (
$res as $row ) {
141 if ( isset( $revRows[$row->ar_rev_id] ) ) {
142 $revRow = $revRows[$row->ar_rev_id];
145 if ( !isset( $seen[$revRow->rev_id] ) ) {
146 $seen[$revRow->rev_id] = [
147 'first' =>
"revision row",
153 if ( $row->ar_timestamp === $revRow->rev_timestamp &&
154 $row->ar_sha1 === $revRow->rev_sha1 &&
155 $row->ar_actor === $revRow->rev_actor
158 "Row $row->ar_id duplicates revision row for rev_id $revRow->rev_id, deleting\n"
160 $toDelete[] = $row->ar_id;
165 $key = $this->getSeenKey( $row );
166 if ( !isset( $seen[$row->ar_rev_id] ) ) {
168 $seen[$row->ar_rev_id] = [
169 'first' =>
"archive row $row->ar_id",
172 } elseif ( !isset( $seen[$row->ar_rev_id][$key] ) ) {
174 $seen[$row->ar_rev_id][$key] = $row->ar_id;
176 "Row $row->ar_id conflicts with {$seen[$row->ar_rev_id]['first']} "
177 .
"for rev_id $row->ar_rev_id, reassigning\n"
179 $toReassign[] = $row->ar_id;
183 "Row $row->ar_id duplicates archive row {$seen[$row->ar_rev_id][$key]} "
184 .
"for rev_id $row->ar_rev_id, deleting\n"
186 $toDelete[] = $row->ar_id;
192 $dbw->
delete(
'archive', [
'ar_id' => $toDelete ], __METHOD__ );
205 private function getSeenKey( $row ) {
206 return implode(
"\n", [
218require_once RUN_MAINTENANCE_IF_MAIN;
Maintenance script that cleans up archive rows with duplicated ar_rev_id, both within archive and bet...
__construct()
Default constructor.
getUpdateKey()
Get the update key name to go in the update log table.
doDBUpdates()
Do the actual work.
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
beginTransaction(IDatabase $dbw, $fname)
Begin a transaction on a DB.
commitTransaction(IDatabase $dbw, $fname)
Commit the transaction on a DB handle and wait for replica DBs to catch up.
output( $out, $channel=null)
Throw some output to the user.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
static checkMysqlAutoIncrementBug(IDatabase $dbw)
Check for (and work around) a MySQL auto-increment bug.
static reassignArRevIds(IDatabase $dbw, array $arIds, array $conds=[])
Assign new ar_rev_ids to a set of ar_ids.
static isNewInstall(IDatabase $dbw)