5require_once __DIR__ .
'/Maintenance.php';
26 parent::__construct();
28 'Clean up duplicate ar_rev_id, both within archive and between archive and revision.'
38 $this->
output(
"Deduplicating ar_rev_id...\n" );
42 $this->
output(
"New install, nothing to do here.\n" );
48 $minId = $dbw->selectField(
'archive',
'MIN(ar_rev_id)', [], __METHOD__ );
49 $maxId = $dbw->selectField(
'archive',
'MAX(ar_rev_id)', [], __METHOD__ );
52 $this->arActorQuery = ActorMigration::newMigration()->getJoin(
'ar_user' );
53 $revActorQuery = ActorMigration::newMigration()->getJoin(
'rev_user' );
55 for ( $id = $minId; $id <= $maxId; $id += $batchSize ) {
56 $endId = min( $maxId, $id + $batchSize - 1 );
65 [
'ar_rev_id >= ' . (
int)$id,
'ar_rev_id <= ' . (
int)$endId ],
72 [
'rev_id >= ' . (
int)$id,
'rev_id <= ' . (
int)$endId ],
74 [
'LOCK IN SHARE MODE' ]
79 [
'archive',
'revision' ] + $revActorQuery[
'tables'],
80 [
'rev_id',
'rev_timestamp',
'rev_sha1' ] + $revActorQuery[
'fields'],
81 [
'ar_rev_id >= ' . (
int)$id,
'ar_rev_id <= ' . (
int)$endId ],
84 [
'revision' => [
'JOIN',
'ar_rev_id = rev_id' ] ] + $revActorQuery[
'joins']
87 foreach (
$res as $row ) {
88 $revRows[$row->rev_id] = $row;
91 $arRevIds = $dbw->selectFieldValues(
94 [
'ar_rev_id >= ' . (
int)$id,
'ar_rev_id <= ' . (
int)$endId ],
96 [
'GROUP BY' =>
'ar_rev_id',
'HAVING' =>
'COUNT(*) > 1' ]
98 $arRevIds = array_values( array_unique( array_merge( $arRevIds, array_keys( $revRows ) ) ) );
104 $this->
output(
"... $id-$endId\n" );
109 "Finished deduplicating ar_rev_id. $this->deleted rows deleted, "
110 .
"$this->reassigned assigned new IDs.\n"
124 [
'archive' ] + $this->arActorQuery[
'tables'],
125 [
'ar_id',
'ar_rev_id',
'ar_namespace',
'ar_title',
'ar_timestamp',
'ar_sha1' ]
126 + $this->arActorQuery[
'fields'],
127 [
'ar_rev_id' => $arRevIds ],
130 $this->arActorQuery[
'joins']
137 foreach (
$res as $row ) {
139 if ( isset( $revRows[$row->ar_rev_id] ) ) {
140 $revRow = $revRows[$row->ar_rev_id];
143 if ( !isset( $seen[$revRow->rev_id] ) ) {
144 $seen[$revRow->rev_id] = [
145 'first' =>
"revision row",
151 if ( $row->ar_timestamp === $revRow->rev_timestamp &&
152 $row->ar_sha1 === $revRow->rev_sha1 &&
153 $row->ar_user === $revRow->rev_user &&
154 $row->ar_user_text === $revRow->rev_user_text
157 "Row $row->ar_id duplicates revision row for rev_id $revRow->rev_id, deleting\n"
159 $toDelete[] = $row->ar_id;
165 if ( !isset( $seen[$row->ar_rev_id] ) ) {
167 $seen[$row->ar_rev_id] = [
168 'first' =>
"archive row $row->ar_id",
171 } elseif ( !isset( $seen[$row->ar_rev_id][$key] ) ) {
173 $seen[$row->ar_rev_id][$key] = $row->ar_id;
175 "Row $row->ar_id conflicts with {$seen[$row->ar_rev_id]['first']} "
176 .
"for rev_id $row->ar_rev_id, reassigning\n"
178 $toReassign[] = $row->ar_id;
182 "Row $row->ar_id duplicates archive row {$seen[$row->ar_rev_id][$key]} "
183 .
"for rev_id $row->ar_rev_id, deleting\n"
185 $toDelete[] = $row->ar_id;
191 $dbw->
delete(
'archive', [
'ar_id' => $toDelete ], __METHOD__ );
205 return implode(
"\n", [
const RUN_MAINTENANCE_IF_MAIN
Maintenance script that cleans up archive rows with duplicated ar_rev_id, both within archive and bet...
__construct()
Default constructor.
array[] null $arActorQuery
-var array{tables:string[],fields:string[],joins:array}|null
getUpdateKey()
Get the update key name to go in the update log table.
doDBUpdates()
Do the actual work.
getSeenKey( $row)
Make a key identifying a "unique" change from a row.
processArRevIds(IDatabase $dbw, array $arRevIds, array $revRows)
Process a set of ar_rev_ids.
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
beginTransaction(IDatabase $dbw, $fname)
Begin a transcation on a DB.
commitTransaction(IDatabase $dbw, $fname)
Commit the transcation on a DB handle and wait for replica DBs to catch up.
output( $out, $channel=null)
Throw some output to the user.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
setBatchSize( $s=0)
Set the batch size.
static checkMysqlAutoIncrementBug(IDatabase $dbw)
Check for (and work around) a MySQL auto-increment bug.
static reassignArRevIds(IDatabase $dbw, array $arIds, array $conds=[])
Assign new ar_rev_ids to a set of ar_ids.
static isNewInstall(IDatabase $dbw)