5require_once __DIR__ .
'/Maintenance.php';
23 parent::__construct();
25 'Clean up duplicate ar_rev_id, both within archive and between archive and revision.'
35 $this->
output(
"Deduplicating ar_rev_id...\n" );
40 $minId = $dbw->selectField(
'archive',
'MIN(ar_rev_id)', [], __METHOD__ );
41 $maxId = $dbw->selectField(
'archive',
'MAX(ar_rev_id)', [], __METHOD__ );
44 $this->arActorQuery = ActorMigration::newMigration()->getJoin(
'ar_user' );
45 $revActorQuery = ActorMigration::newMigration()->getJoin(
'rev_user' );
47 for ( $id = $minId; $id <= $maxId; $id += $batchSize ) {
48 $endId = min( $maxId, $id + $batchSize - 1 );
57 [
'ar_rev_id >= ' . (
int)$id,
'ar_rev_id <= ' . (
int)$endId ],
64 [
'rev_id >= ' . (
int)$id,
'rev_id <= ' . (
int)$endId ],
66 [
'LOCK IN SHARE MODE' ]
71 [
'archive',
'revision' ] + $revActorQuery[
'tables'],
72 [
'rev_id',
'rev_timestamp',
'rev_sha1' ] + $revActorQuery[
'fields'],
73 [
'ar_rev_id >= ' . (
int)$id,
'ar_rev_id <= ' . (
int)$endId ],
76 [
'revision' => [
'JOIN',
'ar_rev_id = rev_id' ] ] + $revActorQuery[
'joins']
79 foreach (
$res as $row ) {
80 $revRows[$row->rev_id] = $row;
83 $arRevIds = $dbw->selectFieldValues(
86 [
'ar_rev_id >= ' . (
int)$id,
'ar_rev_id <= ' . (
int)$endId ],
88 [
'GROUP BY' =>
'ar_rev_id',
'HAVING' =>
'COUNT(*) > 1' ]
90 $arRevIds = array_values( array_unique( array_merge( $arRevIds, array_keys( $revRows ) ) ) );
96 $this->
output(
"... $id-$endId\n" );
101 "Finished deduplicating ar_rev_id. $this->deleted rows deleted, "
102 .
"$this->reassigned assigned new IDs.\n"
116 [
'archive' ] + $this->arActorQuery[
'tables'],
117 [
'ar_id',
'ar_rev_id',
'ar_namespace',
'ar_title',
'ar_timestamp',
'ar_sha1' ]
118 + $this->arActorQuery[
'fields'],
119 [
'ar_rev_id' => $arRevIds ],
122 $this->arActorQuery[
'joins']
129 foreach (
$res as $row ) {
131 if ( isset( $revRows[$row->ar_rev_id] ) ) {
132 $revRow = $revRows[$row->ar_rev_id];
135 if ( !isset( $seen[$revRow->rev_id] ) ) {
136 $seen[$revRow->rev_id] = [
137 'first' =>
"revision row",
143 if ( $row->ar_timestamp === $revRow->rev_timestamp &&
144 $row->ar_sha1 === $revRow->rev_sha1 &&
145 $row->ar_user === $revRow->rev_user &&
146 $row->ar_user_text === $revRow->rev_user_text
149 "Row $row->ar_id duplicates revision row for rev_id $revRow->rev_id, deleting\n"
151 $toDelete[] = $row->ar_id;
157 if ( !isset( $seen[$row->ar_rev_id] ) ) {
159 $seen[$row->ar_rev_id] = [
160 'first' =>
"archive row $row->ar_id",
163 } elseif ( !isset( $seen[$row->ar_rev_id][$key] ) ) {
165 $seen[$row->ar_rev_id][$key] = $row->ar_id;
167 "Row $row->ar_id conflicts with {$seen[$row->ar_rev_id]['first']} "
168 .
"for rev_id $row->ar_rev_id, reassigning\n"
170 $toReassign[] = $row->ar_id;
174 "Row $row->ar_id duplicates archive row {$seen[$row->ar_rev_id][$key]} "
175 .
"for rev_id $row->ar_rev_id, deleting\n"
177 $toDelete[] = $row->ar_id;
183 $dbw->
delete(
'archive', [
'ar_id' => $toDelete ], __METHOD__ );
197 return implode(
"\n", [
Maintenance script that cleans up archive rows with duplicated ar_rev_id, both within archive and bet...
__construct()
Default constructor.
getUpdateKey()
Get the update key name to go in the update log table.
doDBUpdates()
Do the actual work.
getSeenKey( $row)
Make a key identifying a "unique" change from a row.
processArRevIds(IDatabase $dbw, array $arRevIds, array $revRows)
Process a set of ar_rev_ids.
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
beginTransaction(IDatabase $dbw, $fname)
Begin a transcation on a DB.
commitTransaction(IDatabase $dbw, $fname)
Commit the transcation on a DB handle and wait for replica DBs to catch up.
output( $out, $channel=null)
Throw some output to the user.
getDB( $db, $groups=[], $wiki=false)
Returns a database to be used by current maintenance script.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
setBatchSize( $s=0)
Set the batch size.
static checkMysqlAutoIncrementBug(IDatabase $dbw)
Check for (and work around) a MySQL auto-increment bug.
static reassignArRevIds(IDatabase $dbw, array $arIds, array $conds=[])
Assign new ar_rev_ids to a set of ar_ids.
require_once RUN_MAINTENANCE_IF_MAIN
The wiki should then use memcached to cache various data To use multiple just add more items to the array To increase the weight of a make its entry a array("192.168.0.1:11211", 2))