MediaWiki  master
populateRevisionSha1.php
Go to the documentation of this file.
1 <?php
25 require_once __DIR__ . '/Maintenance.php';
26 
28 
36  public function __construct() {
37  parent::__construct();
38  $this->addDescription( 'Populates the rev_sha1 and ar_sha1 fields' );
39  $this->setBatchSize( 200 );
40  }
41 
42  protected function getUpdateKey() {
43  return 'populate rev_sha1';
44  }
45 
46  protected function doDBUpdates() {
47  $db = $this->getDB( DB_MASTER );
48 
49  if ( !$db->tableExists( 'revision' ) ) {
50  $this->fatalError( "revision table does not exist" );
51  } elseif ( !$db->tableExists( 'archive' ) ) {
52  $this->fatalError( "archive table does not exist" );
53  } elseif ( !$db->fieldExists( 'revision', 'rev_sha1', __METHOD__ ) ) {
54  $this->output( "rev_sha1 column does not exist\n\n", true );
55  return false;
56  }
57 
58  $revStore = MediaWikiServices::getInstance()->getRevisionStore();
59 
60  $this->output( "Populating rev_sha1 column\n" );
61  $rc = $this->doSha1Updates( $revStore, 'revision', 'rev_id',
62  $revStore->getQueryInfo(), 'rev'
63  );
64 
65  $this->output( "Populating ar_sha1 column\n" );
66  $ac = $this->doSha1Updates( $revStore, 'archive', 'ar_rev_id',
67  $revStore->getArchiveQueryInfo(), 'ar'
68  );
69  $this->output( "Populating ar_sha1 column legacy rows\n" );
70  $ac += $this->doSha1LegacyUpdates( $revStore );
71 
72  $this->output( "rev_sha1 and ar_sha1 population complete "
73  . "[$rc revision rows, $ac archive rows].\n" );
74 
75  return true;
76  }
77 
86  protected function doSha1Updates( $revStore, $table, $idCol, $queryInfo, $prefix ) {
87  $db = $this->getDB( DB_MASTER );
88  $batchSize = $this->getBatchSize();
89  $start = $db->selectField( $table, "MIN($idCol)", '', __METHOD__ );
90  $end = $db->selectField( $table, "MAX($idCol)", '', __METHOD__ );
91  if ( !$start || !$end ) {
92  $this->output( "...$table table seems to be empty.\n" );
93 
94  return 0;
95  }
96 
97  $count = 0;
98  # Do remaining chunk
99  $end += $batchSize - 1;
100  $blockStart = $start;
101  $blockEnd = $start + $batchSize - 1;
102  while ( $blockEnd <= $end ) {
103  $this->output( "...doing $idCol from $blockStart to $blockEnd\n" );
104 
105  $cond = "$idCol BETWEEN " . (int)$blockStart . " AND " . (int)$blockEnd .
106  " AND $idCol IS NOT NULL AND {$prefix}_sha1 = ''";
107  $res = $db->select(
108  $queryInfo['tables'], $queryInfo['fields'], $cond, __METHOD__, [], $queryInfo['joins']
109  );
110 
111  $this->beginTransaction( $db, __METHOD__ );
112  foreach ( $res as $row ) {
113  if ( $this->upgradeRow( $revStore, $row, $table, $idCol, $prefix ) ) {
114  $count++;
115  }
116  }
117  $this->commitTransaction( $db, __METHOD__ );
118 
119  $blockStart += $batchSize;
120  $blockEnd += $batchSize;
121  }
122 
123  return $count;
124  }
125 
130  protected function doSha1LegacyUpdates( $revStore ) {
131  $count = 0;
132  $db = $this->getDB( DB_MASTER );
133  $arQuery = $revStore->getArchiveQueryInfo();
134  $res = $db->select( $arQuery['tables'], $arQuery['fields'],
135  [ 'ar_rev_id IS NULL', 'ar_sha1' => '' ], __METHOD__, [], $arQuery['joins'] );
136 
137  $updateSize = 0;
138  $this->beginTransaction( $db, __METHOD__ );
139  foreach ( $res as $row ) {
140  if ( $this->upgradeLegacyArchiveRow( $revStore, $row ) ) {
141  ++$count;
142  }
143  if ( ++$updateSize >= 100 ) {
144  $updateSize = 0;
145  $this->commitTransaction( $db, __METHOD__ );
146  $this->output( "Commited row with ar_timestamp={$row->ar_timestamp}\n" );
147  $this->beginTransaction( $db, __METHOD__ );
148  }
149  }
150  $this->commitTransaction( $db, __METHOD__ );
151 
152  return $count;
153  }
154 
163  protected function upgradeRow( $revStore, $row, $table, $idCol, $prefix ) {
164  $db = $this->getDB( DB_MASTER );
165 
166  // Create a revision and use it to get the sha1 from the content table, if possible.
167  try {
168  $rev = ( $table === 'archive' )
169  ? $revStore->newRevisionFromArchiveRow( $row )
170  : $revStore->newRevisionFromRow( $row );
171  $sha1 = $rev->getSha1();
172  } catch ( Exception $e ) {
173  $this->output( "Data of revision with {$idCol}={$row->$idCol} unavailable!\n" );
174  return false; // T24624? T22757?
175  }
176 
177  $db->update( $table,
178  [ "{$prefix}_sha1" => $sha1 ],
179  [ $idCol => $row->$idCol ],
180  __METHOD__
181  );
182 
183  return true;
184  }
185 
191  protected function upgradeLegacyArchiveRow( $revStore, $row ) {
192  $db = $this->getDB( DB_MASTER );
193 
194  // Create a revision and use it to get the sha1 from the content table, if possible.
195  try {
196  $rev = $revStore->newRevisionFromArchiveRow( $row );
197  $sha1 = $rev->getSha1();
198  } catch ( Exception $e ) {
199  $this->output( "Text of revision with timestamp {$row->ar_timestamp} unavailable!\n" );
200  return false; // T24624? T22757?
201  }
202 
203  # Archive table has no PK, but (NS,title,time) should be near unique.
204  # Any duplicates on those should also have duplicated text anyway.
205  $db->update( 'archive',
206  [ 'ar_sha1' => $sha1 ],
207  [
208  'ar_namespace' => $row->ar_namespace,
209  'ar_title' => $row->ar_title,
210  'ar_timestamp' => $row->ar_timestamp,
211  'ar_len' => $row->ar_len // extra sanity
212  ],
213  __METHOD__
214  );
215 
216  return true;
217  }
218 }
219 
220 $maintClass = PopulateRevisionSha1::class;
221 require_once RUN_MAINTENANCE_IF_MAIN;
commitTransaction(IDatabase $dbw, $fname)
Commit the transcation on a DB handle and wait for replica DBs to catch up.
const RUN_MAINTENANCE_IF_MAIN
Definition: Maintenance.php:39
upgradeLegacyArchiveRow( $revStore, $row)
upgradeRow( $revStore, $row, $table, $idCol, $prefix)
setBatchSize( $s=0)
Set the batch size.
const DB_MASTER
Definition: defines.php:26
addDescription( $text)
Set the description text.
doSha1Updates( $revStore, $table, $idCol, $queryInfo, $prefix)
output( $out, $channel=null)
Throw some output to the user.
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
getBatchSize()
Returns batch size.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
Maintenance script that fills the rev_sha1 and ar_sha1 columns of revision and archive tables for rev...
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
beginTransaction(IDatabase $dbw, $fname)
Begin a transcation on a DB.