25 require_once __DIR__ .
'/Maintenance.php';
37 parent::__construct();
38 $this->
addDescription(
'Populates the rev_sha1 and ar_sha1 fields' );
43 return 'populate rev_sha1';
49 if ( !$db->tableExists(
'revision' ) ) {
50 $this->
fatalError(
"revision table does not exist" );
51 } elseif ( !$db->tableExists(
'archive' ) ) {
52 $this->
fatalError(
"archive table does not exist" );
53 } elseif ( !$db->fieldExists(
'revision',
'rev_sha1', __METHOD__ ) ) {
54 $this->
output(
"rev_sha1 column does not exist\n\n",
true );
58 $revStore = MediaWikiServices::getInstance()->getRevisionStore();
60 $this->
output(
"Populating rev_sha1 column\n" );
62 $revStore->getQueryInfo(),
'rev'
65 $this->
output(
"Populating ar_sha1 column\n" );
66 $ac = $this->
doSha1Updates( $revStore,
'archive',
'ar_rev_id',
67 $revStore->getArchiveQueryInfo(),
'ar'
69 $this->
output(
"Populating ar_sha1 column legacy rows\n" );
72 $this->
output(
"rev_sha1 and ar_sha1 population complete "
73 .
"[$rc revision rows, $ac archive rows].\n" );
86 protected function doSha1Updates( $revStore, $table, $idCol, $queryInfo, $prefix ) {
89 $start = $db->selectField( $table,
"MIN($idCol)",
'', __METHOD__ );
90 $end = $db->selectField( $table,
"MAX($idCol)",
'', __METHOD__ );
91 if ( !$start || !$end ) {
92 $this->
output(
"...$table table seems to be empty.\n" );
99 $end += $batchSize - 1;
100 $blockStart = $start;
101 $blockEnd = $start + $batchSize - 1;
102 while ( $blockEnd <= $end ) {
103 $this->
output(
"...doing $idCol from $blockStart to $blockEnd\n" );
105 $cond =
"$idCol BETWEEN " . (int)$blockStart .
" AND " . (
int)$blockEnd .
106 " AND $idCol IS NOT NULL AND {$prefix}_sha1 = ''";
108 $queryInfo[
'tables'], $queryInfo[
'fields'], $cond, __METHOD__, [], $queryInfo[
'joins']
112 foreach (
$res as $row ) {
113 if ( $this->
upgradeRow( $revStore, $row, $table, $idCol, $prefix ) ) {
119 $blockStart += $batchSize;
120 $blockEnd += $batchSize;
133 $arQuery = $revStore->getArchiveQueryInfo();
134 $res = $db->select( $arQuery[
'tables'], $arQuery[
'fields'],
135 [
'ar_rev_id IS NULL',
'ar_sha1' =>
'' ], __METHOD__, [], $arQuery[
'joins'] );
139 foreach (
$res as $row ) {
143 if ( ++$updateSize >= 100 ) {
146 $this->
output(
"Commited row with ar_timestamp={$row->ar_timestamp}\n" );
163 protected function upgradeRow( $revStore, $row, $table, $idCol, $prefix ) {
168 $rev = ( $table ===
'archive' )
169 ? $revStore->newRevisionFromArchiveRow( $row )
170 : $revStore->newRevisionFromRow( $row );
171 $sha1 = $rev->getSha1();
172 }
catch ( Exception $e ) {
173 $this->
output(
"Data of revision with {$idCol}={$row->$idCol} unavailable!\n" );
178 [
"{$prefix}_sha1" => $sha1 ],
179 [ $idCol => $row->$idCol ],
196 $rev = $revStore->newRevisionFromArchiveRow( $row );
197 $sha1 = $rev->getSha1();
198 }
catch ( Exception $e ) {
199 $this->
output(
"Text of revision with timestamp {$row->ar_timestamp} unavailable!\n" );
203 # Archive table has no PK, but (NS,title,time) should be near unique.
204 # Any duplicates on those should also have duplicated text anyway.
205 $db->update(
'archive',
206 [
'ar_sha1' => $sha1 ],
208 'ar_namespace' => $row->ar_namespace,
209 'ar_title' => $row->ar_title,
210 'ar_timestamp' => $row->ar_timestamp,
211 'ar_len' => $row->ar_len