MediaWiki master
populateImageSha1.php
Go to the documentation of this file.
1<?php
26
27require_once __DIR__ . '/Maintenance.php';
28
35 public function __construct() {
36 parent::__construct();
37 $this->addDescription( 'Populate the img_sha1 field' );
38 $this->addOption( 'force', "Recalculate sha1 for rows that already have a value" );
39 $this->addOption( 'multiversiononly', "Calculate only for files with several versions" );
40 $this->addOption( 'method', "Use 'pipe' to pipe to mysql command line,\n" .
41 "\t\tdefault uses Database class", false, true );
42 $this->addOption(
43 'file',
44 'Fix for a specific file, without File: namespace prefixed',
45 false,
46 true
47 );
48 }
49
50 protected function getUpdateKey() {
51 return 'populate img_sha1';
52 }
53
54 protected function updateSkippedMessage() {
55 return 'img_sha1 column of image table already populated.';
56 }
57
58 public function execute() {
59 if ( $this->getOption( 'file' ) || $this->hasOption( 'multiversiononly' ) ) {
60 $this->doDBUpdates(); // skip update log checks/saves
61 } else {
62 parent::execute();
63 }
64 }
65
66 public function doDBUpdates() {
67 $method = $this->getOption( 'method', 'normal' );
68 $file = $this->getOption( 'file', '' );
69 $force = $this->getOption( 'force' );
70 $isRegen = ( $force || $file != '' ); // forced recalculation?
71
72 $t = -microtime( true );
73 $dbw = $this->getPrimaryDB();
74 if ( $file != '' ) {
75 $res = $dbw->newSelectQueryBuilder()
76 ->select( [ 'img_name' ] )
77 ->from( 'image' )
78 ->where( [ 'img_name' => $file ] )
79 ->caller( __METHOD__ )->fetchResultSet();
80 if ( !$res->numRows() ) {
81 $this->fatalError( "No such file: $file" );
82 }
83 $this->output( "Populating img_sha1 field for specified files\n" );
84 } else {
85 if ( $this->hasOption( 'multiversiononly' ) ) {
86 $conds = [];
87 $this->output( "Populating and recalculating img_sha1 field for versioned files\n" );
88 } elseif ( $force ) {
89 $conds = [];
90 $this->output( "Populating and recalculating img_sha1 field\n" );
91 } else {
92 $conds = [ 'img_sha1' => '' ];
93 $this->output( "Populating img_sha1 field\n" );
94 }
95 if ( $this->hasOption( 'multiversiononly' ) ) {
96 $res = $dbw->newSelectQueryBuilder()
97 ->select( [ 'img_name' => 'DISTINCT(oi_name)' ] )
98 ->from( 'oldimage' )
99 ->where( $conds )
100 ->caller( __METHOD__ )->fetchResultSet();
101 } else {
102 $res = $dbw->newSelectQueryBuilder()
103 ->select( [ 'img_name' ] )
104 ->from( 'image' )
105 ->where( $conds )
106 ->caller( __METHOD__ )->fetchResultSet();
107 }
108 }
109
110 $imageTable = $dbw->tableName( 'image' );
111 $oldImageTable = $dbw->tableName( 'oldimage' );
112
113 if ( $method == 'pipe' ) {
114 // Opening a pipe allows the SHA-1 operation to be done in parallel
115 // with the database write operation, because the writes are queued
116 // in the pipe buffer. This can improve performance by up to a
117 // factor of 2.
118 $config = $this->getConfig();
119 $cmd = 'mysql -u' . Shell::escape( $config->get( MainConfigNames::DBuser ) ) .
120 ' -h' . Shell::escape( $config->get( MainConfigNames::DBserver ) ) .
121 ' -p' . Shell::escape( $config->get( MainConfigNames::DBpassword ),
122 $config->get( MainConfigNames::DBname ) );
123 $this->output( "Using pipe method\n" );
124 $pipe = popen( $cmd, 'w' );
125 }
126
127 $numRows = $res->numRows();
128 $i = 0;
129 foreach ( $res as $row ) {
130 if ( $i % $this->getBatchSize() == 0 ) {
131 $this->output( sprintf(
132 "Done %d of %d, %5.3f%% \r", $i, $numRows, $i / $numRows * 100 ) );
133 $this->waitForReplication();
134 }
135
136 $file = $this->getServiceContainer()->getRepoGroup()->getLocalRepo()
137 ->newFile( $row->img_name );
138 if ( !$file ) {
139 continue;
140 }
141
142 // Upgrade the current file version...
143 $sha1 = $file->getRepo()->getFileSha1( $file->getPath() );
144 if ( strval( $sha1 ) !== '' ) { // file on disk and hashed properly
145 if ( $isRegen && $file->getSha1() !== $sha1 ) {
146 // The population was probably done already. If the old SHA1
147 // does not match, then both fix the SHA1 and the metadata.
148 $file->upgradeRow();
149 } else {
150 $sql = "UPDATE $imageTable SET img_sha1=" . $dbw->addQuotes( $sha1 ) .
151 " WHERE img_name=" . $dbw->addQuotes( $file->getName() );
152 if ( $method == 'pipe' ) {
153 // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable
154 // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal pipe is set when used
155 fwrite( $pipe, "$sql;\n" );
156 } else {
157 $dbw->query( $sql, __METHOD__ );
158 }
159 }
160 }
161 // Upgrade the old file versions...
162 foreach ( $file->getHistory() as $oldFile ) {
164 '@phan-var OldLocalFile $oldFile';
165 $sha1 = $oldFile->getRepo()->getFileSha1( $oldFile->getPath() );
166 if ( strval( $sha1 ) !== '' ) { // file on disk and hashed properly
167 if ( $isRegen && $oldFile->getSha1() !== $sha1 ) {
168 // The population was probably done already. If the old SHA1
169 // does not match, then both fix the SHA1 and the metadata.
170 $oldFile->upgradeRow();
171 } else {
172 $sql = "UPDATE $oldImageTable SET oi_sha1=" . $dbw->addQuotes( $sha1 ) .
173 " WHERE (oi_name=" . $dbw->addQuotes( $oldFile->getName() ) . " AND" .
174 " oi_archive_name=" . $dbw->addQuotes( $oldFile->getArchiveName() ) . ")";
175 if ( $method == 'pipe' ) {
176 // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable
177 // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal
178 fwrite( $pipe, "$sql;\n" );
179 } else {
180 $dbw->query( $sql, __METHOD__ );
181 }
182 }
183 }
184 }
185 $i++;
186 }
187 if ( $method == 'pipe' ) {
188 // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal,PhanPossiblyUndeclaredVariable
189 fflush( $pipe );
190 // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal,PhanPossiblyUndeclaredVariable
191 pclose( $pipe );
192 }
193 $t += microtime( true );
194 $this->output( sprintf( "\nDone %d files in %.1f seconds\n", $numRows, $t ) );
195
196 return !$file; // we only updated *some* files, don't log
197 }
198}
199
200$maintClass = PopulateImageSha1::class;
201require_once RUN_MAINTENANCE_IF_MAIN;
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
output( $out, $channel=null)
Throw some output to the user.
waitForReplication()
Wait for replica DBs to catch up.
hasOption( $name)
Checks to see if a particular option was set.
getServiceContainer()
Returns the main service container.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
A class containing constants representing the names of configuration variables.
Executes shell commands.
Definition Shell.php:46
Maintenance script to populate the img_sha1 field.
execute()
Do the actual work.
doDBUpdates()
Do the actual work.
__construct()
Default constructor.
getUpdateKey()
Get the update key name to go in the update log table.
updateSkippedMessage()
Message to show that the update was done already and was just skipped.