MediaWiki  master
populateImageSha1.php
Go to the documentation of this file.
1 <?php
26 
27 require_once __DIR__ . '/Maintenance.php';
28 
35  public function __construct() {
36  parent::__construct();
37  $this->addDescription( 'Populate the img_sha1 field' );
38  $this->addOption( 'force', "Recalculate sha1 for rows that already have a value" );
39  $this->addOption( 'multiversiononly', "Calculate only for files with several versions" );
40  $this->addOption( 'method', "Use 'pipe' to pipe to mysql command line,\n" .
41  "\t\tdefault uses Database class", false, true );
42  $this->addOption(
43  'file',
44  'Fix for a specific file, without File: namespace prefixed',
45  false,
46  true
47  );
48  }
49 
50  protected function getUpdateKey() {
51  return 'populate img_sha1';
52  }
53 
54  protected function updateSkippedMessage() {
55  return 'img_sha1 column of image table already populated.';
56  }
57 
58  public function execute() {
59  if ( $this->getOption( 'file' ) || $this->hasOption( 'multiversiononly' ) ) {
60  $this->doDBUpdates(); // skip update log checks/saves
61  } else {
62  parent::execute();
63  }
64  }
65 
66  public function doDBUpdates() {
67  $method = $this->getOption( 'method', 'normal' );
68  $file = $this->getOption( 'file', '' );
69  $force = $this->getOption( 'force' );
70  $isRegen = ( $force || $file != '' ); // forced recalculation?
71 
72  $t = -microtime( true );
73  $dbw = $this->getDB( DB_PRIMARY );
74  if ( $file != '' ) {
75  $res = $dbw->newSelectQueryBuilder()
76  ->select( [ 'img_name' ] )
77  ->from( 'image' )
78  ->where( [ 'img_name' => $file ] )
79  ->caller( __METHOD__ )->fetchResultSet();
80  if ( !$res ) {
81  $this->fatalError( "No such file: $file" );
82  }
83  $this->output( "Populating img_sha1 field for specified files\n" );
84  } else {
85  if ( $this->hasOption( 'multiversiononly' ) ) {
86  $conds = [];
87  $this->output( "Populating and recalculating img_sha1 field for versioned files\n" );
88  } elseif ( $force ) {
89  $conds = [];
90  $this->output( "Populating and recalculating img_sha1 field\n" );
91  } else {
92  $conds = [ 'img_sha1' => '' ];
93  $this->output( "Populating img_sha1 field\n" );
94  }
95  if ( $this->hasOption( 'multiversiononly' ) ) {
96  $res = $dbw->newSelectQueryBuilder()
97  ->select( [ 'img_name' => 'DISTINCT(oi_name)' ] )
98  ->from( 'oldimage' )
99  ->where( $conds )
100  ->caller( __METHOD__ )->fetchResultSet();
101  } else {
102  $res = $dbw->newSelectQueryBuilder()
103  ->select( [ 'img_name' ] )
104  ->from( 'image' )
105  ->where( $conds )
106  ->caller( __METHOD__ )->fetchResultSet();
107  }
108  }
109 
110  $imageTable = $dbw->tableName( 'image' );
111  $oldImageTable = $dbw->tableName( 'oldimage' );
112 
113  if ( $method == 'pipe' ) {
114  // Opening a pipe allows the SHA-1 operation to be done in parallel
115  // with the database write operation, because the writes are queued
116  // in the pipe buffer. This can improve performance by up to a
117  // factor of 2.
118  $config = $this->getConfig();
119  $cmd = 'mysql -u' . Shell::escape( $config->get( MainConfigNames::DBuser ) ) .
120  ' -h' . Shell::escape( $config->get( MainConfigNames::DBserver ) ) .
121  ' -p' . Shell::escape( $config->get( MainConfigNames::DBpassword ),
122  $config->get( MainConfigNames::DBname ) );
123  $this->output( "Using pipe method\n" );
124  $pipe = popen( $cmd, 'w' );
125  }
126 
127  $numRows = $res->numRows();
128  $i = 0;
129  foreach ( $res as $row ) {
130  if ( $i % $this->getBatchSize() == 0 ) {
131  $this->output( sprintf(
132  "Done %d of %d, %5.3f%% \r", $i, $numRows, $i / $numRows * 100 ) );
133  $this->waitForReplication();
134  }
135 
136  $file = $this->getServiceContainer()->getRepoGroup()->getLocalRepo()
137  ->newFile( $row->img_name );
138  if ( !$file ) {
139  continue;
140  }
141 
142  // Upgrade the current file version...
143  $sha1 = $file->getRepo()->getFileSha1( $file->getPath() );
144  if ( strval( $sha1 ) !== '' ) { // file on disk and hashed properly
145  if ( $isRegen && $file->getSha1() !== $sha1 ) {
146  // The population was probably done already. If the old SHA1
147  // does not match, then both fix the SHA1 and the metadata.
148  $file->upgradeRow();
149  } else {
150  $sql = "UPDATE $imageTable SET img_sha1=" . $dbw->addQuotes( $sha1 ) .
151  " WHERE img_name=" . $dbw->addQuotes( $file->getName() );
152  if ( $method == 'pipe' ) {
153  // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable
154  // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal pipe is set when used
155  fwrite( $pipe, "$sql;\n" );
156  } else {
157  $dbw->query( $sql, __METHOD__ );
158  }
159  }
160  }
161  // Upgrade the old file versions...
162  foreach ( $file->getHistory() as $oldFile ) {
164  '@phan-var OldLocalFile $oldFile';
165  $sha1 = $oldFile->getRepo()->getFileSha1( $oldFile->getPath() );
166  if ( strval( $sha1 ) !== '' ) { // file on disk and hashed properly
167  if ( $isRegen && $oldFile->getSha1() !== $sha1 ) {
168  // The population was probably done already. If the old SHA1
169  // does not match, then both fix the SHA1 and the metadata.
170  $oldFile->upgradeRow();
171  } else {
172  $sql = "UPDATE $oldImageTable SET oi_sha1=" . $dbw->addQuotes( $sha1 ) .
173  " WHERE (oi_name=" . $dbw->addQuotes( $oldFile->getName() ) . " AND" .
174  " oi_archive_name=" . $dbw->addQuotes( $oldFile->getArchiveName() ) . ")";
175  if ( $method == 'pipe' ) {
176  // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable
177  // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal
178  fwrite( $pipe, "$sql;\n" );
179  } else {
180  $dbw->query( $sql, __METHOD__ );
181  }
182  }
183  }
184  }
185  $i++;
186  }
187  if ( $method == 'pipe' ) {
188  // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal,PhanPossiblyUndeclaredVariable
189  fflush( $pipe );
190  // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal,PhanPossiblyUndeclaredVariable
191  pclose( $pipe );
192  }
193  $t += microtime( true );
194  $this->output( sprintf( "\nDone %d files in %.1f seconds\n", $numRows, $t ) );
195 
196  return !$file; // we only updated *some* files, don't log
197  }
198 }
199 
200 $maintClass = PopulateImageSha1::class;
201 require_once RUN_MAINTENANCE_IF_MAIN;
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
output( $out, $channel=null)
Throw some output to the user.
waitForReplication()
Wait for replica DBs to catch up.
hasOption( $name)
Checks to see if a particular option was set.
getServiceContainer()
Returns the main service container.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
A class containing constants representing the names of configuration variables.
Executes shell commands.
Definition: Shell.php:46
Maintenance script to populate the img_sha1 field.
execute()
Do the actual work.
doDBUpdates()
Do the actual work.
__construct()
Default constructor.
getUpdateKey()
Get the update key name to go in the update log table.
updateSkippedMessage()
Message to show that the update was done already and was just skipped.
const DB_PRIMARY
Definition: defines.php:28
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42