MediaWiki  master
populateImageSha1.php
Go to the documentation of this file.
1 <?php
27 
28 require_once __DIR__ . '/Maintenance.php';
29 
36  public function __construct() {
37  parent::__construct();
38  $this->addDescription( 'Populate the img_sha1 field' );
39  $this->addOption( 'force', "Recalculate sha1 for rows that already have a value" );
40  $this->addOption( 'multiversiononly', "Calculate only for files with several versions" );
41  $this->addOption( 'method', "Use 'pipe' to pipe to mysql command line,\n" .
42  "\t\tdefault uses Database class", false, true );
43  $this->addOption(
44  'file',
45  'Fix for a specific file, without File: namespace prefixed',
46  false,
47  true
48  );
49  }
50 
51  protected function getUpdateKey() {
52  return 'populate img_sha1';
53  }
54 
55  protected function updateSkippedMessage() {
56  return 'img_sha1 column of image table already populated.';
57  }
58 
59  public function execute() {
60  if ( $this->getOption( 'file' ) || $this->hasOption( 'multiversiononly' ) ) {
61  $this->doDBUpdates(); // skip update log checks/saves
62  } else {
63  parent::execute();
64  }
65  }
66 
67  public function doDBUpdates() {
68  $method = $this->getOption( 'method', 'normal' );
69  $file = $this->getOption( 'file', '' );
70  $force = $this->getOption( 'force' );
71  $isRegen = ( $force || $file != '' ); // forced recalculation?
72 
73  $t = -microtime( true );
74  $dbw = $this->getDB( DB_PRIMARY );
75  if ( $file != '' ) {
76  $res = $dbw->select(
77  'image',
78  [ 'img_name' ],
79  [ 'img_name' => $file ],
80  __METHOD__
81  );
82  if ( !$res ) {
83  $this->fatalError( "No such file: $file" );
84  }
85  $this->output( "Populating img_sha1 field for specified files\n" );
86  } else {
87  if ( $this->hasOption( 'multiversiononly' ) ) {
88  $conds = [];
89  $this->output( "Populating and recalculating img_sha1 field for versioned files\n" );
90  } elseif ( $force ) {
91  $conds = [];
92  $this->output( "Populating and recalculating img_sha1 field\n" );
93  } else {
94  $conds = [ 'img_sha1' => '' ];
95  $this->output( "Populating img_sha1 field\n" );
96  }
97  if ( $this->hasOption( 'multiversiononly' ) ) {
98  $res = $dbw->select( 'oldimage',
99  [ 'img_name' => 'DISTINCT(oi_name)' ], $conds, __METHOD__ );
100  } else {
101  $res = $dbw->select( 'image', [ 'img_name' ], $conds, __METHOD__ );
102  }
103  }
104 
105  $imageTable = $dbw->tableName( 'image' );
106  $oldImageTable = $dbw->tableName( 'oldimage' );
107 
108  if ( $method == 'pipe' ) {
109  // Opening a pipe allows the SHA-1 operation to be done in parallel
110  // with the database write operation, because the writes are queued
111  // in the pipe buffer. This can improve performance by up to a
112  // factor of 2.
113  $config = $this->getConfig();
114  $cmd = 'mysql -u' . Shell::escape( $config->get( MainConfigNames::DBuser ) ) .
115  ' -h' . Shell::escape( $config->get( MainConfigNames::DBserver ) ) .
116  ' -p' . Shell::escape( $config->get( MainConfigNames::DBpassword ),
117  $config->get( MainConfigNames::DBname ) );
118  $this->output( "Using pipe method\n" );
119  $pipe = popen( $cmd, 'w' );
120  }
121 
122  $numRows = $res->numRows();
123  $i = 0;
124  foreach ( $res as $row ) {
125  if ( $i % $this->getBatchSize() == 0 ) {
126  $this->output( sprintf(
127  "Done %d of %d, %5.3f%% \r", $i, $numRows, $i / $numRows * 100 ) );
128  $this->waitForReplication();
129  }
130 
131  $file = MediaWikiServices::getInstance()->getRepoGroup()->getLocalRepo()
132  ->newFile( $row->img_name );
133  if ( !$file ) {
134  continue;
135  }
136 
137  // Upgrade the current file version...
138  $sha1 = $file->getRepo()->getFileSha1( $file->getPath() );
139  if ( strval( $sha1 ) !== '' ) { // file on disk and hashed properly
140  if ( $isRegen && $file->getSha1() !== $sha1 ) {
141  // The population was probably done already. If the old SHA1
142  // does not match, then both fix the SHA1 and the metadata.
143  $file->upgradeRow();
144  } else {
145  $sql = "UPDATE $imageTable SET img_sha1=" . $dbw->addQuotes( $sha1 ) .
146  " WHERE img_name=" . $dbw->addQuotes( $file->getName() );
147  if ( $method == 'pipe' ) {
148  // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable
149  // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal pipe is set when used
150  fwrite( $pipe, "$sql;\n" );
151  } else {
152  $dbw->query( $sql, __METHOD__ );
153  }
154  }
155  }
156  // Upgrade the old file versions...
157  foreach ( $file->getHistory() as $oldFile ) {
159  '@phan-var OldLocalFile $oldFile';
160  $sha1 = $oldFile->getRepo()->getFileSha1( $oldFile->getPath() );
161  if ( strval( $sha1 ) !== '' ) { // file on disk and hashed properly
162  if ( $isRegen && $oldFile->getSha1() !== $sha1 ) {
163  // The population was probably done already. If the old SHA1
164  // does not match, then both fix the SHA1 and the metadata.
165  $oldFile->upgradeRow();
166  } else {
167  $sql = "UPDATE $oldImageTable SET oi_sha1=" . $dbw->addQuotes( $sha1 ) .
168  " WHERE (oi_name=" . $dbw->addQuotes( $oldFile->getName() ) . " AND" .
169  " oi_archive_name=" . $dbw->addQuotes( $oldFile->getArchiveName() ) . ")";
170  if ( $method == 'pipe' ) {
171  // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable
172  // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal
173  fwrite( $pipe, "$sql;\n" );
174  } else {
175  $dbw->query( $sql, __METHOD__ );
176  }
177  }
178  }
179  }
180  $i++;
181  }
182  if ( $method == 'pipe' ) {
183  // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal,PhanPossiblyUndeclaredVariable
184  fflush( $pipe );
185  // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal,PhanPossiblyUndeclaredVariable
186  pclose( $pipe );
187  }
188  $t += microtime( true );
189  $this->output( sprintf( "\nDone %d files in %.1f seconds\n", $numRows, $t ) );
190 
191  return !$file; // we only updated *some* files, don't log
192  }
193 }
194 
195 $maintClass = PopulateImageSha1::class;
196 require_once RUN_MAINTENANCE_IF_MAIN;
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
output( $out, $channel=null)
Throw some output to the user.
waitForReplication()
Wait for replica DBs to catch up.
hasOption( $name)
Checks to see if a particular option was set.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
A class containing constants representing the names of configuration variables.
Service locator for MediaWiki core services.
Executes shell commands.
Definition: Shell.php:46
Maintenance script to populate the img_sha1 field.
execute()
Do the actual work.
doDBUpdates()
Do the actual work.
__construct()
Default constructor.
getUpdateKey()
Get the update key name to go in the update log table.
updateSkippedMessage()
Message to show that the update was done already and was just skipped.
const DB_PRIMARY
Definition: defines.php:28
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42