MediaWiki  master
populateImageSha1.php
Go to the documentation of this file.
1 <?php
27 
28 require_once __DIR__ . '/Maintenance.php';
29 
36  public function __construct() {
37  parent::__construct();
38  $this->addDescription( 'Populate the img_sha1 field' );
39  $this->addOption( 'force', "Recalculate sha1 for rows that already have a value" );
40  $this->addOption( 'multiversiononly', "Calculate only for files with several versions" );
41  $this->addOption( 'method', "Use 'pipe' to pipe to mysql command line,\n" .
42  "\t\tdefault uses Database class", false, true );
43  $this->addOption(
44  'file',
45  'Fix for a specific file, without File: namespace prefixed',
46  false,
47  true
48  );
49  }
50 
51  protected function getUpdateKey() {
52  return 'populate img_sha1';
53  }
54 
55  protected function updateSkippedMessage() {
56  return 'img_sha1 column of image table already populated.';
57  }
58 
59  public function execute() {
60  if ( $this->getOption( 'file' ) || $this->hasOption( 'multiversiononly' ) ) {
61  $this->doDBUpdates(); // skip update log checks/saves
62  } else {
63  parent::execute();
64  }
65  }
66 
67  public function doDBUpdates() {
68  $method = $this->getOption( 'method', 'normal' );
69  $file = $this->getOption( 'file', '' );
70  $force = $this->getOption( 'force' );
71  $isRegen = ( $force || $file != '' ); // forced recalculation?
72 
73  $t = -microtime( true );
74  $dbw = $this->getDB( DB_PRIMARY );
75  if ( $file != '' ) {
76  $res = $dbw->select(
77  'image',
78  [ 'img_name' ],
79  [ 'img_name' => $file ],
80  __METHOD__
81  );
82  if ( !$res ) {
83  $this->fatalError( "No such file: $file" );
84  }
85  $this->output( "Populating img_sha1 field for specified files\n" );
86  } else {
87  if ( $this->hasOption( 'multiversiononly' ) ) {
88  $conds = [];
89  $this->output( "Populating and recalculating img_sha1 field for versioned files\n" );
90  } elseif ( $force ) {
91  $conds = [];
92  $this->output( "Populating and recalculating img_sha1 field\n" );
93  } else {
94  $conds = [ 'img_sha1' => '' ];
95  $this->output( "Populating img_sha1 field\n" );
96  }
97  if ( $this->hasOption( 'multiversiononly' ) ) {
98  $res = $dbw->select( 'oldimage',
99  [ 'img_name' => 'DISTINCT(oi_name)' ], $conds, __METHOD__ );
100  } else {
101  $res = $dbw->select( 'image', [ 'img_name' ], $conds, __METHOD__ );
102  }
103  }
104 
105  $imageTable = $dbw->tableName( 'image' );
106  $oldImageTable = $dbw->tableName( 'oldimage' );
107 
108  if ( $method == 'pipe' ) {
109  // Opening a pipe allows the SHA-1 operation to be done in parallel
110  // with the database write operation, because the writes are queued
111  // in the pipe buffer. This can improve performance by up to a
112  // factor of 2.
113  $config = $this->getConfig();
114  $cmd = 'mysql -u' . Shell::escape( $config->get( MainConfigNames::DBuser ) ) .
115  ' -h' . Shell::escape( $config->get( MainConfigNames::DBserver ) ) .
116  ' -p' . Shell::escape( $config->get( MainConfigNames::DBpassword ),
117  $config->get( MainConfigNames::DBname ) );
118  $this->output( "Using pipe method\n" );
119  $pipe = popen( $cmd, 'w' );
120  }
121 
122  $numRows = $res->numRows();
123  $i = 0;
124  $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
125  foreach ( $res as $row ) {
126  if ( $i % $this->getBatchSize() == 0 ) {
127  $this->output( sprintf(
128  "Done %d of %d, %5.3f%% \r", $i, $numRows, $i / $numRows * 100 ) );
129  $lbFactory->waitForReplication();
130  }
131 
132  $file = MediaWikiServices::getInstance()->getRepoGroup()->getLocalRepo()
133  ->newFile( $row->img_name );
134  if ( !$file ) {
135  continue;
136  }
137 
138  // Upgrade the current file version...
139  $sha1 = $file->getRepo()->getFileSha1( $file->getPath() );
140  if ( strval( $sha1 ) !== '' ) { // file on disk and hashed properly
141  if ( $isRegen && $file->getSha1() !== $sha1 ) {
142  // The population was probably done already. If the old SHA1
143  // does not match, then both fix the SHA1 and the metadata.
144  $file->upgradeRow();
145  } else {
146  $sql = "UPDATE $imageTable SET img_sha1=" . $dbw->addQuotes( $sha1 ) .
147  " WHERE img_name=" . $dbw->addQuotes( $file->getName() );
148  if ( $method == 'pipe' ) {
149  // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable
150  // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal pipe is set when used
151  fwrite( $pipe, "$sql;\n" );
152  } else {
153  $dbw->query( $sql, __METHOD__ );
154  }
155  }
156  }
157  // Upgrade the old file versions...
158  foreach ( $file->getHistory() as $oldFile ) {
160  '@phan-var OldLocalFile $oldFile';
161  $sha1 = $oldFile->getRepo()->getFileSha1( $oldFile->getPath() );
162  if ( strval( $sha1 ) !== '' ) { // file on disk and hashed properly
163  if ( $isRegen && $oldFile->getSha1() !== $sha1 ) {
164  // The population was probably done already. If the old SHA1
165  // does not match, then both fix the SHA1 and the metadata.
166  $oldFile->upgradeRow();
167  } else {
168  $sql = "UPDATE $oldImageTable SET oi_sha1=" . $dbw->addQuotes( $sha1 ) .
169  " WHERE (oi_name=" . $dbw->addQuotes( $oldFile->getName() ) . " AND" .
170  " oi_archive_name=" . $dbw->addQuotes( $oldFile->getArchiveName() ) . ")";
171  if ( $method == 'pipe' ) {
172  // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable
173  // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal
174  fwrite( $pipe, "$sql;\n" );
175  } else {
176  $dbw->query( $sql, __METHOD__ );
177  }
178  }
179  }
180  }
181  $i++;
182  }
183  if ( $method == 'pipe' ) {
184  // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal,PhanPossiblyUndeclaredVariable
185  fflush( $pipe );
186  // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal,PhanPossiblyUndeclaredVariable
187  pclose( $pipe );
188  }
189  $t += microtime( true );
190  $this->output( sprintf( "\nDone %d files in %.1f seconds\n", $numRows, $t ) );
191 
192  return !$file; // we only updated *some* files, don't log
193  }
194 }
195 
196 $maintClass = PopulateImageSha1::class;
197 require_once RUN_MAINTENANCE_IF_MAIN;
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
output( $out, $channel=null)
Throw some output to the user.
hasOption( $name)
Checks to see if a particular option was set.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
A class containing constants representing the names of configuration variables.
Service locator for MediaWiki core services.
Executes shell commands.
Definition: Shell.php:46
Maintenance script to populate the img_sha1 field.
execute()
Do the actual work.
doDBUpdates()
Do the actual work.
__construct()
Default constructor.
getUpdateKey()
Get the update key name to go in the update log table.
updateSkippedMessage()
Message to show that the update was done already and was just skipped.
const DB_PRIMARY
Definition: defines.php:28
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42