MediaWiki  master
populateImageSha1.php
Go to the documentation of this file.
1 <?php
26 
27 require_once __DIR__ . '/Maintenance.php';
28 
35  public function __construct() {
36  parent::__construct();
37  $this->addDescription( 'Populate the img_sha1 field' );
38  $this->addOption( 'force', "Recalculate sha1 for rows that already have a value" );
39  $this->addOption( 'multiversiononly', "Calculate only for files with several versions" );
40  $this->addOption( 'method', "Use 'pipe' to pipe to mysql command line,\n" .
41  "\t\tdefault uses Database class", false, true );
42  $this->addOption(
43  'file',
44  'Fix for a specific file, without File: namespace prefixed',
45  false,
46  true
47  );
48  }
49 
50  protected function getUpdateKey() {
51  return 'populate img_sha1';
52  }
53 
54  protected function updateSkippedMessage() {
55  return 'img_sha1 column of image table already populated.';
56  }
57 
58  public function execute() {
59  if ( $this->getOption( 'file' ) || $this->hasOption( 'multiversiononly' ) ) {
60  $this->doDBUpdates(); // skip update log checks/saves
61  } else {
62  parent::execute();
63  }
64  }
65 
66  public function doDBUpdates() {
67  $method = $this->getOption( 'method', 'normal' );
68  $file = $this->getOption( 'file', '' );
69  $force = $this->getOption( 'force' );
70  $isRegen = ( $force || $file != '' ); // forced recalculation?
71 
72  $t = -microtime( true );
73  $dbw = $this->getDB( DB_MASTER );
74  if ( $file != '' ) {
75  $res = $dbw->select(
76  'image',
77  [ 'img_name' ],
78  [ 'img_name' => $file ],
79  __METHOD__
80  );
81  if ( !$res ) {
82  $this->fatalError( "No such file: $file" );
83  }
84  $this->output( "Populating img_sha1 field for specified files\n" );
85  } else {
86  if ( $this->hasOption( 'multiversiononly' ) ) {
87  $conds = [];
88  $this->output( "Populating and recalculating img_sha1 field for versioned files\n" );
89  } elseif ( $force ) {
90  $conds = [];
91  $this->output( "Populating and recalculating img_sha1 field\n" );
92  } else {
93  $conds = [ 'img_sha1' => '' ];
94  $this->output( "Populating img_sha1 field\n" );
95  }
96  if ( $this->hasOption( 'multiversiononly' ) ) {
97  $res = $dbw->select( 'oldimage',
98  [ 'img_name' => 'DISTINCT(oi_name)' ], $conds, __METHOD__ );
99  } else {
100  $res = $dbw->select( 'image', [ 'img_name' ], $conds, __METHOD__ );
101  }
102  }
103 
104  $imageTable = $dbw->tableName( 'image' );
105  $oldImageTable = $dbw->tableName( 'oldimage' );
106 
107  if ( $method == 'pipe' ) {
108  // Opening a pipe allows the SHA-1 operation to be done in parallel
109  // with the database write operation, because the writes are queued
110  // in the pipe buffer. This can improve performance by up to a
111  // factor of 2.
112  $config = $this->getConfig();
113  $cmd = 'mysql -u' . Shell::escape( $config->get( 'DBuser' ) ) .
114  ' -h' . Shell::escape( $config->get( 'DBserver' ) ) .
115  ' -p' . Shell::escape( $config->get( 'DBpassword' ), $config->get( 'DBname' ) );
116  $this->output( "Using pipe method\n" );
117  $pipe = popen( $cmd, 'w' );
118  }
119 
120  $numRows = $res->numRows();
121  $i = 0;
122  foreach ( $res as $row ) {
123  if ( $i % $this->getBatchSize() == 0 ) {
124  $this->output( sprintf(
125  "Done %d of %d, %5.3f%% \r", $i, $numRows, $i / $numRows * 100 ) );
126  wfWaitForSlaves();
127  }
128 
129  $file = MediaWikiServices::getInstance()->getRepoGroup()->getLocalRepo()
130  ->newFile( $row->img_name );
131  if ( !$file ) {
132  continue;
133  }
134 
135  // Upgrade the current file version...
136  $sha1 = $file->getRepo()->getFileSha1( $file->getPath() );
137  if ( strval( $sha1 ) !== '' ) { // file on disk and hashed properly
138  if ( $isRegen && $file->getSha1() !== $sha1 ) {
139  // The population was probably done already. If the old SHA1
140  // does not match, then both fix the SHA1 and the metadata.
141  $file->upgradeRow();
142  } else {
143  $sql = "UPDATE $imageTable SET img_sha1=" . $dbw->addQuotes( $sha1 ) .
144  " WHERE img_name=" . $dbw->addQuotes( $file->getName() );
145  if ( $method == 'pipe' ) {
146  fwrite( $pipe, "$sql;\n" );
147  } else {
148  $dbw->query( $sql, __METHOD__ );
149  }
150  }
151  }
152  // Upgrade the old file versions...
153  foreach ( $file->getHistory() as $oldFile ) {
155  '@phan-var OldLocalFile $oldFile';
156  $sha1 = $oldFile->getRepo()->getFileSha1( $oldFile->getPath() );
157  if ( strval( $sha1 ) !== '' ) { // file on disk and hashed properly
158  if ( $isRegen && $oldFile->getSha1() !== $sha1 ) {
159  // The population was probably done already. If the old SHA1
160  // does not match, then both fix the SHA1 and the metadata.
161  $oldFile->upgradeRow();
162  } else {
163  $sql = "UPDATE $oldImageTable SET oi_sha1=" . $dbw->addQuotes( $sha1 ) .
164  " WHERE (oi_name=" . $dbw->addQuotes( $oldFile->getName() ) . " AND" .
165  " oi_archive_name=" . $dbw->addQuotes( $oldFile->getArchiveName() ) . ")";
166  if ( $method == 'pipe' ) {
167  fwrite( $pipe, "$sql;\n" );
168  } else {
169  $dbw->query( $sql, __METHOD__ );
170  }
171  }
172  }
173  }
174  $i++;
175  }
176  if ( $method == 'pipe' ) {
177  fflush( $pipe );
178  pclose( $pipe );
179  }
180  $t += microtime( true );
181  $this->output( sprintf( "\nDone %d files in %.1f seconds\n", $numRows, $t ) );
182 
183  return !$file; // we only updated *some* files, don't log
184  }
185 }
186 
187 $maintClass = PopulateImageSha1::class;
188 require_once RUN_MAINTENANCE_IF_MAIN;
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42
const RUN_MAINTENANCE_IF_MAIN
Definition: Maintenance.php:39
getOption( $name, $default=null)
Get an option, or return the default.
hasOption( $name)
Checks to see if a particular option exists.
const DB_MASTER
Definition: defines.php:26
get( $name)
Get a configuration variable such as "Sitename" or "UploadMaintenance.".
wfWaitForSlaves( $ifWritesSince=null, $wiki=false, $cluster=false, $timeout=null)
Waits for the replica DBs to catch up to the master position.
addDescription( $text)
Set the description text.
output( $out, $channel=null)
Throw some output to the user.
Class for scripts that perform database maintenance and want to log the update in updatelog so we can...
getBatchSize()
Returns batch size.
Maintenance script to populate the img_sha1 field.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
Config $config
Accessible via getConfig()
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.