MediaWiki  master
migrateFileRepoLayout.php
Go to the documentation of this file.
1 <?php
24 require_once __DIR__ . '/Maintenance.php';
25 
34  public function __construct() {
35  parent::__construct();
36  $this->addDescription( 'Copy files in repo to a different layout.' );
37  $this->addOption( 'oldlayout', "Old layout; one of 'name' or 'sha1'", true, true );
38  $this->addOption( 'newlayout', "New layout; one of 'name' or 'sha1'", true, true );
39  $this->addOption( 'since', "Copy only files from after this timestamp", false, true );
40  $this->setBatchSize( 50 );
41  }
42 
43  public function execute() {
44  $oldLayout = $this->getOption( 'oldlayout' );
45  if ( !in_array( $oldLayout, [ 'name', 'sha1' ] ) ) {
46  $this->fatalError( "Invalid old layout." );
47  }
48  $newLayout = $this->getOption( 'newlayout' );
49  if ( !in_array( $newLayout, [ 'name', 'sha1' ] ) ) {
50  $this->fatalError( "Invalid new layout." );
51  }
52  $since = $this->getOption( 'since' );
53 
54  $repo = $this->getRepo();
55 
56  $be = $repo->getBackend();
57  if ( $be instanceof FileBackendDBRepoWrapper ) {
58  // avoid path translations for this script
59  $be = $be->getInternalBackend();
60  }
61 
62  $dbw = $repo->getPrimaryDB();
63 
64  $origBase = $be->getContainerStoragePath( "{$repo->getName()}-original" );
65  $startTime = wfTimestampNow();
66 
67  // Do current and archived versions...
68  $conds = [];
69  if ( $since ) {
70  $conds[] = 'img_timestamp >= ' . $dbw->addQuotes( $dbw->timestamp( $since ) );
71  }
72 
73  $batchSize = $this->getBatchSize();
74  $batch = [];
75  $lastName = '';
76  do {
77  $res = $dbw->select( 'image',
78  [ 'img_name', 'img_sha1' ],
79  array_merge( [ 'img_name > ' . $dbw->addQuotes( $lastName ) ], $conds ),
80  __METHOD__,
81  [ 'LIMIT' => $batchSize, 'ORDER BY' => 'img_name' ]
82  );
83 
84  foreach ( $res as $row ) {
85  $lastName = $row->img_name;
87  $file = $repo->newFile( $row->img_name );
88  // Check in case SHA1 rows are not populated for some files
89  $sha1 = strlen( $row->img_sha1 ) ? $row->img_sha1 : $file->getSha1();
90 
91  if ( !strlen( $sha1 ) ) {
92  $this->error( "Image SHA-1 not known for {$row->img_name}." );
93  } else {
94  if ( $oldLayout === 'sha1' ) {
95  $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
96  } else {
97  $spath = $file->getPath();
98  }
99 
100  if ( $newLayout === 'sha1' ) {
101  $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
102  } else {
103  $dpath = $file->getPath();
104  }
105 
106  $status = $be->prepare( [
107  'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] );
108  if ( !$status->isOK() ) {
109  $this->error( print_r( $status->getErrors(), true ) );
110  }
111 
112  $batch[] = [ 'op' => 'copy', 'overwrite' => true,
113  'src' => $spath, 'dst' => $dpath, 'img' => $row->img_name ];
114  }
115 
116  foreach ( $file->getHistory() as $ofile ) {
117  $sha1 = $ofile->getSha1();
118  if ( !strlen( $sha1 ) ) {
119  $this->error( "Image SHA-1 not set for {$ofile->getArchiveName()}." );
120  continue;
121  }
122 
123  if ( $oldLayout === 'sha1' ) {
124  $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
125  } elseif ( $ofile->isDeleted( File::DELETED_FILE ) ) {
126  $spath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) .
127  '/' . $repo->getDeletedHashPath( $sha1 ) .
128  $sha1 . '.' . $ofile->getExtension();
129  } else {
130  $spath = $ofile->getPath();
131  }
132 
133  if ( $newLayout === 'sha1' ) {
134  $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
135  } else {
136  $dpath = $ofile->getPath();
137  }
138 
139  $status = $be->prepare( [
140  'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] );
141  if ( !$status->isOK() ) {
142  $this->error( print_r( $status->getErrors(), true ) );
143  }
144  $batch[] = [ 'op' => 'copy', 'overwrite' => true,
145  'src' => $spath, 'dst' => $dpath, 'img' => $ofile->getArchiveName() ];
146  }
147 
148  if ( count( $batch ) >= $batchSize ) {
149  $this->runBatch( $batch, $be );
150  $batch = [];
151  }
152  }
153  } while ( $res->numRows() );
154 
155  if ( count( $batch ) ) {
156  $this->runBatch( $batch, $be );
157  }
158 
159  // Do deleted versions...
160  $conds = [];
161  if ( $since ) {
162  $conds[] = 'fa_deleted_timestamp >= ' . $dbw->addQuotes( $dbw->timestamp( $since ) );
163  }
164 
165  $batch = [];
166  $lastId = 0;
167  do {
168  $res = $dbw->select( 'filearchive', [ 'fa_storage_key', 'fa_id', 'fa_name' ],
169  array_merge( [ 'fa_id > ' . $dbw->addQuotes( $lastId ) ], $conds ),
170  __METHOD__,
171  [ 'LIMIT' => $batchSize, 'ORDER BY' => 'fa_id' ]
172  );
173 
174  foreach ( $res as $row ) {
175  $lastId = $row->fa_id;
176  $sha1Key = $row->fa_storage_key;
177  if ( !strlen( $sha1Key ) ) {
178  $this->error( "Image SHA-1 not set for file #{$row->fa_id} (deleted)." );
179  continue;
180  }
181  $sha1 = substr( $sha1Key, 0, strpos( $sha1Key, '.' ) );
182 
183  if ( $oldLayout === 'sha1' ) {
184  $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
185  } else {
186  $spath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) .
187  '/' . $repo->getDeletedHashPath( $sha1Key ) . $sha1Key;
188  }
189 
190  if ( $newLayout === 'sha1' ) {
191  $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
192  } else {
193  $dpath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) .
194  '/' . $repo->getDeletedHashPath( $sha1Key ) . $sha1Key;
195  }
196 
197  $status = $be->prepare( [
198  'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] );
199  if ( !$status->isOK() ) {
200  $this->error( print_r( $status->getErrors(), true ) );
201  }
202 
203  $batch[] = [ 'op' => 'copy', 'src' => $spath, 'dst' => $dpath,
204  'overwriteSame' => true, 'img' => "(ID {$row->fa_id}) {$row->fa_name}" ];
205 
206  if ( count( $batch ) >= $batchSize ) {
207  $this->runBatch( $batch, $be );
208  $batch = [];
209  }
210  }
211  } while ( $res->numRows() );
212 
213  if ( count( $batch ) ) {
214  $this->runBatch( $batch, $be );
215  }
216 
217  $this->output( "Done (started $startTime)\n" );
218  }
219 
220  protected function getRepo() {
221  return $this->getServiceContainer()->getRepoGroup()->getLocalRepo();
222  }
223 
228  protected function runBatch( array $ops, FileBackend $be ) {
229  $this->output( "Migrating file batch:\n" );
230  foreach ( $ops as $op ) {
231  $this->output( "\"{$op['img']}\" (dest: {$op['dst']})\n" );
232  }
233 
234  $status = $be->doOperations( $ops, [ 'bypassReadOnly' => true ] );
235  if ( !$status->isOK() ) {
236  $this->output( print_r( $status->getErrors(), true ) );
237  }
238 
239  $this->output( "Batch done\n\n" );
240  }
241 }
242 
243 $maintClass = MigrateFileRepoLayout::class;
244 require_once RUN_MAINTENANCE_IF_MAIN;
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
Proxy backend that manages file layout rewriting for FileRepo.
Base class for all file backend classes (including multi-write backends).
Definition: FileBackend.php:99
doOperations(array $ops, array $opts=[])
This is the main entry point into the backend for write operations.
const DELETED_FILE
Definition: File.php:74
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
Definition: Maintenance.php:66
error( $err, $die=0)
Throw an error to the user.
output( $out, $channel=null)
Throw some output to the user.
getServiceContainer()
Returns the main service container.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
Copy all files in FileRepo to an originals container using SHA1 paths.
runBatch(array $ops, FileBackend $be)
__construct()
Default constructor.
execute()
Do the actual work.
if(PHP_SAPI !='cli-server') if(!isset( $_SERVER['SCRIPT_FILENAME'])) $file
Item class for a filearchive table row.
Definition: router.php:42