MediaWiki master
migrateFileRepoLayout.php
Go to the documentation of this file.
1<?php
26
27// @codeCoverageIgnoreStart
28require_once __DIR__ . '/Maintenance.php';
29// @codeCoverageIgnoreEnd
30
39 public function __construct() {
40 parent::__construct();
41 $this->addDescription( 'Copy files in repo to a different layout.' );
42 $this->addOption( 'oldlayout', "Old layout; one of 'name' or 'sha1'", true, true );
43 $this->addOption( 'newlayout', "New layout; one of 'name' or 'sha1'", true, true );
44 $this->addOption( 'since', "Copy only files from after this timestamp", false, true );
45 $this->setBatchSize( 50 );
46 }
47
48 public function execute() {
49 $oldLayout = $this->getOption( 'oldlayout' );
50 if ( !in_array( $oldLayout, [ 'name', 'sha1' ] ) ) {
51 $this->fatalError( "Invalid old layout." );
52 }
53 $newLayout = $this->getOption( 'newlayout' );
54 if ( !in_array( $newLayout, [ 'name', 'sha1' ] ) ) {
55 $this->fatalError( "Invalid new layout." );
56 }
57 $since = $this->getOption( 'since' );
58
59 $repo = $this->getRepo();
60
61 $be = $repo->getBackend();
62 if ( $be instanceof FileBackendDBRepoWrapper ) {
63 // avoid path translations for this script
64 $be = $be->getInternalBackend();
65 }
66
67 $dbw = $repo->getPrimaryDB();
68
69 $origBase = $be->getContainerStoragePath( "{$repo->getName()}-original" );
70 $startTime = wfTimestampNow();
71
72 // Do current and archived versions...
73 $conds = [];
74 if ( $since ) {
75 $conds[] = $dbw->expr( 'img_timestamp', '>=', $dbw->timestamp( $since ) );
76 }
77
78 $batchSize = $this->getBatchSize();
79 $batch = [];
80 $lastName = '';
81 do {
82 $res = $dbw->newSelectQueryBuilder()
83 ->select( [ 'img_name', 'img_sha1' ] )
84 ->from( 'image' )
85 ->where( $dbw->expr( 'img_name', '>', $lastName ) )
86 ->andWhere( $conds )
87 ->orderBy( 'img_name' )
88 ->limit( $batchSize )
89 ->caller( __METHOD__ )->fetchResultSet();
90
91 foreach ( $res as $row ) {
92 $lastName = $row->img_name;
94 $file = $repo->newFile( $row->img_name );
95 // Check in case SHA1 rows are not populated for some files
96 $sha1 = $row->img_sha1 !== '' ? $row->img_sha1 : $file->getSha1();
97
98 if ( $sha1 === '' ) {
99 $this->error( "Image SHA-1 not known for {$row->img_name}." );
100 } else {
101 if ( $oldLayout === 'sha1' ) {
102 $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
103 } else {
104 $spath = $file->getPath();
105 }
106
107 if ( $newLayout === 'sha1' ) {
108 $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
109 } else {
110 $dpath = $file->getPath();
111 }
112
113 $status = $be->prepare( [
114 'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] );
115 if ( !$status->isOK() ) {
116 $this->error( $status );
117 }
118
119 $batch[] = [ 'op' => 'copy', 'overwrite' => true,
120 'src' => $spath, 'dst' => $dpath, 'img' => $row->img_name ];
121 }
122
123 foreach ( $file->getHistory() as $ofile ) {
124 $sha1 = $ofile->getSha1();
125 if ( $sha1 === '' ) {
126 $this->error( "Image SHA-1 not set for {$ofile->getArchiveName()}." );
127 continue;
128 }
129
130 if ( $oldLayout === 'sha1' ) {
131 $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
132 } elseif ( $ofile->isDeleted( File::DELETED_FILE ) ) {
133 $spath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) .
134 '/' . $repo->getDeletedHashPath( $sha1 ) .
135 $sha1 . '.' . $ofile->getExtension();
136 } else {
137 $spath = $ofile->getPath();
138 }
139
140 if ( $newLayout === 'sha1' ) {
141 $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
142 } else {
143 $dpath = $ofile->getPath();
144 }
145
146 $status = $be->prepare( [
147 'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] );
148 if ( !$status->isOK() ) {
149 $this->error( $status );
150 }
151 $batch[] = [ 'op' => 'copy', 'overwrite' => true,
152 'src' => $spath, 'dst' => $dpath, 'img' => $ofile->getArchiveName() ];
153 }
154
155 if ( count( $batch ) >= $batchSize ) {
156 $this->runBatch( $batch, $be );
157 $batch = [];
158 }
159 }
160 } while ( $res->numRows() );
161
162 if ( count( $batch ) ) {
163 $this->runBatch( $batch, $be );
164 }
165
166 // Do deleted versions...
167 $conds = [];
168 if ( $since ) {
169 $conds[] = $dbw->expr( 'fa_deleted_timestamp', '>=', $dbw->timestamp( $since ) );
170 }
171
172 $batch = [];
173 $lastId = 0;
174 do {
175 $res = $dbw->newSelectQueryBuilder()
176 ->select( [ 'fa_storage_key', 'fa_id', 'fa_name' ] )
177 ->from( 'filearchive' )
178 ->where( $dbw->expr( 'fa_id', '>', $lastId ) )
179 ->andWhere( $conds )
180 ->orderBy( 'fa_id' )
181 ->limit( $batchSize )
182 ->caller( __METHOD__ )->fetchResultSet();
183
184 foreach ( $res as $row ) {
185 $lastId = $row->fa_id;
186 $sha1Key = $row->fa_storage_key;
187 if ( $sha1Key === '' ) {
188 $this->error( "Image SHA-1 not set for file #{$row->fa_id} (deleted)." );
189 continue;
190 }
191 $sha1 = substr( $sha1Key, 0, strpos( $sha1Key, '.' ) );
192
193 if ( $oldLayout === 'sha1' ) {
194 $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
195 } else {
196 $spath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) .
197 '/' . $repo->getDeletedHashPath( $sha1Key ) . $sha1Key;
198 }
199
200 if ( $newLayout === 'sha1' ) {
201 $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
202 } else {
203 $dpath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) .
204 '/' . $repo->getDeletedHashPath( $sha1Key ) . $sha1Key;
205 }
206
207 $status = $be->prepare( [
208 'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] );
209 if ( !$status->isOK() ) {
210 $this->error( $status );
211 }
212
213 $batch[] = [ 'op' => 'copy', 'src' => $spath, 'dst' => $dpath,
214 'overwriteSame' => true, 'img' => "(ID {$row->fa_id}) {$row->fa_name}" ];
215
216 if ( count( $batch ) >= $batchSize ) {
217 $this->runBatch( $batch, $be );
218 $batch = [];
219 }
220 }
221 } while ( $res->numRows() );
222
223 if ( count( $batch ) ) {
224 $this->runBatch( $batch, $be );
225 }
226
227 $this->output( "Done (started $startTime)\n" );
228 }
229
230 protected function getRepo() {
231 return $this->getServiceContainer()->getRepoGroup()->getLocalRepo();
232 }
233
238 protected function runBatch( array $ops, FileBackend $be ) {
239 $this->output( "Migrating file batch:\n" );
240 foreach ( $ops as $op ) {
241 $this->output( "\"{$op['img']}\" (dest: {$op['dst']})\n" );
242 }
243
244 $status = $be->doOperations( $ops, [ 'bypassReadOnly' => true ] );
245 if ( !$status->isOK() ) {
246 $this->error( $status );
247 }
248
249 $this->output( "Batch done\n\n" );
250 }
251}
252
253// @codeCoverageIgnoreStart
254$maintClass = MigrateFileRepoLayout::class;
255require_once RUN_MAINTENANCE_IF_MAIN;
256// @codeCoverageIgnoreEnd
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
Proxy backend that manages file layout rewriting for FileRepo.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
getBatchSize()
Returns batch size.
output( $out, $channel=null)
Throw some output to the user.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
error( $err, $die=0)
Throw an error to the user.
getServiceContainer()
Returns the main service container.
addDescription( $text)
Set the description text.
Copy all files in FileRepo to an originals container using SHA1 paths.
runBatch(array $ops, FileBackend $be)
__construct()
Default constructor.
execute()
Do the actual work.
Base class for all file backend classes (including multi-write backends).
doOperations(array $ops, array $opts=[])
This is the main entry point into the backend for write operations.