MediaWiki master
migrateFileRepoLayout.php
Go to the documentation of this file.
1<?php
25
26require_once __DIR__ . '/Maintenance.php';
27
36 public function __construct() {
37 parent::__construct();
38 $this->addDescription( 'Copy files in repo to a different layout.' );
39 $this->addOption( 'oldlayout', "Old layout; one of 'name' or 'sha1'", true, true );
40 $this->addOption( 'newlayout', "New layout; one of 'name' or 'sha1'", true, true );
41 $this->addOption( 'since', "Copy only files from after this timestamp", false, true );
42 $this->setBatchSize( 50 );
43 }
44
45 public function execute() {
46 $oldLayout = $this->getOption( 'oldlayout' );
47 if ( !in_array( $oldLayout, [ 'name', 'sha1' ] ) ) {
48 $this->fatalError( "Invalid old layout." );
49 }
50 $newLayout = $this->getOption( 'newlayout' );
51 if ( !in_array( $newLayout, [ 'name', 'sha1' ] ) ) {
52 $this->fatalError( "Invalid new layout." );
53 }
54 $since = $this->getOption( 'since' );
55
56 $repo = $this->getRepo();
57
58 $be = $repo->getBackend();
59 if ( $be instanceof FileBackendDBRepoWrapper ) {
60 // avoid path translations for this script
61 $be = $be->getInternalBackend();
62 }
63
64 $dbw = $repo->getPrimaryDB();
65
66 $origBase = $be->getContainerStoragePath( "{$repo->getName()}-original" );
67 $startTime = wfTimestampNow();
68
69 // Do current and archived versions...
70 $conds = [];
71 if ( $since ) {
72 $conds[] = $dbw->expr( 'img_timestamp', '>=', $dbw->timestamp( $since ) );
73 }
74
75 $batchSize = $this->getBatchSize();
76 $batch = [];
77 $lastName = '';
78 do {
79 $res = $dbw->newSelectQueryBuilder()
80 ->select( [ 'img_name', 'img_sha1' ] )
81 ->from( 'image' )
82 ->where( $dbw->expr( 'img_name', '>', $lastName ) )
83 ->andWhere( $conds )
84 ->orderBy( 'img_name' )
85 ->limit( $batchSize )
86 ->caller( __METHOD__ )->fetchResultSet();
87
88 foreach ( $res as $row ) {
89 $lastName = $row->img_name;
91 $file = $repo->newFile( $row->img_name );
92 // Check in case SHA1 rows are not populated for some files
93 $sha1 = strlen( $row->img_sha1 ) ? $row->img_sha1 : $file->getSha1();
94
95 if ( !strlen( $sha1 ) ) {
96 $this->error( "Image SHA-1 not known for {$row->img_name}." );
97 } else {
98 if ( $oldLayout === 'sha1' ) {
99 $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
100 } else {
101 $spath = $file->getPath();
102 }
103
104 if ( $newLayout === 'sha1' ) {
105 $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
106 } else {
107 $dpath = $file->getPath();
108 }
109
110 $status = $be->prepare( [
111 'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] );
112 if ( !$status->isOK() ) {
113 $this->error( $status );
114 }
115
116 $batch[] = [ 'op' => 'copy', 'overwrite' => true,
117 'src' => $spath, 'dst' => $dpath, 'img' => $row->img_name ];
118 }
119
120 foreach ( $file->getHistory() as $ofile ) {
121 $sha1 = $ofile->getSha1();
122 if ( !strlen( $sha1 ) ) {
123 $this->error( "Image SHA-1 not set for {$ofile->getArchiveName()}." );
124 continue;
125 }
126
127 if ( $oldLayout === 'sha1' ) {
128 $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
129 } elseif ( $ofile->isDeleted( File::DELETED_FILE ) ) {
130 $spath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) .
131 '/' . $repo->getDeletedHashPath( $sha1 ) .
132 $sha1 . '.' . $ofile->getExtension();
133 } else {
134 $spath = $ofile->getPath();
135 }
136
137 if ( $newLayout === 'sha1' ) {
138 $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
139 } else {
140 $dpath = $ofile->getPath();
141 }
142
143 $status = $be->prepare( [
144 'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] );
145 if ( !$status->isOK() ) {
146 $this->error( $status );
147 }
148 $batch[] = [ 'op' => 'copy', 'overwrite' => true,
149 'src' => $spath, 'dst' => $dpath, 'img' => $ofile->getArchiveName() ];
150 }
151
152 if ( count( $batch ) >= $batchSize ) {
153 $this->runBatch( $batch, $be );
154 $batch = [];
155 }
156 }
157 } while ( $res->numRows() );
158
159 if ( count( $batch ) ) {
160 $this->runBatch( $batch, $be );
161 }
162
163 // Do deleted versions...
164 $conds = [];
165 if ( $since ) {
166 $conds[] = $dbw->expr( 'fa_deleted_timestamp', '>=', $dbw->timestamp( $since ) );
167 }
168
169 $batch = [];
170 $lastId = 0;
171 do {
172 $res = $dbw->newSelectQueryBuilder()
173 ->select( [ 'fa_storage_key', 'fa_id', 'fa_name' ] )
174 ->from( 'filearchive' )
175 ->where( $dbw->expr( 'fa_id', '>', $lastId ) )
176 ->andWhere( $conds )
177 ->orderBy( 'fa_id' )
178 ->limit( $batchSize )
179 ->caller( __METHOD__ )->fetchResultSet();
180
181 foreach ( $res as $row ) {
182 $lastId = $row->fa_id;
183 $sha1Key = $row->fa_storage_key;
184 if ( !strlen( $sha1Key ) ) {
185 $this->error( "Image SHA-1 not set for file #{$row->fa_id} (deleted)." );
186 continue;
187 }
188 $sha1 = substr( $sha1Key, 0, strpos( $sha1Key, '.' ) );
189
190 if ( $oldLayout === 'sha1' ) {
191 $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
192 } else {
193 $spath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) .
194 '/' . $repo->getDeletedHashPath( $sha1Key ) . $sha1Key;
195 }
196
197 if ( $newLayout === 'sha1' ) {
198 $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
199 } else {
200 $dpath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) .
201 '/' . $repo->getDeletedHashPath( $sha1Key ) . $sha1Key;
202 }
203
204 $status = $be->prepare( [
205 'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] );
206 if ( !$status->isOK() ) {
207 $this->error( $status );
208 }
209
210 $batch[] = [ 'op' => 'copy', 'src' => $spath, 'dst' => $dpath,
211 'overwriteSame' => true, 'img' => "(ID {$row->fa_id}) {$row->fa_name}" ];
212
213 if ( count( $batch ) >= $batchSize ) {
214 $this->runBatch( $batch, $be );
215 $batch = [];
216 }
217 }
218 } while ( $res->numRows() );
219
220 if ( count( $batch ) ) {
221 $this->runBatch( $batch, $be );
222 }
223
224 $this->output( "Done (started $startTime)\n" );
225 }
226
227 protected function getRepo() {
228 return $this->getServiceContainer()->getRepoGroup()->getLocalRepo();
229 }
230
235 protected function runBatch( array $ops, FileBackend $be ) {
236 $this->output( "Migrating file batch:\n" );
237 foreach ( $ops as $op ) {
238 $this->output( "\"{$op['img']}\" (dest: {$op['dst']})\n" );
239 }
240
241 $status = $be->doOperations( $ops, [ 'bypassReadOnly' => true ] );
242 if ( !$status->isOK() ) {
243 $this->error( $status );
244 }
245
246 $this->output( "Batch done\n\n" );
247 }
248}
249
250$maintClass = MigrateFileRepoLayout::class;
251require_once RUN_MAINTENANCE_IF_MAIN;
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
Proxy backend that manages file layout rewriting for FileRepo.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
error( $err, $die=0)
Throw an error to the user.
output( $out, $channel=null)
Throw some output to the user.
getServiceContainer()
Returns the main service container.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
Copy all files in FileRepo to an originals container using SHA1 paths.
runBatch(array $ops, FileBackend $be)
__construct()
Default constructor.
execute()
Do the actual work.
Base class for all file backend classes (including multi-write backends).
doOperations(array $ops, array $opts=[])
This is the main entry point into the backend for write operations.