MediaWiki master
migrateFileRepoLayout.php
Go to the documentation of this file.
1<?php
25
26// @codeCoverageIgnoreStart
27require_once __DIR__ . '/Maintenance.php';
28// @codeCoverageIgnoreEnd
29
38 public function __construct() {
39 parent::__construct();
40 $this->addDescription( 'Copy files in repo to a different layout.' );
41 $this->addOption( 'oldlayout', "Old layout; one of 'name' or 'sha1'", true, true );
42 $this->addOption( 'newlayout', "New layout; one of 'name' or 'sha1'", true, true );
43 $this->addOption( 'since', "Copy only files from after this timestamp", false, true );
44 $this->setBatchSize( 50 );
45 }
46
47 public function execute() {
48 $oldLayout = $this->getOption( 'oldlayout' );
49 if ( !in_array( $oldLayout, [ 'name', 'sha1' ] ) ) {
50 $this->fatalError( "Invalid old layout." );
51 }
52 $newLayout = $this->getOption( 'newlayout' );
53 if ( !in_array( $newLayout, [ 'name', 'sha1' ] ) ) {
54 $this->fatalError( "Invalid new layout." );
55 }
56 $since = $this->getOption( 'since' );
57
58 $repo = $this->getRepo();
59
60 $be = $repo->getBackend();
61 if ( $be instanceof FileBackendDBRepoWrapper ) {
62 // avoid path translations for this script
63 $be = $be->getInternalBackend();
64 }
65
66 $dbw = $repo->getPrimaryDB();
67
68 $origBase = $be->getContainerStoragePath( "{$repo->getName()}-original" );
69 $startTime = wfTimestampNow();
70
71 // Do current and archived versions...
72 $conds = [];
73 if ( $since ) {
74 $conds[] = $dbw->expr( 'img_timestamp', '>=', $dbw->timestamp( $since ) );
75 }
76
77 $batchSize = $this->getBatchSize();
78 $batch = [];
79 $lastName = '';
80 do {
81 $res = $dbw->newSelectQueryBuilder()
82 ->select( [ 'img_name', 'img_sha1' ] )
83 ->from( 'image' )
84 ->where( $dbw->expr( 'img_name', '>', $lastName ) )
85 ->andWhere( $conds )
86 ->orderBy( 'img_name' )
87 ->limit( $batchSize )
88 ->caller( __METHOD__ )->fetchResultSet();
89
90 foreach ( $res as $row ) {
91 $lastName = $row->img_name;
93 $file = $repo->newFile( $row->img_name );
94 // Check in case SHA1 rows are not populated for some files
95 $sha1 = strlen( $row->img_sha1 ) ? $row->img_sha1 : $file->getSha1();
96
97 if ( !strlen( $sha1 ) ) {
98 $this->error( "Image SHA-1 not known for {$row->img_name}." );
99 } else {
100 if ( $oldLayout === 'sha1' ) {
101 $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
102 } else {
103 $spath = $file->getPath();
104 }
105
106 if ( $newLayout === 'sha1' ) {
107 $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
108 } else {
109 $dpath = $file->getPath();
110 }
111
112 $status = $be->prepare( [
113 'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] );
114 if ( !$status->isOK() ) {
115 $this->error( $status );
116 }
117
118 $batch[] = [ 'op' => 'copy', 'overwrite' => true,
119 'src' => $spath, 'dst' => $dpath, 'img' => $row->img_name ];
120 }
121
122 foreach ( $file->getHistory() as $ofile ) {
123 $sha1 = $ofile->getSha1();
124 if ( !strlen( $sha1 ) ) {
125 $this->error( "Image SHA-1 not set for {$ofile->getArchiveName()}." );
126 continue;
127 }
128
129 if ( $oldLayout === 'sha1' ) {
130 $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
131 } elseif ( $ofile->isDeleted( File::DELETED_FILE ) ) {
132 $spath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) .
133 '/' . $repo->getDeletedHashPath( $sha1 ) .
134 $sha1 . '.' . $ofile->getExtension();
135 } else {
136 $spath = $ofile->getPath();
137 }
138
139 if ( $newLayout === 'sha1' ) {
140 $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
141 } else {
142 $dpath = $ofile->getPath();
143 }
144
145 $status = $be->prepare( [
146 'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] );
147 if ( !$status->isOK() ) {
148 $this->error( $status );
149 }
150 $batch[] = [ 'op' => 'copy', 'overwrite' => true,
151 'src' => $spath, 'dst' => $dpath, 'img' => $ofile->getArchiveName() ];
152 }
153
154 if ( count( $batch ) >= $batchSize ) {
155 $this->runBatch( $batch, $be );
156 $batch = [];
157 }
158 }
159 } while ( $res->numRows() );
160
161 if ( count( $batch ) ) {
162 $this->runBatch( $batch, $be );
163 }
164
165 // Do deleted versions...
166 $conds = [];
167 if ( $since ) {
168 $conds[] = $dbw->expr( 'fa_deleted_timestamp', '>=', $dbw->timestamp( $since ) );
169 }
170
171 $batch = [];
172 $lastId = 0;
173 do {
174 $res = $dbw->newSelectQueryBuilder()
175 ->select( [ 'fa_storage_key', 'fa_id', 'fa_name' ] )
176 ->from( 'filearchive' )
177 ->where( $dbw->expr( 'fa_id', '>', $lastId ) )
178 ->andWhere( $conds )
179 ->orderBy( 'fa_id' )
180 ->limit( $batchSize )
181 ->caller( __METHOD__ )->fetchResultSet();
182
183 foreach ( $res as $row ) {
184 $lastId = $row->fa_id;
185 $sha1Key = $row->fa_storage_key;
186 if ( !strlen( $sha1Key ) ) {
187 $this->error( "Image SHA-1 not set for file #{$row->fa_id} (deleted)." );
188 continue;
189 }
190 $sha1 = substr( $sha1Key, 0, strpos( $sha1Key, '.' ) );
191
192 if ( $oldLayout === 'sha1' ) {
193 $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
194 } else {
195 $spath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) .
196 '/' . $repo->getDeletedHashPath( $sha1Key ) . $sha1Key;
197 }
198
199 if ( $newLayout === 'sha1' ) {
200 $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
201 } else {
202 $dpath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) .
203 '/' . $repo->getDeletedHashPath( $sha1Key ) . $sha1Key;
204 }
205
206 $status = $be->prepare( [
207 'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] );
208 if ( !$status->isOK() ) {
209 $this->error( $status );
210 }
211
212 $batch[] = [ 'op' => 'copy', 'src' => $spath, 'dst' => $dpath,
213 'overwriteSame' => true, 'img' => "(ID {$row->fa_id}) {$row->fa_name}" ];
214
215 if ( count( $batch ) >= $batchSize ) {
216 $this->runBatch( $batch, $be );
217 $batch = [];
218 }
219 }
220 } while ( $res->numRows() );
221
222 if ( count( $batch ) ) {
223 $this->runBatch( $batch, $be );
224 }
225
226 $this->output( "Done (started $startTime)\n" );
227 }
228
229 protected function getRepo() {
230 return $this->getServiceContainer()->getRepoGroup()->getLocalRepo();
231 }
232
237 protected function runBatch( array $ops, FileBackend $be ) {
238 $this->output( "Migrating file batch:\n" );
239 foreach ( $ops as $op ) {
240 $this->output( "\"{$op['img']}\" (dest: {$op['dst']})\n" );
241 }
242
243 $status = $be->doOperations( $ops, [ 'bypassReadOnly' => true ] );
244 if ( !$status->isOK() ) {
245 $this->error( $status );
246 }
247
248 $this->output( "Batch done\n\n" );
249 }
250}
251
252// @codeCoverageIgnoreStart
253$maintClass = MigrateFileRepoLayout::class;
254require_once RUN_MAINTENANCE_IF_MAIN;
255// @codeCoverageIgnoreEnd
wfTimestampNow()
Convenience function; returns MediaWiki timestamp for the present time.
Proxy backend that manages file layout rewriting for FileRepo.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
error( $err, $die=0)
Throw an error to the user.
output( $out, $channel=null)
Throw some output to the user.
getServiceContainer()
Returns the main service container.
getBatchSize()
Returns batch size.
addDescription( $text)
Set the description text.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
Copy all files in FileRepo to an originals container using SHA1 paths.
runBatch(array $ops, FileBackend $be)
__construct()
Default constructor.
execute()
Do the actual work.
Base class for all file backend classes (including multi-write backends).
doOperations(array $ops, array $opts=[])
This is the main entry point into the backend for write operations.