Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
55.47% |
71 / 128 |
|
25.00% |
1 / 4 |
CRAP | |
0.00% |
0 / 1 |
MigrateFileRepoLayout | |
55.47% |
71 / 128 |
|
25.00% |
1 / 4 |
122.43 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
execute | |
51.75% |
59 / 114 |
|
0.00% |
0 / 1 |
108.87 | |||
getRepo | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
runBatch | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
3.03 |
1 | <?php |
2 | /** |
3 | * Copy all files in FileRepo to an originals container using SHA1 paths. |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | * @ingroup Maintenance |
22 | */ |
23 | |
24 | use Wikimedia\FileBackend\FileBackend; |
25 | |
26 | // @codeCoverageIgnoreStart |
27 | require_once __DIR__ . '/Maintenance.php'; |
28 | // @codeCoverageIgnoreEnd |
29 | |
30 | /** |
31 | * Copy all files in FileRepo to an originals container using SHA1 paths. |
32 | * |
33 | * This script should be run while the repo is still set to the old layout. |
34 | * |
35 | * @ingroup Maintenance |
36 | */ |
37 | class MigrateFileRepoLayout extends Maintenance { |
38 | public function __construct() { |
39 | parent::__construct(); |
40 | $this->addDescription( 'Copy files in repo to a different layout.' ); |
41 | $this->addOption( 'oldlayout', "Old layout; one of 'name' or 'sha1'", true, true ); |
42 | $this->addOption( 'newlayout', "New layout; one of 'name' or 'sha1'", true, true ); |
43 | $this->addOption( 'since', "Copy only files from after this timestamp", false, true ); |
44 | $this->setBatchSize( 50 ); |
45 | } |
46 | |
47 | public function execute() { |
48 | $oldLayout = $this->getOption( 'oldlayout' ); |
49 | if ( !in_array( $oldLayout, [ 'name', 'sha1' ] ) ) { |
50 | $this->fatalError( "Invalid old layout." ); |
51 | } |
52 | $newLayout = $this->getOption( 'newlayout' ); |
53 | if ( !in_array( $newLayout, [ 'name', 'sha1' ] ) ) { |
54 | $this->fatalError( "Invalid new layout." ); |
55 | } |
56 | $since = $this->getOption( 'since' ); |
57 | |
58 | $repo = $this->getRepo(); |
59 | |
60 | $be = $repo->getBackend(); |
61 | if ( $be instanceof FileBackendDBRepoWrapper ) { |
62 | // avoid path translations for this script |
63 | $be = $be->getInternalBackend(); |
64 | } |
65 | |
66 | $dbw = $repo->getPrimaryDB(); |
67 | |
68 | $origBase = $be->getContainerStoragePath( "{$repo->getName()}-original" ); |
69 | $startTime = wfTimestampNow(); |
70 | |
71 | // Do current and archived versions... |
72 | $conds = []; |
73 | if ( $since ) { |
74 | $conds[] = $dbw->expr( 'img_timestamp', '>=', $dbw->timestamp( $since ) ); |
75 | } |
76 | |
77 | $batchSize = $this->getBatchSize(); |
78 | $batch = []; |
79 | $lastName = ''; |
80 | do { |
81 | $res = $dbw->newSelectQueryBuilder() |
82 | ->select( [ 'img_name', 'img_sha1' ] ) |
83 | ->from( 'image' ) |
84 | ->where( $dbw->expr( 'img_name', '>', $lastName ) ) |
85 | ->andWhere( $conds ) |
86 | ->orderBy( 'img_name' ) |
87 | ->limit( $batchSize ) |
88 | ->caller( __METHOD__ )->fetchResultSet(); |
89 | |
90 | foreach ( $res as $row ) { |
91 | $lastName = $row->img_name; |
92 | /** @var LocalFile $file */ |
93 | $file = $repo->newFile( $row->img_name ); |
94 | // Check in case SHA1 rows are not populated for some files |
95 | $sha1 = strlen( $row->img_sha1 ) ? $row->img_sha1 : $file->getSha1(); |
96 | |
97 | if ( !strlen( $sha1 ) ) { |
98 | $this->error( "Image SHA-1 not known for {$row->img_name}." ); |
99 | } else { |
100 | if ( $oldLayout === 'sha1' ) { |
101 | $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}"; |
102 | } else { |
103 | $spath = $file->getPath(); |
104 | } |
105 | |
106 | if ( $newLayout === 'sha1' ) { |
107 | $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}"; |
108 | } else { |
109 | $dpath = $file->getPath(); |
110 | } |
111 | |
112 | $status = $be->prepare( [ |
113 | 'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] ); |
114 | if ( !$status->isOK() ) { |
115 | $this->error( $status ); |
116 | } |
117 | |
118 | $batch[] = [ 'op' => 'copy', 'overwrite' => true, |
119 | 'src' => $spath, 'dst' => $dpath, 'img' => $row->img_name ]; |
120 | } |
121 | |
122 | foreach ( $file->getHistory() as $ofile ) { |
123 | $sha1 = $ofile->getSha1(); |
124 | if ( !strlen( $sha1 ) ) { |
125 | $this->error( "Image SHA-1 not set for {$ofile->getArchiveName()}." ); |
126 | continue; |
127 | } |
128 | |
129 | if ( $oldLayout === 'sha1' ) { |
130 | $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}"; |
131 | } elseif ( $ofile->isDeleted( File::DELETED_FILE ) ) { |
132 | $spath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) . |
133 | '/' . $repo->getDeletedHashPath( $sha1 ) . |
134 | $sha1 . '.' . $ofile->getExtension(); |
135 | } else { |
136 | $spath = $ofile->getPath(); |
137 | } |
138 | |
139 | if ( $newLayout === 'sha1' ) { |
140 | $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}"; |
141 | } else { |
142 | $dpath = $ofile->getPath(); |
143 | } |
144 | |
145 | $status = $be->prepare( [ |
146 | 'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] ); |
147 | if ( !$status->isOK() ) { |
148 | $this->error( $status ); |
149 | } |
150 | $batch[] = [ 'op' => 'copy', 'overwrite' => true, |
151 | 'src' => $spath, 'dst' => $dpath, 'img' => $ofile->getArchiveName() ]; |
152 | } |
153 | |
154 | if ( count( $batch ) >= $batchSize ) { |
155 | $this->runBatch( $batch, $be ); |
156 | $batch = []; |
157 | } |
158 | } |
159 | } while ( $res->numRows() ); |
160 | |
161 | if ( count( $batch ) ) { |
162 | $this->runBatch( $batch, $be ); |
163 | } |
164 | |
165 | // Do deleted versions... |
166 | $conds = []; |
167 | if ( $since ) { |
168 | $conds[] = $dbw->expr( 'fa_deleted_timestamp', '>=', $dbw->timestamp( $since ) ); |
169 | } |
170 | |
171 | $batch = []; |
172 | $lastId = 0; |
173 | do { |
174 | $res = $dbw->newSelectQueryBuilder() |
175 | ->select( [ 'fa_storage_key', 'fa_id', 'fa_name' ] ) |
176 | ->from( 'filearchive' ) |
177 | ->where( $dbw->expr( 'fa_id', '>', $lastId ) ) |
178 | ->andWhere( $conds ) |
179 | ->orderBy( 'fa_id' ) |
180 | ->limit( $batchSize ) |
181 | ->caller( __METHOD__ )->fetchResultSet(); |
182 | |
183 | foreach ( $res as $row ) { |
184 | $lastId = $row->fa_id; |
185 | $sha1Key = $row->fa_storage_key; |
186 | if ( !strlen( $sha1Key ) ) { |
187 | $this->error( "Image SHA-1 not set for file #{$row->fa_id} (deleted)." ); |
188 | continue; |
189 | } |
190 | $sha1 = substr( $sha1Key, 0, strpos( $sha1Key, '.' ) ); |
191 | |
192 | if ( $oldLayout === 'sha1' ) { |
193 | $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}"; |
194 | } else { |
195 | $spath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) . |
196 | '/' . $repo->getDeletedHashPath( $sha1Key ) . $sha1Key; |
197 | } |
198 | |
199 | if ( $newLayout === 'sha1' ) { |
200 | $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}"; |
201 | } else { |
202 | $dpath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) . |
203 | '/' . $repo->getDeletedHashPath( $sha1Key ) . $sha1Key; |
204 | } |
205 | |
206 | $status = $be->prepare( [ |
207 | 'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] ); |
208 | if ( !$status->isOK() ) { |
209 | $this->error( $status ); |
210 | } |
211 | |
212 | $batch[] = [ 'op' => 'copy', 'src' => $spath, 'dst' => $dpath, |
213 | 'overwriteSame' => true, 'img' => "(ID {$row->fa_id}) {$row->fa_name}" ]; |
214 | |
215 | if ( count( $batch ) >= $batchSize ) { |
216 | $this->runBatch( $batch, $be ); |
217 | $batch = []; |
218 | } |
219 | } |
220 | } while ( $res->numRows() ); |
221 | |
222 | if ( count( $batch ) ) { |
223 | $this->runBatch( $batch, $be ); |
224 | } |
225 | |
226 | $this->output( "Done (started $startTime)\n" ); |
227 | } |
228 | |
229 | protected function getRepo() { |
230 | return $this->getServiceContainer()->getRepoGroup()->getLocalRepo(); |
231 | } |
232 | |
233 | /** |
234 | * @param array[] $ops |
235 | * @param FileBackend $be |
236 | */ |
237 | protected function runBatch( array $ops, FileBackend $be ) { |
238 | $this->output( "Migrating file batch:\n" ); |
239 | foreach ( $ops as $op ) { |
240 | $this->output( "\"{$op['img']}\" (dest: {$op['dst']})\n" ); |
241 | } |
242 | |
243 | $status = $be->doOperations( $ops, [ 'bypassReadOnly' => true ] ); |
244 | if ( !$status->isOK() ) { |
245 | $this->error( $status ); |
246 | } |
247 | |
248 | $this->output( "Batch done\n\n" ); |
249 | } |
250 | } |
251 | |
252 | // @codeCoverageIgnoreStart |
253 | $maintClass = MigrateFileRepoLayout::class; |
254 | require_once RUN_MAINTENANCE_IF_MAIN; |
255 | // @codeCoverageIgnoreEnd |