Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
54.20% |
71 / 131 |
|
25.00% |
1 / 4 |
CRAP | |
0.00% |
0 / 1 |
MigrateFileRepoLayout | |
55.47% |
71 / 128 |
|
25.00% |
1 / 4 |
122.43 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
execute | |
51.75% |
59 / 114 |
|
0.00% |
0 / 1 |
108.87 | |||
getRepo | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
runBatch | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
3.03 |
1 | <?php |
2 | /** |
3 | * Copy all files in FileRepo to an originals container using SHA1 paths. |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | * @ingroup Maintenance |
22 | */ |
23 | |
24 | require_once __DIR__ . '/Maintenance.php'; |
25 | |
26 | /** |
27 | * Copy all files in FileRepo to an originals container using SHA1 paths. |
28 | * |
29 | * This script should be run while the repo is still set to the old layout. |
30 | * |
31 | * @ingroup Maintenance |
32 | */ |
33 | class MigrateFileRepoLayout extends Maintenance { |
34 | public function __construct() { |
35 | parent::__construct(); |
36 | $this->addDescription( 'Copy files in repo to a different layout.' ); |
37 | $this->addOption( 'oldlayout', "Old layout; one of 'name' or 'sha1'", true, true ); |
38 | $this->addOption( 'newlayout', "New layout; one of 'name' or 'sha1'", true, true ); |
39 | $this->addOption( 'since', "Copy only files from after this timestamp", false, true ); |
40 | $this->setBatchSize( 50 ); |
41 | } |
42 | |
43 | public function execute() { |
44 | $oldLayout = $this->getOption( 'oldlayout' ); |
45 | if ( !in_array( $oldLayout, [ 'name', 'sha1' ] ) ) { |
46 | $this->fatalError( "Invalid old layout." ); |
47 | } |
48 | $newLayout = $this->getOption( 'newlayout' ); |
49 | if ( !in_array( $newLayout, [ 'name', 'sha1' ] ) ) { |
50 | $this->fatalError( "Invalid new layout." ); |
51 | } |
52 | $since = $this->getOption( 'since' ); |
53 | |
54 | $repo = $this->getRepo(); |
55 | |
56 | $be = $repo->getBackend(); |
57 | if ( $be instanceof FileBackendDBRepoWrapper ) { |
58 | // avoid path translations for this script |
59 | $be = $be->getInternalBackend(); |
60 | } |
61 | |
62 | $dbw = $repo->getPrimaryDB(); |
63 | |
64 | $origBase = $be->getContainerStoragePath( "{$repo->getName()}-original" ); |
65 | $startTime = wfTimestampNow(); |
66 | |
67 | // Do current and archived versions... |
68 | $conds = []; |
69 | if ( $since ) { |
70 | $conds[] = $dbw->expr( 'img_timestamp', '>=', $dbw->timestamp( $since ) ); |
71 | } |
72 | |
73 | $batchSize = $this->getBatchSize(); |
74 | $batch = []; |
75 | $lastName = ''; |
76 | do { |
77 | $res = $dbw->newSelectQueryBuilder() |
78 | ->select( [ 'img_name', 'img_sha1' ] ) |
79 | ->from( 'image' ) |
80 | ->where( $dbw->expr( 'img_name', '>', $lastName ) ) |
81 | ->andWhere( $conds ) |
82 | ->orderBy( 'img_name' ) |
83 | ->limit( $batchSize ) |
84 | ->caller( __METHOD__ )->fetchResultSet(); |
85 | |
86 | foreach ( $res as $row ) { |
87 | $lastName = $row->img_name; |
88 | /** @var LocalFile $file */ |
89 | $file = $repo->newFile( $row->img_name ); |
90 | // Check in case SHA1 rows are not populated for some files |
91 | $sha1 = strlen( $row->img_sha1 ) ? $row->img_sha1 : $file->getSha1(); |
92 | |
93 | if ( !strlen( $sha1 ) ) { |
94 | $this->error( "Image SHA-1 not known for {$row->img_name}." ); |
95 | } else { |
96 | if ( $oldLayout === 'sha1' ) { |
97 | $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}"; |
98 | } else { |
99 | $spath = $file->getPath(); |
100 | } |
101 | |
102 | if ( $newLayout === 'sha1' ) { |
103 | $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}"; |
104 | } else { |
105 | $dpath = $file->getPath(); |
106 | } |
107 | |
108 | $status = $be->prepare( [ |
109 | 'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] ); |
110 | if ( !$status->isOK() ) { |
111 | $this->error( print_r( $status->getErrors(), true ) ); |
112 | } |
113 | |
114 | $batch[] = [ 'op' => 'copy', 'overwrite' => true, |
115 | 'src' => $spath, 'dst' => $dpath, 'img' => $row->img_name ]; |
116 | } |
117 | |
118 | foreach ( $file->getHistory() as $ofile ) { |
119 | $sha1 = $ofile->getSha1(); |
120 | if ( !strlen( $sha1 ) ) { |
121 | $this->error( "Image SHA-1 not set for {$ofile->getArchiveName()}." ); |
122 | continue; |
123 | } |
124 | |
125 | if ( $oldLayout === 'sha1' ) { |
126 | $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}"; |
127 | } elseif ( $ofile->isDeleted( File::DELETED_FILE ) ) { |
128 | $spath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) . |
129 | '/' . $repo->getDeletedHashPath( $sha1 ) . |
130 | $sha1 . '.' . $ofile->getExtension(); |
131 | } else { |
132 | $spath = $ofile->getPath(); |
133 | } |
134 | |
135 | if ( $newLayout === 'sha1' ) { |
136 | $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}"; |
137 | } else { |
138 | $dpath = $ofile->getPath(); |
139 | } |
140 | |
141 | $status = $be->prepare( [ |
142 | 'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] ); |
143 | if ( !$status->isOK() ) { |
144 | $this->error( print_r( $status->getErrors(), true ) ); |
145 | } |
146 | $batch[] = [ 'op' => 'copy', 'overwrite' => true, |
147 | 'src' => $spath, 'dst' => $dpath, 'img' => $ofile->getArchiveName() ]; |
148 | } |
149 | |
150 | if ( count( $batch ) >= $batchSize ) { |
151 | $this->runBatch( $batch, $be ); |
152 | $batch = []; |
153 | } |
154 | } |
155 | } while ( $res->numRows() ); |
156 | |
157 | if ( count( $batch ) ) { |
158 | $this->runBatch( $batch, $be ); |
159 | } |
160 | |
161 | // Do deleted versions... |
162 | $conds = []; |
163 | if ( $since ) { |
164 | $conds[] = $dbw->expr( 'fa_deleted_timestamp', '>=', $dbw->timestamp( $since ) ); |
165 | } |
166 | |
167 | $batch = []; |
168 | $lastId = 0; |
169 | do { |
170 | $res = $dbw->newSelectQueryBuilder() |
171 | ->select( [ 'fa_storage_key', 'fa_id', 'fa_name' ] ) |
172 | ->from( 'filearchive' ) |
173 | ->where( $dbw->expr( 'fa_id', '>', $lastId ) ) |
174 | ->andWhere( $conds ) |
175 | ->orderBy( 'fa_id' ) |
176 | ->limit( $batchSize ) |
177 | ->caller( __METHOD__ )->fetchResultSet(); |
178 | |
179 | foreach ( $res as $row ) { |
180 | $lastId = $row->fa_id; |
181 | $sha1Key = $row->fa_storage_key; |
182 | if ( !strlen( $sha1Key ) ) { |
183 | $this->error( "Image SHA-1 not set for file #{$row->fa_id} (deleted)." ); |
184 | continue; |
185 | } |
186 | $sha1 = substr( $sha1Key, 0, strpos( $sha1Key, '.' ) ); |
187 | |
188 | if ( $oldLayout === 'sha1' ) { |
189 | $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}"; |
190 | } else { |
191 | $spath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) . |
192 | '/' . $repo->getDeletedHashPath( $sha1Key ) . $sha1Key; |
193 | } |
194 | |
195 | if ( $newLayout === 'sha1' ) { |
196 | $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}"; |
197 | } else { |
198 | $dpath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) . |
199 | '/' . $repo->getDeletedHashPath( $sha1Key ) . $sha1Key; |
200 | } |
201 | |
202 | $status = $be->prepare( [ |
203 | 'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] ); |
204 | if ( !$status->isOK() ) { |
205 | $this->error( print_r( $status->getErrors(), true ) ); |
206 | } |
207 | |
208 | $batch[] = [ 'op' => 'copy', 'src' => $spath, 'dst' => $dpath, |
209 | 'overwriteSame' => true, 'img' => "(ID {$row->fa_id}) {$row->fa_name}" ]; |
210 | |
211 | if ( count( $batch ) >= $batchSize ) { |
212 | $this->runBatch( $batch, $be ); |
213 | $batch = []; |
214 | } |
215 | } |
216 | } while ( $res->numRows() ); |
217 | |
218 | if ( count( $batch ) ) { |
219 | $this->runBatch( $batch, $be ); |
220 | } |
221 | |
222 | $this->output( "Done (started $startTime)\n" ); |
223 | } |
224 | |
225 | protected function getRepo() { |
226 | return $this->getServiceContainer()->getRepoGroup()->getLocalRepo(); |
227 | } |
228 | |
229 | /** |
230 | * @param array[] $ops |
231 | * @param FileBackend $be |
232 | */ |
233 | protected function runBatch( array $ops, FileBackend $be ) { |
234 | $this->output( "Migrating file batch:\n" ); |
235 | foreach ( $ops as $op ) { |
236 | $this->output( "\"{$op['img']}\" (dest: {$op['dst']})\n" ); |
237 | } |
238 | |
239 | $status = $be->doOperations( $ops, [ 'bypassReadOnly' => true ] ); |
240 | if ( !$status->isOK() ) { |
241 | $this->output( print_r( $status->getErrors(), true ) ); |
242 | } |
243 | |
244 | $this->output( "Batch done\n\n" ); |
245 | } |
246 | } |
247 | |
248 | $maintClass = MigrateFileRepoLayout::class; |
249 | require_once RUN_MAINTENANCE_IF_MAIN; |