Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
55.47% |
71 / 128 |
|
25.00% |
1 / 4 |
CRAP | |
0.00% |
0 / 1 |
MigrateFileRepoLayout | |
55.47% |
71 / 128 |
|
25.00% |
1 / 4 |
122.43 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
execute | |
51.75% |
59 / 114 |
|
0.00% |
0 / 1 |
108.87 | |||
getRepo | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
runBatch | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
3.03 |
1 | <?php |
2 | /** |
3 | * Copy all files in FileRepo to an originals container using SHA1 paths. |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | * @ingroup Maintenance |
22 | */ |
23 | |
24 | use MediaWiki\Maintenance\Maintenance; |
25 | use Wikimedia\FileBackend\FileBackend; |
26 | |
27 | // @codeCoverageIgnoreStart |
28 | require_once __DIR__ . '/Maintenance.php'; |
29 | // @codeCoverageIgnoreEnd |
30 | |
31 | /** |
32 | * Copy all files in FileRepo to an originals container using SHA1 paths. |
33 | * |
34 | * This script should be run while the repo is still set to the old layout. |
35 | * |
36 | * @ingroup Maintenance |
37 | */ |
38 | class MigrateFileRepoLayout extends Maintenance { |
39 | public function __construct() { |
40 | parent::__construct(); |
41 | $this->addDescription( 'Copy files in repo to a different layout.' ); |
42 | $this->addOption( 'oldlayout', "Old layout; one of 'name' or 'sha1'", true, true ); |
43 | $this->addOption( 'newlayout', "New layout; one of 'name' or 'sha1'", true, true ); |
44 | $this->addOption( 'since', "Copy only files from after this timestamp", false, true ); |
45 | $this->setBatchSize( 50 ); |
46 | } |
47 | |
48 | public function execute() { |
49 | $oldLayout = $this->getOption( 'oldlayout' ); |
50 | if ( !in_array( $oldLayout, [ 'name', 'sha1' ] ) ) { |
51 | $this->fatalError( "Invalid old layout." ); |
52 | } |
53 | $newLayout = $this->getOption( 'newlayout' ); |
54 | if ( !in_array( $newLayout, [ 'name', 'sha1' ] ) ) { |
55 | $this->fatalError( "Invalid new layout." ); |
56 | } |
57 | $since = $this->getOption( 'since' ); |
58 | |
59 | $repo = $this->getRepo(); |
60 | |
61 | $be = $repo->getBackend(); |
62 | if ( $be instanceof FileBackendDBRepoWrapper ) { |
63 | // avoid path translations for this script |
64 | $be = $be->getInternalBackend(); |
65 | } |
66 | |
67 | $dbw = $repo->getPrimaryDB(); |
68 | |
69 | $origBase = $be->getContainerStoragePath( "{$repo->getName()}-original" ); |
70 | $startTime = wfTimestampNow(); |
71 | |
72 | // Do current and archived versions... |
73 | $conds = []; |
74 | if ( $since ) { |
75 | $conds[] = $dbw->expr( 'img_timestamp', '>=', $dbw->timestamp( $since ) ); |
76 | } |
77 | |
78 | $batchSize = $this->getBatchSize(); |
79 | $batch = []; |
80 | $lastName = ''; |
81 | do { |
82 | $res = $dbw->newSelectQueryBuilder() |
83 | ->select( [ 'img_name', 'img_sha1' ] ) |
84 | ->from( 'image' ) |
85 | ->where( $dbw->expr( 'img_name', '>', $lastName ) ) |
86 | ->andWhere( $conds ) |
87 | ->orderBy( 'img_name' ) |
88 | ->limit( $batchSize ) |
89 | ->caller( __METHOD__ )->fetchResultSet(); |
90 | |
91 | foreach ( $res as $row ) { |
92 | $lastName = $row->img_name; |
93 | /** @var LocalFile $file */ |
94 | $file = $repo->newFile( $row->img_name ); |
95 | // Check in case SHA1 rows are not populated for some files |
96 | $sha1 = strlen( $row->img_sha1 ) ? $row->img_sha1 : $file->getSha1(); |
97 | |
98 | if ( !strlen( $sha1 ) ) { |
99 | $this->error( "Image SHA-1 not known for {$row->img_name}." ); |
100 | } else { |
101 | if ( $oldLayout === 'sha1' ) { |
102 | $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}"; |
103 | } else { |
104 | $spath = $file->getPath(); |
105 | } |
106 | |
107 | if ( $newLayout === 'sha1' ) { |
108 | $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}"; |
109 | } else { |
110 | $dpath = $file->getPath(); |
111 | } |
112 | |
113 | $status = $be->prepare( [ |
114 | 'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] ); |
115 | if ( !$status->isOK() ) { |
116 | $this->error( $status ); |
117 | } |
118 | |
119 | $batch[] = [ 'op' => 'copy', 'overwrite' => true, |
120 | 'src' => $spath, 'dst' => $dpath, 'img' => $row->img_name ]; |
121 | } |
122 | |
123 | foreach ( $file->getHistory() as $ofile ) { |
124 | $sha1 = $ofile->getSha1(); |
125 | if ( !strlen( $sha1 ) ) { |
126 | $this->error( "Image SHA-1 not set for {$ofile->getArchiveName()}." ); |
127 | continue; |
128 | } |
129 | |
130 | if ( $oldLayout === 'sha1' ) { |
131 | $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}"; |
132 | } elseif ( $ofile->isDeleted( File::DELETED_FILE ) ) { |
133 | $spath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) . |
134 | '/' . $repo->getDeletedHashPath( $sha1 ) . |
135 | $sha1 . '.' . $ofile->getExtension(); |
136 | } else { |
137 | $spath = $ofile->getPath(); |
138 | } |
139 | |
140 | if ( $newLayout === 'sha1' ) { |
141 | $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}"; |
142 | } else { |
143 | $dpath = $ofile->getPath(); |
144 | } |
145 | |
146 | $status = $be->prepare( [ |
147 | 'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] ); |
148 | if ( !$status->isOK() ) { |
149 | $this->error( $status ); |
150 | } |
151 | $batch[] = [ 'op' => 'copy', 'overwrite' => true, |
152 | 'src' => $spath, 'dst' => $dpath, 'img' => $ofile->getArchiveName() ]; |
153 | } |
154 | |
155 | if ( count( $batch ) >= $batchSize ) { |
156 | $this->runBatch( $batch, $be ); |
157 | $batch = []; |
158 | } |
159 | } |
160 | } while ( $res->numRows() ); |
161 | |
162 | if ( count( $batch ) ) { |
163 | $this->runBatch( $batch, $be ); |
164 | } |
165 | |
166 | // Do deleted versions... |
167 | $conds = []; |
168 | if ( $since ) { |
169 | $conds[] = $dbw->expr( 'fa_deleted_timestamp', '>=', $dbw->timestamp( $since ) ); |
170 | } |
171 | |
172 | $batch = []; |
173 | $lastId = 0; |
174 | do { |
175 | $res = $dbw->newSelectQueryBuilder() |
176 | ->select( [ 'fa_storage_key', 'fa_id', 'fa_name' ] ) |
177 | ->from( 'filearchive' ) |
178 | ->where( $dbw->expr( 'fa_id', '>', $lastId ) ) |
179 | ->andWhere( $conds ) |
180 | ->orderBy( 'fa_id' ) |
181 | ->limit( $batchSize ) |
182 | ->caller( __METHOD__ )->fetchResultSet(); |
183 | |
184 | foreach ( $res as $row ) { |
185 | $lastId = $row->fa_id; |
186 | $sha1Key = $row->fa_storage_key; |
187 | if ( !strlen( $sha1Key ) ) { |
188 | $this->error( "Image SHA-1 not set for file #{$row->fa_id} (deleted)." ); |
189 | continue; |
190 | } |
191 | $sha1 = substr( $sha1Key, 0, strpos( $sha1Key, '.' ) ); |
192 | |
193 | if ( $oldLayout === 'sha1' ) { |
194 | $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}"; |
195 | } else { |
196 | $spath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) . |
197 | '/' . $repo->getDeletedHashPath( $sha1Key ) . $sha1Key; |
198 | } |
199 | |
200 | if ( $newLayout === 'sha1' ) { |
201 | $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}"; |
202 | } else { |
203 | $dpath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) . |
204 | '/' . $repo->getDeletedHashPath( $sha1Key ) . $sha1Key; |
205 | } |
206 | |
207 | $status = $be->prepare( [ |
208 | 'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] ); |
209 | if ( !$status->isOK() ) { |
210 | $this->error( $status ); |
211 | } |
212 | |
213 | $batch[] = [ 'op' => 'copy', 'src' => $spath, 'dst' => $dpath, |
214 | 'overwriteSame' => true, 'img' => "(ID {$row->fa_id}) {$row->fa_name}" ]; |
215 | |
216 | if ( count( $batch ) >= $batchSize ) { |
217 | $this->runBatch( $batch, $be ); |
218 | $batch = []; |
219 | } |
220 | } |
221 | } while ( $res->numRows() ); |
222 | |
223 | if ( count( $batch ) ) { |
224 | $this->runBatch( $batch, $be ); |
225 | } |
226 | |
227 | $this->output( "Done (started $startTime)\n" ); |
228 | } |
229 | |
230 | protected function getRepo() { |
231 | return $this->getServiceContainer()->getRepoGroup()->getLocalRepo(); |
232 | } |
233 | |
234 | /** |
235 | * @param array[] $ops |
236 | * @param FileBackend $be |
237 | */ |
238 | protected function runBatch( array $ops, FileBackend $be ) { |
239 | $this->output( "Migrating file batch:\n" ); |
240 | foreach ( $ops as $op ) { |
241 | $this->output( "\"{$op['img']}\" (dest: {$op['dst']})\n" ); |
242 | } |
243 | |
244 | $status = $be->doOperations( $ops, [ 'bypassReadOnly' => true ] ); |
245 | if ( !$status->isOK() ) { |
246 | $this->error( $status ); |
247 | } |
248 | |
249 | $this->output( "Batch done\n\n" ); |
250 | } |
251 | } |
252 | |
253 | // @codeCoverageIgnoreStart |
254 | $maintClass = MigrateFileRepoLayout::class; |
255 | require_once RUN_MAINTENANCE_IF_MAIN; |
256 | // @codeCoverageIgnoreEnd |