Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
55.47% covered (warning)
55.47%
71 / 128
25.00% covered (danger)
25.00%
1 / 4
CRAP
0.00% covered (danger)
0.00%
0 / 1
MigrateFileRepoLayout
55.47% covered (warning)
55.47%
71 / 128
25.00% covered (danger)
25.00%
1 / 4
122.43
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
1
 execute
51.75% covered (warning)
51.75%
59 / 114
0.00% covered (danger)
0.00%
0 / 1
108.87
 getRepo
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 runBatch
85.71% covered (warning)
85.71%
6 / 7
0.00% covered (danger)
0.00%
0 / 1
3.03
1<?php
2/**
3 * Copy all files in FileRepo to an originals container using SHA1 paths.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Maintenance
22 */
23
24use Wikimedia\FileBackend\FileBackend;
25
26// @codeCoverageIgnoreStart
27require_once __DIR__ . '/Maintenance.php';
28// @codeCoverageIgnoreEnd
29
30/**
31 * Copy all files in FileRepo to an originals container using SHA1 paths.
32 *
33 * This script should be run while the repo is still set to the old layout.
34 *
35 * @ingroup Maintenance
36 */
37class MigrateFileRepoLayout extends Maintenance {
38    public function __construct() {
39        parent::__construct();
40        $this->addDescription( 'Copy files in repo to a different layout.' );
41        $this->addOption( 'oldlayout', "Old layout; one of 'name' or 'sha1'", true, true );
42        $this->addOption( 'newlayout', "New layout; one of 'name' or 'sha1'", true, true );
43        $this->addOption( 'since', "Copy only files from after this timestamp", false, true );
44        $this->setBatchSize( 50 );
45    }
46
47    public function execute() {
48        $oldLayout = $this->getOption( 'oldlayout' );
49        if ( !in_array( $oldLayout, [ 'name', 'sha1' ] ) ) {
50            $this->fatalError( "Invalid old layout." );
51        }
52        $newLayout = $this->getOption( 'newlayout' );
53        if ( !in_array( $newLayout, [ 'name', 'sha1' ] ) ) {
54            $this->fatalError( "Invalid new layout." );
55        }
56        $since = $this->getOption( 'since' );
57
58        $repo = $this->getRepo();
59
60        $be = $repo->getBackend();
61        if ( $be instanceof FileBackendDBRepoWrapper ) {
62            // avoid path translations for this script
63            $be = $be->getInternalBackend();
64        }
65
66        $dbw = $repo->getPrimaryDB();
67
68        $origBase = $be->getContainerStoragePath( "{$repo->getName()}-original" );
69        $startTime = wfTimestampNow();
70
71        // Do current and archived versions...
72        $conds = [];
73        if ( $since ) {
74            $conds[] = $dbw->expr( 'img_timestamp', '>=', $dbw->timestamp( $since ) );
75        }
76
77        $batchSize = $this->getBatchSize();
78        $batch = [];
79        $lastName = '';
80        do {
81            $res = $dbw->newSelectQueryBuilder()
82                ->select( [ 'img_name', 'img_sha1' ] )
83                ->from( 'image' )
84                ->where( $dbw->expr( 'img_name', '>', $lastName ) )
85                ->andWhere( $conds )
86                ->orderBy( 'img_name' )
87                ->limit( $batchSize )
88                ->caller( __METHOD__ )->fetchResultSet();
89
90            foreach ( $res as $row ) {
91                $lastName = $row->img_name;
92                /** @var LocalFile $file */
93                $file = $repo->newFile( $row->img_name );
94                // Check in case SHA1 rows are not populated for some files
95                $sha1 = strlen( $row->img_sha1 ) ? $row->img_sha1 : $file->getSha1();
96
97                if ( !strlen( $sha1 ) ) {
98                    $this->error( "Image SHA-1 not known for {$row->img_name}." );
99                } else {
100                    if ( $oldLayout === 'sha1' ) {
101                        $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
102                    } else {
103                        $spath = $file->getPath();
104                    }
105
106                    if ( $newLayout === 'sha1' ) {
107                        $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
108                    } else {
109                        $dpath = $file->getPath();
110                    }
111
112                    $status = $be->prepare( [
113                        'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] );
114                    if ( !$status->isOK() ) {
115                        $this->error( $status );
116                    }
117
118                    $batch[] = [ 'op' => 'copy', 'overwrite' => true,
119                        'src' => $spath, 'dst' => $dpath, 'img' => $row->img_name ];
120                }
121
122                foreach ( $file->getHistory() as $ofile ) {
123                    $sha1 = $ofile->getSha1();
124                    if ( !strlen( $sha1 ) ) {
125                        $this->error( "Image SHA-1 not set for {$ofile->getArchiveName()}." );
126                        continue;
127                    }
128
129                    if ( $oldLayout === 'sha1' ) {
130                        $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
131                    } elseif ( $ofile->isDeleted( File::DELETED_FILE ) ) {
132                        $spath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) .
133                            '/' . $repo->getDeletedHashPath( $sha1 ) .
134                            $sha1 . '.' . $ofile->getExtension();
135                    } else {
136                        $spath = $ofile->getPath();
137                    }
138
139                    if ( $newLayout === 'sha1' ) {
140                        $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
141                    } else {
142                        $dpath = $ofile->getPath();
143                    }
144
145                    $status = $be->prepare( [
146                        'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] );
147                    if ( !$status->isOK() ) {
148                        $this->error( $status );
149                    }
150                    $batch[] = [ 'op' => 'copy', 'overwrite' => true,
151                        'src' => $spath, 'dst' => $dpath, 'img' => $ofile->getArchiveName() ];
152                }
153
154                if ( count( $batch ) >= $batchSize ) {
155                    $this->runBatch( $batch, $be );
156                    $batch = [];
157                }
158            }
159        } while ( $res->numRows() );
160
161        if ( count( $batch ) ) {
162            $this->runBatch( $batch, $be );
163        }
164
165        // Do deleted versions...
166        $conds = [];
167        if ( $since ) {
168            $conds[] = $dbw->expr( 'fa_deleted_timestamp', '>=', $dbw->timestamp( $since ) );
169        }
170
171        $batch = [];
172        $lastId = 0;
173        do {
174            $res = $dbw->newSelectQueryBuilder()
175                ->select( [ 'fa_storage_key', 'fa_id', 'fa_name' ] )
176                ->from( 'filearchive' )
177                ->where( $dbw->expr( 'fa_id', '>', $lastId ) )
178                ->andWhere( $conds )
179                ->orderBy( 'fa_id' )
180                ->limit( $batchSize )
181                ->caller( __METHOD__ )->fetchResultSet();
182
183            foreach ( $res as $row ) {
184                $lastId = $row->fa_id;
185                $sha1Key = $row->fa_storage_key;
186                if ( !strlen( $sha1Key ) ) {
187                    $this->error( "Image SHA-1 not set for file #{$row->fa_id} (deleted)." );
188                    continue;
189                }
190                $sha1 = substr( $sha1Key, 0, strpos( $sha1Key, '.' ) );
191
192                if ( $oldLayout === 'sha1' ) {
193                    $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
194                } else {
195                    $spath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) .
196                        '/' . $repo->getDeletedHashPath( $sha1Key ) . $sha1Key;
197                }
198
199                if ( $newLayout === 'sha1' ) {
200                    $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
201                } else {
202                    $dpath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) .
203                        '/' . $repo->getDeletedHashPath( $sha1Key ) . $sha1Key;
204                }
205
206                $status = $be->prepare( [
207                    'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] );
208                if ( !$status->isOK() ) {
209                    $this->error( $status );
210                }
211
212                $batch[] = [ 'op' => 'copy', 'src' => $spath, 'dst' => $dpath,
213                    'overwriteSame' => true, 'img' => "(ID {$row->fa_id}{$row->fa_name}" ];
214
215                if ( count( $batch ) >= $batchSize ) {
216                    $this->runBatch( $batch, $be );
217                    $batch = [];
218                }
219            }
220        } while ( $res->numRows() );
221
222        if ( count( $batch ) ) {
223            $this->runBatch( $batch, $be );
224        }
225
226        $this->output( "Done (started $startTime)\n" );
227    }
228
229    protected function getRepo() {
230        return $this->getServiceContainer()->getRepoGroup()->getLocalRepo();
231    }
232
233    /**
234     * @param array[] $ops
235     * @param FileBackend $be
236     */
237    protected function runBatch( array $ops, FileBackend $be ) {
238        $this->output( "Migrating file batch:\n" );
239        foreach ( $ops as $op ) {
240            $this->output( "\"{$op['img']}\" (dest: {$op['dst']})\n" );
241        }
242
243        $status = $be->doOperations( $ops, [ 'bypassReadOnly' => true ] );
244        if ( !$status->isOK() ) {
245            $this->error( $status );
246        }
247
248        $this->output( "Batch done\n\n" );
249    }
250}
251
252// @codeCoverageIgnoreStart
253$maintClass = MigrateFileRepoLayout::class;
254require_once RUN_MAINTENANCE_IF_MAIN;
255// @codeCoverageIgnoreEnd