Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
55.47% covered (warning)
55.47%
71 / 128
25.00% covered (danger)
25.00%
1 / 4
CRAP
0.00% covered (danger)
0.00%
0 / 1
MigrateFileRepoLayout
55.47% covered (warning)
55.47%
71 / 128
25.00% covered (danger)
25.00%
1 / 4
122.43
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
1
 execute
51.75% covered (warning)
51.75%
59 / 114
0.00% covered (danger)
0.00%
0 / 1
108.87
 getRepo
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 runBatch
85.71% covered (warning)
85.71%
6 / 7
0.00% covered (danger)
0.00%
0 / 1
3.03
1<?php
2/**
3 * Copy all files in FileRepo to an originals container using SHA1 paths.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Maintenance
22 */
23
24use MediaWiki\Maintenance\Maintenance;
25use Wikimedia\FileBackend\FileBackend;
26
27// @codeCoverageIgnoreStart
28require_once __DIR__ . '/Maintenance.php';
29// @codeCoverageIgnoreEnd
30
31/**
32 * Copy all files in FileRepo to an originals container using SHA1 paths.
33 *
34 * This script should be run while the repo is still set to the old layout.
35 *
36 * @ingroup Maintenance
37 */
38class MigrateFileRepoLayout extends Maintenance {
39    public function __construct() {
40        parent::__construct();
41        $this->addDescription( 'Copy files in repo to a different layout.' );
42        $this->addOption( 'oldlayout', "Old layout; one of 'name' or 'sha1'", true, true );
43        $this->addOption( 'newlayout', "New layout; one of 'name' or 'sha1'", true, true );
44        $this->addOption( 'since', "Copy only files from after this timestamp", false, true );
45        $this->setBatchSize( 50 );
46    }
47
48    public function execute() {
49        $oldLayout = $this->getOption( 'oldlayout' );
50        if ( !in_array( $oldLayout, [ 'name', 'sha1' ] ) ) {
51            $this->fatalError( "Invalid old layout." );
52        }
53        $newLayout = $this->getOption( 'newlayout' );
54        if ( !in_array( $newLayout, [ 'name', 'sha1' ] ) ) {
55            $this->fatalError( "Invalid new layout." );
56        }
57        $since = $this->getOption( 'since' );
58
59        $repo = $this->getRepo();
60
61        $be = $repo->getBackend();
62        if ( $be instanceof FileBackendDBRepoWrapper ) {
63            // avoid path translations for this script
64            $be = $be->getInternalBackend();
65        }
66
67        $dbw = $repo->getPrimaryDB();
68
69        $origBase = $be->getContainerStoragePath( "{$repo->getName()}-original" );
70        $startTime = wfTimestampNow();
71
72        // Do current and archived versions...
73        $conds = [];
74        if ( $since ) {
75            $conds[] = $dbw->expr( 'img_timestamp', '>=', $dbw->timestamp( $since ) );
76        }
77
78        $batchSize = $this->getBatchSize();
79        $batch = [];
80        $lastName = '';
81        do {
82            $res = $dbw->newSelectQueryBuilder()
83                ->select( [ 'img_name', 'img_sha1' ] )
84                ->from( 'image' )
85                ->where( $dbw->expr( 'img_name', '>', $lastName ) )
86                ->andWhere( $conds )
87                ->orderBy( 'img_name' )
88                ->limit( $batchSize )
89                ->caller( __METHOD__ )->fetchResultSet();
90
91            foreach ( $res as $row ) {
92                $lastName = $row->img_name;
93                /** @var LocalFile $file */
94                $file = $repo->newFile( $row->img_name );
95                // Check in case SHA1 rows are not populated for some files
96                $sha1 = strlen( $row->img_sha1 ) ? $row->img_sha1 : $file->getSha1();
97
98                if ( !strlen( $sha1 ) ) {
99                    $this->error( "Image SHA-1 not known for {$row->img_name}." );
100                } else {
101                    if ( $oldLayout === 'sha1' ) {
102                        $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
103                    } else {
104                        $spath = $file->getPath();
105                    }
106
107                    if ( $newLayout === 'sha1' ) {
108                        $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
109                    } else {
110                        $dpath = $file->getPath();
111                    }
112
113                    $status = $be->prepare( [
114                        'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] );
115                    if ( !$status->isOK() ) {
116                        $this->error( $status );
117                    }
118
119                    $batch[] = [ 'op' => 'copy', 'overwrite' => true,
120                        'src' => $spath, 'dst' => $dpath, 'img' => $row->img_name ];
121                }
122
123                foreach ( $file->getHistory() as $ofile ) {
124                    $sha1 = $ofile->getSha1();
125                    if ( !strlen( $sha1 ) ) {
126                        $this->error( "Image SHA-1 not set for {$ofile->getArchiveName()}." );
127                        continue;
128                    }
129
130                    if ( $oldLayout === 'sha1' ) {
131                        $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
132                    } elseif ( $ofile->isDeleted( File::DELETED_FILE ) ) {
133                        $spath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) .
134                            '/' . $repo->getDeletedHashPath( $sha1 ) .
135                            $sha1 . '.' . $ofile->getExtension();
136                    } else {
137                        $spath = $ofile->getPath();
138                    }
139
140                    if ( $newLayout === 'sha1' ) {
141                        $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
142                    } else {
143                        $dpath = $ofile->getPath();
144                    }
145
146                    $status = $be->prepare( [
147                        'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] );
148                    if ( !$status->isOK() ) {
149                        $this->error( $status );
150                    }
151                    $batch[] = [ 'op' => 'copy', 'overwrite' => true,
152                        'src' => $spath, 'dst' => $dpath, 'img' => $ofile->getArchiveName() ];
153                }
154
155                if ( count( $batch ) >= $batchSize ) {
156                    $this->runBatch( $batch, $be );
157                    $batch = [];
158                }
159            }
160        } while ( $res->numRows() );
161
162        if ( count( $batch ) ) {
163            $this->runBatch( $batch, $be );
164        }
165
166        // Do deleted versions...
167        $conds = [];
168        if ( $since ) {
169            $conds[] = $dbw->expr( 'fa_deleted_timestamp', '>=', $dbw->timestamp( $since ) );
170        }
171
172        $batch = [];
173        $lastId = 0;
174        do {
175            $res = $dbw->newSelectQueryBuilder()
176                ->select( [ 'fa_storage_key', 'fa_id', 'fa_name' ] )
177                ->from( 'filearchive' )
178                ->where( $dbw->expr( 'fa_id', '>', $lastId ) )
179                ->andWhere( $conds )
180                ->orderBy( 'fa_id' )
181                ->limit( $batchSize )
182                ->caller( __METHOD__ )->fetchResultSet();
183
184            foreach ( $res as $row ) {
185                $lastId = $row->fa_id;
186                $sha1Key = $row->fa_storage_key;
187                if ( !strlen( $sha1Key ) ) {
188                    $this->error( "Image SHA-1 not set for file #{$row->fa_id} (deleted)." );
189                    continue;
190                }
191                $sha1 = substr( $sha1Key, 0, strpos( $sha1Key, '.' ) );
192
193                if ( $oldLayout === 'sha1' ) {
194                    $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
195                } else {
196                    $spath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) .
197                        '/' . $repo->getDeletedHashPath( $sha1Key ) . $sha1Key;
198                }
199
200                if ( $newLayout === 'sha1' ) {
201                    $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
202                } else {
203                    $dpath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) .
204                        '/' . $repo->getDeletedHashPath( $sha1Key ) . $sha1Key;
205                }
206
207                $status = $be->prepare( [
208                    'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] );
209                if ( !$status->isOK() ) {
210                    $this->error( $status );
211                }
212
213                $batch[] = [ 'op' => 'copy', 'src' => $spath, 'dst' => $dpath,
214                    'overwriteSame' => true, 'img' => "(ID {$row->fa_id}{$row->fa_name}" ];
215
216                if ( count( $batch ) >= $batchSize ) {
217                    $this->runBatch( $batch, $be );
218                    $batch = [];
219                }
220            }
221        } while ( $res->numRows() );
222
223        if ( count( $batch ) ) {
224            $this->runBatch( $batch, $be );
225        }
226
227        $this->output( "Done (started $startTime)\n" );
228    }
229
230    protected function getRepo() {
231        return $this->getServiceContainer()->getRepoGroup()->getLocalRepo();
232    }
233
234    /**
235     * @param array[] $ops
236     * @param FileBackend $be
237     */
238    protected function runBatch( array $ops, FileBackend $be ) {
239        $this->output( "Migrating file batch:\n" );
240        foreach ( $ops as $op ) {
241            $this->output( "\"{$op['img']}\" (dest: {$op['dst']})\n" );
242        }
243
244        $status = $be->doOperations( $ops, [ 'bypassReadOnly' => true ] );
245        if ( !$status->isOK() ) {
246            $this->error( $status );
247        }
248
249        $this->output( "Batch done\n\n" );
250    }
251}
252
253// @codeCoverageIgnoreStart
254$maintClass = MigrateFileRepoLayout::class;
255require_once RUN_MAINTENANCE_IF_MAIN;
256// @codeCoverageIgnoreEnd