Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
55.47% covered (warning)
55.47%
71 / 128
25.00% covered (danger)
25.00%
1 / 4
CRAP
0.00% covered (danger)
0.00%
0 / 1
MigrateFileRepoLayout
55.47% covered (warning)
55.47%
71 / 128
25.00% covered (danger)
25.00%
1 / 4
122.43
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
6 / 6
100.00% covered (success)
100.00%
1 / 1
1
 execute
51.75% covered (warning)
51.75%
59 / 114
0.00% covered (danger)
0.00%
0 / 1
108.87
 getRepo
0.00% covered (danger)
0.00%
0 / 1
0.00% covered (danger)
0.00%
0 / 1
2
 runBatch
85.71% covered (warning)
85.71%
6 / 7
0.00% covered (danger)
0.00%
0 / 1
3.03
1<?php
2/**
3 * Copy all files in FileRepo to an originals container using SHA1 paths.
4 *
5 * @license GPL-2.0-or-later
6 * @file
7 * @ingroup Maintenance
8 */
9
10use MediaWiki\FileRepo\File\File;
11use MediaWiki\FileRepo\File\LocalFile;
12use MediaWiki\FileRepo\FileBackendDBRepoWrapper;
13use MediaWiki\FileRepo\LocalRepo;
14use MediaWiki\Maintenance\Maintenance;
15use Wikimedia\FileBackend\FileBackend;
16
17// @codeCoverageIgnoreStart
18require_once __DIR__ . '/Maintenance.php';
19// @codeCoverageIgnoreEnd
20
21/**
22 * Copy all files in FileRepo to an originals container using SHA1 paths.
23 *
24 * This script should be run while the repo is still set to the old layout.
25 *
26 * @ingroup Maintenance
27 */
28class MigrateFileRepoLayout extends Maintenance {
29    public function __construct() {
30        parent::__construct();
31        $this->addDescription( 'Copy files in repo to a different layout.' );
32        $this->addOption( 'oldlayout', "Old layout; one of 'name' or 'sha1'", true, true );
33        $this->addOption( 'newlayout', "New layout; one of 'name' or 'sha1'", true, true );
34        $this->addOption( 'since', "Copy only files from after this timestamp", false, true );
35        $this->setBatchSize( 50 );
36    }
37
38    public function execute() {
39        $oldLayout = $this->getOption( 'oldlayout' );
40        if ( !in_array( $oldLayout, [ 'name', 'sha1' ] ) ) {
41            $this->fatalError( "Invalid old layout." );
42        }
43        $newLayout = $this->getOption( 'newlayout' );
44        if ( !in_array( $newLayout, [ 'name', 'sha1' ] ) ) {
45            $this->fatalError( "Invalid new layout." );
46        }
47        $since = $this->getOption( 'since' );
48
49        $repo = $this->getRepo();
50
51        $be = $repo->getBackend();
52        if ( $be instanceof FileBackendDBRepoWrapper ) {
53            // avoid path translations for this script
54            $be = $be->getInternalBackend();
55        }
56
57        $dbw = $repo->getPrimaryDB();
58
59        $origBase = $be->getContainerStoragePath( "{$repo->getName()}-original" );
60        $startTime = wfTimestampNow();
61
62        // Do current and archived versions...
63        $conds = [];
64        if ( $since ) {
65            $conds[] = $dbw->expr( 'img_timestamp', '>=', $dbw->timestamp( $since ) );
66        }
67
68        $batchSize = $this->getBatchSize();
69        $batch = [];
70        $lastName = '';
71        do {
72            $res = $dbw->newSelectQueryBuilder()
73                ->select( [ 'img_name', 'img_sha1' ] )
74                ->from( 'image' )
75                ->where( $dbw->expr( 'img_name', '>', $lastName ) )
76                ->andWhere( $conds )
77                ->orderBy( 'img_name' )
78                ->limit( $batchSize )
79                ->caller( __METHOD__ )->fetchResultSet();
80
81            foreach ( $res as $row ) {
82                $lastName = $row->img_name;
83                /** @var LocalFile $file */
84                $file = $repo->newFile( $row->img_name );
85                // Check in case SHA1 rows are not populated for some files
86                $sha1 = $row->img_sha1 !== '' ? $row->img_sha1 : $file->getSha1();
87
88                if ( $sha1 === '' ) {
89                    $this->error( "Image SHA-1 not known for {$row->img_name}." );
90                } else {
91                    if ( $oldLayout === 'sha1' ) {
92                        $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
93                    } else {
94                        $spath = $file->getPath();
95                    }
96
97                    if ( $newLayout === 'sha1' ) {
98                        $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
99                    } else {
100                        $dpath = $file->getPath();
101                    }
102
103                    $status = $be->prepare( [
104                        'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] );
105                    if ( !$status->isOK() ) {
106                        $this->error( $status );
107                    }
108
109                    $batch[] = [ 'op' => 'copy', 'overwrite' => true,
110                        'src' => $spath, 'dst' => $dpath, 'img' => $row->img_name ];
111                }
112
113                foreach ( $file->getHistory() as $ofile ) {
114                    $sha1 = $ofile->getSha1();
115                    if ( $sha1 === '' ) {
116                        $this->error( "Image SHA-1 not set for {$ofile->getArchiveName()}." );
117                        continue;
118                    }
119
120                    if ( $oldLayout === 'sha1' ) {
121                        $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
122                    } elseif ( $ofile->isDeleted( File::DELETED_FILE ) ) {
123                        $spath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) .
124                            '/' . $repo->getDeletedHashPath( $sha1 ) .
125                            $sha1 . '.' . $ofile->getExtension();
126                    } else {
127                        $spath = $ofile->getPath();
128                    }
129
130                    if ( $newLayout === 'sha1' ) {
131                        $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
132                    } else {
133                        $dpath = $ofile->getPath();
134                    }
135
136                    $status = $be->prepare( [
137                        'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] );
138                    if ( !$status->isOK() ) {
139                        $this->error( $status );
140                    }
141                    $batch[] = [ 'op' => 'copy', 'overwrite' => true,
142                        'src' => $spath, 'dst' => $dpath, 'img' => $ofile->getArchiveName() ];
143                }
144
145                if ( count( $batch ) >= $batchSize ) {
146                    $this->runBatch( $batch, $be );
147                    $batch = [];
148                }
149            }
150        } while ( $res->numRows() );
151
152        if ( count( $batch ) ) {
153            $this->runBatch( $batch, $be );
154        }
155
156        // Do deleted versions...
157        $conds = [];
158        if ( $since ) {
159            $conds[] = $dbw->expr( 'fa_deleted_timestamp', '>=', $dbw->timestamp( $since ) );
160        }
161
162        $batch = [];
163        $lastId = 0;
164        do {
165            $res = $dbw->newSelectQueryBuilder()
166                ->select( [ 'fa_storage_key', 'fa_id', 'fa_name' ] )
167                ->from( 'filearchive' )
168                ->where( $dbw->expr( 'fa_id', '>', $lastId ) )
169                ->andWhere( $conds )
170                ->orderBy( 'fa_id' )
171                ->limit( $batchSize )
172                ->caller( __METHOD__ )->fetchResultSet();
173
174            foreach ( $res as $row ) {
175                $lastId = $row->fa_id;
176                $sha1Key = $row->fa_storage_key;
177                if ( $sha1Key === '' ) {
178                    $this->error( "Image SHA-1 not set for file #{$row->fa_id} (deleted)." );
179                    continue;
180                }
181                $sha1 = substr( $sha1Key, 0, strpos( $sha1Key, '.' ) );
182
183                if ( $oldLayout === 'sha1' ) {
184                    $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
185                } else {
186                    $spath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) .
187                        '/' . $repo->getDeletedHashPath( $sha1Key ) . $sha1Key;
188                }
189
190                if ( $newLayout === 'sha1' ) {
191                    $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}";
192                } else {
193                    $dpath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) .
194                        '/' . $repo->getDeletedHashPath( $sha1Key ) . $sha1Key;
195                }
196
197                $status = $be->prepare( [
198                    'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] );
199                if ( !$status->isOK() ) {
200                    $this->error( $status );
201                }
202
203                $batch[] = [ 'op' => 'copy', 'src' => $spath, 'dst' => $dpath,
204                    'overwriteSame' => true, 'img' => "(ID {$row->fa_id}{$row->fa_name}" ];
205
206                if ( count( $batch ) >= $batchSize ) {
207                    $this->runBatch( $batch, $be );
208                    $batch = [];
209                }
210            }
211        } while ( $res->numRows() );
212
213        if ( count( $batch ) ) {
214            $this->runBatch( $batch, $be );
215        }
216
217        $this->output( "Done (started $startTime)\n" );
218    }
219
220    protected function getRepo(): LocalRepo {
221        return $this->getServiceContainer()->getRepoGroup()->getLocalRepo();
222    }
223
224    /**
225     * @param array[] $ops
226     * @param FileBackend $be
227     */
228    protected function runBatch( array $ops, FileBackend $be ) {
229        $this->output( "Migrating file batch:\n" );
230        foreach ( $ops as $op ) {
231            $this->output( "\"{$op['img']}\" (dest: {$op['dst']})\n" );
232        }
233
234        $status = $be->doOperations( $ops, [ 'bypassReadOnly' => true ] );
235        if ( !$status->isOK() ) {
236            $this->error( $status );
237        }
238
239        $this->output( "Batch done\n\n" );
240    }
241}
242
243// @codeCoverageIgnoreStart
244$maintClass = MigrateFileRepoLayout::class;
245require_once RUN_MAINTENANCE_IF_MAIN;
246// @codeCoverageIgnoreEnd