Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
55.47% |
71 / 128 |
|
25.00% |
1 / 4 |
CRAP | |
0.00% |
0 / 1 |
| MigrateFileRepoLayout | |
55.47% |
71 / 128 |
|
25.00% |
1 / 4 |
122.43 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
6 / 6 |
|
100.00% |
1 / 1 |
1 | |||
| execute | |
51.75% |
59 / 114 |
|
0.00% |
0 / 1 |
108.87 | |||
| getRepo | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| runBatch | |
85.71% |
6 / 7 |
|
0.00% |
0 / 1 |
3.03 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * Copy all files in FileRepo to an originals container using SHA1 paths. |
| 4 | * |
| 5 | * @license GPL-2.0-or-later |
| 6 | * @file |
| 7 | * @ingroup Maintenance |
| 8 | */ |
| 9 | |
| 10 | use MediaWiki\FileRepo\File\File; |
| 11 | use MediaWiki\FileRepo\File\LocalFile; |
| 12 | use MediaWiki\FileRepo\FileBackendDBRepoWrapper; |
| 13 | use MediaWiki\FileRepo\LocalRepo; |
| 14 | use MediaWiki\Maintenance\Maintenance; |
| 15 | use Wikimedia\FileBackend\FileBackend; |
| 16 | |
| 17 | // @codeCoverageIgnoreStart |
| 18 | require_once __DIR__ . '/Maintenance.php'; |
| 19 | // @codeCoverageIgnoreEnd |
| 20 | |
| 21 | /** |
| 22 | * Copy all files in FileRepo to an originals container using SHA1 paths. |
| 23 | * |
| 24 | * This script should be run while the repo is still set to the old layout. |
| 25 | * |
| 26 | * @ingroup Maintenance |
| 27 | */ |
| 28 | class MigrateFileRepoLayout extends Maintenance { |
| 29 | public function __construct() { |
| 30 | parent::__construct(); |
| 31 | $this->addDescription( 'Copy files in repo to a different layout.' ); |
| 32 | $this->addOption( 'oldlayout', "Old layout; one of 'name' or 'sha1'", true, true ); |
| 33 | $this->addOption( 'newlayout', "New layout; one of 'name' or 'sha1'", true, true ); |
| 34 | $this->addOption( 'since', "Copy only files from after this timestamp", false, true ); |
| 35 | $this->setBatchSize( 50 ); |
| 36 | } |
| 37 | |
| 38 | public function execute() { |
| 39 | $oldLayout = $this->getOption( 'oldlayout' ); |
| 40 | if ( !in_array( $oldLayout, [ 'name', 'sha1' ] ) ) { |
| 41 | $this->fatalError( "Invalid old layout." ); |
| 42 | } |
| 43 | $newLayout = $this->getOption( 'newlayout' ); |
| 44 | if ( !in_array( $newLayout, [ 'name', 'sha1' ] ) ) { |
| 45 | $this->fatalError( "Invalid new layout." ); |
| 46 | } |
| 47 | $since = $this->getOption( 'since' ); |
| 48 | |
| 49 | $repo = $this->getRepo(); |
| 50 | |
| 51 | $be = $repo->getBackend(); |
| 52 | if ( $be instanceof FileBackendDBRepoWrapper ) { |
| 53 | // avoid path translations for this script |
| 54 | $be = $be->getInternalBackend(); |
| 55 | } |
| 56 | |
| 57 | $dbw = $repo->getPrimaryDB(); |
| 58 | |
| 59 | $origBase = $be->getContainerStoragePath( "{$repo->getName()}-original" ); |
| 60 | $startTime = wfTimestampNow(); |
| 61 | |
| 62 | // Do current and archived versions... |
| 63 | $conds = []; |
| 64 | if ( $since ) { |
| 65 | $conds[] = $dbw->expr( 'img_timestamp', '>=', $dbw->timestamp( $since ) ); |
| 66 | } |
| 67 | |
| 68 | $batchSize = $this->getBatchSize(); |
| 69 | $batch = []; |
| 70 | $lastName = ''; |
| 71 | do { |
| 72 | $res = $dbw->newSelectQueryBuilder() |
| 73 | ->select( [ 'img_name', 'img_sha1' ] ) |
| 74 | ->from( 'image' ) |
| 75 | ->where( $dbw->expr( 'img_name', '>', $lastName ) ) |
| 76 | ->andWhere( $conds ) |
| 77 | ->orderBy( 'img_name' ) |
| 78 | ->limit( $batchSize ) |
| 79 | ->caller( __METHOD__ )->fetchResultSet(); |
| 80 | |
| 81 | foreach ( $res as $row ) { |
| 82 | $lastName = $row->img_name; |
| 83 | /** @var LocalFile $file */ |
| 84 | $file = $repo->newFile( $row->img_name ); |
| 85 | // Check in case SHA1 rows are not populated for some files |
| 86 | $sha1 = $row->img_sha1 !== '' ? $row->img_sha1 : $file->getSha1(); |
| 87 | |
| 88 | if ( $sha1 === '' ) { |
| 89 | $this->error( "Image SHA-1 not known for {$row->img_name}." ); |
| 90 | } else { |
| 91 | if ( $oldLayout === 'sha1' ) { |
| 92 | $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}"; |
| 93 | } else { |
| 94 | $spath = $file->getPath(); |
| 95 | } |
| 96 | |
| 97 | if ( $newLayout === 'sha1' ) { |
| 98 | $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}"; |
| 99 | } else { |
| 100 | $dpath = $file->getPath(); |
| 101 | } |
| 102 | |
| 103 | $status = $be->prepare( [ |
| 104 | 'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] ); |
| 105 | if ( !$status->isOK() ) { |
| 106 | $this->error( $status ); |
| 107 | } |
| 108 | |
| 109 | $batch[] = [ 'op' => 'copy', 'overwrite' => true, |
| 110 | 'src' => $spath, 'dst' => $dpath, 'img' => $row->img_name ]; |
| 111 | } |
| 112 | |
| 113 | foreach ( $file->getHistory() as $ofile ) { |
| 114 | $sha1 = $ofile->getSha1(); |
| 115 | if ( $sha1 === '' ) { |
| 116 | $this->error( "Image SHA-1 not set for {$ofile->getArchiveName()}." ); |
| 117 | continue; |
| 118 | } |
| 119 | |
| 120 | if ( $oldLayout === 'sha1' ) { |
| 121 | $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}"; |
| 122 | } elseif ( $ofile->isDeleted( File::DELETED_FILE ) ) { |
| 123 | $spath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) . |
| 124 | '/' . $repo->getDeletedHashPath( $sha1 ) . |
| 125 | $sha1 . '.' . $ofile->getExtension(); |
| 126 | } else { |
| 127 | $spath = $ofile->getPath(); |
| 128 | } |
| 129 | |
| 130 | if ( $newLayout === 'sha1' ) { |
| 131 | $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}"; |
| 132 | } else { |
| 133 | $dpath = $ofile->getPath(); |
| 134 | } |
| 135 | |
| 136 | $status = $be->prepare( [ |
| 137 | 'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] ); |
| 138 | if ( !$status->isOK() ) { |
| 139 | $this->error( $status ); |
| 140 | } |
| 141 | $batch[] = [ 'op' => 'copy', 'overwrite' => true, |
| 142 | 'src' => $spath, 'dst' => $dpath, 'img' => $ofile->getArchiveName() ]; |
| 143 | } |
| 144 | |
| 145 | if ( count( $batch ) >= $batchSize ) { |
| 146 | $this->runBatch( $batch, $be ); |
| 147 | $batch = []; |
| 148 | } |
| 149 | } |
| 150 | } while ( $res->numRows() ); |
| 151 | |
| 152 | if ( count( $batch ) ) { |
| 153 | $this->runBatch( $batch, $be ); |
| 154 | } |
| 155 | |
| 156 | // Do deleted versions... |
| 157 | $conds = []; |
| 158 | if ( $since ) { |
| 159 | $conds[] = $dbw->expr( 'fa_deleted_timestamp', '>=', $dbw->timestamp( $since ) ); |
| 160 | } |
| 161 | |
| 162 | $batch = []; |
| 163 | $lastId = 0; |
| 164 | do { |
| 165 | $res = $dbw->newSelectQueryBuilder() |
| 166 | ->select( [ 'fa_storage_key', 'fa_id', 'fa_name' ] ) |
| 167 | ->from( 'filearchive' ) |
| 168 | ->where( $dbw->expr( 'fa_id', '>', $lastId ) ) |
| 169 | ->andWhere( $conds ) |
| 170 | ->orderBy( 'fa_id' ) |
| 171 | ->limit( $batchSize ) |
| 172 | ->caller( __METHOD__ )->fetchResultSet(); |
| 173 | |
| 174 | foreach ( $res as $row ) { |
| 175 | $lastId = $row->fa_id; |
| 176 | $sha1Key = $row->fa_storage_key; |
| 177 | if ( $sha1Key === '' ) { |
| 178 | $this->error( "Image SHA-1 not set for file #{$row->fa_id} (deleted)." ); |
| 179 | continue; |
| 180 | } |
| 181 | $sha1 = substr( $sha1Key, 0, strpos( $sha1Key, '.' ) ); |
| 182 | |
| 183 | if ( $oldLayout === 'sha1' ) { |
| 184 | $spath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}"; |
| 185 | } else { |
| 186 | $spath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) . |
| 187 | '/' . $repo->getDeletedHashPath( $sha1Key ) . $sha1Key; |
| 188 | } |
| 189 | |
| 190 | if ( $newLayout === 'sha1' ) { |
| 191 | $dpath = "{$origBase}/{$sha1[0]}/{$sha1[1]}/{$sha1[2]}/{$sha1}"; |
| 192 | } else { |
| 193 | $dpath = $be->getContainerStoragePath( "{$repo->getName()}-deleted" ) . |
| 194 | '/' . $repo->getDeletedHashPath( $sha1Key ) . $sha1Key; |
| 195 | } |
| 196 | |
| 197 | $status = $be->prepare( [ |
| 198 | 'dir' => dirname( $dpath ), 'bypassReadOnly' => true ] ); |
| 199 | if ( !$status->isOK() ) { |
| 200 | $this->error( $status ); |
| 201 | } |
| 202 | |
| 203 | $batch[] = [ 'op' => 'copy', 'src' => $spath, 'dst' => $dpath, |
| 204 | 'overwriteSame' => true, 'img' => "(ID {$row->fa_id}) {$row->fa_name}" ]; |
| 205 | |
| 206 | if ( count( $batch ) >= $batchSize ) { |
| 207 | $this->runBatch( $batch, $be ); |
| 208 | $batch = []; |
| 209 | } |
| 210 | } |
| 211 | } while ( $res->numRows() ); |
| 212 | |
| 213 | if ( count( $batch ) ) { |
| 214 | $this->runBatch( $batch, $be ); |
| 215 | } |
| 216 | |
| 217 | $this->output( "Done (started $startTime)\n" ); |
| 218 | } |
| 219 | |
| 220 | protected function getRepo(): LocalRepo { |
| 221 | return $this->getServiceContainer()->getRepoGroup()->getLocalRepo(); |
| 222 | } |
| 223 | |
| 224 | /** |
| 225 | * @param array[] $ops |
| 226 | * @param FileBackend $be |
| 227 | */ |
| 228 | protected function runBatch( array $ops, FileBackend $be ) { |
| 229 | $this->output( "Migrating file batch:\n" ); |
| 230 | foreach ( $ops as $op ) { |
| 231 | $this->output( "\"{$op['img']}\" (dest: {$op['dst']})\n" ); |
| 232 | } |
| 233 | |
| 234 | $status = $be->doOperations( $ops, [ 'bypassReadOnly' => true ] ); |
| 235 | if ( !$status->isOK() ) { |
| 236 | $this->error( $status ); |
| 237 | } |
| 238 | |
| 239 | $this->output( "Batch done\n\n" ); |
| 240 | } |
| 241 | } |
| 242 | |
| 243 | // @codeCoverageIgnoreStart |
| 244 | $maintClass = MigrateFileRepoLayout::class; |
| 245 | require_once RUN_MAINTENANCE_IF_MAIN; |
| 246 | // @codeCoverageIgnoreEnd |