Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
93.01% |
173 / 186 |
|
33.33% |
1 / 3 |
CRAP | |
0.00% |
0 / 1 |
| MigrateFileTables | |
93.01% |
173 / 186 |
|
33.33% |
1 / 3 |
19.12 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
11 / 11 |
|
100.00% |
1 / 1 |
1 | |||
| execute | |
93.94% |
62 / 66 |
|
0.00% |
0 / 1 |
8.01 | |||
| handleFile | |
91.74% |
100 / 109 |
|
0.00% |
0 / 1 |
10.06 | |||
| 1 | <?php |
| 2 | /** |
| 3 | * Maintenance script to refresh image metadata fields. |
| 4 | * |
| 5 | * @license GPL-2.0-or-later |
| 6 | * @file |
| 7 | * @ingroup Maintenance |
| 8 | */ |
| 9 | |
| 10 | // @codeCoverageIgnoreStart |
| 11 | require_once __DIR__ . '/Maintenance.php'; |
| 12 | // @codeCoverageIgnoreEnd |
| 13 | |
| 14 | use MediaWiki\Maintenance\Maintenance; |
| 15 | use Wikimedia\Rdbms\IMaintainableDatabase; |
| 16 | use Wikimedia\Rdbms\SelectQueryBuilder; |
| 17 | |
| 18 | /** |
| 19 | * Maintenance script to refresh image metadata fields. |
| 20 | * |
| 21 | * @ingroup Maintenance |
| 22 | */ |
| 23 | class MigrateFileTables extends Maintenance { |
| 24 | |
| 25 | /** |
| 26 | * @var IMaintainableDatabase |
| 27 | */ |
| 28 | protected $dbw; |
| 29 | |
| 30 | public function __construct() { |
| 31 | parent::__construct(); |
| 32 | |
| 33 | $this->addDescription( 'Script to migrate from image/oldimage tables to file/filerevision' ); |
| 34 | $this->setBatchSize( 200 ); |
| 35 | |
| 36 | $this->addOption( 'start', 'Name of file to start with', false, true ); |
| 37 | $this->addOption( 'end', 'Name of file to end with', false, true ); |
| 38 | $this->addOption( |
| 39 | 'sleep', |
| 40 | 'Time to sleep between each batch (in seconds). Default: 0', |
| 41 | false, |
| 42 | true |
| 43 | ); |
| 44 | } |
| 45 | |
| 46 | public function execute() { |
| 47 | $verbose = $this->hasOption( 'verbose' ); |
| 48 | $start = $this->getOption( 'start', false ); |
| 49 | $sleep = (int)$this->getOption( 'sleep', 0 ); |
| 50 | |
| 51 | $dbw = $this->getPrimaryDB(); |
| 52 | $queryBuilderTemplate = $dbw->newSelectQueryBuilder() |
| 53 | ->select( |
| 54 | [ |
| 55 | 'img_name', |
| 56 | 'img_size', |
| 57 | 'img_width', |
| 58 | 'img_height', |
| 59 | 'img_metadata', |
| 60 | 'img_bits', |
| 61 | 'img_media_type', |
| 62 | 'img_major_mime', |
| 63 | 'img_minor_mime', |
| 64 | 'img_timestamp', |
| 65 | 'img_sha1', |
| 66 | 'img_actor', |
| 67 | 'img_metadata', |
| 68 | 'img_description_id', |
| 69 | 'img_description_text' => 'comment_img_description.comment_text', |
| 70 | 'img_description_data' => 'comment_img_description.comment_data', |
| 71 | 'img_description_cid' => 'comment_img_description.comment_id' |
| 72 | ] |
| 73 | ) |
| 74 | ->from( 'image' ) |
| 75 | ->join( |
| 76 | 'comment', |
| 77 | 'comment_img_description', |
| 78 | 'comment_img_description.comment_id = img_description_id' |
| 79 | ); |
| 80 | $totalRowsInserted = 0; |
| 81 | $filesHandled = 0; |
| 82 | $batchSize = intval( $this->getBatchSize() ); |
| 83 | if ( $batchSize <= 0 ) { |
| 84 | $this->fatalError( "Batch size is too low...", 12 ); |
| 85 | } |
| 86 | $end = $this->getOption( 'end', false ); |
| 87 | if ( $end !== false ) { |
| 88 | $queryBuilderTemplate->andWhere( $dbw->expr( 'img_name', '<=', $end ) ); |
| 89 | } |
| 90 | $queryBuilderTemplate |
| 91 | ->orderBy( 'img_name', SelectQueryBuilder::SORT_ASC ) |
| 92 | ->limit( $batchSize ); |
| 93 | |
| 94 | $batchCondition = []; |
| 95 | // For the WHERE img_name > 'foo' condition that comes after doing a batch |
| 96 | if ( $start !== false ) { |
| 97 | $batchCondition[] = $dbw->expr( 'img_name', '>=', $start ); |
| 98 | } |
| 99 | do { |
| 100 | $queryBuilder = clone $queryBuilderTemplate; |
| 101 | $res = $queryBuilder->andWhere( $batchCondition ) |
| 102 | ->caller( __METHOD__ )->fetchResultSet(); |
| 103 | if ( $res->numRows() > 0 ) { |
| 104 | $row1 = $res->current(); |
| 105 | $this->output( "Processing next {$res->numRows()} row(s) starting with {$row1->img_name}.\n" ); |
| 106 | $res->rewind(); |
| 107 | } |
| 108 | |
| 109 | foreach ( $res as $row ) { |
| 110 | $rowsInserted = $this->handleFile( $row ); |
| 111 | $filesHandled += 1; |
| 112 | $totalRowsInserted += $rowsInserted; |
| 113 | |
| 114 | $this->output( "Migrated File:{$row->img_name}. Inserted $rowsInserted rows.\n" ); |
| 115 | } |
| 116 | if ( $res->numRows() > 0 ) { |
| 117 | // @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item |
| 118 | $batchCondition = [ $dbw->expr( 'img_name', '>', $row->img_name ) ]; |
| 119 | } |
| 120 | $this->waitForReplication(); |
| 121 | if ( $sleep ) { |
| 122 | sleep( $sleep ); |
| 123 | } |
| 124 | } while ( $res->numRows() === $batchSize ); |
| 125 | |
| 126 | $this->output( "\nFinished migration for $filesHandled files. " |
| 127 | . "$totalRowsInserted rows have been inserted into filerevision table.\n" ); |
| 128 | } |
| 129 | |
| 130 | private function handleFile( stdClass $row ): int { |
| 131 | $repo = $this->getServiceContainer()->getRepoGroup() |
| 132 | ->newCustomLocalRepo(); |
| 133 | $dbw = $this->getPrimaryDB(); |
| 134 | $rowsInserted = 0; |
| 135 | |
| 136 | // LocalFile doesn't like it when the row holds img_description_id |
| 137 | $imgDescriptionId = $row->img_description_id; |
| 138 | unset( $row->img_description_id ); |
| 139 | |
| 140 | $file = $repo->newFileFromRow( $row ); |
| 141 | |
| 142 | // Lock everything we can |
| 143 | $file->acquireFileLock(); |
| 144 | $dbw->startAtomic( __METHOD__ ); |
| 145 | $dbw->newSelectQueryBuilder() |
| 146 | ->select( '*' ) |
| 147 | ->forUpdate() |
| 148 | ->from( 'image' ) |
| 149 | ->where( [ 'img_name' => $row->img_name ] ) |
| 150 | ->caller( __METHOD__ )->fetchRow(); |
| 151 | $oldimageRows = $dbw->newSelectQueryBuilder() |
| 152 | ->select( '*' ) |
| 153 | ->forUpdate() |
| 154 | ->from( 'oldimage' ) |
| 155 | ->where( [ 'oi_name' => $row->img_name ] ) |
| 156 | ->orderBy( 'oi_timestamp', 'ASC' ) |
| 157 | ->caller( __METHOD__ )->fetchResultSet(); |
| 158 | $dbw->newSelectQueryBuilder() |
| 159 | ->select( '*' ) |
| 160 | ->forUpdate() |
| 161 | ->from( 'file' ) |
| 162 | ->where( [ 'file_name' => $row->img_name ] ) |
| 163 | ->caller( __METHOD__ )->fetchRow(); |
| 164 | |
| 165 | // Make sure the row exists in file table |
| 166 | $fileId = $file->acquireFileIdFromName(); |
| 167 | $fileRevisionRows = $dbw->newSelectQueryBuilder() |
| 168 | ->select( '*' ) |
| 169 | ->forUpdate() |
| 170 | ->from( 'filerevision' ) |
| 171 | ->where( [ 'fr_file' => $fileId ] ) |
| 172 | ->caller( __METHOD__ )->fetchResultSet(); |
| 173 | |
| 174 | // Make sure the filerevision rows exist |
| 175 | foreach ( $oldimageRows as $oldimageRow ) { |
| 176 | $timestamp = $oldimageRow->oi_timestamp; |
| 177 | $sha1 = $oldimageRow->oi_sha1; |
| 178 | |
| 179 | $alreadyDone = false; |
| 180 | foreach ( $fileRevisionRows as $fileRevisionRow ) { |
| 181 | if ( |
| 182 | $timestamp === $fileRevisionRow->fr_timestamp && |
| 183 | $sha1 === $fileRevisionRow->fr_sha1 |
| 184 | ) { |
| 185 | // This assume the combination of oi_timestamp and oi_sha1 |
| 186 | // will be always unique which is not the case in production |
| 187 | // but also all of them were duplicate old uploads and we are |
| 188 | // willing to simply insert one row only. See T67264 |
| 189 | $alreadyDone = true; |
| 190 | break; |
| 191 | } |
| 192 | } |
| 193 | |
| 194 | if ( $alreadyDone ) { |
| 195 | continue; |
| 196 | } |
| 197 | |
| 198 | $dbw->newInsertQueryBuilder() |
| 199 | ->insertInto( 'filerevision' ) |
| 200 | ->row( |
| 201 | [ |
| 202 | 'fr_file' => $fileId, |
| 203 | 'fr_size' => $oldimageRow->oi_size, |
| 204 | 'fr_width' => $oldimageRow->oi_width, |
| 205 | 'fr_height' => $oldimageRow->oi_height, |
| 206 | 'fr_metadata' => $oldimageRow->oi_metadata, |
| 207 | 'fr_bits' => $oldimageRow->oi_bits, |
| 208 | 'fr_description_id' => $oldimageRow->oi_description_id, |
| 209 | 'fr_actor' => $oldimageRow->oi_actor, |
| 210 | 'fr_timestamp' => $oldimageRow->oi_timestamp, |
| 211 | 'fr_sha1' => $oldimageRow->oi_sha1, |
| 212 | 'fr_archive_name' => $oldimageRow->oi_archive_name, |
| 213 | 'fr_deleted' => $oldimageRow->oi_deleted, |
| 214 | ] |
| 215 | ) |
| 216 | ->caller( __METHOD__ )->execute(); |
| 217 | $rowsInserted += 1; |
| 218 | } |
| 219 | |
| 220 | // Make sure the image row (most current version) is there |
| 221 | $timestamp = $row->img_timestamp; |
| 222 | $sha1 = $row->img_sha1; |
| 223 | |
| 224 | $alreadyDone = false; |
| 225 | foreach ( $fileRevisionRows as $fileRevisionRow ) { |
| 226 | if ( |
| 227 | $timestamp === $fileRevisionRow->fr_timestamp && |
| 228 | $sha1 === $fileRevisionRow->fr_sha1 |
| 229 | ) { |
| 230 | $alreadyDone = true; |
| 231 | break; |
| 232 | } |
| 233 | } |
| 234 | |
| 235 | if ( !$alreadyDone ) { |
| 236 | $dbw->newInsertQueryBuilder() |
| 237 | ->insertInto( 'filerevision' ) |
| 238 | ->row( |
| 239 | [ |
| 240 | 'fr_file' => $fileId, |
| 241 | 'fr_size' => $row->img_size, |
| 242 | 'fr_width' => $row->img_width, |
| 243 | 'fr_height' => $row->img_height, |
| 244 | 'fr_metadata' => $row->img_metadata, |
| 245 | 'fr_bits' => $row->img_bits, |
| 246 | 'fr_description_id' => $imgDescriptionId, |
| 247 | 'fr_actor' => $row->img_actor, |
| 248 | 'fr_timestamp' => $row->img_timestamp, |
| 249 | 'fr_sha1' => $row->img_sha1, |
| 250 | 'fr_archive_name' => '', |
| 251 | 'fr_deleted' => 0, |
| 252 | ] |
| 253 | ) |
| 254 | ->caller( __METHOD__ )->execute(); |
| 255 | $rowsInserted += 1; |
| 256 | } |
| 257 | |
| 258 | // Make sure file has the latest filerevision |
| 259 | $latestFrId = $dbw->newSelectQueryBuilder() |
| 260 | ->select( 'fr_id' ) |
| 261 | ->from( 'filerevision' ) |
| 262 | ->where( [ 'fr_file' => $fileId ] ) |
| 263 | ->orderBy( 'fr_timestamp', 'DESC' ) |
| 264 | ->caller( __METHOD__ )->fetchField(); |
| 265 | $dbw->newUpdateQueryBuilder() |
| 266 | ->update( 'file' ) |
| 267 | ->set( [ 'file_latest' => $latestFrId ] ) |
| 268 | ->where( [ 'file_id' => $fileId ] ) |
| 269 | ->caller( __METHOD__ )->execute(); |
| 270 | |
| 271 | $dbw->endAtomic( __METHOD__ ); |
| 272 | $file->releaseFileLock(); |
| 273 | return $rowsInserted; |
| 274 | } |
| 275 | } |
| 276 | |
| 277 | // @codeCoverageIgnoreStart |
| 278 | $maintClass = MigrateFileTables::class; |
| 279 | require_once RUN_MAINTENANCE_IF_MAIN; |
| 280 | // @codeCoverageIgnoreEnd |