Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
93.01% covered (success)
93.01%
173 / 186
33.33% covered (danger)
33.33%
1 / 3
CRAP
0.00% covered (danger)
0.00%
0 / 1
MigrateFileTables
93.01% covered (success)
93.01%
173 / 186
33.33% covered (danger)
33.33%
1 / 3
19.12
0.00% covered (danger)
0.00%
0 / 1
 __construct
100.00% covered (success)
100.00%
11 / 11
100.00% covered (success)
100.00%
1 / 1
1
 execute
93.94% covered (success)
93.94%
62 / 66
0.00% covered (danger)
0.00%
0 / 1
8.01
 handleFile
91.74% covered (success)
91.74%
100 / 109
0.00% covered (danger)
0.00%
0 / 1
10.06
1<?php
2/**
3 * Maintenance script to refresh image metadata fields.
4 *
5 * @license GPL-2.0-or-later
6 * @file
7 * @ingroup Maintenance
8 */
9
10// @codeCoverageIgnoreStart
11require_once __DIR__ . '/Maintenance.php';
12// @codeCoverageIgnoreEnd
13
14use MediaWiki\Maintenance\Maintenance;
15use Wikimedia\Rdbms\IMaintainableDatabase;
16use Wikimedia\Rdbms\SelectQueryBuilder;
17
18/**
19 * Maintenance script to refresh image metadata fields.
20 *
21 * @ingroup Maintenance
22 */
23class MigrateFileTables extends Maintenance {
24
25    /**
26     * @var IMaintainableDatabase
27     */
28    protected $dbw;
29
30    public function __construct() {
31        parent::__construct();
32
33        $this->addDescription( 'Script to migrate from image/oldimage tables to file/filerevision' );
34        $this->setBatchSize( 200 );
35
36        $this->addOption( 'start', 'Name of file to start with', false, true );
37        $this->addOption( 'end', 'Name of file to end with', false, true );
38        $this->addOption(
39            'sleep',
40            'Time to sleep between each batch (in seconds). Default: 0',
41            false,
42            true
43        );
44    }
45
46    public function execute() {
47        $verbose = $this->hasOption( 'verbose' );
48        $start = $this->getOption( 'start', false );
49        $sleep = (int)$this->getOption( 'sleep', 0 );
50
51        $dbw = $this->getPrimaryDB();
52        $queryBuilderTemplate = $dbw->newSelectQueryBuilder()
53            ->select(
54                [
55                    'img_name',
56                    'img_size',
57                    'img_width',
58                    'img_height',
59                    'img_metadata',
60                    'img_bits',
61                    'img_media_type',
62                    'img_major_mime',
63                    'img_minor_mime',
64                    'img_timestamp',
65                    'img_sha1',
66                    'img_actor',
67                    'img_metadata',
68                    'img_description_id',
69                    'img_description_text' => 'comment_img_description.comment_text',
70                    'img_description_data' => 'comment_img_description.comment_data',
71                    'img_description_cid' => 'comment_img_description.comment_id'
72                ]
73            )
74            ->from( 'image' )
75            ->join(
76                'comment',
77                'comment_img_description',
78                'comment_img_description.comment_id = img_description_id'
79            );
80        $totalRowsInserted = 0;
81        $filesHandled = 0;
82        $batchSize = intval( $this->getBatchSize() );
83        if ( $batchSize <= 0 ) {
84            $this->fatalError( "Batch size is too low...", 12 );
85        }
86        $end = $this->getOption( 'end', false );
87        if ( $end !== false ) {
88            $queryBuilderTemplate->andWhere( $dbw->expr( 'img_name', '<=', $end ) );
89        }
90        $queryBuilderTemplate
91            ->orderBy( 'img_name', SelectQueryBuilder::SORT_ASC )
92            ->limit( $batchSize );
93
94        $batchCondition = [];
95        // For the WHERE img_name > 'foo' condition that comes after doing a batch
96        if ( $start !== false ) {
97            $batchCondition[] = $dbw->expr( 'img_name', '>=', $start );
98        }
99        do {
100            $queryBuilder = clone $queryBuilderTemplate;
101            $res = $queryBuilder->andWhere( $batchCondition )
102                ->caller( __METHOD__ )->fetchResultSet();
103            if ( $res->numRows() > 0 ) {
104                $row1 = $res->current();
105                $this->output( "Processing next {$res->numRows()} row(s) starting with {$row1->img_name}.\n" );
106                $res->rewind();
107            }
108
109            foreach ( $res as $row ) {
110                $rowsInserted = $this->handleFile( $row );
111                $filesHandled += 1;
112                $totalRowsInserted += $rowsInserted;
113
114                $this->output( "Migrated File:{$row->img_name}. Inserted $rowsInserted rows.\n" );
115            }
116            if ( $res->numRows() > 0 ) {
117                // @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item
118                $batchCondition = [ $dbw->expr( 'img_name', '>', $row->img_name ) ];
119            }
120            $this->waitForReplication();
121            if ( $sleep ) {
122                sleep( $sleep );
123            }
124        } while ( $res->numRows() === $batchSize );
125
126        $this->output( "\nFinished migration for $filesHandled files. "
127            . "$totalRowsInserted rows have been inserted into filerevision table.\n" );
128    }
129
130    private function handleFile( stdClass $row ): int {
131        $repo = $this->getServiceContainer()->getRepoGroup()
132            ->newCustomLocalRepo();
133        $dbw = $this->getPrimaryDB();
134        $rowsInserted = 0;
135
136        // LocalFile doesn't like it when the row holds img_description_id
137        $imgDescriptionId = $row->img_description_id;
138        unset( $row->img_description_id );
139
140        $file = $repo->newFileFromRow( $row );
141
142        // Lock everything we can
143        $file->acquireFileLock();
144        $dbw->startAtomic( __METHOD__ );
145        $dbw->newSelectQueryBuilder()
146            ->select( '*' )
147            ->forUpdate()
148            ->from( 'image' )
149            ->where( [ 'img_name' => $row->img_name ] )
150            ->caller( __METHOD__ )->fetchRow();
151        $oldimageRows = $dbw->newSelectQueryBuilder()
152            ->select( '*' )
153            ->forUpdate()
154            ->from( 'oldimage' )
155            ->where( [ 'oi_name' => $row->img_name ] )
156            ->orderBy( 'oi_timestamp', 'ASC' )
157            ->caller( __METHOD__ )->fetchResultSet();
158        $dbw->newSelectQueryBuilder()
159            ->select( '*' )
160            ->forUpdate()
161            ->from( 'file' )
162            ->where( [ 'file_name' => $row->img_name ] )
163            ->caller( __METHOD__ )->fetchRow();
164
165        // Make sure the row exists in file table
166        $fileId = $file->acquireFileIdFromName();
167        $fileRevisionRows = $dbw->newSelectQueryBuilder()
168            ->select( '*' )
169            ->forUpdate()
170            ->from( 'filerevision' )
171            ->where( [ 'fr_file' => $fileId ] )
172            ->caller( __METHOD__ )->fetchResultSet();
173
174        // Make sure the filerevision rows exist
175        foreach ( $oldimageRows as $oldimageRow ) {
176            $timestamp = $oldimageRow->oi_timestamp;
177            $sha1 = $oldimageRow->oi_sha1;
178
179            $alreadyDone = false;
180            foreach ( $fileRevisionRows as $fileRevisionRow ) {
181                if (
182                    $timestamp === $fileRevisionRow->fr_timestamp &&
183                    $sha1 === $fileRevisionRow->fr_sha1
184                ) {
185                    // This assume the combination of oi_timestamp and oi_sha1
186                    // will be always unique which is not the case in production
187                    // but also all of them were duplicate old uploads and we are
188                    // willing to simply insert one row only. See T67264
189                    $alreadyDone = true;
190                    break;
191                }
192            }
193
194            if ( $alreadyDone ) {
195                continue;
196            }
197
198            $dbw->newInsertQueryBuilder()
199                ->insertInto( 'filerevision' )
200                ->row(
201                    [
202                        'fr_file' => $fileId,
203                        'fr_size' => $oldimageRow->oi_size,
204                        'fr_width' => $oldimageRow->oi_width,
205                        'fr_height' => $oldimageRow->oi_height,
206                        'fr_metadata' => $oldimageRow->oi_metadata,
207                        'fr_bits' => $oldimageRow->oi_bits,
208                        'fr_description_id' => $oldimageRow->oi_description_id,
209                        'fr_actor' => $oldimageRow->oi_actor,
210                        'fr_timestamp' => $oldimageRow->oi_timestamp,
211                        'fr_sha1' => $oldimageRow->oi_sha1,
212                        'fr_archive_name' => $oldimageRow->oi_archive_name,
213                        'fr_deleted' => $oldimageRow->oi_deleted,
214                    ]
215                )
216                ->caller( __METHOD__ )->execute();
217            $rowsInserted += 1;
218        }
219
220        // Make sure the image row (most current version) is there
221        $timestamp = $row->img_timestamp;
222        $sha1 = $row->img_sha1;
223
224        $alreadyDone = false;
225        foreach ( $fileRevisionRows as $fileRevisionRow ) {
226            if (
227                $timestamp === $fileRevisionRow->fr_timestamp &&
228                $sha1 === $fileRevisionRow->fr_sha1
229            ) {
230                $alreadyDone = true;
231                break;
232            }
233        }
234
235        if ( !$alreadyDone ) {
236            $dbw->newInsertQueryBuilder()
237                ->insertInto( 'filerevision' )
238                ->row(
239                    [
240                        'fr_file' => $fileId,
241                        'fr_size' => $row->img_size,
242                        'fr_width' => $row->img_width,
243                        'fr_height' => $row->img_height,
244                        'fr_metadata' => $row->img_metadata,
245                        'fr_bits' => $row->img_bits,
246                        'fr_description_id' => $imgDescriptionId,
247                        'fr_actor' => $row->img_actor,
248                        'fr_timestamp' => $row->img_timestamp,
249                        'fr_sha1' => $row->img_sha1,
250                        'fr_archive_name' => '',
251                        'fr_deleted' => 0,
252                    ]
253                )
254                ->caller( __METHOD__ )->execute();
255            $rowsInserted += 1;
256        }
257
258        // Make sure file has the latest filerevision
259        $latestFrId = $dbw->newSelectQueryBuilder()
260            ->select( 'fr_id' )
261            ->from( 'filerevision' )
262            ->where( [ 'fr_file' => $fileId ] )
263            ->orderBy( 'fr_timestamp', 'DESC' )
264            ->caller( __METHOD__ )->fetchField();
265        $dbw->newUpdateQueryBuilder()
266            ->update( 'file' )
267            ->set( [ 'file_latest' => $latestFrId ] )
268            ->where( [ 'file_id' => $fileId ] )
269            ->caller( __METHOD__ )->execute();
270
271        $dbw->endAtomic( __METHOD__ );
272        $file->releaseFileLock();
273        return $rowsInserted;
274    }
275}
276
277// @codeCoverageIgnoreStart
278$maintClass = MigrateFileTables::class;
279require_once RUN_MAINTENANCE_IF_MAIN;
280// @codeCoverageIgnoreEnd