MediaWiki master
migrateFileTables.php
Go to the documentation of this file.
1<?php
10// @codeCoverageIgnoreStart
11require_once __DIR__ . '/Maintenance.php';
12// @codeCoverageIgnoreEnd
13
17
24
28 protected $dbw;
29
30 public function __construct() {
31 parent::__construct();
32
33 $this->addDescription( 'Script to migrate from image/oldimage tables to file/filerevision' );
34 $this->setBatchSize( 200 );
35
36 $this->addOption( 'start', 'Name of file to start with', false, true );
37 $this->addOption( 'end', 'Name of file to end with', false, true );
38 $this->addOption(
39 'sleep',
40 'Time to sleep between each batch (in seconds). Default: 0',
41 false,
42 true
43 );
44 }
45
46 public function execute() {
47 $verbose = $this->hasOption( 'verbose' );
48 $start = $this->getOption( 'start', false );
49 $sleep = (int)$this->getOption( 'sleep', 0 );
50
51 $dbw = $this->getPrimaryDB();
52 $queryBuilderTemplate = $dbw->newSelectQueryBuilder()
53 ->select(
54 [
55 'img_name',
56 'img_size',
57 'img_width',
58 'img_height',
59 'img_metadata',
60 'img_bits',
61 'img_media_type',
62 'img_major_mime',
63 'img_minor_mime',
64 'img_timestamp',
65 'img_sha1',
66 'img_actor',
67 'img_metadata',
68 'img_description_id',
69 'img_description_text' => 'comment_img_description.comment_text',
70 'img_description_data' => 'comment_img_description.comment_data',
71 'img_description_cid' => 'comment_img_description.comment_id'
72 ]
73 )
74 ->from( 'image' )
75 ->join(
76 'comment',
77 'comment_img_description',
78 'comment_img_description.comment_id = img_description_id'
79 );
80 $totalRowsInserted = 0;
81 $filesHandled = 0;
82 $batchSize = intval( $this->getBatchSize() );
83 if ( $batchSize <= 0 ) {
84 $this->fatalError( "Batch size is too low...", 12 );
85 }
86 $end = $this->getOption( 'end', false );
87 if ( $end !== false ) {
88 $queryBuilderTemplate->andWhere( $dbw->expr( 'img_name', '<=', $end ) );
89 }
90 $queryBuilderTemplate
91 ->orderBy( 'img_name', SelectQueryBuilder::SORT_ASC )
92 ->limit( $batchSize );
93
94 $batchCondition = [];
95 // For the WHERE img_name > 'foo' condition that comes after doing a batch
96 if ( $start !== false ) {
97 $batchCondition[] = $dbw->expr( 'img_name', '>=', $start );
98 }
99 do {
100 $queryBuilder = clone $queryBuilderTemplate;
101 $res = $queryBuilder->andWhere( $batchCondition )
102 ->caller( __METHOD__ )->fetchResultSet();
103 if ( $res->numRows() > 0 ) {
104 $row1 = $res->current();
105 $this->output( "Processing next {$res->numRows()} row(s) starting with {$row1->img_name}.\n" );
106 $res->rewind();
107 }
108
109 foreach ( $res as $row ) {
110 $rowsInserted = $this->handleFile( $row );
111 $filesHandled += 1;
112 $totalRowsInserted += $rowsInserted;
113
114 $this->output( "Migrated File:{$row->img_name}. Inserted $rowsInserted rows.\n" );
115 }
116 if ( $res->numRows() > 0 ) {
117 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item
118 $batchCondition = [ $dbw->expr( 'img_name', '>', $row->img_name ) ];
119 }
120 $this->waitForReplication();
121 if ( $sleep ) {
122 sleep( $sleep );
123 }
124 } while ( $res->numRows() === $batchSize );
125
126 $this->output( "\nFinished migration for $filesHandled files. "
127 . "$totalRowsInserted rows have been inserted into filerevision table.\n" );
128 }
129
130 private function handleFile( stdClass $row ): int {
131 $repo = $this->getServiceContainer()->getRepoGroup()
132 ->newCustomLocalRepo();
133 $dbw = $this->getPrimaryDB();
134 $rowsInserted = 0;
135
136 // LocalFile doesn't like it when the row holds img_description_id
137 $imgDescriptionId = $row->img_description_id;
138 unset( $row->img_description_id );
139
140 $file = $repo->newFileFromRow( $row );
141
142 // Lock everything we can
143 $file->acquireFileLock();
144 $dbw->startAtomic( __METHOD__ );
146 ->select( '*' )
147 ->forUpdate()
148 ->from( 'image' )
149 ->where( [ 'img_name' => $row->img_name ] )
150 ->caller( __METHOD__ )->fetchRow();
151 $oldimageRows = $dbw->newSelectQueryBuilder()
152 ->select( '*' )
153 ->forUpdate()
154 ->from( 'oldimage' )
155 ->where( [ 'oi_name' => $row->img_name ] )
156 ->orderBy( 'oi_timestamp', 'ASC' )
157 ->caller( __METHOD__ )->fetchResultSet();
159 ->select( '*' )
160 ->forUpdate()
161 ->from( 'file' )
162 ->where( [ 'file_name' => $row->img_name ] )
163 ->caller( __METHOD__ )->fetchRow();
164
165 // Make sure the row exists in file table
166 $fileId = $file->acquireFileIdFromName();
167 $fileRevisionRows = $dbw->newSelectQueryBuilder()
168 ->select( '*' )
169 ->forUpdate()
170 ->from( 'filerevision' )
171 ->where( [ 'fr_file' => $fileId ] )
172 ->caller( __METHOD__ )->fetchResultSet();
173
174 // Make sure the filerevision rows exist
175 foreach ( $oldimageRows as $oldimageRow ) {
176 $timestamp = $oldimageRow->oi_timestamp;
177 $sha1 = $oldimageRow->oi_sha1;
178
179 $alreadyDone = false;
180 foreach ( $fileRevisionRows as $fileRevisionRow ) {
181 if (
182 $timestamp === $fileRevisionRow->fr_timestamp &&
183 $sha1 === $fileRevisionRow->fr_sha1
184 ) {
185 // This assume the combination of oi_timestamp and oi_sha1
186 // will be always unique which is not the case in production
187 // but also all of them were duplicate old uploads and we are
188 // willing to simply insert one row only. See T67264
189 $alreadyDone = true;
190 break;
191 }
192 }
193
194 if ( $alreadyDone ) {
195 continue;
196 }
197
199 ->insertInto( 'filerevision' )
200 ->row(
201 [
202 'fr_file' => $fileId,
203 'fr_size' => $oldimageRow->oi_size,
204 'fr_width' => $oldimageRow->oi_width,
205 'fr_height' => $oldimageRow->oi_height,
206 'fr_metadata' => $oldimageRow->oi_metadata,
207 'fr_bits' => $oldimageRow->oi_bits,
208 'fr_description_id' => $oldimageRow->oi_description_id,
209 'fr_actor' => $oldimageRow->oi_actor,
210 'fr_timestamp' => $oldimageRow->oi_timestamp,
211 'fr_sha1' => $oldimageRow->oi_sha1,
212 'fr_archive_name' => $oldimageRow->oi_archive_name,
213 'fr_deleted' => $oldimageRow->oi_deleted,
214 ]
215 )
216 ->caller( __METHOD__ )->execute();
217 $rowsInserted += 1;
218 }
219
220 // Make sure the image row (most current version) is there
221 $timestamp = $row->img_timestamp;
222 $sha1 = $row->img_sha1;
223
224 $alreadyDone = false;
225 foreach ( $fileRevisionRows as $fileRevisionRow ) {
226 if (
227 $timestamp === $fileRevisionRow->fr_timestamp &&
228 $sha1 === $fileRevisionRow->fr_sha1
229 ) {
230 $alreadyDone = true;
231 break;
232 }
233 }
234
235 if ( !$alreadyDone ) {
237 ->insertInto( 'filerevision' )
238 ->row(
239 [
240 'fr_file' => $fileId,
241 'fr_size' => $row->img_size,
242 'fr_width' => $row->img_width,
243 'fr_height' => $row->img_height,
244 'fr_metadata' => $row->img_metadata,
245 'fr_bits' => $row->img_bits,
246 'fr_description_id' => $imgDescriptionId,
247 'fr_actor' => $row->img_actor,
248 'fr_timestamp' => $row->img_timestamp,
249 'fr_sha1' => $row->img_sha1,
250 'fr_archive_name' => '',
251 'fr_deleted' => 0,
252 ]
253 )
254 ->caller( __METHOD__ )->execute();
255 $rowsInserted += 1;
256 }
257
258 // Make sure file has the latest filerevision
259 $latestFrId = $dbw->newSelectQueryBuilder()
260 ->select( 'fr_id' )
261 ->from( 'filerevision' )
262 ->where( [ 'fr_file' => $fileId ] )
263 ->orderBy( 'fr_timestamp', 'DESC' )
264 ->caller( __METHOD__ )->fetchField();
266 ->update( 'file' )
267 ->set( [ 'file_latest' => $latestFrId ] )
268 ->where( [ 'file_id' => $fileId ] )
269 ->caller( __METHOD__ )->execute();
270
271 $dbw->endAtomic( __METHOD__ );
272 $file->releaseFileLock();
273 return $rowsInserted;
274 }
275}
276
277// @codeCoverageIgnoreStart
278$maintClass = MigrateFileTables::class;
279require_once RUN_MAINTENANCE_IF_MAIN;
280// @codeCoverageIgnoreEnd
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
getBatchSize()
Returns batch size.
output( $out, $channel=null)
Throw some output to the user.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
waitForReplication()
Wait for replica DB servers to catch up.
hasOption( $name)
Checks to see if a particular option was set.
getOption( $name, $default=null)
Get an option, or return the default.
getServiceContainer()
Returns the main service container.
getPrimaryDB(string|false $virtualDomain=false)
addDescription( $text)
Set the description text.
Maintenance script to refresh image metadata fields.
IMaintainableDatabase $dbw
execute()
Do the actual work.
__construct()
Default constructor.
Build SELECT queries with a fluent interface.
endAtomic( $fname=__METHOD__)
Ends an atomic section of SQL statements.
newUpdateQueryBuilder()
Get an UpdateQueryBuilder bound to this connection.
startAtomic( $fname=__METHOD__, $cancelable=self::ATOMIC_NOT_CANCELABLE)
Begin an atomic section of SQL statements.
newInsertQueryBuilder()
Get an InsertQueryBuilder bound to this connection.
Advanced database interface for IDatabase handles that include maintenance methods.
newSelectQueryBuilder()
Create an empty SelectQueryBuilder which can be used to run queries against this connection.
expr(string $field, string $op, $value)
See Expression::__construct()