MediaWiki master
migrateFileTables.php
Go to the documentation of this file.
1<?php
24// @codeCoverageIgnoreStart
25require_once __DIR__ . '/Maintenance.php';
26// @codeCoverageIgnoreEnd
27
31
38
42 protected $dbw;
43
44 public function __construct() {
45 parent::__construct();
46
47 $this->addDescription( 'Script to migrate from image/oldimage tables to file/filerevision' );
48 $this->setBatchSize( 200 );
49
50 $this->addOption( 'start', 'Name of file to start with', false, true );
51 $this->addOption( 'end', 'Name of file to end with', false, true );
52 $this->addOption(
53 'sleep',
54 'Time to sleep between each batch (in seconds). Default: 0',
55 false,
56 true
57 );
58 }
59
60 public function execute() {
61 $verbose = $this->hasOption( 'verbose' );
62 $start = $this->getOption( 'start', false );
63 $sleep = (int)$this->getOption( 'sleep', 0 );
64
65 $dbw = $this->getPrimaryDB();
66 $queryBuilderTemplate = $dbw->newSelectQueryBuilder()
67 ->select(
68 [
69 'img_name',
70 'img_size',
71 'img_width',
72 'img_height',
73 'img_metadata',
74 'img_bits',
75 'img_media_type',
76 'img_major_mime',
77 'img_minor_mime',
78 'img_timestamp',
79 'img_sha1',
80 'img_actor',
81 'img_metadata',
82 'img_description_id',
83 'img_description_text' => 'comment_img_description.comment_text',
84 'img_description_data' => 'comment_img_description.comment_data',
85 'img_description_cid' => 'comment_img_description.comment_id'
86 ]
87 )
88 ->from( 'image' )
89 ->join(
90 'comment',
91 'comment_img_description',
92 'comment_img_description.comment_id = img_description_id'
93 );
94 $totalRowsInserted = 0;
95 $filesHandled = 0;
96 $batchSize = intval( $this->getBatchSize() );
97 if ( $batchSize <= 0 ) {
98 $this->fatalError( "Batch size is too low...", 12 );
99 }
100 $end = $this->getOption( 'end', false );
101 if ( $end !== false ) {
102 $queryBuilderTemplate->andWhere( $dbw->expr( 'img_name', '<=', $end ) );
103 }
104 $queryBuilderTemplate
105 ->orderBy( 'img_name', SelectQueryBuilder::SORT_ASC )
106 ->limit( $batchSize );
107
108 $batchCondition = [];
109 // For the WHERE img_name > 'foo' condition that comes after doing a batch
110 if ( $start !== false ) {
111 $batchCondition[] = $dbw->expr( 'img_name', '>=', $start );
112 }
113 do {
114 $queryBuilder = clone $queryBuilderTemplate;
115 $res = $queryBuilder->andWhere( $batchCondition )
116 ->caller( __METHOD__ )->fetchResultSet();
117 if ( $res->numRows() > 0 ) {
118 $row1 = $res->current();
119 $this->output( "Processing next {$res->numRows()} row(s) starting with {$row1->img_name}.\n" );
120 $res->rewind();
121 }
122
123 foreach ( $res as $row ) {
124 $rowsInserted = $this->handleFile( $row );
125 $filesHandled += 1;
126 $totalRowsInserted += $rowsInserted;
127
128 $this->output( "Migrated File:{$row->img_name}. Inserted $rowsInserted rows.\n" );
129 }
130 if ( $res->numRows() > 0 ) {
131 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item
132 $batchCondition = [ $dbw->expr( 'img_name', '>', $row->img_name ) ];
133 }
134 $this->waitForReplication();
135 if ( $sleep ) {
136 sleep( $sleep );
137 }
138 } while ( $res->numRows() === $batchSize );
139
140 $this->output( "\nFinished migration for $filesHandled files. "
141 . "$totalRowsInserted rows have been inserted into filerevision table.\n" );
142 }
143
144 private function handleFile( stdClass $row ) {
145 $repo = $this->getServiceContainer()->getRepoGroup()
146 ->newCustomLocalRepo();
147 $dbw = $this->getPrimaryDB();
148 $rowsInserted = 0;
149
150 // LocalFile doesn't like it when the row holds img_description_id
151 $imgDescriptionId = $row->img_description_id;
152 unset( $row->img_description_id );
153
154 $file = $repo->newFileFromRow( $row );
155
156 // Lock everything we can
157 $file->acquireFileLock();
158 $dbw->startAtomic( __METHOD__ );
160 ->select( '*' )
161 ->forUpdate()
162 ->from( 'image' )
163 ->where( [ 'img_name' => $row->img_name ] )
164 ->caller( __METHOD__ )->fetchRow();
165 $oldimageRows = $dbw->newSelectQueryBuilder()
166 ->select( '*' )
167 ->forUpdate()
168 ->from( 'oldimage' )
169 ->where( [ 'oi_name' => $row->img_name ] )
170 ->orderBy( 'oi_timestamp', 'ASC' )
171 ->caller( __METHOD__ )->fetchResultSet();
173 ->select( '*' )
174 ->forUpdate()
175 ->from( 'file' )
176 ->where( [ 'file_name' => $row->img_name ] )
177 ->caller( __METHOD__ )->fetchRow();
178
179 // Make sure the row exists in file table
180 $fileId = $file->acquireFileIdFromName();
181 $fileRevisionRows = $dbw->newSelectQueryBuilder()
182 ->select( '*' )
183 ->forUpdate()
184 ->from( 'filerevision' )
185 ->where( [ 'fr_file' => $fileId ] )
186 ->caller( __METHOD__ )->fetchResultSet();
187
188 // Make sure the filerevision rows exist
189 foreach ( $oldimageRows as $oldimageRow ) {
190 $timestamp = $oldimageRow->oi_timestamp;
191 $sha1 = $oldimageRow->oi_sha1;
192
193 $alreadyDone = false;
194 foreach ( $fileRevisionRows as $fileRevisionRow ) {
195 if (
196 $timestamp === $fileRevisionRow->fr_timestamp &&
197 $sha1 === $fileRevisionRow->fr_sha1
198 ) {
199 // This assume the combination of oi_timestamp and oi_sha1
200 // will be always unique which is not the case in production
201 // but also all of them were duplicate old uploads and we are
202 // willing to simply insert one row only. See T67264
203 $alreadyDone = true;
204 break;
205 }
206 }
207
208 if ( $alreadyDone ) {
209 continue;
210 }
211
213 ->insertInto( 'filerevision' )
214 ->row(
215 [
216 'fr_file' => $fileId,
217 'fr_size' => $oldimageRow->oi_size,
218 'fr_width' => $oldimageRow->oi_width,
219 'fr_height' => $oldimageRow->oi_height,
220 'fr_metadata' => $oldimageRow->oi_metadata,
221 'fr_bits' => $oldimageRow->oi_bits,
222 'fr_description_id' => $oldimageRow->oi_description_id,
223 'fr_actor' => $oldimageRow->oi_actor,
224 'fr_timestamp' => $oldimageRow->oi_timestamp,
225 'fr_sha1' => $oldimageRow->oi_sha1,
226 'fr_archive_name' => $oldimageRow->oi_archive_name,
227 'fr_deleted' => $oldimageRow->oi_deleted,
228 ]
229 )
230 ->caller( __METHOD__ )->execute();
231 $rowsInserted += 1;
232 }
233
234 // Make sure the image row (most current version) is there
235 $timestamp = $row->img_timestamp;
236 $sha1 = $row->img_sha1;
237
238 $alreadyDone = false;
239 foreach ( $fileRevisionRows as $fileRevisionRow ) {
240 if (
241 $timestamp === $fileRevisionRow->fr_timestamp &&
242 $sha1 === $fileRevisionRow->fr_sha1
243 ) {
244 $alreadyDone = true;
245 break;
246 }
247 }
248
249 if ( !$alreadyDone ) {
251 ->insertInto( 'filerevision' )
252 ->row(
253 [
254 'fr_file' => $fileId,
255 'fr_size' => $row->img_size,
256 'fr_width' => $row->img_width,
257 'fr_height' => $row->img_height,
258 'fr_metadata' => $row->img_metadata,
259 'fr_bits' => $row->img_bits,
260 'fr_description_id' => $imgDescriptionId,
261 'fr_actor' => $row->img_actor,
262 'fr_timestamp' => $row->img_timestamp,
263 'fr_sha1' => $row->img_sha1,
264 'fr_archive_name' => '',
265 'fr_deleted' => 0,
266 ]
267 )
268 ->caller( __METHOD__ )->execute();
269 $rowsInserted += 1;
270 }
271
272 // Make sure file has the latest filerevision
273 $latestFrId = $dbw->newSelectQueryBuilder()
274 ->select( 'fr_id' )
275 ->from( 'filerevision' )
276 ->where( [ 'fr_file' => $fileId ] )
277 ->orderBy( 'fr_timestamp', 'DESC' )
278 ->fetchField();
280 ->update( 'file' )
281 ->set( [ 'file_latest' => $latestFrId ] )
282 ->where( [ 'file_id' => $fileId ] )
283 ->caller( __METHOD__ )->execute();
284
285 $dbw->endAtomic( __METHOD__ );
286 $file->releaseFileLock();
287 return $rowsInserted;
288 }
289}
290
291// @codeCoverageIgnoreStart
292$maintClass = MigrateFileTables::class;
293require_once RUN_MAINTENANCE_IF_MAIN;
294// @codeCoverageIgnoreEnd
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
getBatchSize()
Returns batch size.
output( $out, $channel=null)
Throw some output to the user.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
waitForReplication()
Wait for replica DB servers to catch up.
hasOption( $name)
Checks to see if a particular option was set.
getOption( $name, $default=null)
Get an option, or return the default.
getServiceContainer()
Returns the main service container.
addDescription( $text)
Set the description text.
Maintenance script to refresh image metadata fields.
IMaintainableDatabase $dbw
execute()
Do the actual work.
__construct()
Default constructor.
Build SELECT queries with a fluent interface.
endAtomic( $fname=__METHOD__)
Ends an atomic section of SQL statements.
newUpdateQueryBuilder()
Get an UpdateQueryBuilder bound to this connection.
startAtomic( $fname=__METHOD__, $cancelable=self::ATOMIC_NOT_CANCELABLE)
Begin an atomic section of SQL statements.
newInsertQueryBuilder()
Get an InsertQueryBuilder bound to this connection.
Advanced database interface for IDatabase handles that include maintenance methods.
newSelectQueryBuilder()
Create an empty SelectQueryBuilder which can be used to run queries against this connection.
expr(string $field, string $op, $value)
See Expression::__construct()