MediaWiki master
moveToExternal.php
Go to the documentation of this file.
1<?php
28use Wikimedia\AtEase\AtEase;
31
32// @codeCoverageIgnoreStart
33require_once __DIR__ . '/../Maintenance.php';
34// @codeCoverageIgnoreEnd
35
38 private $resolveStubs;
40 private $reportingInterval;
42 private $minID;
44 private $maxID;
46 private $esType;
48 private $esLocation;
50 private $threshold;
52 private $gzip;
54 private $skipResolve;
56 private $legacyEncoding;
58 private $dryRun;
60 private $undoLog;
61
62 public function __construct() {
63 parent::__construct();
64
65 $this->setBatchSize( 1000 );
66
67 $this->addOption( 'start', 'start old_id', false, true, 's' );
68 $this->addOption( 'end', 'end old_id', false, true, 'e' );
69 $this->addOption( 'threshold', 'minimum size in bytes', false, true );
70 $this->addOption( 'reporting-interval',
71 'show a message after this many revisions', false, true );
72 $this->addOption( 'undo', 'filename for undo SQL', false, true );
73
74 $this->addOption( 'skip-gzip', 'Don\'t compress individual revisions' );
75 $this->addOption( 'skip-resolve',
76 'Don\'t replace HistoryBlobStub objects with direct external store pointers' );
77 $this->addOption( 'iconv', 'Resolve legacy character encoding' );
78 $this->addOption( 'dry-run', 'Don\'t modify any rows' );
79
80 $this->addArg( 'type', 'The external store type, e.g. "DB" or "mwstore"' );
81 $this->addArg( 'location', 'e.g. "cluster12" or "global-swift"' );
82 }
83
85 public function execute() {
86 $this->resolveStubs = new ResolveStubs;
87 $this->esType = $this->getArg( 0 ); // e.g. "DB" or "mwstore"
88 $this->esLocation = $this->getArg( 1 ); // e.g. "cluster12" or "global-swift"
89 $dbw = $this->getPrimaryDB();
90
91 $maxID = $this->getOption( 'end' ) ?? $dbw->newSelectQueryBuilder()
92 ->select( 'MAX(old_id)' )
93 ->from( 'text' )
94 ->caller( __METHOD__ )->fetchField();
95 $this->maxID = (int)$maxID;
96 $this->minID = (int)$this->getOption( 'start', 1 );
97
98 $this->reportingInterval = $this->getOption( 'reporting-interval', 100 );
99 $this->threshold = (int)$this->getOption( 'threshold', 0 );
100
101 if ( $this->getOption( 'skip-gzip' ) ) {
102 $this->gzip = false;
103 } elseif ( !function_exists( 'gzdeflate' ) ) {
104 $this->fatalError( "gzdeflate() not found. " .
105 "Please run with --skip-gzip if you don't want to compress revisions." );
106 } else {
107 $this->gzip = true;
108 }
109
110 $this->skipResolve = $this->getOption( 'skip-resolve' );
111
112 if ( $this->getOption( 'iconv' ) ) {
113 $legacyEncoding = $this->getConfig()->get( MainConfigNames::LegacyEncoding );
114 if ( $legacyEncoding ) {
115 $this->legacyEncoding = $legacyEncoding;
116 } else {
117 $this->output( "iconv requested but the wiki has no legacy encoding\n" );
118 }
119 }
120 $this->dryRun = $this->getOption( 'dry-run', false );
121
122 $undo = $this->getOption( 'undo' );
123 try {
124 $this->undoLog = new UndoLog( $undo, $dbw );
125 } catch ( RuntimeException $e ) {
126 $this->fatalError( "Unable to open undo log" );
127 }
128 $this->resolveStubs->setUndoLog( $this->undoLog );
129
130 return $this->doMoveToExternal();
131 }
132
133 private function doMoveToExternal(): bool {
134 $success = true;
135 $dbr = $this->getReplicaDB();
136
137 $count = $this->maxID - $this->minID + 1;
138 $blockSize = $this->getBatchSize();
139 $numBlocks = ceil( $count / $blockSize );
140 print "Moving text rows from {$this->minID} to {$this->maxID} to external storage\n";
141
142 $esFactory = $this->getServiceContainer()->getExternalStoreFactory();
143 $extStore = $esFactory->getStore( $this->esType );
144 $numMoved = 0;
145 $stubIDs = [];
146
147 for ( $block = 0; $block < $numBlocks; $block++ ) {
148 $blockStart = $block * $blockSize + $this->minID;
149 $blockEnd = $blockStart + $blockSize - 1;
150
151 if ( $this->reportingInterval && !( $block % $this->reportingInterval ) ) {
152 $this->output( "oldid=$blockStart, moved=$numMoved\n" );
153 $this->waitForReplication();
154 }
155
156 $res = $dbr->newSelectQueryBuilder()
157 ->select( [ 'old_id', 'old_flags', 'old_text' ] )
158 ->from( 'text' )
159 ->where( $this->getConditions( $blockStart, $blockEnd, $dbr ) )
160 ->caller( __METHOD__ )->fetchResultSet();
161 foreach ( $res as $row ) {
162 $text = $row->old_text;
163 $id = $row->old_id;
164 $flags = SqlBlobStore::explodeFlags( $row->old_flags );
165 [ $text, $flags ] = $this->resolveText( $text, $flags );
166
167 if ( $text === false ) {
168 $success = false;
169 }
170
171 if ( in_array( 'error', $flags ) ) {
172 continue;
173 } elseif ( in_array( 'object', $flags ) ) {
174 $obj = unserialize( $text );
175 if ( $obj instanceof HistoryBlobStub ) {
176 // Handle later, after CGZ resolution
177 if ( !$this->skipResolve ) {
178 $stubIDs[] = $id;
179 }
180 continue;
181 } elseif ( $obj instanceof HistoryBlobCurStub ) {
182 // Copy cur text to ES
183 $newText = $obj->getText();
184 if ( $newText === false ) {
185 print "Warning: Could not fetch revision blob {$id}: {$text}\n";
186 $success = false;
187 continue;
188 }
189
190 [ $text, $flags ] = $this->resolveLegacyEncoding( $newText, [] );
191
192 if ( $text === false ) {
193 print "Warning: Could not decode legacy-encoded gzip\'ed revision blob {$id}: {$newText}\n";
194 $success = false;
195 continue;
196 }
197
198 [ $text, $flags ] = $this->compress( $text, $flags );
199 } elseif ( $obj instanceof ConcatenatedGzipHistoryBlob ) {
200 // Store as is
201 } else {
202 $className = get_class( $obj );
203 print "Warning: old_id=$id unrecognised object class \"$className\"\n";
204 $success = false;
205 continue;
206 }
207 } elseif ( strlen( $text ) < $this->threshold ) {
208 // Don't move small revisions
209 continue;
210 } else {
211 [ $text, $flags ] = $this->resolveLegacyEncoding( $text, $flags );
212 [ $newText, $flags ] = $this->compress( $text, $flags );
213 if ( $newText === false ) {
214 print "Warning: Could not compress revision blob {$id}: {$text}\n";
215 $success = false;
216 continue;
217 }
218 $text = $newText;
219 }
220 $flags[] = 'external';
221 $flagsString = implode( ',', $flags );
222
223 if ( $this->dryRun ) {
224 $this->output( "Move $id => $flagsString " .
225 addcslashes( substr( $text, 0, 30 ), "\0..\x1f\x7f..\xff" ) .
226 "\n"
227 );
228 continue;
229 }
230
231 $url = $extStore->store( $this->esLocation, $text );
232 if ( !$url ) {
233 $this->fatalError( "Error writing to external storage" );
234 }
235 $moved = $this->undoLog->update(
236 'text',
237 [ 'old_flags' => $flagsString, 'old_text' => $url ],
238 (array)$row,
239 __METHOD__
240 );
241 if ( $moved ) {
242 $numMoved++;
243 } else {
244 print "Update of old_id $id failed, affected zero rows\n";
245 $success = false;
246 }
247 }
248 }
249
250 if ( count( $stubIDs ) ) {
251 $this->resolveStubs( $stubIDs );
252 }
253
254 return $success;
255 }
256
257 private function compress( string $text, array $flags ): array {
258 if ( $this->gzip && !in_array( 'gzip', $flags ) ) {
259 $flags[] = 'gzip';
260 $text = gzdeflate( $text );
261 }
262 return [ $text, $flags ];
263 }
264
265 private function resolveLegacyEncoding( string $text, array $flags ): array {
266 if ( $this->legacyEncoding !== null
267 && !in_array( 'utf-8', $flags )
268 && !in_array( 'utf8', $flags )
269 ) {
270 // First decompress the entry so we don't try to convert a binary gzip to utf-8
271 if ( in_array( 'gzip', $flags ) ) {
272 if ( !$this->gzip ) {
273 return [ $text, $flags ];
274 }
275 $flags = array_diff( $flags, [ 'gzip' ] );
276 $newText = gzinflate( $text );
277 if ( $newText === false ) {
278 return [ false, $flags ];
279 }
280 $text = $newText;
281 }
282 AtEase::suppressWarnings();
283 $newText = iconv( $this->legacyEncoding, 'UTF-8//IGNORE', $text );
284 AtEase::restoreWarnings();
285 if ( $newText === false ) {
286 return [ false, $flags ];
287 }
288 $text = $newText;
289 $flags[] = 'utf-8';
290 }
291 return [ $text, $flags ];
292 }
293
294 private function resolveStubs( array $stubIDs ) {
295 if ( $this->dryRun ) {
296 print "Note: resolving stubs in dry run mode is expected to fail, " .
297 "because the main blobs have not been moved to external storage.\n";
298 }
299
300 $dbr = $this->getReplicaDB();
301 $this->output( "Resolving " . count( $stubIDs ) . " stubs\n" );
302 $numResolved = 0;
303 $numTotal = 0;
304 foreach ( array_chunk( $stubIDs, $this->getBatchSize() ) as $stubBatch ) {
305 $res = $dbr->newSelectQueryBuilder()
306 ->select( [ 'old_id', 'old_flags', 'old_text' ] )
307 ->from( 'text' )
308 ->where( [ 'old_id' => $stubBatch ] )
309 ->caller( __METHOD__ )->fetchResultSet();
310 foreach ( $res as $row ) {
311 $numResolved += $this->resolveStubs->resolveStub( $row, $this->dryRun ) ? 1 : 0;
312 $numTotal++;
313 if ( $this->reportingInterval
314 && $numTotal % $this->reportingInterval == 0
315 ) {
316 $this->output( "$numTotal stubs processed\n" );
317 $this->waitForReplication();
318 }
319 }
320 }
321 $this->output( "$numResolved of $numTotal stubs resolved\n" );
322 }
323
324 protected function getConditions( $blockStart, $blockEnd, $dbr ) {
325 return [
326 $dbr->expr( 'old_id', '>=', $blockStart ),
327 $dbr->expr( 'old_id', '>=', $blockEnd ),
328 $dbr->expr( 'old_flags', IExpression::NOT_LIKE,
329 new LikeValue( $dbr->anyString(), 'external', $dbr->anyString() ) ),
330 ];
331 }
332
333 protected function resolveText( $text, $flags ) {
334 return [ $text, $flags ];
335 }
336}
337
338// @codeCoverageIgnoreStart
339$maintClass = MoveToExternal::class;
340require_once RUN_MAINTENANCE_IF_MAIN;
341// @codeCoverageIgnoreEnd
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:81
Concatenated gzip (CGZ) storage Improves compression ratio by concatenating like objects before gzipp...
To speed up conversion from 1.4 to 1.5 schema, text rows can refer to the leftover cur table as the b...
Pointer object for an item within a CGZ blob stored in the text table.
A class containing constants representing the names of configuration variables.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
addArg( $arg, $description, $required=true, $multi=false)
Add some args that are needed.
getArg( $argId=0, $default=null)
Get an argument.
getBatchSize()
Returns batch size.
output( $out, $channel=null)
Throw some output to the user.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
waitForReplication()
Wait for replica DB servers to catch up.
getOption( $name, $default=null)
Get an option, or return the default.
getServiceContainer()
Returns the main service container.
Update a database while optionally writing SQL that reverses the update to a file.
Definition UndoLog.php:11
Service for storing and loading Content objects representing revision data blobs.
__construct()
Default constructor.
getConditions( $blockStart, $blockEnd, $dbr)
execute()
Do the actual work.All child classes will need to implement thisbool|null|void True for success,...
resolveText( $text, $flags)
Content of like value.
Definition LikeValue.php:14
$maintClass