35 private $resolveStubs;
37 private $reportingInterval;
53 private $legacyEncoding;
60 parent::__construct();
64 $this->
addOption(
'start',
'start old_id',
false,
true,
's' );
65 $this->
addOption(
'end',
'end old_id',
false,
true,
'e' );
66 $this->
addOption(
'threshold',
'minimum size in bytes',
false,
true );
68 'show a message after this many revisions',
false,
true );
69 $this->
addOption(
'undo',
'filename for undo SQL',
false,
true );
71 $this->
addOption(
'skip-gzip',
'Don\'t compress individual revisions' );
73 'Don\'t replace HistoryBlobStub objects with direct external store pointers' );
74 $this->
addOption(
'iconv',
'Resolve legacy character encoding' );
75 $this->
addOption(
'dry-run',
'Don\'t modify any rows' );
77 $this->
addArg(
'type',
'The external store type, e.g. "DB" or "mwstore"' );
78 $this->
addArg(
'location',
'e.g. "cluster12" or "global-swift"' );
83 $this->esType = $this->
getArg( 0 );
84 $this->esLocation = $this->
getArg( 1 );
88 if ( $maxID ===
null ) {
89 $maxID = $dbw->newSelectQueryBuilder()
90 ->select(
'MAX(old_id)' )
92 ->caller( __METHOD__ )->fetchField();
94 $this->maxID = (int)$maxID;
95 $this->minID = (int)$this->
getOption(
'start', 1 );
97 $this->reportingInterval = $this->
getOption(
'reporting-interval', 100 );
98 $this->threshold = (int)$this->
getOption(
'threshold', 0 );
102 } elseif ( !function_exists(
'gzdeflate' ) ) {
103 $this->
fatalError(
"gzdeflate() not found. " .
104 "Please run with --skip-gzip if you don't want to compress revisions." );
109 $this->skipResolve = $this->
getOption(
'skip-resolve' );
112 $legacyEncoding = $this->
getConfig()->get( MainConfigNames::LegacyEncoding );
113 if ( $legacyEncoding ) {
114 $this->legacyEncoding = $legacyEncoding;
116 $this->
output(
"iconv requested but the wiki has no legacy encoding\n" );
119 $this->dryRun = $this->
getOption(
'dry-run',
false );
123 $this->undoLog =
new UndoLog( $undo, $dbw );
124 }
catch ( RuntimeException $e ) {
125 $this->
fatalError(
"Unable to open undo log" );
127 $this->resolveStubs->setUndoLog( $this->undoLog );
129 return $this->doMoveToExternal();
132 private function doMoveToExternal() {
136 $count = $this->maxID - $this->minID + 1;
138 $numBlocks = ceil( $count / $blockSize );
139 print
"Moving text rows from {$this->minID} to {$this->maxID} to external storage\n";
142 $extStore = $esFactory->getStore( $this->esType );
146 for ( $block = 0; $block < $numBlocks; $block++ ) {
147 $blockStart = $block * $blockSize + $this->minID;
148 $blockEnd = $blockStart + $blockSize - 1;
150 if ( $this->reportingInterval && !( $block % $this->reportingInterval ) ) {
151 $this->
output(
"oldid=$blockStart, moved=$numMoved\n" );
155 $res = $dbr->newSelectQueryBuilder()
156 ->select( [
'old_id',
'old_flags',
'old_text' ] )
158 ->where( $this->
getConditions( $blockStart, $blockEnd, $dbr ) )
159 ->caller( __METHOD__ )->fetchResultSet();
160 foreach ( $res as $row ) {
161 $text = $row->old_text;
163 $flags = SqlBlobStore::explodeFlags( $row->old_flags );
164 [ $text, $flags ] = $this->
resolveText( $text, $flags );
166 if ( $text ===
false ) {
170 if ( in_array(
'error', $flags ) ) {
172 } elseif ( in_array(
'object', $flags ) ) {
173 $obj = unserialize( $text );
176 if ( !$this->skipResolve ) {
182 $newText = $obj->getText();
183 if ( $newText ===
false ) {
184 print
"Warning: Could not fetch revision blob {$id}: {$text}\n";
189 [ $text, $flags ] = $this->resolveLegacyEncoding( $newText, [] );
191 if ( $text ===
false ) {
192 print
"Warning: Could not decode legacy-encoded gzip\'ed revision blob {$id}: {$newText}\n";
197 [ $text, $flags ] = $this->compress( $text, $flags );
201 $className = get_class( $obj );
202 print
"Warning: old_id=$id unrecognised object class \"$className\"\n";
206 } elseif ( strlen( $text ) < $this->threshold ) {
210 [ $text, $flags ] = $this->resolveLegacyEncoding( $text, $flags );
211 [ $newText, $flags ] = $this->compress( $text, $flags );
212 if ( $newText ===
false ) {
213 print
"Warning: Could not compress revision blob {$id}: {$text}\n";
219 $flags[] =
'external';
220 $flagsString = implode(
',', $flags );
222 if ( $this->dryRun ) {
223 $this->
output(
"Move $id => $flagsString " .
224 addcslashes( substr( $text, 0, 30 ),
"\0..\x1f\x7f..\xff" ) .
230 $url = $extStore->store( $this->esLocation, $text );
232 $this->
fatalError(
"Error writing to external storage" );
234 $moved = $this->undoLog->update(
236 [
'old_flags' => $flagsString,
'old_text' => $url ],
243 print
"Update of old_id $id failed, affected zero rows\n";
249 if ( count( $stubIDs ) ) {
250 $this->resolveStubs( $stubIDs );
256 private function compress( $text, $flags ) {
257 if ( $this->gzip && !in_array(
'gzip', $flags ) ) {
259 $text = gzdeflate( $text );
261 return [ $text, $flags ];
264 private function resolveLegacyEncoding( $text, $flags ) {
265 if ( $this->legacyEncoding !==
null
266 && !in_array(
'utf-8', $flags )
267 && !in_array(
'utf8', $flags )
270 if ( in_array(
'gzip', $flags ) ) {
271 if ( !$this->gzip ) {
272 return [ $text, $flags ];
274 $flags = array_diff( $flags, [
'gzip' ] );
275 $newText = gzinflate( $text );
276 if ( $newText ===
false ) {
277 return [
false, $flags ];
281 AtEase::suppressWarnings();
282 $newText = iconv( $this->legacyEncoding,
'UTF-8//IGNORE', $text );
283 AtEase::restoreWarnings();
284 if ( $newText ===
false ) {
285 return [
false, $flags ];
290 return [ $text, $flags ];
293 private function resolveStubs( $stubIDs ) {
294 if ( $this->dryRun ) {
295 print
"Note: resolving stubs in dry run mode is expected to fail, " .
296 "because the main blobs have not been moved to external storage.\n";
300 $this->
output(
"Resolving " . count( $stubIDs ) .
" stubs\n" );
303 foreach ( array_chunk( $stubIDs, $this->
getBatchSize() ) as $stubBatch ) {
304 $res = $dbr->newSelectQueryBuilder()
305 ->select( [
'old_id',
'old_flags',
'old_text' ] )
307 ->where( [
'old_id' => $stubBatch ] )
308 ->caller( __METHOD__ )->fetchResultSet();
309 foreach ( $res as $row ) {
310 $numResolved += $this->resolveStubs->resolveStub( $row, $this->dryRun ) ? 1 : 0;
312 if ( $this->reportingInterval
313 && $numTotal % $this->reportingInterval == 0
315 $this->
output(
"$numTotal stubs processed\n" );
320 $this->
output(
"$numResolved of $numTotal stubs resolved\n" );
325 $dbr->expr(
'old_id',
'>=', $blockStart ),
326 $dbr->expr(
'old_id',
'>=', $blockEnd ),
327 $dbr->expr(
'old_flags', IExpression::NOT_LIKE,
328 new LikeValue( $dbr->anyString(),
'external', $dbr->anyString() ) ),
333 return [ $text, $flags ];
Concatenated gzip (CGZ) storage Improves compression ratio by concatenating like objects before gzipp...
To speed up conversion from 1.4 to 1.5 schema, text rows can refer to the leftover cur table as the b...
Pointer object for an item within a CGZ blob stored in the text table.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
addArg( $arg, $description, $required=true, $multi=false)
Add some args that are needed.
output( $out, $channel=null)
Throw some output to the user.
waitForReplication()
Wait for replica DBs to catch up.
getServiceContainer()
Returns the main service container.
getArg( $argId=0, $default=null)
Get an argument.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.