33 private $resolveStubs;
35 private $reportingInterval;
51 private $legacyEncoding;
58 parent::__construct();
62 $this->
addOption(
'start',
'start old_id',
false,
true,
's' );
63 $this->
addOption(
'end',
'end old_id',
false,
true,
'e' );
64 $this->
addOption(
'threshold',
'minimum size in bytes',
false,
true );
66 'show a message after this many revisions',
false,
true );
67 $this->
addOption(
'undo',
'filename for undo SQL',
false,
true );
69 $this->
addOption(
'skip-gzip',
'Don\'t compress individual revisions' );
71 'Don\'t replace HistoryBlobStub objects with direct external store pointers' );
72 $this->
addOption(
'iconv',
'Resolve legacy character encoding' );
73 $this->
addOption(
'dry-run',
'Don\'t modify any rows' );
75 $this->
addArg(
'type',
'The external store type, e.g. "DB" or "mwstore"' );
76 $this->
addArg(
'location',
'e.g. "cluster12" or "global-swift"' );
81 $this->esType = $this->
getArg( 0 );
82 $this->esLocation = $this->
getArg( 1 );
86 if ( $maxID ===
null ) {
87 $maxID = $dbw->newSelectQueryBuilder()
88 ->select(
'MAX(old_id)' )
90 ->caller( __METHOD__ )->fetchField();
92 $this->maxID = (int)$maxID;
93 $this->minID = (int)$this->
getOption(
'start', 1 );
95 $this->reportingInterval = $this->
getOption(
'reporting-interval', 100 );
96 $this->threshold = (int)$this->
getOption(
'threshold', 0 );
100 } elseif ( !function_exists(
'gzdeflate' ) ) {
101 $this->
fatalError(
"gzdeflate() not found. " .
102 "Please run with --skip-gzip if you don't want to compress revisions." );
107 $this->skipResolve = $this->
getOption(
'skip-resolve' );
110 $legacyEncoding = $this->
getConfig()->get( MainConfigNames::LegacyEncoding );
111 if ( $legacyEncoding ) {
112 $this->legacyEncoding = $legacyEncoding;
114 $this->
output(
"iconv requested but the wiki has no legacy encoding\n" );
117 $this->dryRun = $this->
getOption(
'dry-run',
false );
121 $this->undoLog =
new UndoLog( $undo, $dbw );
122 }
catch ( RuntimeException $e ) {
123 $this->
fatalError(
"Unable to open undo log" );
125 $this->resolveStubs->setUndoLog( $this->undoLog );
127 return $this->doMoveToExternal();
130 private function doMoveToExternal() {
134 $count = $this->maxID - $this->minID + 1;
136 $numBlocks = ceil( $count / $blockSize );
137 print
"Moving text rows from {$this->minID} to {$this->maxID} to external storage\n";
140 $extStore = $esFactory->getStore( $this->esType );
144 for ( $block = 0; $block < $numBlocks; $block++ ) {
145 $blockStart = $block * $blockSize + $this->minID;
146 $blockEnd = $blockStart + $blockSize - 1;
148 if ( $this->reportingInterval && !( $block % $this->reportingInterval ) ) {
149 $this->
output(
"oldid=$blockStart, moved=$numMoved\n" );
153 $res = $dbr->newSelectQueryBuilder()
154 ->select( [
'old_id',
'old_flags',
'old_text' ] )
156 ->where( $this->
getConditions( $blockStart, $blockEnd, $dbr ) )
157 ->caller( __METHOD__ )->fetchResultSet();
158 foreach ( $res as $row ) {
159 $text = $row->old_text;
161 $flags = SqlBlobStore::explodeFlags( $row->old_flags );
162 [ $text, $flags ] = $this->
resolveText( $text, $flags );
164 if ( $text ===
false ) {
168 if ( in_array(
'error', $flags ) ) {
170 } elseif ( in_array(
'object', $flags ) ) {
171 $obj = unserialize( $text );
174 if ( !$this->skipResolve ) {
180 $newText = $obj->getText();
181 if ( $newText ===
false ) {
182 print
"Warning: Could not fetch revision blob {$id}: {$text}\n";
187 [ $text, $flags ] = $this->resolveLegacyEncoding( $newText, [] );
189 if ( $text ===
false ) {
190 print
"Warning: Could not decode legacy-encoded gzip\'ed revision blob {$id}: {$newText}\n";
195 [ $text, $flags ] = $this->compress( $text, $flags );
199 $className = get_class( $obj );
200 print
"Warning: old_id=$id unrecognised object class \"$className\"\n";
204 } elseif ( strlen( $text ) < $this->threshold ) {
208 [ $text, $flags ] = $this->resolveLegacyEncoding( $text, $flags );
209 [ $newText, $flags ] = $this->compress( $text, $flags );
210 if ( $newText ===
false ) {
211 print
"Warning: Could not compress revision blob {$id}: {$text}\n";
217 $flags[] =
'external';
218 $flagsString = implode(
',', $flags );
220 if ( $this->dryRun ) {
221 $this->
output(
"Move $id => $flagsString " .
222 addcslashes( substr( $text, 0, 30 ),
"\0..\x1f\x7f..\xff" ) .
228 $url = $extStore->store( $this->esLocation, $text );
230 $this->
fatalError(
"Error writing to external storage" );
232 $moved = $this->undoLog->update(
234 [
'old_flags' => $flagsString,
'old_text' => $url ],
241 print
"Update of old_id $id failed, affected zero rows\n";
247 if ( count( $stubIDs ) ) {
248 $this->resolveStubs( $stubIDs );
254 private function compress( $text, $flags ) {
255 if ( $this->gzip && !in_array(
'gzip', $flags ) ) {
257 $text = gzdeflate( $text );
259 return [ $text, $flags ];
262 private function resolveLegacyEncoding( $text, $flags ) {
263 if ( $this->legacyEncoding !==
null
264 && !in_array(
'utf-8', $flags )
265 && !in_array(
'utf8', $flags )
268 if ( in_array(
'gzip', $flags ) ) {
269 if ( !$this->gzip ) {
270 return [ $text, $flags ];
272 $flags = array_diff( $flags, [
'gzip' ] );
273 $newText = gzinflate( $text );
274 if ( $newText ===
false ) {
275 return [
false, $flags ];
279 AtEase::suppressWarnings();
280 $newText = iconv( $this->legacyEncoding,
'UTF-8//IGNORE', $text );
281 AtEase::restoreWarnings();
282 if ( $newText ===
false ) {
283 return [
false, $flags ];
288 return [ $text, $flags ];
291 private function resolveStubs( $stubIDs ) {
292 if ( $this->dryRun ) {
293 print
"Note: resolving stubs in dry run mode is expected to fail, " .
294 "because the main blobs have not been moved to external storage.\n";
298 $this->
output(
"Resolving " . count( $stubIDs ) .
" stubs\n" );
301 foreach ( array_chunk( $stubIDs, $this->
getBatchSize() ) as $stubBatch ) {
302 $res = $dbr->newSelectQueryBuilder()
303 ->select( [
'old_id',
'old_flags',
'old_text' ] )
305 ->where( [
'old_id' => $stubBatch ] )
306 ->caller( __METHOD__ )->fetchResultSet();
307 foreach ( $res as $row ) {
308 $numResolved += $this->resolveStubs->resolveStub( $row, $this->dryRun ) ? 1 : 0;
310 if ( $this->reportingInterval
311 && $numTotal % $this->reportingInterval == 0
313 $this->
output(
"$numTotal stubs processed\n" );
318 $this->
output(
"$numResolved of $numTotal stubs resolved\n" );
323 "old_id BETWEEN $blockStart AND $blockEnd",
324 'old_flags NOT ' . $dbr->buildLike( $dbr->anyString(),
'external', $dbr->anyString() ),
329 return [ $text, $flags ];
Concatenated gzip (CGZ) storage Improves compression ratio by concatenating like objects before gzipp...
To speed up conversion from 1.4 to 1.5 schema, text rows can refer to the leftover cur table as the b...
Pointer object for an item within a CGZ blob stored in the text table.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
addArg( $arg, $description, $required=true, $multi=false)
Add some args that are needed.
output( $out, $channel=null)
Throw some output to the user.
waitForReplication()
Wait for replica DBs to catch up.
getServiceContainer()
Returns the main service container.
getArg( $argId=0, $default=null)
Get an argument.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.