38 private $resolveStubs;
40 private $reportingInterval;
56 private $legacyEncoding;
63 parent::__construct();
67 $this->
addOption(
'start',
'start old_id',
false,
true,
's' );
68 $this->
addOption(
'end',
'end old_id',
false,
true,
'e' );
69 $this->
addOption(
'threshold',
'minimum size in bytes',
false,
true );
71 'show a message after this many revisions',
false,
true );
72 $this->
addOption(
'undo',
'filename for undo SQL',
false,
true );
74 $this->
addOption(
'skip-gzip',
'Don\'t compress individual revisions' );
76 'Don\'t replace HistoryBlobStub objects with direct external store pointers' );
77 $this->
addOption(
'iconv',
'Resolve legacy character encoding' );
78 $this->
addOption(
'dry-run',
'Don\'t modify any rows' );
80 $this->
addArg(
'type',
'The external store type, e.g. "DB" or "mwstore"' );
81 $this->
addArg(
'location',
'e.g. "cluster12" or "global-swift"' );
87 $this->esType = $this->
getArg( 0 );
88 $this->esLocation = $this->
getArg( 1 );
91 $maxID = $this->
getOption(
'end' ) ?? $dbw->newSelectQueryBuilder()
92 ->select(
'MAX(old_id)' )
94 ->caller( __METHOD__ )->fetchField();
95 $this->maxID = (int)$maxID;
96 $this->minID = (int)$this->
getOption(
'start', 1 );
98 $this->reportingInterval = $this->
getOption(
'reporting-interval', 100 );
99 $this->threshold = (int)$this->
getOption(
'threshold', 0 );
103 } elseif ( !function_exists(
'gzdeflate' ) ) {
104 $this->
fatalError(
"gzdeflate() not found. " .
105 "Please run with --skip-gzip if you don't want to compress revisions." );
110 $this->skipResolve = $this->
getOption(
'skip-resolve' );
113 $legacyEncoding = $this->
getConfig()->get( MainConfigNames::LegacyEncoding );
114 if ( $legacyEncoding ) {
115 $this->legacyEncoding = $legacyEncoding;
117 $this->
output(
"iconv requested but the wiki has no legacy encoding\n" );
120 $this->dryRun = $this->
getOption(
'dry-run',
false );
124 $this->undoLog =
new UndoLog( $undo, $dbw );
125 }
catch ( RuntimeException $e ) {
126 $this->
fatalError(
"Unable to open undo log" );
128 $this->resolveStubs->setUndoLog( $this->undoLog );
130 return $this->doMoveToExternal();
133 private function doMoveToExternal(): bool {
137 $count = $this->maxID - $this->minID + 1;
139 $numBlocks = ceil( $count / $blockSize );
140 print
"Moving text rows from {$this->minID} to {$this->maxID} to external storage\n";
143 $extStore = $esFactory->getStore( $this->esType );
147 for ( $block = 0; $block < $numBlocks; $block++ ) {
148 $blockStart = $block * $blockSize + $this->minID;
149 $blockEnd = $blockStart + $blockSize - 1;
151 if ( $this->reportingInterval && !( $block % $this->reportingInterval ) ) {
152 $this->
output(
"oldid=$blockStart, moved=$numMoved\n" );
156 $res = $dbr->newSelectQueryBuilder()
157 ->select( [
'old_id',
'old_flags',
'old_text' ] )
159 ->where( $this->
getConditions( $blockStart, $blockEnd, $dbr ) )
160 ->caller( __METHOD__ )->fetchResultSet();
161 foreach ( $res as $row ) {
162 $text = $row->old_text;
164 $flags = SqlBlobStore::explodeFlags( $row->old_flags );
165 [ $text, $flags ] = $this->
resolveText( $text, $flags );
167 if ( $text ===
false ) {
171 if ( in_array(
'error', $flags ) ) {
173 } elseif ( in_array(
'object', $flags ) ) {
174 $obj = unserialize( $text );
177 if ( !$this->skipResolve ) {
183 $newText = $obj->getText();
184 if ( $newText ===
false ) {
185 print
"Warning: Could not fetch revision blob {$id}: {$text}\n";
190 [ $text, $flags ] = $this->resolveLegacyEncoding( $newText, [] );
192 if ( $text ===
false ) {
193 print
"Warning: Could not decode legacy-encoded gzip\'ed revision blob {$id}: {$newText}\n";
198 [ $text, $flags ] = $this->compress( $text, $flags );
202 $className = get_class( $obj );
203 print
"Warning: old_id=$id unrecognised object class \"$className\"\n";
207 } elseif ( strlen( $text ) < $this->threshold ) {
211 [ $text, $flags ] = $this->resolveLegacyEncoding( $text, $flags );
212 [ $newText, $flags ] = $this->compress( $text, $flags );
213 if ( $newText ===
false ) {
214 print
"Warning: Could not compress revision blob {$id}: {$text}\n";
220 $flags[] =
'external';
221 $flagsString = implode(
',', $flags );
223 if ( $this->dryRun ) {
224 $this->
output(
"Move $id => $flagsString " .
225 addcslashes( substr( $text, 0, 30 ),
"\0..\x1f\x7f..\xff" ) .
231 $url = $extStore->store( $this->esLocation, $text );
233 $this->
fatalError(
"Error writing to external storage" );
235 $moved = $this->undoLog->update(
237 [
'old_flags' => $flagsString,
'old_text' =>
$url ],
244 print
"Update of old_id $id failed, affected zero rows\n";
250 if ( count( $stubIDs ) ) {
251 $this->resolveStubs( $stubIDs );
257 private function compress(
string $text, array $flags ): array {
258 if ( $this->gzip && !in_array(
'gzip', $flags ) ) {
260 $text = gzdeflate( $text );
262 return [ $text, $flags ];
265 private function resolveLegacyEncoding(
string $text, array $flags ): array {
266 if ( $this->legacyEncoding !== null
267 && !in_array(
'utf-8', $flags )
268 && !in_array(
'utf8', $flags )
271 if ( in_array(
'gzip', $flags ) ) {
272 if ( !$this->gzip ) {
273 return [ $text, $flags ];
275 $flags = array_diff( $flags, [
'gzip' ] );
276 $newText = gzinflate( $text );
277 if ( $newText ===
false ) {
278 return [
false, $flags ];
282 AtEase::suppressWarnings();
283 $newText = iconv( $this->legacyEncoding,
'UTF-8//IGNORE', $text );
284 AtEase::restoreWarnings();
285 if ( $newText ===
false ) {
286 return [
false, $flags ];
291 return [ $text, $flags ];
294 private function resolveStubs( array $stubIDs ) {
295 if ( $this->dryRun ) {
296 print
"Note: resolving stubs in dry run mode is expected to fail, " .
297 "because the main blobs have not been moved to external storage.\n";
300 $dbr = $this->getReplicaDB();
301 $this->output(
"Resolving " . count( $stubIDs ) .
" stubs\n" );
304 foreach ( array_chunk( $stubIDs, $this->getBatchSize() ) as $stubBatch ) {
305 $res = $dbr->newSelectQueryBuilder()
306 ->select( [
'old_id',
'old_flags',
'old_text' ] )
308 ->where( [
'old_id' => $stubBatch ] )
309 ->caller( __METHOD__ )->fetchResultSet();
310 foreach ( $res as $row ) {
311 $numResolved += $this->resolveStubs->resolveStub( $row, $this->dryRun ) ? 1 : 0;
313 if ( $this->reportingInterval
314 && $numTotal % $this->reportingInterval == 0
316 $this->output(
"$numTotal stubs processed\n" );
317 $this->waitForReplication();
321 $this->output(
"$numResolved of $numTotal stubs resolved\n" );
326 $dbr->expr(
'old_id',
'>=', $blockStart ),
327 $dbr->expr(
'old_id',
'>=', $blockEnd ),
328 $dbr->expr(
'old_flags', IExpression::NOT_LIKE,
329 new LikeValue( $dbr->anyString(),
'external', $dbr->anyString() ) ),
334 return [ $text, $flags ];
Concatenated gzip (CGZ) storage Improves compression ratio by concatenating like objects before gzipp...
To speed up conversion from 1.4 to 1.5 schema, text rows can refer to the leftover cur table as the b...
Pointer object for an item within a CGZ blob stored in the text table.