34 private $resolveStubs;
36 private $reportingInterval;
52 private $legacyEncoding;
59 parent::__construct();
63 $this->
addOption(
'start',
'start old_id',
false,
true,
's' );
64 $this->
addOption(
'end',
'end old_id',
false,
true,
'e' );
65 $this->
addOption(
'threshold',
'minimum size in bytes',
false,
true );
67 'show a message after this many revisions',
false,
true );
68 $this->
addOption(
'undo',
'filename for undo SQL',
false,
true );
70 $this->
addOption(
'skip-gzip',
'Don\'t compress individual revisions' );
72 'Don\'t replace HistoryBlobStub objects with direct external store pointers' );
73 $this->
addOption(
'iconv',
'Resolve legacy character encoding' );
74 $this->
addOption(
'dry-run',
'Don\'t modify any rows' );
76 $this->
addArg(
'type',
'The external store type, e.g. "DB" or "mwstore"' );
77 $this->
addArg(
'location',
'e.g. "cluster12" or "global-swift"' );
82 $this->esType = $this->
getArg( 0 );
83 $this->esLocation = $this->
getArg( 1 );
87 if ( $maxID ===
null ) {
88 $maxID = $dbw->selectField(
'text',
'MAX(old_id)',
'', __METHOD__ );
90 $this->maxID = (int)$maxID;
91 $this->minID = (int)$this->
getOption(
'start', 1 );
93 $this->reportingInterval = $this->
getOption(
'reporting-interval', 100 );
94 $this->threshold = (int)$this->
getOption(
'threshold', 0 );
98 } elseif ( !function_exists(
'gzdeflate' ) ) {
100 "Please run with --skip-gzip if you don't want to compress revisions." );
105 $this->skipResolve = $this->
getOption(
'skip-resolve' );
108 $legacyEncoding = $this->
getConfig()->get( MainConfigNames::LegacyEncoding );
109 if ( $legacyEncoding ) {
110 $this->legacyEncoding = $legacyEncoding;
112 $this->
output(
"iconv requested but the wiki has no legacy encoding\n" );
115 $this->dryRun = $this->
getOption(
'dry-run',
false );
119 $this->undoLog =
new UndoLog( $undo, $dbw );
120 }
catch ( RuntimeException $e ) {
121 $this->
fatalError(
"Unable to open undo log" );
123 $this->resolveStubs->setUndoLog( $this->undoLog );
125 $this->doMoveToExternal();
128 private function doMoveToExternal() {
131 $count = $this->maxID - $this->minID + 1;
133 $numBlocks = ceil( $count / $blockSize );
134 print
"Moving text rows from {$this->minID} to {$this->maxID} to external storage\n";
136 $esFactory = MediaWikiServices::getInstance()->getExternalStoreFactory();
137 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
138 $extStore = $esFactory->getStore( $this->esType );
142 for ( $block = 0; $block < $numBlocks; $block++ ) {
143 $blockStart = $block * $blockSize + $this->minID;
144 $blockEnd = $blockStart + $blockSize - 1;
146 if ( $this->reportingInterval && !( $block % $this->reportingInterval ) ) {
147 $this->
output(
"oldid=$blockStart, moved=$numMoved\n" );
148 $lbFactory->waitForReplication();
151 $res =
$dbr->select(
'text', [
'old_id',
'old_flags',
'old_text' ],
153 "old_id BETWEEN $blockStart AND $blockEnd",
154 'old_flags NOT ' .
$dbr->buildLike(
$dbr->anyString(),
'external',
$dbr->anyString() ),
157 foreach (
$res as $row ) {
158 $text = $row->old_text;
160 $flags = SqlBlobStore::explodeFlags( $row->old_flags );
162 if ( in_array(
'error', $flags ) ) {
164 } elseif ( in_array(
'object', $flags ) ) {
165 $obj = unserialize( $text );
168 if ( !$this->skipResolve ) {
174 [ $text, $flags ] = $this->compress( $obj->getText(), [
'utf-8' ] );
178 $className = get_class( $obj );
179 print
"Warning: old_id=$id unrecognised object class \"$className\"\n";
182 } elseif ( strlen( $text ) < $this->threshold ) {
186 [ $text, $flags ] = $this->resolveLegacyEncoding( $text, $flags );
187 [ $text, $flags ] = $this->compress( $text, $flags );
189 $flags[] =
'external';
190 $flagsString = implode(
',', $flags );
192 if ( $this->dryRun ) {
193 $this->
output(
"Move $id => $flagsString " .
194 addcslashes( substr( $text, 0, 30 ),
"\0..\x1f\x7f..\xff" ) .
200 $url = $extStore->store( $this->esLocation, $text );
202 $this->
fatalError(
"Error writing to external storage" );
204 $moved = $this->undoLog->update(
206 [
'old_flags' => $flagsString,
'old_text' => $url ],
213 print
"Update of old_id $id failed, affected zero rows\n";
218 if ( count( $stubIDs ) ) {
219 $this->resolveStubs( $stubIDs );
223 private function compress( $text, $flags ) {
224 if ( $this->gzip && !in_array(
'gzip', $flags ) ) {
226 $text = gzdeflate( $text );
228 return [ $text, $flags ];
231 private function resolveLegacyEncoding( $text, $flags ) {
232 if ( $this->legacyEncoding !==
null
233 && !in_array(
'utf-8', $flags )
234 && !in_array(
'utf8', $flags )
236 AtEase::suppressWarnings();
237 $text = iconv( $this->legacyEncoding,
'UTF-8//IGNORE', $text );
238 AtEase::restoreWarnings();
241 return [ $text, $flags ];
244 private function resolveStubs( $stubIDs ) {
245 if ( $this->dryRun ) {
246 print
"Note: resolving stubs in dry run mode is expected to fail, " .
247 "because the main blobs have not been moved to external storage.\n";
251 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
252 $this->
output(
"Resolving " . count( $stubIDs ) .
" stubs\n" );
255 foreach ( array_chunk( $stubIDs, $this->
getBatchSize() ) as $stubBatch ) {
258 [
'old_id',
'old_flags',
'old_text' ],
259 [
'old_id' => $stubBatch ],
262 foreach (
$res as $row ) {
263 $numResolved += $this->resolveStubs->resolveStub( $row, $this->dryRun ) ? 1 : 0;
265 if ( $this->reportingInterval
266 && $numTotal % $this->reportingInterval == 0
268 $this->
output(
"$numTotal stubs processed\n" );
269 $lbFactory->waitForReplication();
273 $this->
output(
"$numResolved of $numTotal stubs resolved\n" );
Concatenated gzip (CGZ) storage Improves compression ratio by concatenating like objects before gzipp...
To speed up conversion from 1.4 to 1.5 schema, text rows can refer to the leftover cur table as the b...
Pointer object for an item within a CGZ blob stored in the text table.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
addArg( $arg, $description, $required=true, $multi=false)
Add some args that are needed.
output( $out, $channel=null)
Throw some output to the user.
getArg( $argId=0, $default=null)
Get an argument.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.