27 use Wikimedia\AtEase\AtEase;
29 require_once __DIR__ .
'/../Maintenance.php';
33 private $resolveStubs;
35 private $reportingInterval;
51 private $legacyEncoding;
58 parent::__construct();
62 $this->
addOption(
'start',
'start old_id',
false,
true,
's' );
63 $this->
addOption(
'end',
'end old_id',
false,
true,
'e' );
64 $this->
addOption(
'threshold',
'minimum size in bytes',
false,
true );
66 'show a message after this many revisions',
false,
true );
67 $this->
addOption(
'undo',
'filename for undo SQL',
false,
true );
69 $this->
addOption(
'skip-gzip',
'Don\'t compress individual revisions' );
71 'Don\'t replace HistoryBlobStub objects with direct external store pointers' );
72 $this->
addOption(
'iconv',
'Resolve legacy character encoding' );
73 $this->
addOption(
'dry-run',
'Don\'t modify any rows' );
75 $this->
addArg(
'type',
'The external store type, e.g. "DB" or "mwstore"' );
76 $this->
addArg(
'location',
'e.g. "cluster12" or "global-swift"' );
81 $this->esType = $this->
getArg( 0 );
82 $this->esLocation = $this->
getArg( 1 );
86 if ( $maxID ===
null ) {
87 $maxID = $dbw->newSelectQueryBuilder()
88 ->select(
'MAX(old_id)' )
90 ->caller( __METHOD__ )->fetchField();
92 $this->maxID = (int)$maxID;
93 $this->minID = (int)$this->
getOption(
'start', 1 );
95 $this->reportingInterval = $this->
getOption(
'reporting-interval', 100 );
96 $this->threshold = (int)$this->
getOption(
'threshold', 0 );
100 } elseif ( !function_exists(
'gzdeflate' ) ) {
101 $this->
fatalError(
"gzdeflate() not found. " .
102 "Please run with --skip-gzip if you don't want to compress revisions." );
107 $this->skipResolve = $this->
getOption(
'skip-resolve' );
110 $legacyEncoding = $this->
getConfig()->get( MainConfigNames::LegacyEncoding );
111 if ( $legacyEncoding ) {
112 $this->legacyEncoding = $legacyEncoding;
114 $this->
output(
"iconv requested but the wiki has no legacy encoding\n" );
117 $this->dryRun = $this->
getOption(
'dry-run',
false );
121 $this->undoLog =
new UndoLog( $undo, $dbw );
122 }
catch ( RuntimeException $e ) {
123 $this->
fatalError(
"Unable to open undo log" );
125 $this->resolveStubs->setUndoLog( $this->undoLog );
127 $this->doMoveToExternal();
130 private function doMoveToExternal() {
133 $count = $this->maxID - $this->minID + 1;
135 $numBlocks = ceil( $count / $blockSize );
136 print
"Moving text rows from {$this->minID} to {$this->maxID} to external storage\n";
140 $extStore = $esFactory->getStore( $this->esType );
144 for ( $block = 0; $block < $numBlocks; $block++ ) {
145 $blockStart = $block * $blockSize + $this->minID;
146 $blockEnd = $blockStart + $blockSize - 1;
148 if ( $this->reportingInterval && !( $block % $this->reportingInterval ) ) {
149 $this->
output(
"oldid=$blockStart, moved=$numMoved\n" );
150 $lbFactory->waitForReplication();
153 $res = $dbr->newSelectQueryBuilder()
154 ->select( [
'old_id',
'old_flags',
'old_text' ] )
156 ->where( $this->
getConditions( $blockStart, $blockEnd, $dbr ) )
157 ->caller( __METHOD__ )->fetchResultSet();
158 foreach ( $res as $row ) {
159 $text = $row->old_text;
161 $flags = SqlBlobStore::explodeFlags( $row->old_flags );
162 [ $text, $flags ] = $this->
resolveText( $text, $flags );
164 if ( in_array(
'error', $flags ) ) {
166 } elseif ( in_array(
'object', $flags ) ) {
167 $obj = unserialize( $text );
170 if ( !$this->skipResolve ) {
176 [ $text, $flags ] = $this->resolveLegacyEncoding( $obj->getText(), [] );
177 [ $text, $flags ] = $this->compress( $text, $flags );
181 $className = get_class( $obj );
182 print
"Warning: old_id=$id unrecognised object class \"$className\"\n";
185 } elseif ( strlen( $text ) < $this->threshold ) {
189 [ $text, $flags ] = $this->resolveLegacyEncoding( $text, $flags );
190 [ $text, $flags ] = $this->compress( $text, $flags );
192 $flags[] =
'external';
193 $flagsString = implode(
',', $flags );
195 if ( $this->dryRun ) {
196 $this->
output(
"Move $id => $flagsString " .
197 addcslashes( substr( $text, 0, 30 ),
"\0..\x1f\x7f..\xff" ) .
203 $url = $extStore->store( $this->esLocation, $text );
205 $this->
fatalError(
"Error writing to external storage" );
207 $moved = $this->undoLog->update(
209 [
'old_flags' => $flagsString,
'old_text' => $url ],
216 print
"Update of old_id $id failed, affected zero rows\n";
221 if ( count( $stubIDs ) ) {
222 $this->resolveStubs( $stubIDs );
226 private function compress( $text, $flags ) {
227 if ( $this->gzip && !in_array(
'gzip', $flags ) ) {
229 $text = gzdeflate( $text );
231 return [ $text, $flags ];
234 private function resolveLegacyEncoding( $text, $flags ) {
235 if ( $this->legacyEncoding !==
null
236 && !in_array(
'utf-8', $flags )
237 && !in_array(
'utf8', $flags )
240 if ( in_array(
'gzip', $flags ) ) {
241 if ( !$this->gzip ) {
242 return [ $text, $flags ];
244 $flags = array_diff( $flags, [
'gzip' ] );
245 $text = gzinflate( $text );
247 AtEase::suppressWarnings();
248 $text = iconv( $this->legacyEncoding,
'UTF-8//IGNORE', $text );
249 AtEase::restoreWarnings();
252 return [ $text, $flags ];
255 private function resolveStubs( $stubIDs ) {
256 if ( $this->dryRun ) {
257 print
"Note: resolving stubs in dry run mode is expected to fail, " .
258 "because the main blobs have not been moved to external storage.\n";
263 $this->
output(
"Resolving " . count( $stubIDs ) .
" stubs\n" );
266 foreach ( array_chunk( $stubIDs, $this->
getBatchSize() ) as $stubBatch ) {
267 $res = $dbr->newSelectQueryBuilder()
268 ->select( [
'old_id',
'old_flags',
'old_text' ] )
270 ->where( [
'old_id' => $stubBatch ] )
271 ->caller( __METHOD__ )->fetchResultSet();
272 foreach ( $res as $row ) {
273 $numResolved += $this->resolveStubs->resolveStub( $row, $this->dryRun ) ? 1 : 0;
275 if ( $this->reportingInterval
276 && $numTotal % $this->reportingInterval == 0
278 $this->
output(
"$numTotal stubs processed\n" );
279 $lbFactory->waitForReplication();
283 $this->
output(
"$numResolved of $numTotal stubs resolved\n" );
288 "old_id BETWEEN $blockStart AND $blockEnd",
289 'old_flags NOT ' . $dbr->buildLike( $dbr->anyString(),
'external', $dbr->anyString() ),
294 return [ $text, $flags ];
299 require_once RUN_MAINTENANCE_IF_MAIN;
Concatenated gzip (CGZ) storage Improves compression ratio by concatenating like objects before gzipp...
To speed up conversion from 1.4 to 1.5 schema, text rows can refer to the leftover cur table as the b...
Pointer object for an item within a CGZ blob stored in the text table.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
getDB( $db, $groups=[], $dbDomain=false)
Returns a database to be used by current maintenance script.
addArg( $arg, $description, $required=true, $multi=false)
Add some args that are needed.
output( $out, $channel=null)
Throw some output to the user.
getServiceContainer()
Returns the main service container.
getBatchSize()
Returns batch size.
getArg( $argId=0, $default=null)
Get an argument.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
A class containing constants representing the names of configuration variables.
Update a database while optionally writing SQL that reverses the update to a file.
__construct()
Default constructor.
getConditions( $blockStart, $blockEnd, $dbr)
execute()
Do the actual work.
resolveText( $text, $flags)