MediaWiki REL1_40
moveToExternal.php
Go to the documentation of this file.
1<?php
28use Wikimedia\AtEase\AtEase;
29
30require_once __DIR__ . '/../Maintenance.php';
31
34 private $resolveStubs;
36 private $reportingInterval;
38 private $minID;
40 private $maxID;
42 private $esType;
44 private $esLocation;
46 private $threshold;
48 private $gzip;
50 private $skipResolve;
52 private $legacyEncoding;
54 private $dryRun;
56 private $undoLog;
57
58 public function __construct() {
59 parent::__construct();
60
61 $this->setBatchSize( 1000 );
62
63 $this->addOption( 'start', 'start old_id', false, true, 's' );
64 $this->addOption( 'end', 'end old_id', false, true, 'e' );
65 $this->addOption( 'threshold', 'minimum size in bytes', false, true );
66 $this->addOption( 'reporting-interval',
67 'show a message after this many revisions', false, true );
68 $this->addOption( 'undo', 'filename for undo SQL', false, true );
69
70 $this->addOption( 'skip-gzip', 'Don\'t compress individual revisions' );
71 $this->addOption( 'skip-resolve',
72 'Don\'t replace HistoryBlobStub objects with direct external store pointers' );
73 $this->addOption( 'iconv', 'Resolve legacy character encoding' );
74 $this->addOption( 'dry-run', 'Don\'t modify any rows' );
75
76 $this->addArg( 'type', 'The external store type, e.g. "DB" or "mwstore"' );
77 $this->addArg( 'location', 'e.g. "cluster12" or "global-swift"' );
78 }
79
80 public function execute() {
81 $this->resolveStubs = new ResolveStubs;
82 $this->esType = $this->getArg( 0 ); // e.g. "DB" or "mwstore"
83 $this->esLocation = $this->getArg( 1 ); // e.g. "cluster12" or "global-swift"
84 $dbw = $this->getDB( DB_PRIMARY );
85
86 $maxID = $this->getOption( 'end' );
87 if ( $maxID === null ) {
88 $maxID = $dbw->selectField( 'text', 'MAX(old_id)', '', __METHOD__ );
89 }
90 $this->maxID = (int)$maxID;
91 $this->minID = (int)$this->getOption( 'start', 1 );
92
93 $this->reportingInterval = $this->getOption( 'reporting-interval', 100 );
94 $this->threshold = (int)$this->getOption( 'threshold', 0 );
95
96 if ( $this->getOption( 'skip-gzip' ) ) {
97 $this->gzip = false;
98 } elseif ( !function_exists( 'gzdeflate' ) ) {
99 $this->fatalError( "gzdeflate() not found. " .
100 "Please run with --skip-gzip if you don't want to compress revisions." );
101 } else {
102 $this->gzip = true;
103 }
104
105 $this->skipResolve = $this->getOption( 'skip-resolve' );
106
107 if ( $this->getOption( 'iconv' ) ) {
108 $legacyEncoding = $this->getConfig()->get( MainConfigNames::LegacyEncoding );
109 if ( $legacyEncoding ) {
110 $this->legacyEncoding = $legacyEncoding;
111 } else {
112 $this->output( "iconv requested but the wiki has no legacy encoding\n" );
113 }
114 }
115 $this->dryRun = $this->getOption( 'dry-run', false );
116
117 $undo = $this->getOption( 'undo' );
118 try {
119 $this->undoLog = new UndoLog( $undo, $dbw );
120 } catch ( RuntimeException $e ) {
121 $this->fatalError( "Unable to open undo log" );
122 }
123 $this->resolveStubs->setUndoLog( $this->undoLog );
124
125 $this->doMoveToExternal();
126 }
127
128 private function doMoveToExternal() {
129 $dbr = $this->getDB( DB_REPLICA );
130
131 $count = $this->maxID - $this->minID + 1;
132 $blockSize = $this->getBatchSize();
133 $numBlocks = ceil( $count / $blockSize );
134 print "Moving text rows from {$this->minID} to {$this->maxID} to external storage\n";
135
136 $esFactory = MediaWikiServices::getInstance()->getExternalStoreFactory();
137 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
138 $extStore = $esFactory->getStore( $this->esType );
139 $numMoved = 0;
140 $stubIDs = [];
141
142 for ( $block = 0; $block < $numBlocks; $block++ ) {
143 $blockStart = $block * $blockSize + $this->minID;
144 $blockEnd = $blockStart + $blockSize - 1;
145
146 if ( $this->reportingInterval && !( $block % $this->reportingInterval ) ) {
147 $this->output( "oldid=$blockStart, moved=$numMoved\n" );
148 $lbFactory->waitForReplication();
149 }
150
151 $res = $dbr->select( 'text', [ 'old_id', 'old_flags', 'old_text' ],
152 [
153 "old_id BETWEEN $blockStart AND $blockEnd",
154 'old_flags NOT ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ),
155 ], __METHOD__
156 );
157 foreach ( $res as $row ) {
158 $text = $row->old_text;
159 $id = $row->old_id;
160 $flags = SqlBlobStore::explodeFlags( $row->old_flags );
161
162 if ( in_array( 'error', $flags ) ) {
163 continue;
164 } elseif ( in_array( 'object', $flags ) ) {
165 $obj = unserialize( $text );
166 if ( $obj instanceof HistoryBlobStub ) {
167 // Handle later, after CGZ resolution
168 if ( !$this->skipResolve ) {
169 $stubIDs[] = $id;
170 }
171 continue;
172 } elseif ( $obj instanceof HistoryBlobCurStub ) {
173 // Copy cur text to ES
174 [ $text, $flags ] = $this->compress( $obj->getText(), [ 'utf-8' ] );
175 } elseif ( $obj instanceof ConcatenatedGzipHistoryBlob ) {
176 // Store as is
177 } else {
178 $className = get_class( $obj );
179 print "Warning: old_id=$id unrecognised object class \"$className\"\n";
180 continue;
181 }
182 } elseif ( strlen( $text ) < $this->threshold ) {
183 // Don't move small revisions
184 continue;
185 } else {
186 [ $text, $flags ] = $this->resolveLegacyEncoding( $text, $flags );
187 [ $text, $flags ] = $this->compress( $text, $flags );
188 }
189 $flags[] = 'external';
190 $flagsString = implode( ',', $flags );
191
192 if ( $this->dryRun ) {
193 $this->output( "Move $id => $flagsString " .
194 addcslashes( substr( $text, 0, 30 ), "\0..\x1f\x7f..\xff" ) .
195 "\n"
196 );
197 continue;
198 }
199
200 $url = $extStore->store( $this->esLocation, $text );
201 if ( !$url ) {
202 $this->fatalError( "Error writing to external storage" );
203 }
204 $moved = $this->undoLog->update(
205 'text',
206 [ 'old_flags' => $flagsString, 'old_text' => $url ],
207 (array)$row,
208 __METHOD__
209 );
210 if ( $moved ) {
211 $numMoved++;
212 } else {
213 print "Update of old_id $id failed, affected zero rows\n";
214 }
215 }
216 }
217
218 if ( count( $stubIDs ) ) {
219 $this->resolveStubs( $stubIDs );
220 }
221 }
222
223 private function compress( $text, $flags ) {
224 if ( $this->gzip && !in_array( 'gzip', $flags ) ) {
225 $flags[] = 'gzip';
226 $text = gzdeflate( $text );
227 }
228 return [ $text, $flags ];
229 }
230
231 private function resolveLegacyEncoding( $text, $flags ) {
232 if ( $this->legacyEncoding !== null
233 && !in_array( 'utf-8', $flags )
234 && !in_array( 'utf8', $flags )
235 ) {
236 AtEase::suppressWarnings();
237 $text = iconv( $this->legacyEncoding, 'UTF-8//IGNORE', $text );
238 AtEase::restoreWarnings();
239 $flags[] = 'utf-8';
240 }
241 return [ $text, $flags ];
242 }
243
244 private function resolveStubs( $stubIDs ) {
245 if ( $this->dryRun ) {
246 print "Note: resolving stubs in dry run mode is expected to fail, " .
247 "because the main blobs have not been moved to external storage.\n";
248 }
249
250 $dbr = $this->getDB( DB_REPLICA );
251 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
252 $this->output( "Resolving " . count( $stubIDs ) . " stubs\n" );
253 $numResolved = 0;
254 $numTotal = 0;
255 foreach ( array_chunk( $stubIDs, $this->getBatchSize() ) as $stubBatch ) {
256 $res = $dbr->select(
257 'text',
258 [ 'old_id', 'old_flags', 'old_text' ],
259 [ 'old_id' => $stubBatch ],
260 __METHOD__
261 );
262 foreach ( $res as $row ) {
263 $numResolved += $this->resolveStubs->resolveStub( $row, $this->dryRun ) ? 1 : 0;
264 $numTotal++;
265 if ( $this->reportingInterval
266 && $numTotal % $this->reportingInterval == 0
267 ) {
268 $this->output( "$numTotal stubs processed\n" );
269 $lbFactory->waitForReplication();
270 }
271 }
272 }
273 $this->output( "$numResolved of $numTotal stubs resolved\n" );
274 }
275}
276
277$maintClass = MoveToExternal::class;
278require_once RUN_MAINTENANCE_IF_MAIN;
getDB()
Concatenated gzip (CGZ) storage Improves compression ratio by concatenating like objects before gzipp...
To speed up conversion from 1.4 to 1.5 schema, text rows can refer to the leftover cur table as the b...
Pointer object for an item within a CGZ blob stored in the text table.
Abstract maintenance class for quickly writing and churning out maintenance scripts with minimal effo...
addArg( $arg, $description, $required=true, $multi=false)
Add some args that are needed.
output( $out, $channel=null)
Throw some output to the user.
getBatchSize()
Returns batch size.
getArg( $argId=0, $default=null)
Get an argument.
addOption( $name, $description, $required=false, $withArg=false, $shortName=false, $multiOccurrence=false)
Add a parameter to the script.
getOption( $name, $default=null)
Get an option, or return the default.
setBatchSize( $s=0)
fatalError( $msg, $exitCode=1)
Output a message and terminate the current script.
A class containing constants representing the names of configuration variables.
Update a database while optionally writing SQL that reverses the update to a file.
Definition UndoLog.php:11
Service locator for MediaWiki core services.
Service for storing and loading Content objects representing revision data blobs.
__construct()
Default constructor.
execute()
Do the actual work.
$maintClass
const DB_REPLICA
Definition defines.php:26
const DB_PRIMARY
Definition defines.php:28