Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 188 |
|
0.00% |
0 / 8 |
CRAP | |
0.00% |
0 / 1 |
MoveToExternal | |
0.00% |
0 / 185 |
|
0.00% |
0 / 8 |
2352 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 33 |
|
0.00% |
0 / 1 |
56 | |||
doMoveToExternal | |
0.00% |
0 / 86 |
|
0.00% |
0 / 1 |
420 | |||
compress | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
resolveLegacyEncoding | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
72 | |||
resolveStubs | |
0.00% |
0 / 21 |
|
0.00% |
0 / 1 |
56 | |||
getConditions | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
resolveText | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * Move text from the text table to external storage |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | * @ingroup Maintenance ExternalStorage |
22 | */ |
23 | |
24 | use MediaWiki\MainConfigNames; |
25 | use MediaWiki\Maintenance\UndoLog; |
26 | use MediaWiki\Storage\SqlBlobStore; |
27 | use Wikimedia\AtEase\AtEase; |
28 | use Wikimedia\Rdbms\IExpression; |
29 | use Wikimedia\Rdbms\LikeValue; |
30 | |
31 | require_once __DIR__ . '/../Maintenance.php'; |
32 | |
33 | class MoveToExternal extends Maintenance { |
34 | /** @var ResolveStubs */ |
35 | private $resolveStubs; |
36 | /** @var int */ |
37 | private $reportingInterval; |
38 | /** @var int */ |
39 | private $minID; |
40 | /** @var int */ |
41 | private $maxID; |
42 | /** @var string */ |
43 | private $esType; |
44 | /** @var string */ |
45 | private $esLocation; |
46 | /** @var int */ |
47 | private $threshold; |
48 | /** @var bool */ |
49 | private $gzip; |
50 | /** @var bool */ |
51 | private $skipResolve; |
52 | /** @var string|null */ |
53 | private $legacyEncoding; |
54 | /** @var bool */ |
55 | private $dryRun; |
56 | /** @var UndoLog */ |
57 | private $undoLog; |
58 | |
59 | public function __construct() { |
60 | parent::__construct(); |
61 | |
62 | $this->setBatchSize( 1000 ); |
63 | |
64 | $this->addOption( 'start', 'start old_id', false, true, 's' ); |
65 | $this->addOption( 'end', 'end old_id', false, true, 'e' ); |
66 | $this->addOption( 'threshold', 'minimum size in bytes', false, true ); |
67 | $this->addOption( 'reporting-interval', |
68 | 'show a message after this many revisions', false, true ); |
69 | $this->addOption( 'undo', 'filename for undo SQL', false, true ); |
70 | |
71 | $this->addOption( 'skip-gzip', 'Don\'t compress individual revisions' ); |
72 | $this->addOption( 'skip-resolve', |
73 | 'Don\'t replace HistoryBlobStub objects with direct external store pointers' ); |
74 | $this->addOption( 'iconv', 'Resolve legacy character encoding' ); |
75 | $this->addOption( 'dry-run', 'Don\'t modify any rows' ); |
76 | |
77 | $this->addArg( 'type', 'The external store type, e.g. "DB" or "mwstore"' ); |
78 | $this->addArg( 'location', 'e.g. "cluster12" or "global-swift"' ); |
79 | } |
80 | |
81 | public function execute() { |
82 | $this->resolveStubs = new ResolveStubs; |
83 | $this->esType = $this->getArg( 0 ); // e.g. "DB" or "mwstore" |
84 | $this->esLocation = $this->getArg( 1 ); // e.g. "cluster12" or "global-swift" |
85 | $dbw = $this->getPrimaryDB(); |
86 | |
87 | $maxID = $this->getOption( 'end' ); |
88 | if ( $maxID === null ) { |
89 | $maxID = $dbw->newSelectQueryBuilder() |
90 | ->select( 'MAX(old_id)' ) |
91 | ->from( 'text' ) |
92 | ->caller( __METHOD__ )->fetchField(); |
93 | } |
94 | $this->maxID = (int)$maxID; |
95 | $this->minID = (int)$this->getOption( 'start', 1 ); |
96 | |
97 | $this->reportingInterval = $this->getOption( 'reporting-interval', 100 ); |
98 | $this->threshold = (int)$this->getOption( 'threshold', 0 ); |
99 | |
100 | if ( $this->getOption( 'skip-gzip' ) ) { |
101 | $this->gzip = false; |
102 | } elseif ( !function_exists( 'gzdeflate' ) ) { |
103 | $this->fatalError( "gzdeflate() not found. " . |
104 | "Please run with --skip-gzip if you don't want to compress revisions." ); |
105 | } else { |
106 | $this->gzip = true; |
107 | } |
108 | |
109 | $this->skipResolve = $this->getOption( 'skip-resolve' ); |
110 | |
111 | if ( $this->getOption( 'iconv' ) ) { |
112 | $legacyEncoding = $this->getConfig()->get( MainConfigNames::LegacyEncoding ); |
113 | if ( $legacyEncoding ) { |
114 | $this->legacyEncoding = $legacyEncoding; |
115 | } else { |
116 | $this->output( "iconv requested but the wiki has no legacy encoding\n" ); |
117 | } |
118 | } |
119 | $this->dryRun = $this->getOption( 'dry-run', false ); |
120 | |
121 | $undo = $this->getOption( 'undo' ); |
122 | try { |
123 | $this->undoLog = new UndoLog( $undo, $dbw ); |
124 | } catch ( RuntimeException $e ) { |
125 | $this->fatalError( "Unable to open undo log" ); |
126 | } |
127 | $this->resolveStubs->setUndoLog( $this->undoLog ); |
128 | |
129 | return $this->doMoveToExternal(); |
130 | } |
131 | |
132 | private function doMoveToExternal() { |
133 | $success = true; |
134 | $dbr = $this->getReplicaDB(); |
135 | |
136 | $count = $this->maxID - $this->minID + 1; |
137 | $blockSize = $this->getBatchSize(); |
138 | $numBlocks = ceil( $count / $blockSize ); |
139 | print "Moving text rows from {$this->minID} to {$this->maxID} to external storage\n"; |
140 | |
141 | $esFactory = $this->getServiceContainer()->getExternalStoreFactory(); |
142 | $extStore = $esFactory->getStore( $this->esType ); |
143 | $numMoved = 0; |
144 | $stubIDs = []; |
145 | |
146 | for ( $block = 0; $block < $numBlocks; $block++ ) { |
147 | $blockStart = $block * $blockSize + $this->minID; |
148 | $blockEnd = $blockStart + $blockSize - 1; |
149 | |
150 | if ( $this->reportingInterval && !( $block % $this->reportingInterval ) ) { |
151 | $this->output( "oldid=$blockStart, moved=$numMoved\n" ); |
152 | $this->waitForReplication(); |
153 | } |
154 | |
155 | $res = $dbr->newSelectQueryBuilder() |
156 | ->select( [ 'old_id', 'old_flags', 'old_text' ] ) |
157 | ->from( 'text' ) |
158 | ->where( $this->getConditions( $blockStart, $blockEnd, $dbr ) ) |
159 | ->caller( __METHOD__ )->fetchResultSet(); |
160 | foreach ( $res as $row ) { |
161 | $text = $row->old_text; |
162 | $id = $row->old_id; |
163 | $flags = SqlBlobStore::explodeFlags( $row->old_flags ); |
164 | [ $text, $flags ] = $this->resolveText( $text, $flags ); |
165 | |
166 | if ( $text === false ) { |
167 | $success = false; |
168 | } |
169 | |
170 | if ( in_array( 'error', $flags ) ) { |
171 | continue; |
172 | } elseif ( in_array( 'object', $flags ) ) { |
173 | $obj = unserialize( $text ); |
174 | if ( $obj instanceof HistoryBlobStub ) { |
175 | // Handle later, after CGZ resolution |
176 | if ( !$this->skipResolve ) { |
177 | $stubIDs[] = $id; |
178 | } |
179 | continue; |
180 | } elseif ( $obj instanceof HistoryBlobCurStub ) { |
181 | // Copy cur text to ES |
182 | $newText = $obj->getText(); |
183 | if ( $newText === false ) { |
184 | print "Warning: Could not fetch revision blob {$id}: {$text}\n"; |
185 | $success = false; |
186 | continue; |
187 | } |
188 | |
189 | [ $text, $flags ] = $this->resolveLegacyEncoding( $newText, [] ); |
190 | |
191 | if ( $text === false ) { |
192 | print "Warning: Could not decode legacy-encoded gzip\'ed revision blob {$id}: {$newText}\n"; |
193 | $success = false; |
194 | continue; |
195 | } |
196 | |
197 | [ $text, $flags ] = $this->compress( $text, $flags ); |
198 | } elseif ( $obj instanceof ConcatenatedGzipHistoryBlob ) { |
199 | // Store as is |
200 | } else { |
201 | $className = get_class( $obj ); |
202 | print "Warning: old_id=$id unrecognised object class \"$className\"\n"; |
203 | $success = false; |
204 | continue; |
205 | } |
206 | } elseif ( strlen( $text ) < $this->threshold ) { |
207 | // Don't move small revisions |
208 | continue; |
209 | } else { |
210 | [ $text, $flags ] = $this->resolveLegacyEncoding( $text, $flags ); |
211 | [ $newText, $flags ] = $this->compress( $text, $flags ); |
212 | if ( $newText === false ) { |
213 | print "Warning: Could not compress revision blob {$id}: {$text}\n"; |
214 | $success = false; |
215 | continue; |
216 | } |
217 | $text = $newText; |
218 | } |
219 | $flags[] = 'external'; |
220 | $flagsString = implode( ',', $flags ); |
221 | |
222 | if ( $this->dryRun ) { |
223 | $this->output( "Move $id => $flagsString " . |
224 | addcslashes( substr( $text, 0, 30 ), "\0..\x1f\x7f..\xff" ) . |
225 | "\n" |
226 | ); |
227 | continue; |
228 | } |
229 | |
230 | $url = $extStore->store( $this->esLocation, $text ); |
231 | if ( !$url ) { |
232 | $this->fatalError( "Error writing to external storage" ); |
233 | } |
234 | $moved = $this->undoLog->update( |
235 | 'text', |
236 | [ 'old_flags' => $flagsString, 'old_text' => $url ], |
237 | (array)$row, |
238 | __METHOD__ |
239 | ); |
240 | if ( $moved ) { |
241 | $numMoved++; |
242 | } else { |
243 | print "Update of old_id $id failed, affected zero rows\n"; |
244 | $success = false; |
245 | } |
246 | } |
247 | } |
248 | |
249 | if ( count( $stubIDs ) ) { |
250 | $this->resolveStubs( $stubIDs ); |
251 | } |
252 | |
253 | return $success; |
254 | } |
255 | |
256 | private function compress( $text, $flags ) { |
257 | if ( $this->gzip && !in_array( 'gzip', $flags ) ) { |
258 | $flags[] = 'gzip'; |
259 | $text = gzdeflate( $text ); |
260 | } |
261 | return [ $text, $flags ]; |
262 | } |
263 | |
264 | private function resolveLegacyEncoding( $text, $flags ) { |
265 | if ( $this->legacyEncoding !== null |
266 | && !in_array( 'utf-8', $flags ) |
267 | && !in_array( 'utf8', $flags ) |
268 | ) { |
269 | // First decompress the entry so we don't try to convert a binary gzip to utf-8 |
270 | if ( in_array( 'gzip', $flags ) ) { |
271 | if ( !$this->gzip ) { |
272 | return [ $text, $flags ]; |
273 | } |
274 | $flags = array_diff( $flags, [ 'gzip' ] ); |
275 | $newText = gzinflate( $text ); |
276 | if ( $newText === false ) { |
277 | return [ false, $flags ]; |
278 | } |
279 | $text = $newText; |
280 | } |
281 | AtEase::suppressWarnings(); |
282 | $newText = iconv( $this->legacyEncoding, 'UTF-8//IGNORE', $text ); |
283 | AtEase::restoreWarnings(); |
284 | if ( $newText === false ) { |
285 | return [ false, $flags ]; |
286 | } |
287 | $text = $newText; |
288 | $flags[] = 'utf-8'; |
289 | } |
290 | return [ $text, $flags ]; |
291 | } |
292 | |
293 | private function resolveStubs( $stubIDs ) { |
294 | if ( $this->dryRun ) { |
295 | print "Note: resolving stubs in dry run mode is expected to fail, " . |
296 | "because the main blobs have not been moved to external storage.\n"; |
297 | } |
298 | |
299 | $dbr = $this->getReplicaDB(); |
300 | $this->output( "Resolving " . count( $stubIDs ) . " stubs\n" ); |
301 | $numResolved = 0; |
302 | $numTotal = 0; |
303 | foreach ( array_chunk( $stubIDs, $this->getBatchSize() ) as $stubBatch ) { |
304 | $res = $dbr->newSelectQueryBuilder() |
305 | ->select( [ 'old_id', 'old_flags', 'old_text' ] ) |
306 | ->from( 'text' ) |
307 | ->where( [ 'old_id' => $stubBatch ] ) |
308 | ->caller( __METHOD__ )->fetchResultSet(); |
309 | foreach ( $res as $row ) { |
310 | $numResolved += $this->resolveStubs->resolveStub( $row, $this->dryRun ) ? 1 : 0; |
311 | $numTotal++; |
312 | if ( $this->reportingInterval |
313 | && $numTotal % $this->reportingInterval == 0 |
314 | ) { |
315 | $this->output( "$numTotal stubs processed\n" ); |
316 | $this->waitForReplication(); |
317 | } |
318 | } |
319 | } |
320 | $this->output( "$numResolved of $numTotal stubs resolved\n" ); |
321 | } |
322 | |
323 | protected function getConditions( $blockStart, $blockEnd, $dbr ) { |
324 | return [ |
325 | $dbr->expr( 'old_id', '>=', $blockStart ), |
326 | $dbr->expr( 'old_id', '>=', $blockEnd ), |
327 | $dbr->expr( 'old_flags', IExpression::NOT_LIKE, |
328 | new LikeValue( $dbr->anyString(), 'external', $dbr->anyString() ) ), |
329 | ]; |
330 | } |
331 | |
332 | protected function resolveText( $text, $flags ) { |
333 | return [ $text, $flags ]; |
334 | } |
335 | } |
336 | |
337 | $maintClass = MoveToExternal::class; |
338 | require_once RUN_MAINTENANCE_IF_MAIN; |