Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 183 |
|
0.00% |
0 / 8 |
CRAP | |
0.00% |
0 / 1 |
MoveToExternal | |
0.00% |
0 / 183 |
|
0.00% |
0 / 8 |
2256 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 15 |
|
0.00% |
0 / 1 |
2 | |||
execute | |
0.00% |
0 / 31 |
|
0.00% |
0 / 1 |
42 | |||
doMoveToExternal | |
0.00% |
0 / 86 |
|
0.00% |
0 / 1 |
420 | |||
compress | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
12 | |||
resolveLegacyEncoding | |
0.00% |
0 / 19 |
|
0.00% |
0 / 1 |
72 | |||
resolveStubs | |
0.00% |
0 / 21 |
|
0.00% |
0 / 1 |
56 | |||
getConditions | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
2 | |||
resolveText | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 |
1 | <?php |
2 | /** |
3 | * Move text from the text table to external storage |
4 | * |
5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by |
7 | * the Free Software Foundation; either version 2 of the License, or |
8 | * (at your option) any later version. |
9 | * |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | * GNU General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU General Public License along |
16 | * with this program; if not, write to the Free Software Foundation, Inc., |
17 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | * http://www.gnu.org/copyleft/gpl.html |
19 | * |
20 | * @file |
21 | * @ingroup Maintenance ExternalStorage |
22 | */ |
23 | |
24 | use MediaWiki\MainConfigNames; |
25 | use MediaWiki\Maintenance\UndoLog; |
26 | use MediaWiki\Storage\SqlBlobStore; |
27 | use Wikimedia\AtEase\AtEase; |
28 | use Wikimedia\Rdbms\IExpression; |
29 | use Wikimedia\Rdbms\LikeValue; |
30 | |
31 | // @codeCoverageIgnoreStart |
32 | require_once __DIR__ . '/../Maintenance.php'; |
33 | // @codeCoverageIgnoreEnd |
34 | |
35 | class MoveToExternal extends Maintenance { |
36 | /** @var ResolveStubs */ |
37 | private $resolveStubs; |
38 | /** @var int */ |
39 | private $reportingInterval; |
40 | /** @var int */ |
41 | private $minID; |
42 | /** @var int */ |
43 | private $maxID; |
44 | /** @var string */ |
45 | private $esType; |
46 | /** @var string */ |
47 | private $esLocation; |
48 | /** @var int */ |
49 | private $threshold; |
50 | /** @var bool */ |
51 | private $gzip; |
52 | /** @var bool */ |
53 | private $skipResolve; |
54 | /** @var string|null */ |
55 | private $legacyEncoding; |
56 | /** @var bool */ |
57 | private $dryRun; |
58 | /** @var UndoLog */ |
59 | private $undoLog; |
60 | |
61 | public function __construct() { |
62 | parent::__construct(); |
63 | |
64 | $this->setBatchSize( 1000 ); |
65 | |
66 | $this->addOption( 'start', 'start old_id', false, true, 's' ); |
67 | $this->addOption( 'end', 'end old_id', false, true, 'e' ); |
68 | $this->addOption( 'threshold', 'minimum size in bytes', false, true ); |
69 | $this->addOption( 'reporting-interval', |
70 | 'show a message after this many revisions', false, true ); |
71 | $this->addOption( 'undo', 'filename for undo SQL', false, true ); |
72 | |
73 | $this->addOption( 'skip-gzip', 'Don\'t compress individual revisions' ); |
74 | $this->addOption( 'skip-resolve', |
75 | 'Don\'t replace HistoryBlobStub objects with direct external store pointers' ); |
76 | $this->addOption( 'iconv', 'Resolve legacy character encoding' ); |
77 | $this->addOption( 'dry-run', 'Don\'t modify any rows' ); |
78 | |
79 | $this->addArg( 'type', 'The external store type, e.g. "DB" or "mwstore"' ); |
80 | $this->addArg( 'location', 'e.g. "cluster12" or "global-swift"' ); |
81 | } |
82 | |
83 | public function execute() { |
84 | $this->resolveStubs = new ResolveStubs; |
85 | $this->esType = $this->getArg( 0 ); // e.g. "DB" or "mwstore" |
86 | $this->esLocation = $this->getArg( 1 ); // e.g. "cluster12" or "global-swift" |
87 | $dbw = $this->getPrimaryDB(); |
88 | |
89 | $maxID = $this->getOption( 'end' ) ?? $dbw->newSelectQueryBuilder() |
90 | ->select( 'MAX(old_id)' ) |
91 | ->from( 'text' ) |
92 | ->caller( __METHOD__ )->fetchField(); |
93 | $this->maxID = (int)$maxID; |
94 | $this->minID = (int)$this->getOption( 'start', 1 ); |
95 | |
96 | $this->reportingInterval = $this->getOption( 'reporting-interval', 100 ); |
97 | $this->threshold = (int)$this->getOption( 'threshold', 0 ); |
98 | |
99 | if ( $this->getOption( 'skip-gzip' ) ) { |
100 | $this->gzip = false; |
101 | } elseif ( !function_exists( 'gzdeflate' ) ) { |
102 | $this->fatalError( "gzdeflate() not found. " . |
103 | "Please run with --skip-gzip if you don't want to compress revisions." ); |
104 | } else { |
105 | $this->gzip = true; |
106 | } |
107 | |
108 | $this->skipResolve = $this->getOption( 'skip-resolve' ); |
109 | |
110 | if ( $this->getOption( 'iconv' ) ) { |
111 | $legacyEncoding = $this->getConfig()->get( MainConfigNames::LegacyEncoding ); |
112 | if ( $legacyEncoding ) { |
113 | $this->legacyEncoding = $legacyEncoding; |
114 | } else { |
115 | $this->output( "iconv requested but the wiki has no legacy encoding\n" ); |
116 | } |
117 | } |
118 | $this->dryRun = $this->getOption( 'dry-run', false ); |
119 | |
120 | $undo = $this->getOption( 'undo' ); |
121 | try { |
122 | $this->undoLog = new UndoLog( $undo, $dbw ); |
123 | } catch ( RuntimeException $e ) { |
124 | $this->fatalError( "Unable to open undo log" ); |
125 | } |
126 | $this->resolveStubs->setUndoLog( $this->undoLog ); |
127 | |
128 | return $this->doMoveToExternal(); |
129 | } |
130 | |
131 | private function doMoveToExternal() { |
132 | $success = true; |
133 | $dbr = $this->getReplicaDB(); |
134 | |
135 | $count = $this->maxID - $this->minID + 1; |
136 | $blockSize = $this->getBatchSize(); |
137 | $numBlocks = ceil( $count / $blockSize ); |
138 | print "Moving text rows from {$this->minID} to {$this->maxID} to external storage\n"; |
139 | |
140 | $esFactory = $this->getServiceContainer()->getExternalStoreFactory(); |
141 | $extStore = $esFactory->getStore( $this->esType ); |
142 | $numMoved = 0; |
143 | $stubIDs = []; |
144 | |
145 | for ( $block = 0; $block < $numBlocks; $block++ ) { |
146 | $blockStart = $block * $blockSize + $this->minID; |
147 | $blockEnd = $blockStart + $blockSize - 1; |
148 | |
149 | if ( $this->reportingInterval && !( $block % $this->reportingInterval ) ) { |
150 | $this->output( "oldid=$blockStart, moved=$numMoved\n" ); |
151 | $this->waitForReplication(); |
152 | } |
153 | |
154 | $res = $dbr->newSelectQueryBuilder() |
155 | ->select( [ 'old_id', 'old_flags', 'old_text' ] ) |
156 | ->from( 'text' ) |
157 | ->where( $this->getConditions( $blockStart, $blockEnd, $dbr ) ) |
158 | ->caller( __METHOD__ )->fetchResultSet(); |
159 | foreach ( $res as $row ) { |
160 | $text = $row->old_text; |
161 | $id = $row->old_id; |
162 | $flags = SqlBlobStore::explodeFlags( $row->old_flags ); |
163 | [ $text, $flags ] = $this->resolveText( $text, $flags ); |
164 | |
165 | if ( $text === false ) { |
166 | $success = false; |
167 | } |
168 | |
169 | if ( in_array( 'error', $flags ) ) { |
170 | continue; |
171 | } elseif ( in_array( 'object', $flags ) ) { |
172 | $obj = unserialize( $text ); |
173 | if ( $obj instanceof HistoryBlobStub ) { |
174 | // Handle later, after CGZ resolution |
175 | if ( !$this->skipResolve ) { |
176 | $stubIDs[] = $id; |
177 | } |
178 | continue; |
179 | } elseif ( $obj instanceof HistoryBlobCurStub ) { |
180 | // Copy cur text to ES |
181 | $newText = $obj->getText(); |
182 | if ( $newText === false ) { |
183 | print "Warning: Could not fetch revision blob {$id}: {$text}\n"; |
184 | $success = false; |
185 | continue; |
186 | } |
187 | |
188 | [ $text, $flags ] = $this->resolveLegacyEncoding( $newText, [] ); |
189 | |
190 | if ( $text === false ) { |
191 | print "Warning: Could not decode legacy-encoded gzip\'ed revision blob {$id}: {$newText}\n"; |
192 | $success = false; |
193 | continue; |
194 | } |
195 | |
196 | [ $text, $flags ] = $this->compress( $text, $flags ); |
197 | } elseif ( $obj instanceof ConcatenatedGzipHistoryBlob ) { |
198 | // Store as is |
199 | } else { |
200 | $className = get_class( $obj ); |
201 | print "Warning: old_id=$id unrecognised object class \"$className\"\n"; |
202 | $success = false; |
203 | continue; |
204 | } |
205 | } elseif ( strlen( $text ) < $this->threshold ) { |
206 | // Don't move small revisions |
207 | continue; |
208 | } else { |
209 | [ $text, $flags ] = $this->resolveLegacyEncoding( $text, $flags ); |
210 | [ $newText, $flags ] = $this->compress( $text, $flags ); |
211 | if ( $newText === false ) { |
212 | print "Warning: Could not compress revision blob {$id}: {$text}\n"; |
213 | $success = false; |
214 | continue; |
215 | } |
216 | $text = $newText; |
217 | } |
218 | $flags[] = 'external'; |
219 | $flagsString = implode( ',', $flags ); |
220 | |
221 | if ( $this->dryRun ) { |
222 | $this->output( "Move $id => $flagsString " . |
223 | addcslashes( substr( $text, 0, 30 ), "\0..\x1f\x7f..\xff" ) . |
224 | "\n" |
225 | ); |
226 | continue; |
227 | } |
228 | |
229 | $url = $extStore->store( $this->esLocation, $text ); |
230 | if ( !$url ) { |
231 | $this->fatalError( "Error writing to external storage" ); |
232 | } |
233 | $moved = $this->undoLog->update( |
234 | 'text', |
235 | [ 'old_flags' => $flagsString, 'old_text' => $url ], |
236 | (array)$row, |
237 | __METHOD__ |
238 | ); |
239 | if ( $moved ) { |
240 | $numMoved++; |
241 | } else { |
242 | print "Update of old_id $id failed, affected zero rows\n"; |
243 | $success = false; |
244 | } |
245 | } |
246 | } |
247 | |
248 | if ( count( $stubIDs ) ) { |
249 | $this->resolveStubs( $stubIDs ); |
250 | } |
251 | |
252 | return $success; |
253 | } |
254 | |
255 | private function compress( $text, $flags ) { |
256 | if ( $this->gzip && !in_array( 'gzip', $flags ) ) { |
257 | $flags[] = 'gzip'; |
258 | $text = gzdeflate( $text ); |
259 | } |
260 | return [ $text, $flags ]; |
261 | } |
262 | |
263 | private function resolveLegacyEncoding( $text, $flags ) { |
264 | if ( $this->legacyEncoding !== null |
265 | && !in_array( 'utf-8', $flags ) |
266 | && !in_array( 'utf8', $flags ) |
267 | ) { |
268 | // First decompress the entry so we don't try to convert a binary gzip to utf-8 |
269 | if ( in_array( 'gzip', $flags ) ) { |
270 | if ( !$this->gzip ) { |
271 | return [ $text, $flags ]; |
272 | } |
273 | $flags = array_diff( $flags, [ 'gzip' ] ); |
274 | $newText = gzinflate( $text ); |
275 | if ( $newText === false ) { |
276 | return [ false, $flags ]; |
277 | } |
278 | $text = $newText; |
279 | } |
280 | AtEase::suppressWarnings(); |
281 | $newText = iconv( $this->legacyEncoding, 'UTF-8//IGNORE', $text ); |
282 | AtEase::restoreWarnings(); |
283 | if ( $newText === false ) { |
284 | return [ false, $flags ]; |
285 | } |
286 | $text = $newText; |
287 | $flags[] = 'utf-8'; |
288 | } |
289 | return [ $text, $flags ]; |
290 | } |
291 | |
292 | private function resolveStubs( $stubIDs ) { |
293 | if ( $this->dryRun ) { |
294 | print "Note: resolving stubs in dry run mode is expected to fail, " . |
295 | "because the main blobs have not been moved to external storage.\n"; |
296 | } |
297 | |
298 | $dbr = $this->getReplicaDB(); |
299 | $this->output( "Resolving " . count( $stubIDs ) . " stubs\n" ); |
300 | $numResolved = 0; |
301 | $numTotal = 0; |
302 | foreach ( array_chunk( $stubIDs, $this->getBatchSize() ) as $stubBatch ) { |
303 | $res = $dbr->newSelectQueryBuilder() |
304 | ->select( [ 'old_id', 'old_flags', 'old_text' ] ) |
305 | ->from( 'text' ) |
306 | ->where( [ 'old_id' => $stubBatch ] ) |
307 | ->caller( __METHOD__ )->fetchResultSet(); |
308 | foreach ( $res as $row ) { |
309 | $numResolved += $this->resolveStubs->resolveStub( $row, $this->dryRun ) ? 1 : 0; |
310 | $numTotal++; |
311 | if ( $this->reportingInterval |
312 | && $numTotal % $this->reportingInterval == 0 |
313 | ) { |
314 | $this->output( "$numTotal stubs processed\n" ); |
315 | $this->waitForReplication(); |
316 | } |
317 | } |
318 | } |
319 | $this->output( "$numResolved of $numTotal stubs resolved\n" ); |
320 | } |
321 | |
322 | protected function getConditions( $blockStart, $blockEnd, $dbr ) { |
323 | return [ |
324 | $dbr->expr( 'old_id', '>=', $blockStart ), |
325 | $dbr->expr( 'old_id', '>=', $blockEnd ), |
326 | $dbr->expr( 'old_flags', IExpression::NOT_LIKE, |
327 | new LikeValue( $dbr->anyString(), 'external', $dbr->anyString() ) ), |
328 | ]; |
329 | } |
330 | |
331 | protected function resolveText( $text, $flags ) { |
332 | return [ $text, $flags ]; |
333 | } |
334 | } |
335 | |
336 | // @codeCoverageIgnoreStart |
337 | $maintClass = MoveToExternal::class; |
338 | require_once RUN_MAINTENANCE_IF_MAIN; |
339 | // @codeCoverageIgnoreEnd |